refactor(app): Code structured and grouped
This commit is contained in:
140
bluemastofeed.py
140
bluemastofeed.py
@ -18,8 +18,10 @@ from email.mime.multipart import MIMEMultipart
|
|||||||
from dateutil import parser as date_parser
|
from dateutil import parser as date_parser
|
||||||
from datetime import datetime, timezone, timedelta
|
from datetime import datetime, timezone, timedelta
|
||||||
|
|
||||||
|
# Load environment variables
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
|
# Configuration
|
||||||
FEED_URL = os.getenv("FEED_URL")
|
FEED_URL = os.getenv("FEED_URL")
|
||||||
SEEN_POSTS_FILE = "/data/seen_posts.txt"
|
SEEN_POSTS_FILE = "/data/seen_posts.txt"
|
||||||
MASTODON_BASE_URL = os.getenv("MASTODON_API_BASE_URL")
|
MASTODON_BASE_URL = os.getenv("MASTODON_API_BASE_URL")
|
||||||
@ -29,6 +31,7 @@ BSKY_PASSWORD = os.getenv("BSKY_PASSWORD")
|
|||||||
MAX_POST_AGE_DAYS = int(os.getenv("MAX_POST_AGE_DAYS", 0))
|
MAX_POST_AGE_DAYS = int(os.getenv("MAX_POST_AGE_DAYS", 0))
|
||||||
POST_TARGETS = os.getenv("POST_TARGETS", "both").lower()
|
POST_TARGETS = os.getenv("POST_TARGETS", "both").lower()
|
||||||
|
|
||||||
|
# Logger setup
|
||||||
logger = logging.getLogger()
|
logger = logging.getLogger()
|
||||||
logger.setLevel(logging.INFO)
|
logger.setLevel(logging.INFO)
|
||||||
handler = logging.StreamHandler()
|
handler = logging.StreamHandler()
|
||||||
@ -36,7 +39,7 @@ formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
|
|||||||
handler.setFormatter(formatter)
|
handler.setFormatter(formatter)
|
||||||
logger.addHandler(handler)
|
logger.addHandler(handler)
|
||||||
|
|
||||||
|
# Healthcheck server
|
||||||
class HealthHandler(BaseHTTPRequestHandler):
|
class HealthHandler(BaseHTTPRequestHandler):
|
||||||
def do_GET(self):
|
def do_GET(self):
|
||||||
if self.path == "/health":
|
if self.path == "/health":
|
||||||
@ -50,58 +53,18 @@ class HealthHandler(BaseHTTPRequestHandler):
|
|||||||
def log_message(self, format, *args):
|
def log_message(self, format, *args):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def start_health_server():
|
def start_health_server():
|
||||||
server = HTTPServer(("0.0.0.0", 8000), HealthHandler)
|
server = HTTPServer(("0.0.0.0", 8000), HealthHandler)
|
||||||
thread = threading.Thread(target=server.serve_forever, daemon=True)
|
thread = threading.Thread(target=server.serve_forever, daemon=True)
|
||||||
thread.start()
|
thread.start()
|
||||||
logger.info(f"💡 Healthcheck server running on port 8000.")
|
logger.info(f"✨ Healthcheck server running on port 8000.")
|
||||||
|
|
||||||
|
# Email helper
|
||||||
|
|
||||||
def should_send_email(on_success: bool):
|
def should_send_email(on_success: bool):
|
||||||
mode = os.getenv("EMAIL_MODE", "errors").lower()
|
mode = os.getenv("EMAIL_MODE", "errors").lower()
|
||||||
return (mode == "all") or (mode == "errors" and not on_success)
|
return (mode == "all") or (mode == "errors" and not on_success)
|
||||||
|
|
||||||
|
|
||||||
def extract_facets_utf8(text: str):
|
|
||||||
import re
|
|
||||||
facets = []
|
|
||||||
def get_byte_range(char_start, char_end):
|
|
||||||
byte_start = len(text[:char_start].encode("utf-8"))
|
|
||||||
byte_end = len(text[:char_end].encode("utf-8"))
|
|
||||||
return byte_start, byte_end
|
|
||||||
|
|
||||||
# Hashtags
|
|
||||||
for match in re.finditer(r"#(\w+)", text):
|
|
||||||
tag = match.group(1)
|
|
||||||
char_start, char_end = match.span()
|
|
||||||
byte_start, byte_end = get_byte_range(char_start, char_end)
|
|
||||||
|
|
||||||
facets.append({
|
|
||||||
"index": {"byteStart": byte_start, "byteEnd": byte_end},
|
|
||||||
"features": [{
|
|
||||||
"$type": "app.bsky.richtext.facet#tag",
|
|
||||||
"tag": tag
|
|
||||||
}]
|
|
||||||
})
|
|
||||||
|
|
||||||
# Links
|
|
||||||
for match in re.finditer(r"https?://[^\s]+", text):
|
|
||||||
url = match.group(0)
|
|
||||||
char_start, char_end = match.span()
|
|
||||||
byte_start, byte_end = get_byte_range(char_start, char_end)
|
|
||||||
|
|
||||||
facets.append({
|
|
||||||
"index": {"byteStart": byte_start, "byteEnd": byte_end},
|
|
||||||
"features": [{
|
|
||||||
"$type": "app.bsky.richtext.facet#link",
|
|
||||||
"uri": url
|
|
||||||
}]
|
|
||||||
})
|
|
||||||
|
|
||||||
return facets
|
|
||||||
|
|
||||||
|
|
||||||
def generate_email_html(status: str, title: str, link: str, error_message: str = None) -> str:
|
def generate_email_html(status: str, title: str, link: str, error_message: str = None) -> str:
|
||||||
color = "#2e7d32" if status == "success" else "#d32f2f"
|
color = "#2e7d32" if status == "success" else "#d32f2f"
|
||||||
bg_color = "#f5f5f5" if status == "success" else "#fff3f3"
|
bg_color = "#f5f5f5" if status == "success" else "#fff3f3"
|
||||||
@ -139,7 +102,6 @@ def generate_email_html(status: str, title: str, link: str, error_message: str =
|
|||||||
</html>
|
</html>
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
def send_status_email(subject, html_content):
|
def send_status_email(subject, html_content):
|
||||||
try:
|
try:
|
||||||
smtp_host = os.getenv("SMTP_HOST")
|
smtp_host = os.getenv("SMTP_HOST")
|
||||||
@ -164,6 +126,33 @@ def send_status_email(subject, html_content):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"❌ Error sending email: {e}")
|
logger.error(f"❌ Error sending email: {e}")
|
||||||
|
|
||||||
|
# Utility functions
|
||||||
|
|
||||||
|
def extract_facets_utf8(text: str):
|
||||||
|
facets = []
|
||||||
|
|
||||||
|
def get_byte_range(char_start, char_end):
|
||||||
|
byte_start = len(text[:char_start].encode("utf-8"))
|
||||||
|
byte_end = len(text[:char_end].encode("utf-8"))
|
||||||
|
return byte_start, byte_end
|
||||||
|
|
||||||
|
for match in re.finditer(r"#(\w+)", text):
|
||||||
|
tag = match.group(1)
|
||||||
|
byte_start, byte_end = get_byte_range(*match.span())
|
||||||
|
facets.append({
|
||||||
|
"index": {"byteStart": byte_start, "byteEnd": byte_end},
|
||||||
|
"features": [{"$type": "app.bsky.richtext.facet#tag", "tag": tag}]
|
||||||
|
})
|
||||||
|
|
||||||
|
for match in re.finditer(r"https?://[^\s]+", text):
|
||||||
|
url = match.group(0)
|
||||||
|
byte_start, byte_end = get_byte_range(*match.span())
|
||||||
|
facets.append({
|
||||||
|
"index": {"byteStart": byte_start, "byteEnd": byte_end},
|
||||||
|
"features": [{"$type": "app.bsky.richtext.facet#link", "uri": url}]
|
||||||
|
})
|
||||||
|
|
||||||
|
return facets
|
||||||
|
|
||||||
def load_seen_ids():
|
def load_seen_ids():
|
||||||
os.makedirs(os.path.dirname(SEEN_POSTS_FILE), exist_ok=True)
|
os.makedirs(os.path.dirname(SEEN_POSTS_FILE), exist_ok=True)
|
||||||
@ -172,12 +161,10 @@ def load_seen_ids():
|
|||||||
with open(SEEN_POSTS_FILE, "r") as f:
|
with open(SEEN_POSTS_FILE, "r") as f:
|
||||||
return set(line.strip() for line in f)
|
return set(line.strip() for line in f)
|
||||||
|
|
||||||
|
|
||||||
def save_seen_id(post_id):
|
def save_seen_id(post_id):
|
||||||
with open(SEEN_POSTS_FILE, "a") as f:
|
with open(SEEN_POSTS_FILE, "a") as f:
|
||||||
f.write(post_id + "\n")
|
f.write(post_id + "\n")
|
||||||
|
|
||||||
|
|
||||||
def post_to_mastodon(title, link, tags):
|
def post_to_mastodon(title, link, tags):
|
||||||
mastodon = Mastodon(access_token=MASTODON_TOKEN, api_base_url=MASTODON_BASE_URL)
|
mastodon = Mastodon(access_token=MASTODON_TOKEN, api_base_url=MASTODON_BASE_URL)
|
||||||
hashtags = " ".join(f"#{tag}" for tag in tags) if tags else ""
|
hashtags = " ".join(f"#{tag}" for tag in tags) if tags else ""
|
||||||
@ -186,7 +173,6 @@ def post_to_mastodon(title, link, tags):
|
|||||||
message += f"\n\n{hashtags}"
|
message += f"\n\n{hashtags}"
|
||||||
mastodon.toot(message)
|
mastodon.toot(message)
|
||||||
|
|
||||||
|
|
||||||
def fetch_og_data(url):
|
def fetch_og_data(url):
|
||||||
try:
|
try:
|
||||||
resp = requests.get(url, timeout=10)
|
resp = requests.get(url, timeout=10)
|
||||||
@ -201,7 +187,6 @@ def fetch_og_data(url):
|
|||||||
logger.error(f"❌ Error fetching OG data: {e}")
|
logger.error(f"❌ Error fetching OG data: {e}")
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
|
|
||||||
def post_to_bluesky(title, link, tags):
|
def post_to_bluesky(title, link, tags):
|
||||||
client = Client()
|
client = Client()
|
||||||
client.login(BSKY_HANDLE, BSKY_PASSWORD)
|
client.login(BSKY_HANDLE, BSKY_PASSWORD)
|
||||||
@ -211,9 +196,8 @@ def post_to_bluesky(title, link, tags):
|
|||||||
if hashtags:
|
if hashtags:
|
||||||
message += f"\n\n{hashtags}"
|
message += f"\n\n{hashtags}"
|
||||||
|
|
||||||
facets = extract_facets_utf8(message) # <-- NEU
|
facets = extract_facets_utf8(message)
|
||||||
|
|
||||||
# Versuche OG-Vorschau
|
|
||||||
try:
|
try:
|
||||||
og_title, image_url = fetch_og_data(link)
|
og_title, image_url = fetch_og_data(link)
|
||||||
if og_title and image_url:
|
if og_title and image_url:
|
||||||
@ -223,42 +207,25 @@ def post_to_bluesky(title, link, tags):
|
|||||||
"uri": link,
|
"uri": link,
|
||||||
"title": title,
|
"title": title,
|
||||||
"description": "",
|
"description": "",
|
||||||
"thumb": {
|
"thumb": {"$type": "blob", "ref": None, "mimeType": "", "size": 0}
|
||||||
"$type": "blob",
|
|
||||||
"ref": None,
|
|
||||||
"mimeType": "",
|
|
||||||
"size": 0
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
img_resp = requests.get(image_url, timeout=10)
|
img_resp = requests.get(image_url, timeout=10)
|
||||||
img_resp.raise_for_status()
|
img_resp.raise_for_status()
|
||||||
blob = client.upload_blob(BytesIO(img_resp.content))
|
blob = client.upload_blob(BytesIO(img_resp.content))
|
||||||
embed["external"]["thumb"] = blob.blob
|
embed["external"]["thumb"] = blob.blob
|
||||||
|
client.send_post(text=message, embed=embed, facets=facets)
|
||||||
client.send_post(text=message, embed=embed, facets=facets) # <-- facets hier
|
|
||||||
logger.info(f"✅ Posted to Bluesky with preview.")
|
logger.info(f"✅ Posted to Bluesky with preview.")
|
||||||
return
|
return
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"❌ Error uploading preview to Bluesky: {e}")
|
logger.error(f"❌ Error uploading preview to Bluesky: {e}")
|
||||||
|
|
||||||
# Fallback: Nur Text, aber mit Facets
|
client.send_post(text=message, facets=facets)
|
||||||
client.send_post(text=message, facets=facets) # <-- facets hier
|
|
||||||
logger.info(f"💡 Posted to Bluesky without preview.")
|
logger.info(f"💡 Posted to Bluesky without preview.")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def extract_post_date(entry):
|
def extract_post_date(entry):
|
||||||
date_fields = [
|
date_fields = [entry.get(k) for k in ("published", "updated", "date_published", "date_modified", "pubDate")]
|
||||||
entry.get("published"),
|
|
||||||
entry.get("updated"),
|
|
||||||
entry.get("date_published"),
|
|
||||||
entry.get("date_modified"),
|
|
||||||
entry.get("pubDate")
|
|
||||||
]
|
|
||||||
dates = []
|
dates = []
|
||||||
|
|
||||||
for d in date_fields:
|
for d in date_fields:
|
||||||
if d:
|
if d:
|
||||||
try:
|
try:
|
||||||
@ -268,10 +235,8 @@ def extract_post_date(entry):
|
|||||||
dates.append(dt)
|
dates.append(dt)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"⚠️ Could not parse date: {d} ({e})")
|
logger.warning(f"⚠️ Could not parse date: {d} ({e})")
|
||||||
|
|
||||||
return min(dates) if dates else datetime.now(timezone.utc)
|
return min(dates) if dates else datetime.now(timezone.utc)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
seen_ids = load_seen_ids()
|
seen_ids = load_seen_ids()
|
||||||
feed = feedparser.parse(FEED_URL)
|
feed = feedparser.parse(FEED_URL)
|
||||||
@ -284,11 +249,6 @@ def main():
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
post_date = extract_post_date(entry)
|
post_date = extract_post_date(entry)
|
||||||
age = now - post_date
|
|
||||||
age_days = age.days
|
|
||||||
age_hours = age.seconds // 3600
|
|
||||||
#logger.info(f"Post '{entry.get('title', '').strip()}' is {age_days} days and {age_hours} hours old.")
|
|
||||||
|
|
||||||
if post_date < now - max_age:
|
if post_date < now - max_age:
|
||||||
logger.info(f"⏩ Skipping old post ({MAX_POST_AGE_DAYS}+ days): {post_id}")
|
logger.info(f"⏩ Skipping old post ({MAX_POST_AGE_DAYS}+ days): {post_id}")
|
||||||
continue
|
continue
|
||||||
@ -304,18 +264,9 @@ def main():
|
|||||||
|
|
||||||
tags = []
|
tags = []
|
||||||
if "tags" in entry:
|
if "tags" in entry:
|
||||||
raw_tags = [
|
raw_tags = [tag.get("term") if isinstance(tag, dict) else getattr(tag, "term", None) for tag in entry.tags]
|
||||||
tag.get("term") if isinstance(tag, dict) else getattr(tag, "term", None)
|
|
||||||
for tag in entry.tags
|
|
||||||
]
|
|
||||||
tags = [sanitize_tag(t) for t in raw_tags if t]
|
tags = [sanitize_tag(t) for t in raw_tags if t]
|
||||||
|
|
||||||
if tags:
|
|
||||||
hashtags = " ".join(f"#{tag}" for tag in tags)
|
|
||||||
message = f"{link} {hashtags}"
|
|
||||||
else:
|
|
||||||
message = link
|
|
||||||
|
|
||||||
logger.info(f"💡 New post found: {title}")
|
logger.info(f"💡 New post found: {title}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -330,28 +281,19 @@ def main():
|
|||||||
logger.info(f"✅ Post successfully published.")
|
logger.info(f"✅ Post successfully published.")
|
||||||
|
|
||||||
if should_send_email(on_success=True):
|
if should_send_email(on_success=True):
|
||||||
send_status_email(
|
send_status_email(f"✅ Post published: {title}", generate_email_html("success", title, link))
|
||||||
f"✅ Post published: {title}",
|
|
||||||
generate_email_html("success", title, link)
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"❌ Posting failed: {e}")
|
logger.error(f"❌ Posting failed: {e}")
|
||||||
if should_send_email(on_success=False):
|
if should_send_email(on_success=False):
|
||||||
send_status_email(
|
send_status_email(f"❌ Error posting: {title}", generate_email_html("error", title, link, str(e)))
|
||||||
f"❌ Error posting: {title}",
|
|
||||||
generate_email_html("error", title, link, str(e))
|
|
||||||
)
|
|
||||||
|
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
INTERVAL_MINUTES = int(os.getenv("INTERVAL_MINUTES", 30))
|
INTERVAL_MINUTES = int(os.getenv("INTERVAL_MINUTES", 30))
|
||||||
logger.info(f"🔁 Starting feed check every {INTERVAL_MINUTES} minutes.")
|
logger.info(f"🔁 Starting feed check every {INTERVAL_MINUTES} minutes.")
|
||||||
|
|
||||||
start_health_server()
|
start_health_server()
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
main()
|
main()
|
||||||
|
Reference in New Issue
Block a user