From e04c838ede0d59c66f61880cce9c8764608e1197 Mon Sep 17 00:00:00 2001 From: Ralf Kirchner Date: Mon, 2 Jun 2025 17:35:55 +0200 Subject: [PATCH] refactor(app): Code structured and grouped --- bluemastofeed.py | 140 ++++++++++++++--------------------------------- 1 file changed, 41 insertions(+), 99 deletions(-) diff --git a/bluemastofeed.py b/bluemastofeed.py index cfcfb3c..ec85765 100644 --- a/bluemastofeed.py +++ b/bluemastofeed.py @@ -18,8 +18,10 @@ from email.mime.multipart import MIMEMultipart from dateutil import parser as date_parser from datetime import datetime, timezone, timedelta +# Load environment variables load_dotenv() +# Configuration FEED_URL = os.getenv("FEED_URL") SEEN_POSTS_FILE = "/data/seen_posts.txt" MASTODON_BASE_URL = os.getenv("MASTODON_API_BASE_URL") @@ -29,6 +31,7 @@ BSKY_PASSWORD = os.getenv("BSKY_PASSWORD") MAX_POST_AGE_DAYS = int(os.getenv("MAX_POST_AGE_DAYS", 0)) POST_TARGETS = os.getenv("POST_TARGETS", "both").lower() +# Logger setup logger = logging.getLogger() logger.setLevel(logging.INFO) handler = logging.StreamHandler() @@ -36,7 +39,7 @@ formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') handler.setFormatter(formatter) logger.addHandler(handler) - +# Healthcheck server class HealthHandler(BaseHTTPRequestHandler): def do_GET(self): if self.path == "/health": @@ -50,58 +53,18 @@ class HealthHandler(BaseHTTPRequestHandler): def log_message(self, format, *args): pass - def start_health_server(): server = HTTPServer(("0.0.0.0", 8000), HealthHandler) thread = threading.Thread(target=server.serve_forever, daemon=True) thread.start() - logger.info(f"💡 Healthcheck server running on port 8000.") + logger.info(f"✨ Healthcheck server running on port 8000.") +# Email helper def should_send_email(on_success: bool): mode = os.getenv("EMAIL_MODE", "errors").lower() return (mode == "all") or (mode == "errors" and not on_success) - -def extract_facets_utf8(text: str): - import re - facets = [] - def get_byte_range(char_start, char_end): - byte_start = len(text[:char_start].encode("utf-8")) - byte_end = len(text[:char_end].encode("utf-8")) - return byte_start, byte_end - - # Hashtags - for match in re.finditer(r"#(\w+)", text): - tag = match.group(1) - char_start, char_end = match.span() - byte_start, byte_end = get_byte_range(char_start, char_end) - - facets.append({ - "index": {"byteStart": byte_start, "byteEnd": byte_end}, - "features": [{ - "$type": "app.bsky.richtext.facet#tag", - "tag": tag - }] - }) - - # Links - for match in re.finditer(r"https?://[^\s]+", text): - url = match.group(0) - char_start, char_end = match.span() - byte_start, byte_end = get_byte_range(char_start, char_end) - - facets.append({ - "index": {"byteStart": byte_start, "byteEnd": byte_end}, - "features": [{ - "$type": "app.bsky.richtext.facet#link", - "uri": url - }] - }) - - return facets - - def generate_email_html(status: str, title: str, link: str, error_message: str = None) -> str: color = "#2e7d32" if status == "success" else "#d32f2f" bg_color = "#f5f5f5" if status == "success" else "#fff3f3" @@ -139,7 +102,6 @@ def generate_email_html(status: str, title: str, link: str, error_message: str = """ - def send_status_email(subject, html_content): try: smtp_host = os.getenv("SMTP_HOST") @@ -164,6 +126,33 @@ def send_status_email(subject, html_content): except Exception as e: logger.error(f"❌ Error sending email: {e}") +# Utility functions + +def extract_facets_utf8(text: str): + facets = [] + + def get_byte_range(char_start, char_end): + byte_start = len(text[:char_start].encode("utf-8")) + byte_end = len(text[:char_end].encode("utf-8")) + return byte_start, byte_end + + for match in re.finditer(r"#(\w+)", text): + tag = match.group(1) + byte_start, byte_end = get_byte_range(*match.span()) + facets.append({ + "index": {"byteStart": byte_start, "byteEnd": byte_end}, + "features": [{"$type": "app.bsky.richtext.facet#tag", "tag": tag}] + }) + + for match in re.finditer(r"https?://[^\s]+", text): + url = match.group(0) + byte_start, byte_end = get_byte_range(*match.span()) + facets.append({ + "index": {"byteStart": byte_start, "byteEnd": byte_end}, + "features": [{"$type": "app.bsky.richtext.facet#link", "uri": url}] + }) + + return facets def load_seen_ids(): os.makedirs(os.path.dirname(SEEN_POSTS_FILE), exist_ok=True) @@ -172,12 +161,10 @@ def load_seen_ids(): with open(SEEN_POSTS_FILE, "r") as f: return set(line.strip() for line in f) - def save_seen_id(post_id): with open(SEEN_POSTS_FILE, "a") as f: f.write(post_id + "\n") - def post_to_mastodon(title, link, tags): mastodon = Mastodon(access_token=MASTODON_TOKEN, api_base_url=MASTODON_BASE_URL) hashtags = " ".join(f"#{tag}" for tag in tags) if tags else "" @@ -186,7 +173,6 @@ def post_to_mastodon(title, link, tags): message += f"\n\n{hashtags}" mastodon.toot(message) - def fetch_og_data(url): try: resp = requests.get(url, timeout=10) @@ -201,7 +187,6 @@ def fetch_og_data(url): logger.error(f"❌ Error fetching OG data: {e}") return None, None - def post_to_bluesky(title, link, tags): client = Client() client.login(BSKY_HANDLE, BSKY_PASSWORD) @@ -211,9 +196,8 @@ def post_to_bluesky(title, link, tags): if hashtags: message += f"\n\n{hashtags}" - facets = extract_facets_utf8(message) # <-- NEU + facets = extract_facets_utf8(message) - # Versuche OG-Vorschau try: og_title, image_url = fetch_og_data(link) if og_title and image_url: @@ -223,42 +207,25 @@ def post_to_bluesky(title, link, tags): "uri": link, "title": title, "description": "", - "thumb": { - "$type": "blob", - "ref": None, - "mimeType": "", - "size": 0 - } + "thumb": {"$type": "blob", "ref": None, "mimeType": "", "size": 0} } } - img_resp = requests.get(image_url, timeout=10) img_resp.raise_for_status() blob = client.upload_blob(BytesIO(img_resp.content)) embed["external"]["thumb"] = blob.blob - - client.send_post(text=message, embed=embed, facets=facets) # <-- facets hier + client.send_post(text=message, embed=embed, facets=facets) logger.info(f"✅ Posted to Bluesky with preview.") return except Exception as e: logger.error(f"❌ Error uploading preview to Bluesky: {e}") - # Fallback: Nur Text, aber mit Facets - client.send_post(text=message, facets=facets) # <-- facets hier + client.send_post(text=message, facets=facets) logger.info(f"💡 Posted to Bluesky without preview.") - - def extract_post_date(entry): - date_fields = [ - entry.get("published"), - entry.get("updated"), - entry.get("date_published"), - entry.get("date_modified"), - entry.get("pubDate") - ] + date_fields = [entry.get(k) for k in ("published", "updated", "date_published", "date_modified", "pubDate")] dates = [] - for d in date_fields: if d: try: @@ -268,10 +235,8 @@ def extract_post_date(entry): dates.append(dt) except Exception as e: logger.warning(f"⚠️ Could not parse date: {d} ({e})") - return min(dates) if dates else datetime.now(timezone.utc) - def main(): seen_ids = load_seen_ids() feed = feedparser.parse(FEED_URL) @@ -284,11 +249,6 @@ def main(): continue post_date = extract_post_date(entry) - age = now - post_date - age_days = age.days - age_hours = age.seconds // 3600 - #logger.info(f"Post '{entry.get('title', '').strip()}' is {age_days} days and {age_hours} hours old.") - if post_date < now - max_age: logger.info(f"⏩ Skipping old post ({MAX_POST_AGE_DAYS}+ days): {post_id}") continue @@ -304,18 +264,9 @@ def main(): tags = [] if "tags" in entry: - raw_tags = [ - tag.get("term") if isinstance(tag, dict) else getattr(tag, "term", None) - for tag in entry.tags - ] + raw_tags = [tag.get("term") if isinstance(tag, dict) else getattr(tag, "term", None) for tag in entry.tags] tags = [sanitize_tag(t) for t in raw_tags if t] - if tags: - hashtags = " ".join(f"#{tag}" for tag in tags) - message = f"{link} {hashtags}" - else: - message = link - logger.info(f"💡 New post found: {title}") try: @@ -330,28 +281,19 @@ def main(): logger.info(f"✅ Post successfully published.") if should_send_email(on_success=True): - send_status_email( - f"✅ Post published: {title}", - generate_email_html("success", title, link) - ) + send_status_email(f"✅ Post published: {title}", generate_email_html("success", title, link)) except Exception as e: logger.error(f"❌ Posting failed: {e}") if should_send_email(on_success=False): - send_status_email( - f"❌ Error posting: {title}", - generate_email_html("error", title, link, str(e)) - ) + send_status_email(f"❌ Error posting: {title}", generate_email_html("error", title, link, str(e))) time.sleep(5) - if __name__ == "__main__": INTERVAL_MINUTES = int(os.getenv("INTERVAL_MINUTES", 30)) logger.info(f"🔁 Starting feed check every {INTERVAL_MINUTES} minutes.") - start_health_server() - while True: try: main()