diff --git a/bluemastofeed.py b/bluemastofeed.py index c18a69b..cfcfb3c 100644 --- a/bluemastofeed.py +++ b/bluemastofeed.py @@ -5,6 +5,8 @@ import logging import requests import threading import smtplib +import re +import unicodedata from bs4 import BeautifulSoup from io import BytesIO from mastodon import Mastodon @@ -61,6 +63,45 @@ def should_send_email(on_success: bool): return (mode == "all") or (mode == "errors" and not on_success) +def extract_facets_utf8(text: str): + import re + facets = [] + def get_byte_range(char_start, char_end): + byte_start = len(text[:char_start].encode("utf-8")) + byte_end = len(text[:char_end].encode("utf-8")) + return byte_start, byte_end + + # Hashtags + for match in re.finditer(r"#(\w+)", text): + tag = match.group(1) + char_start, char_end = match.span() + byte_start, byte_end = get_byte_range(char_start, char_end) + + facets.append({ + "index": {"byteStart": byte_start, "byteEnd": byte_end}, + "features": [{ + "$type": "app.bsky.richtext.facet#tag", + "tag": tag + }] + }) + + # Links + for match in re.finditer(r"https?://[^\s]+", text): + url = match.group(0) + char_start, char_end = match.span() + byte_start, byte_end = get_byte_range(char_start, char_end) + + facets.append({ + "index": {"byteStart": byte_start, "byteEnd": byte_end}, + "features": [{ + "$type": "app.bsky.richtext.facet#link", + "uri": url + }] + }) + + return facets + + def generate_email_html(status: str, title: str, link: str, error_message: str = None) -> str: color = "#2e7d32" if status == "success" else "#d32f2f" bg_color = "#f5f5f5" if status == "success" else "#fff3f3" @@ -170,7 +211,9 @@ def post_to_bluesky(title, link, tags): if hashtags: message += f"\n\n{hashtags}" - # Try rich embed + facets = extract_facets_utf8(message) # <-- NEU + + # Versuche OG-Vorschau try: og_title, image_url = fetch_og_data(link) if og_title and image_url: @@ -194,17 +237,18 @@ def post_to_bluesky(title, link, tags): blob = client.upload_blob(BytesIO(img_resp.content)) embed["external"]["thumb"] = blob.blob - client.send_post(text=message, embed=embed) + client.send_post(text=message, embed=embed, facets=facets) # <-- facets hier logger.info(f"✅ Posted to Bluesky with preview.") return except Exception as e: logger.error(f"❌ Error uploading preview to Bluesky: {e}") - # Fallback to text-only post - client.send_post(text=message) + # Fallback: Nur Text, aber mit Facets + client.send_post(text=message, facets=facets) # <-- facets hier logger.info(f"💡 Posted to Bluesky without preview.") + def extract_post_date(entry): date_fields = [ entry.get("published"), @@ -252,9 +296,19 @@ def main(): title = entry.get("title", "").strip() link = entry.get("link", "").strip() + def sanitize_tag(tag): + tag = tag.lower() + tag = unicodedata.normalize("NFKD", tag).encode("ascii", "ignore").decode("ascii") + tag = re.sub(r"\W+", "", tag) + return tag + tags = [] if "tags" in entry: - tags = [tag["term"] for tag in entry.tags if "term" in tag] + raw_tags = [ + tag.get("term") if isinstance(tag, dict) else getattr(tag, "term", None) + for tag in entry.tags + ] + tags = [sanitize_tag(t) for t in raw_tags if t] if tags: hashtags = " ".join(f"#{tag}" for tag in tags)