6 Commits

Author SHA1 Message Date
dbcb624abf chore(base): Release 0.9.8
All checks were successful
Build and Push Docker Image on Tag / build_and_push (push) Successful in 30s
Create Release / release (push) Successful in 8s
2025-06-02 20:07:12 +02:00
b20103fbc3 test(app): Debugging features added and README.md extended 2025-06-02 19:53:09 +02:00
3d58068291 chore(base): Release v0.9.7
All checks were successful
Build and Push Docker Image on Tag / build_and_push (push) Successful in 45s
Create Release / release (push) Successful in 8s
2025-06-02 19:23:11 +02:00
e04c838ede refactor(app): Code structured and grouped 2025-06-02 17:35:55 +02:00
3e1255ccdc fix(main): Fixed error in hashtag display 2025-06-02 17:22:00 +02:00
3bb33ca379 fix(app): Add hashtags to bluesky post 2025-06-02 16:27:49 +02:00
3 changed files with 110 additions and 73 deletions

View File

@ -1,5 +1,5 @@
FROM python:3.11-slim FROM python:3.11-slim
LABEL version="0.9.6" LABEL version="0.9.8"
RUN apt-get update && apt-get install -y curl && apt-get clean && rm -rf /var/lib/apt/lists/* RUN apt-get update && apt-get install -y curl && apt-get clean && rm -rf /var/lib/apt/lists/*

View File

@ -112,3 +112,5 @@ Die folgenden Umgebungsvariablen steuern das Verhalten des Containers. Sie könn
| `SMTP_PASSWORD` | Passwort für SMTP | `sicherespasswort` | _optional_ | | `SMTP_PASSWORD` | Passwort für SMTP | `sicherespasswort` | _optional_ |
| `EMAIL_FROM` | Absenderadresse für E-Mails | `noreply@example.com` | _optional_ | | `EMAIL_FROM` | Absenderadresse für E-Mails | `noreply@example.com` | _optional_ |
| `EMAIL_TO` | Empfängeradresse für E-Mails | `admin@example.com` | _optional_ | | `EMAIL_TO` | Empfängeradresse für E-Mails | `admin@example.com` | _optional_ |
| `LOG_LEVEL` | Logging-Level: `DEBUG`, `INFO`, `WARNING`, `ERROR` | `DEBUG` | `ÌNFO` |

View File

@ -5,6 +5,8 @@ import logging
import requests import requests
import threading import threading
import smtplib import smtplib
import re
import unicodedata
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from io import BytesIO from io import BytesIO
from mastodon import Mastodon from mastodon import Mastodon
@ -16,8 +18,10 @@ from email.mime.multipart import MIMEMultipart
from dateutil import parser as date_parser from dateutil import parser as date_parser
from datetime import datetime, timezone, timedelta from datetime import datetime, timezone, timedelta
# Load environment variables
load_dotenv() load_dotenv()
# Configuration
FEED_URL = os.getenv("FEED_URL") FEED_URL = os.getenv("FEED_URL")
SEEN_POSTS_FILE = "/data/seen_posts.txt" SEEN_POSTS_FILE = "/data/seen_posts.txt"
MASTODON_BASE_URL = os.getenv("MASTODON_API_BASE_URL") MASTODON_BASE_URL = os.getenv("MASTODON_API_BASE_URL")
@ -27,40 +31,50 @@ BSKY_PASSWORD = os.getenv("BSKY_PASSWORD")
MAX_POST_AGE_DAYS = int(os.getenv("MAX_POST_AGE_DAYS", 0)) MAX_POST_AGE_DAYS = int(os.getenv("MAX_POST_AGE_DAYS", 0))
POST_TARGETS = os.getenv("POST_TARGETS", "both").lower() POST_TARGETS = os.getenv("POST_TARGETS", "both").lower()
# Logger setup
logger = logging.getLogger() logger = logging.getLogger()
logger.setLevel(logging.INFO) log_level = os.getenv("LOG_LEVEL", "INFO").upper() # Enable DEBUG level via env variable
logger.setLevel(getattr(logging, log_level, logging.INFO))
handler = logging.StreamHandler() handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter) handler.setFormatter(formatter)
logger.addHandler(handler) logger.addHandler(handler)
# Configuration overview (INFO level)
logger.info(f"📡 Feed URL: {FEED_URL}")
logger.info(f"📤 Posting targets: {POST_TARGETS}")
logger.info(f"🕒 Max post age: {MAX_POST_AGE_DAYS} days")
logger.info(f"📨 Email mode: {os.getenv('EMAIL_MODE', 'errors')}")
logger.debug(f"🛠 Full environment variables: {dict(os.environ)}") # DEBUG: All environment variables
# Healthcheck server handler
class HealthHandler(BaseHTTPRequestHandler): class HealthHandler(BaseHTTPRequestHandler):
def do_GET(self): def do_GET(self):
if self.path == "/health": if self.path == "/health":
self.send_response(200) self.send_response(200)
self.end_headers() self.end_headers()
self.wfile.write(b"OK") self.wfile.write(b"OK")
logger.debug("🏥 Healthcheck requested and responded OK.") # DEBUG Healthcheck log
else: else:
self.send_response(404) self.send_response(404)
self.end_headers() self.end_headers()
def log_message(self, format, *args): def log_message(self, format, *args):
# Suppress default HTTP request logging
pass pass
def start_health_server(): def start_health_server():
server = HTTPServer(("0.0.0.0", 8000), HealthHandler) server = HTTPServer(("0.0.0.0", 8000), HealthHandler)
thread = threading.Thread(target=server.serve_forever, daemon=True) thread = threading.Thread(target=server.serve_forever, daemon=True)
thread.start() thread.start()
logger.info(f"💡 Healthcheck server running on port 8000.") logger.info(f" Healthcheck server running on port 8000.")
# Email helper functions
def should_send_email(on_success: bool): def should_send_email(on_success: bool):
mode = os.getenv("EMAIL_MODE", "errors").lower() mode = os.getenv("EMAIL_MODE", "errors").lower()
return (mode == "all") or (mode == "errors" and not on_success) return (mode == "all") or (mode == "errors" and not on_success)
def generate_email_html(status: str, title: str, link: str, error_message: str = None) -> str: def generate_email_html(status: str, title: str, link: str, error_message: str = None) -> str:
color = "#2e7d32" if status == "success" else "#d32f2f" color = "#2e7d32" if status == "success" else "#d32f2f"
bg_color = "#f5f5f5" if status == "success" else "#fff3f3" bg_color = "#f5f5f5" if status == "success" else "#fff3f3"
@ -70,7 +84,7 @@ def generate_email_html(status: str, title: str, link: str, error_message: str =
meta = "This is an automated success notification." if status == "success" else "Please check logs or configuration." meta = "This is an automated success notification." if status == "success" else "Please check logs or configuration."
error_html = f""" error_html = f"""
<p><strong>Error:</strong></p> <p><strong>Error Details:</strong></p>
<div class=\"error\">{error_message}</div> <div class=\"error\">{error_message}</div>
""" if error_message else "" """ if error_message else ""
@ -98,7 +112,6 @@ def generate_email_html(status: str, title: str, link: str, error_message: str =
</html> </html>
""" """
def send_status_email(subject, html_content): def send_status_email(subject, html_content):
try: try:
smtp_host = os.getenv("SMTP_HOST") smtp_host = os.getenv("SMTP_HOST")
@ -108,6 +121,8 @@ def send_status_email(subject, html_content):
email_from = os.getenv("EMAIL_FROM") email_from = os.getenv("EMAIL_FROM")
email_to = os.getenv("EMAIL_TO") email_to = os.getenv("EMAIL_TO")
logger.debug(f"📧 Preparing to send email to {email_to} with subject: {subject}") # DEBUG
msg = MIMEMultipart("alternative") msg = MIMEMultipart("alternative")
msg["Subject"] = subject msg["Subject"] = subject
msg["From"] = email_from msg["From"] = email_from
@ -120,22 +135,53 @@ def send_status_email(subject, html_content):
server.sendmail(email_from, email_to, msg.as_string()) server.sendmail(email_from, email_to, msg.as_string())
logger.info(f"✅ Status email sent successfully.") logger.info(f"✅ Status email sent successfully.")
except Exception as e: except Exception:
logger.error(f"❌ Error sending email: {e}") logger.exception(f"❌ Error sending email:") # Full stack trace on error
# Utility functions
def extract_facets_utf8(text: str):
facets = []
def get_byte_range(char_start, char_end):
byte_start = len(text[:char_start].encode("utf-8"))
byte_end = len(text[:char_end].encode("utf-8"))
return byte_start, byte_end
# Extract hashtags
for match in re.finditer(r"#(\w+)", text):
tag = match.group(1)
byte_start, byte_end = get_byte_range(*match.span())
facets.append({
"index": {"byteStart": byte_start, "byteEnd": byte_end},
"features": [{"$type": "app.bsky.richtext.facet#tag", "tag": tag}]
})
# Extract links
for match in re.finditer(r"https?://[^\s]+", text):
url = match.group(0)
byte_start, byte_end = get_byte_range(*match.span())
facets.append({
"index": {"byteStart": byte_start, "byteEnd": byte_end},
"features": [{"$type": "app.bsky.richtext.facet#link", "uri": url}]
})
logger.debug(f"🏷 Extracted facets: {facets}") # DEBUG
return facets
def load_seen_ids(): def load_seen_ids():
os.makedirs(os.path.dirname(SEEN_POSTS_FILE), exist_ok=True) os.makedirs(os.path.dirname(SEEN_POSTS_FILE), exist_ok=True)
if not os.path.exists(SEEN_POSTS_FILE): if not os.path.exists(SEEN_POSTS_FILE):
open(SEEN_POSTS_FILE, "w").close() open(SEEN_POSTS_FILE, "w").close()
with open(SEEN_POSTS_FILE, "r") as f: with open(SEEN_POSTS_FILE, "r") as f:
return set(line.strip() for line in f) seen = set(line.strip() for line in f)
logger.debug(f"🗂 Loaded {len(seen)} seen post IDs.") # DEBUG
return seen
def save_seen_id(post_id): def save_seen_id(post_id):
with open(SEEN_POSTS_FILE, "a") as f: with open(SEEN_POSTS_FILE, "a") as f:
f.write(post_id + "\n") f.write(post_id + "\n")
logger.debug(f"📝 Saved post ID as seen: {post_id}") # DEBUG
def post_to_mastodon(title, link, tags): def post_to_mastodon(title, link, tags):
mastodon = Mastodon(access_token=MASTODON_TOKEN, api_base_url=MASTODON_BASE_URL) mastodon = Mastodon(access_token=MASTODON_TOKEN, api_base_url=MASTODON_BASE_URL)
@ -143,11 +189,13 @@ def post_to_mastodon(title, link, tags):
message = f"{title}\n\n{link}" message = f"{title}\n\n{link}"
if hashtags: if hashtags:
message += f"\n\n{hashtags}" message += f"\n\n{hashtags}"
logger.debug(f"🐘 Posting to Mastodon: {message}") # DEBUG
mastodon.toot(message) mastodon.toot(message)
logger.info("✅ Posted to Mastodon.")
def fetch_og_data(url): def fetch_og_data(url):
try: try:
logger.debug(f"🔍 Fetching OpenGraph data from {url}") # DEBUG
resp = requests.get(url, timeout=10) resp = requests.get(url, timeout=10)
resp.raise_for_status() resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser") soup = BeautifulSoup(resp.text, "html.parser")
@ -155,61 +203,54 @@ def fetch_og_data(url):
og_image = soup.find("meta", property="og:image") og_image = soup.find("meta", property="og:image")
title = og_title["content"] if og_title and og_title.has_attr("content") else None title = og_title["content"] if og_title and og_title.has_attr("content") else None
image_url = og_image["content"] if og_image and og_image.has_attr("content") else None image_url = og_image["content"] if og_image and og_image.has_attr("content") else None
if not title or not image_url:
logger.debug(f"⚠️ OpenGraph data incomplete. Title: {title}, Image: {image_url}") # DEBUG
return title, image_url return title, image_url
except Exception as e: except Exception:
logger.error(f"❌ Error fetching OG data: {e}") logger.exception(f"❌ Error fetching OpenGraph data:")
return None, None return None, None
def post_to_bluesky(title, link, tags):
def post_to_bluesky(message, link):
client = Client() client = Client()
logger.debug(f"🔑 Logging in to Bluesky as {BSKY_HANDLE}") # DEBUG
client.login(BSKY_HANDLE, BSKY_PASSWORD) client.login(BSKY_HANDLE, BSKY_PASSWORD)
title, image_url = fetch_og_data(link) hashtags = " ".join(f"#{tag}" for tag in tags) if tags else ""
text = title or message message = f"{title}\n\n{link}"
if hashtags:
message += f"\n\n{hashtags}"
if title and image_url: facets = extract_facets_utf8(message)
try:
try:
og_title, image_url = fetch_og_data(link)
if og_title and image_url:
embed = { embed = {
"$type": "app.bsky.embed.external", "$type": "app.bsky.embed.external",
"external": { "external": {
"uri": link, "uri": link,
"title": title, "title": title,
"description": "", "description": "",
"thumb": { "thumb": {"$type": "blob", "ref": None, "mimeType": "", "size": 0}
"$type": "blob",
"ref": None,
"mimeType": "",
"size": 0
}
} }
} }
logger.debug(f"📸 Attempting to upload preview image from: {image_url}") # DEBUG
img_resp = requests.get(image_url, timeout=10) img_resp = requests.get(image_url, timeout=10)
img_resp.raise_for_status() img_resp.raise_for_status()
blob = client.upload_blob(BytesIO(img_resp.content)) blob = client.upload_blob(BytesIO(img_resp.content))
embed["external"]["thumb"] = blob.blob embed["external"]["thumb"] = blob.blob
client.send_post(text=message, embed=embed, facets=facets)
client.send_post(text=text, embed=embed)
logger.info(f"✅ Posted to Bluesky with preview.") logger.info(f"✅ Posted to Bluesky with preview.")
return return
except Exception as e: except Exception:
logger.error(f"❌ Error uploading preview to Bluesky: {e}") logger.exception(f"❌ Error uploading preview to Bluesky:")
client.send_post(f"{text}\n{link}") client.send_post(text=message, facets=facets)
logger.info(f"💡 Posted to Bluesky without preview.") logger.info(f"💡 Posted to Bluesky without preview.")
def extract_post_date(entry): def extract_post_date(entry):
date_fields = [ date_fields = [entry.get(k) for k in ("published", "updated", "date_published", "date_modified", "pubDate")]
entry.get("published"),
entry.get("updated"),
entry.get("date_published"),
entry.get("date_modified"),
entry.get("pubDate")
]
dates = [] dates = []
for d in date_fields: for d in date_fields:
if d: if d:
try: try:
@ -219,9 +260,9 @@ def extract_post_date(entry):
dates.append(dt) dates.append(dt)
except Exception as e: except Exception as e:
logger.warning(f"⚠️ Could not parse date: {d} ({e})") logger.warning(f"⚠️ Could not parse date: {d} ({e})")
chosen_date = min(dates) if dates else datetime.now(timezone.utc)
return min(dates) if dates else datetime.now(timezone.utc) logger.debug(f"📅 Extracted post date: {chosen_date}") # DEBUG
return chosen_date
def main(): def main():
seen_ids = load_seen_ids() seen_ids = load_seen_ids()
@ -229,33 +270,36 @@ def main():
now = datetime.now(timezone.utc) now = datetime.now(timezone.utc)
max_age = timedelta(days=MAX_POST_AGE_DAYS) max_age = timedelta(days=MAX_POST_AGE_DAYS)
logger.debug(f"📰 Number of feed entries found: {len(feed.entries)}") # DEBUG
for entry in feed.entries: for entry in feed.entries:
post_id = entry.get("id") or entry.get("link") post_id = entry.get("id") or entry.get("link")
logger.debug(f"🆔 Checking post ID: {post_id}") # DEBUG
if post_id in seen_ids: if post_id in seen_ids:
logger.debug(f"⏭️ Post already processed: {post_id}") # DEBUG
continue continue
post_date = extract_post_date(entry) post_date = extract_post_date(entry)
age = now - post_date
age_days = age.days
age_hours = age.seconds // 3600
#logger.info(f"Post '{entry.get('title', '').strip()}' is {age_days} days and {age_hours} hours old.")
if post_date < now - max_age: if post_date < now - max_age:
logger.info(f"⏩ Skipping old post ({MAX_POST_AGE_DAYS}+ days): {post_id}") logger.info(f"⏩ Skipping old post (older than {MAX_POST_AGE_DAYS} days): {post_id}")
continue continue
title = entry.get("title", "").strip() title = entry.get("title", "").strip()
link = entry.get("link", "").strip() link = entry.get("link", "").strip()
def sanitize_tag(tag):
tag = tag.lower()
tag = unicodedata.normalize("NFKD", tag).encode("ascii", "ignore").decode("ascii")
tag = re.sub(r"\W+", "", tag)
return tag
tags = [] tags = []
if "tags" in entry: if "tags" in entry:
tags = [tag["term"] for tag in entry.tags if "term" in tag] raw_tags = [tag.get("term") if isinstance(tag, dict) else getattr(tag, "term", None) for tag in entry.tags]
tags = [sanitize_tag(t) for t in raw_tags if t]
if tags: logger.debug(f"🏷 Extracted tags: {tags}") # DEBUG
hashtags = " ".join(f"#{tag}" for tag in tags)
message = f"{link} {hashtags}"
else:
message = link
logger.info(f"💡 New post found: {title}") logger.info(f"💡 New post found: {title}")
@ -265,39 +309,30 @@ def main():
time.sleep(2) time.sleep(2)
if POST_TARGETS in ("bluesky", "both"): if POST_TARGETS in ("bluesky", "both"):
post_to_bluesky(f"{title}\n{link}", link) post_to_bluesky(title, link, tags)
save_seen_id(post_id) save_seen_id(post_id)
logger.info(f"✅ Post successfully published.") logger.info(f"✅ Post successfully published.")
if should_send_email(on_success=True): if should_send_email(on_success=True):
send_status_email( send_status_email(f"✅ Post published: {title}", generate_email_html("success", title, link))
f"✅ Post published: {title}",
generate_email_html("success", title, link)
)
except Exception as e: except Exception:
logger.error(f"❌ Posting failed: {e}") logger.exception(f"❌ Posting failed for post: {post_id}")
if should_send_email(on_success=False): if should_send_email(on_success=False):
send_status_email( send_status_email(f"❌ Error posting: {title}", generate_email_html("error", title, link, str(Exception)))
f"❌ Error posting: {title}",
generate_email_html("error", title, link, str(e))
)
time.sleep(5) time.sleep(5)
if __name__ == "__main__": if __name__ == "__main__":
INTERVAL_MINUTES = int(os.getenv("INTERVAL_MINUTES", 30)) INTERVAL_MINUTES = int(os.getenv("INTERVAL_MINUTES", 30))
logger.info(f"🔁 Starting feed check every {INTERVAL_MINUTES} minutes.") logger.info(f"🔁 Starting feed check every {INTERVAL_MINUTES} minutes.")
start_health_server() start_health_server()
while True: while True:
try: try:
main() main()
except Exception as e: except Exception:
logger.error(f"Unhandled error during execution: {e}") logger.exception("Unhandled error during execution:")
logger.info(f"⏳ Waiting {INTERVAL_MINUTES} minutes until next run...") logger.info(f"⏳ Waiting {INTERVAL_MINUTES} minutes until next run...")
time.sleep(INTERVAL_MINUTES * 60) time.sleep(INTERVAL_MINUTES * 60)