Added function to control maximum post age
All checks were successful
Build and Push Docker Image on Tag / build_and_push (push) Successful in 27s

This commit is contained in:
2025-05-25 12:53:18 +02:00
parent 539a8abf28
commit 6b52107afa
3 changed files with 108 additions and 73 deletions

View File

@@ -81,18 +81,21 @@ docker run -d \
Die folgenden Umgebungsvariablen steuern das Verhalten des Containers. Sie können entweder direkt beim Start übergeben oder über eine `.env`-Datei definiert werden. Die folgenden Umgebungsvariablen steuern das Verhalten des Containers. Sie können entweder direkt beim Start übergeben oder über eine `.env`-Datei definiert werden.
| Variable | Beschreibung |
| ----------------------- | ------------------------------------------------------------ |
| `FEED_URL` | URL des RSS-Feeds, der regelmäßig überprüft wird (z.B. `https://example.com/rss.xml`) | | Variable | Beschreibung | Beispielwert | Standardwert |
| `MASTODON_API_BASE_URL` | Basis-URL der Mastodon-Instanz (z.B. `https://mastodon.social`) | | ----------------------- | ------------------------------------------------------------ | -------------------------- | -------------- |
| `MASTODON_ACCESS_TOKEN` | Access Token des Mastodon-Accounts für die API-Nutzung | | `FEED_URL` | URL zum RSS- oder Atom-Feed | `https://example.com/feed` | _erforderlich_ |
| `BSKY_IDENTIFIER` | Bluesky-Handle oder Login (z.B. `user.bsky.social`) | | `MAX_POST_AGE_DAYS` | Maximales Alter eines Beitrags (in Tagen), der gepostet werden darf | `0` = nur heutige Beiträge | `0` |
| `BSKY_PASSWORD` | Passwort des Bluesky-Accounts | | `MASTODON_API_BASE_URL` | Basis-URL deiner Mastodon-Instanz | `https://mastodon.social` | _erforderlich_ |
| `INTERVAL_MINUTES` | Zeitintervall in Minuten zur Prüfung des RSS-Feeds (z.B. `30`) | | `MASTODON_ACCESS_TOKEN` | Access Token für die Mastodon API | `abc123...` | _erforderlich_ |
| `SMTP_HOST` | SMTP-Server für den Versand von E-Mails | | `BSKY_IDENTIFIER` | Bluesky-Handle | `name.bsky.social` | _erforderlich_ |
| `SMTP_PORT` | Port des SMTP-Servers (z.B. `587` für STARTTLS) | | `BSKY_PASSWORD` | Passwort für das Bluesky-Konto | `passwort123` | _erforderlich_ |
| `SMTP_USER` | SMTP-Benutzername | | `INTERVAL_MINUTES` | Zeitintervall in Minuten zwischen den Feed-Prüfungen | `30` | `30` |
| `SMTP_PASSWORD` | SMTP-Passwort | | `EMAIL_MODE` | Wann eine Status-E-Mail gesendet werden soll (`none`, `errors`, `all`) | `errors` | `errors` |
| `EMAIL_FROM` | Absenderadresse für E-Mail-Benachrichtigungen | | `SMTP_HOST` | SMTP-Server für Status-E-Mails | `smtp.example.com` | _optional_ |
| `EMAIL_TO` | Empfängeradresse für Benachrichtigungen | | `SMTP_PORT` | Port des SMTP-Servers | `587` | `587` |
| `EMAIL_MODE` | Wann E-Mails versendet werden:<br>`none` keine Mails<br>`errors` nur bei Fehlern<br>`all` bei jedem Durchlauf | | `SMTP_USER` | Benutzername für SMTP | `user@example.com` | _optional_ |
| `SMTP_PASSWORD` | Passwort für SMTP | `sicherespasswort` | _optional_ |
| `EMAIL_FROM` | Absenderadresse für E-Mails | `noreply@example.com` | _optional_ |
| `EMAIL_TO` | Empfängeradresse für E-Mails | `admin@example.com` | _optional_ |

View File

@@ -14,26 +14,28 @@ from dotenv import load_dotenv
from http.server import HTTPServer, BaseHTTPRequestHandler from http.server import HTTPServer, BaseHTTPRequestHandler
from email.mime.text import MIMEText from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart from email.mime.multipart import MIMEMultipart
from dateutil import parser as date_parser
from datetime import datetime, timezone, timedelta
load_dotenv() load_dotenv()
FEED_URL = os.getenv("FEED_URL") FEED_URL = os.getenv("FEED_URL")
SEEN_POSTS_FILE = "/data/seen_posts.txt" SEEN_POSTS_FILE = "/data/seen_posts.txt"
MASTODON_BASE_URL = os.getenv("MASTODON_API_BASE_URL") MASTODON_BASE_URL = os.getenv("MASTODON_API_BASE_URL")
MASTODON_TOKEN = os.getenv("MASTODON_ACCESS_TOKEN") MASTODON_TOKEN = os.getenv("MASTODON_ACCESS_TOKEN")
BSKY_HANDLE = os.getenv("BSKY_IDENTIFIER") BSKY_HANDLE = os.getenv("BSKY_IDENTIFIER")
BSKY_PASSWORD = os.getenv("BSKY_PASSWORD") BSKY_PASSWORD = os.getenv("BSKY_PASSWORD")
MAX_POST_AGE_DAYS = int(os.getenv("MAX_POST_AGE_DAYS", 0))
# Logging konfigurieren (Standard-Format)
logger = logging.getLogger() logger = logging.getLogger()
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
handler = logging.StreamHandler() # Log an stdout (Docker-Standard) handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter) handler.setFormatter(formatter)
logger.addHandler(handler) logger.addHandler(handler)
class HealthHandler(BaseHTTPRequestHandler): class HealthHandler(BaseHTTPRequestHandler):
"""Handles HTTP GET requests for the health check endpoint."""
def do_GET(self): def do_GET(self):
if self.path == "/health": if self.path == "/health":
self.send_response(200) self.send_response(200)
@@ -43,25 +45,27 @@ class HealthHandler(BaseHTTPRequestHandler):
self.send_response(404) self.send_response(404)
self.end_headers() self.end_headers()
def log_message(self, format, *args):
"""Suppress default HTTP request logging."""
pass
def start_health_server(): def start_health_server():
"""Starts the health check HTTP server in a background thread."""
server = HTTPServer(("0.0.0.0", 8000), HealthHandler) server = HTTPServer(("0.0.0.0", 8000), HealthHandler)
thread = threading.Thread(target=server.serve_forever, daemon=True) thread = threading.Thread(target=server.serve_forever, daemon=True)
thread.start() thread.start()
logger.info("Healthcheck server runs on port 8000.") logger.info("Healthcheck server is running on port 8000.")
def should_send_email(on_success: bool): def should_send_email(on_success: bool):
"""Determines whether to send a status email based on mode and success."""
mode = os.getenv("EMAIL_MODE", "errors").lower() mode = os.getenv("EMAIL_MODE", "errors").lower()
if mode == "none": return (mode == "all") or (mode == "errors" and not on_success)
return False
if mode == "all":
return True
if mode == "errors" and not on_success:
return True
return False
def send_status_email(subject, html_content): def send_status_email(subject, html_content):
"""Sends a formatted HTML email with the given subject and content."""
try: try:
smtp_host = os.getenv("SMTP_HOST") smtp_host = os.getenv("SMTP_HOST")
smtp_port = int(os.getenv("SMTP_PORT", 587)) smtp_port = int(os.getenv("SMTP_PORT", 587))
@@ -74,36 +78,40 @@ def send_status_email(subject, html_content):
msg["Subject"] = subject msg["Subject"] = subject
msg["From"] = email_from msg["From"] = email_from
msg["To"] = email_to msg["To"] = email_to
msg.attach(MIMEText(html_content, "html"))
part = MIMEText(html_content, "html")
msg.attach(part)
with smtplib.SMTP(smtp_host, smtp_port) as server: with smtplib.SMTP(smtp_host, smtp_port) as server:
server.starttls() server.starttls()
server.login(smtp_user, smtp_password) server.login(smtp_user, smtp_password)
server.sendmail(email_from, email_to, msg.as_string()) server.sendmail(email_from, email_to, msg.as_string())
logger.info("Status E-Mail gesendet.") logger.info("Status email sent.")
except Exception as e: except Exception as e:
logger.error(f"Fehler beim Senden der E-Mail: {e}") logger.error(f"Error sending status email: {e}")
def load_seen_ids(): def load_seen_ids():
"""Loads the set of already seen post IDs from file."""
os.makedirs(os.path.dirname(SEEN_POSTS_FILE), exist_ok=True) os.makedirs(os.path.dirname(SEEN_POSTS_FILE), exist_ok=True)
if not os.path.exists(SEEN_POSTS_FILE): if not os.path.exists(SEEN_POSTS_FILE):
with open(SEEN_POSTS_FILE, "w"): pass open(SEEN_POSTS_FILE, "w").close()
return set()
with open(SEEN_POSTS_FILE, "r") as f: with open(SEEN_POSTS_FILE, "r") as f:
return set(line.strip() for line in f) return set(line.strip() for line in f)
def save_seen_id(post_id): def save_seen_id(post_id):
"""Appends a new post ID to the seen posts file."""
with open(SEEN_POSTS_FILE, "a") as f: with open(SEEN_POSTS_FILE, "a") as f:
f.write(post_id + "\n") f.write(post_id + "\n")
def post_to_mastodon(message): def post_to_mastodon(message):
"""Posts a message to Mastodon."""
mastodon = Mastodon(access_token=MASTODON_TOKEN, api_base_url=MASTODON_BASE_URL) mastodon = Mastodon(access_token=MASTODON_TOKEN, api_base_url=MASTODON_BASE_URL)
mastodon.toot(message) mastodon.toot(message)
def fetch_og_data(url): def fetch_og_data(url):
"""Fetches Open Graph title and image URL from a web page."""
try: try:
resp = requests.get(url, timeout=10) resp = requests.get(url, timeout=10)
resp.raise_for_status() resp.raise_for_status()
@@ -117,7 +125,9 @@ def fetch_og_data(url):
logger.error(f"Error loading OG data: {e}") logger.error(f"Error loading OG data: {e}")
return None, None return None, None
def post_to_bluesky(message, link): def post_to_bluesky(message, link):
"""Posts a message and optional preview to Bluesky."""
client = Client() client = Client()
client.login(BSKY_HANDLE, BSKY_PASSWORD) client.login(BSKY_HANDLE, BSKY_PASSWORD)
@@ -131,23 +141,20 @@ def post_to_bluesky(message, link):
"external": { "external": {
"uri": link, "uri": link,
"title": title, "title": title,
"description": "", # Optional: Beschreibung kannst du per OG:description holen "description": "",
"thumb": { "thumb": {
"$type": "blob", "$type": "blob",
"ref": None, # Wird vom Upload ersetzt "ref": None,
"mimeType": "", # Wird vom Upload ersetzt "mimeType": "",
"size": 0 # Wird vom Upload ersetzt "size": 0
} }
} }
} }
# Bild herunterladen und hochladen
img_resp = requests.get(image_url, timeout=10) img_resp = requests.get(image_url, timeout=10)
img_resp.raise_for_status() img_resp.raise_for_status()
image_bytes = BytesIO(img_resp.content) blob = client.upload_blob(BytesIO(img_resp.content))
embed["external"]["thumb"] = blob.blob
blob = client.upload_blob(image_bytes)
embed["external"]["thumb"] = blob.blob # Automatisch ersetzt
client.send_post(text=text, embed=embed) client.send_post(text=text, embed=embed)
logger.info("Posted with OG preview.") logger.info("Posted with OG preview.")
@@ -155,22 +162,59 @@ def post_to_bluesky(message, link):
except Exception as e: except Exception as e:
logger.error(f"Error uploading OG preview: {e}") logger.error(f"Error uploading OG preview: {e}")
# Fallback: Nur Text + Link
client.send_post(f"{text}\n{link}") client.send_post(f"{text}\n{link}")
logger.info("Posted without OG preview.") logger.info("Posted without preview.")
def extract_post_date(entry):
"""Extracts the oldest available date from various RSS date fields."""
date_fields = [
entry.get("published"),
entry.get("updated"),
entry.get("date_published"),
entry.get("date_modified"),
entry.get("pubDate")
]
dates = []
for d in date_fields:
if d:
try:
dt = date_parser.parse(d)
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
dates.append(dt)
except Exception as e:
logger.warning(f"⚠️ Cannot parse date field: {d} ({e})")
return min(dates) if dates else datetime.now(timezone.utc)
def main(): def main():
"""Main function to process feed entries and post new items."""
seen_ids = load_seen_ids() seen_ids = load_seen_ids()
feed = feedparser.parse(FEED_URL) feed = feedparser.parse(FEED_URL)
now = datetime.now(timezone.utc)
max_age = timedelta(days=MAX_POST_AGE_DAYS)
for entry in feed.entries: for entry in feed.entries:
post_id = entry.get("id") or entry.get("link") post_id = entry.get("id") or entry.get("link")
if post_id in seen_ids: if post_id in seen_ids:
continue continue
post_date = extract_post_date(entry)
age = now - post_date
age_days = age.days
age_hours = age.seconds // 3600
logger.info(f"Post '{entry.get('title', '').strip()}' is {age_days} days and {age_hours} hours old.")
if post_date < now - max_age:
logger.info(f"⏩ Skipping old post (older than {MAX_POST_AGE_DAYS} days): {post_id}")
continue
title = entry.get("title", "").strip() title = entry.get("title", "").strip()
link = entry.get("link", "").strip() link = entry.get("link", "").strip()
message = link # Link alleine posten für Mastodon OG-Vorschau message = link
logger.info(f"New post: {title}") logger.info(f"New post: {title}")
@@ -179,45 +223,30 @@ def main():
time.sleep(2) time.sleep(2)
post_to_bluesky(message, link) post_to_bluesky(message, link)
save_seen_id(post_id) save_seen_id(post_id)
logger.info("Successfully posted.") logger.info("Successfully posted.")
if should_send_email(on_success=True): if should_send_email(on_success=True):
email_subject = f"✅ Erfolgreich gepostet: {title}" send_status_email(
email_body = f""" f"✅ Successfully posted: {title}",
<html> f"<html><body><h2>Post successfully published</h2><p><b>Title:</b> {title}</p><p><b>Link:</b> <a href='{link}'>{link}</a></p></body></html>"
<body> )
<h2>Beitrag erfolgreich gepostet</h2>
<p><strong>Titel:</strong> {title}</p>
<p><strong>Link:</strong> <a href="{link}">{link}</a></p>
</body>
</html>
"""
send_status_email(email_subject, email_body)
except Exception as e: except Exception as e:
logger.error(f"Error posting: {e}") logger.error(f"Error posting: {e}")
if should_send_email(on_success=False): if should_send_email(on_success=False):
email_subject = f"❌ Fehler beim Posten: {title}" send_status_email(
email_body = f""" f"❌ Error posting: {title}",
<html> f"<html><body><h2>Error posting</h2><p><b>Title:</b> {title}</p><p><b>Link:</b> <a href='{link}'>{link}</a></p><p><b>Error message:</b> {str(e)}</p></body></html>"
<body> )
<h2>Fehler beim Posten</h2>
<p><strong>Titel:</strong> {title}</p>
<p><strong>Link:</strong> <a href="{link}">{link}</a></p>
<p><strong>Fehlermeldung:</strong> {str(e)}</p>
</body>
</html>
"""
send_status_email(email_subject, email_body)
time.sleep(5) time.sleep(5)
if __name__ == "__main__": if __name__ == "__main__":
INTERVAL_MINUTES = int(os.getenv("INTERVAL_MINUTES", 30)) # Default: 30 Minuten INTERVAL_MINUTES = int(os.getenv("INTERVAL_MINUTES", 30))
logger.info(f"Start feed check every {INTERVAL_MINUTES} minutes.") logger.info(f"Start feed check every {INTERVAL_MINUTES} minutes.")
start_health_server() # HTTP-Healthcheck starten start_health_server()
while True: while True:
try: try:

3
env
View File

@@ -12,6 +12,9 @@ BSKY_PASSWORD=your_bluesky_password
# Intervall in Minuten für Feedprüfung # Intervall in Minuten für Feedprüfung
INTERVAL_MINUTES=30 INTERVAL_MINUTES=30
# Maximales Alter eines Beitrags (in Tagen), der gepostet werden darf (0 = nur heute, 1 = bis gestern, usw.)
MAX_POST_AGE_DAYS=0
# E-Mail Einstellungen # E-Mail Einstellungen
SMTP_HOST=smtp.example.com SMTP_HOST=smtp.example.com
SMTP_PORT=587 SMTP_PORT=587