Added function to control maximum post age
All checks were successful
Build and Push Docker Image on Tag / build_and_push (push) Successful in 27s

This commit is contained in:
2025-05-25 12:53:18 +02:00
parent 539a8abf28
commit 6b52107afa
3 changed files with 108 additions and 73 deletions

View File

@ -14,26 +14,28 @@ from dotenv import load_dotenv
from http.server import HTTPServer, BaseHTTPRequestHandler
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from dateutil import parser as date_parser
from datetime import datetime, timezone, timedelta
load_dotenv()
FEED_URL = os.getenv("FEED_URL")
SEEN_POSTS_FILE = "/data/seen_posts.txt"
MASTODON_BASE_URL = os.getenv("MASTODON_API_BASE_URL")
MASTODON_TOKEN = os.getenv("MASTODON_ACCESS_TOKEN")
BSKY_HANDLE = os.getenv("BSKY_IDENTIFIER")
BSKY_PASSWORD = os.getenv("BSKY_PASSWORD")
MAX_POST_AGE_DAYS = int(os.getenv("MAX_POST_AGE_DAYS", 0))
# Logging konfigurieren (Standard-Format)
logger = logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.StreamHandler() # Log an stdout (Docker-Standard)
handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
class HealthHandler(BaseHTTPRequestHandler):
"""Handles HTTP GET requests for the health check endpoint."""
def do_GET(self):
if self.path == "/health":
self.send_response(200)
@ -43,25 +45,27 @@ class HealthHandler(BaseHTTPRequestHandler):
self.send_response(404)
self.end_headers()
def log_message(self, format, *args):
"""Suppress default HTTP request logging."""
pass
def start_health_server():
"""Starts the health check HTTP server in a background thread."""
server = HTTPServer(("0.0.0.0", 8000), HealthHandler)
thread = threading.Thread(target=server.serve_forever, daemon=True)
thread.start()
logger.info("Healthcheck server runs on port 8000.")
logger.info("Healthcheck server is running on port 8000.")
def should_send_email(on_success: bool):
"""Determines whether to send a status email based on mode and success."""
mode = os.getenv("EMAIL_MODE", "errors").lower()
if mode == "none":
return False
if mode == "all":
return True
if mode == "errors" and not on_success:
return True
return False
return (mode == "all") or (mode == "errors" and not on_success)
def send_status_email(subject, html_content):
"""Sends a formatted HTML email with the given subject and content."""
try:
smtp_host = os.getenv("SMTP_HOST")
smtp_port = int(os.getenv("SMTP_PORT", 587))
@ -74,36 +78,40 @@ def send_status_email(subject, html_content):
msg["Subject"] = subject
msg["From"] = email_from
msg["To"] = email_to
part = MIMEText(html_content, "html")
msg.attach(part)
msg.attach(MIMEText(html_content, "html"))
with smtplib.SMTP(smtp_host, smtp_port) as server:
server.starttls()
server.login(smtp_user, smtp_password)
server.sendmail(email_from, email_to, msg.as_string())
logger.info("Status E-Mail gesendet.")
logger.info("Status email sent.")
except Exception as e:
logger.error(f"Fehler beim Senden der E-Mail: {e}")
logger.error(f"Error sending status email: {e}")
def load_seen_ids():
"""Loads the set of already seen post IDs from file."""
os.makedirs(os.path.dirname(SEEN_POSTS_FILE), exist_ok=True)
if not os.path.exists(SEEN_POSTS_FILE):
with open(SEEN_POSTS_FILE, "w"): pass
return set()
open(SEEN_POSTS_FILE, "w").close()
with open(SEEN_POSTS_FILE, "r") as f:
return set(line.strip() for line in f)
def save_seen_id(post_id):
"""Appends a new post ID to the seen posts file."""
with open(SEEN_POSTS_FILE, "a") as f:
f.write(post_id + "\n")
def post_to_mastodon(message):
"""Posts a message to Mastodon."""
mastodon = Mastodon(access_token=MASTODON_TOKEN, api_base_url=MASTODON_BASE_URL)
mastodon.toot(message)
def fetch_og_data(url):
"""Fetches Open Graph title and image URL from a web page."""
try:
resp = requests.get(url, timeout=10)
resp.raise_for_status()
@ -117,7 +125,9 @@ def fetch_og_data(url):
logger.error(f"Error loading OG data: {e}")
return None, None
def post_to_bluesky(message, link):
"""Posts a message and optional preview to Bluesky."""
client = Client()
client.login(BSKY_HANDLE, BSKY_PASSWORD)
@ -131,23 +141,20 @@ def post_to_bluesky(message, link):
"external": {
"uri": link,
"title": title,
"description": "", # Optional: Beschreibung kannst du per OG:description holen
"description": "",
"thumb": {
"$type": "blob",
"ref": None, # Wird vom Upload ersetzt
"mimeType": "", # Wird vom Upload ersetzt
"size": 0 # Wird vom Upload ersetzt
"ref": None,
"mimeType": "",
"size": 0
}
}
}
# Bild herunterladen und hochladen
img_resp = requests.get(image_url, timeout=10)
img_resp.raise_for_status()
image_bytes = BytesIO(img_resp.content)
blob = client.upload_blob(image_bytes)
embed["external"]["thumb"] = blob.blob # Automatisch ersetzt
blob = client.upload_blob(BytesIO(img_resp.content))
embed["external"]["thumb"] = blob.blob
client.send_post(text=text, embed=embed)
logger.info("Posted with OG preview.")
@ -155,22 +162,59 @@ def post_to_bluesky(message, link):
except Exception as e:
logger.error(f"Error uploading OG preview: {e}")
# Fallback: Nur Text + Link
client.send_post(f"{text}\n{link}")
logger.info("Posted without OG preview.")
logger.info("Posted without preview.")
def extract_post_date(entry):
"""Extracts the oldest available date from various RSS date fields."""
date_fields = [
entry.get("published"),
entry.get("updated"),
entry.get("date_published"),
entry.get("date_modified"),
entry.get("pubDate")
]
dates = []
for d in date_fields:
if d:
try:
dt = date_parser.parse(d)
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
dates.append(dt)
except Exception as e:
logger.warning(f"⚠️ Cannot parse date field: {d} ({e})")
return min(dates) if dates else datetime.now(timezone.utc)
def main():
"""Main function to process feed entries and post new items."""
seen_ids = load_seen_ids()
feed = feedparser.parse(FEED_URL)
now = datetime.now(timezone.utc)
max_age = timedelta(days=MAX_POST_AGE_DAYS)
for entry in feed.entries:
post_id = entry.get("id") or entry.get("link")
if post_id in seen_ids:
continue
post_date = extract_post_date(entry)
age = now - post_date
age_days = age.days
age_hours = age.seconds // 3600
logger.info(f"Post '{entry.get('title', '').strip()}' is {age_days} days and {age_hours} hours old.")
if post_date < now - max_age:
logger.info(f"⏩ Skipping old post (older than {MAX_POST_AGE_DAYS} days): {post_id}")
continue
title = entry.get("title", "").strip()
link = entry.get("link", "").strip()
message = link # Link alleine posten für Mastodon OG-Vorschau
message = link
logger.info(f"New post: {title}")
@ -179,45 +223,30 @@ def main():
time.sleep(2)
post_to_bluesky(message, link)
save_seen_id(post_id)
logger.info("Successfully posted.")
logger.info("Successfully posted.")
if should_send_email(on_success=True):
email_subject = f"✅ Erfolgreich gepostet: {title}"
email_body = f"""
<html>
<body>
<h2>Beitrag erfolgreich gepostet</h2>
<p><strong>Titel:</strong> {title}</p>
<p><strong>Link:</strong> <a href="{link}">{link}</a></p>
</body>
</html>
"""
send_status_email(email_subject, email_body)
send_status_email(
f"✅ Successfully posted: {title}",
f"<html><body><h2>Post successfully published</h2><p><b>Title:</b> {title}</p><p><b>Link:</b> <a href='{link}'>{link}</a></p></body></html>"
)
except Exception as e:
logger.error(f"Error posting: {e}")
logger.error(f"Error posting: {e}")
if should_send_email(on_success=False):
email_subject = f"❌ Fehler beim Posten: {title}"
email_body = f"""
<html>
<body>
<h2>Fehler beim Posten</h2>
<p><strong>Titel:</strong> {title}</p>
<p><strong>Link:</strong> <a href="{link}">{link}</a></p>
<p><strong>Fehlermeldung:</strong> {str(e)}</p>
</body>
</html>
"""
send_status_email(email_subject, email_body)
send_status_email(
f"❌ Error posting: {title}",
f"<html><body><h2>Error posting</h2><p><b>Title:</b> {title}</p><p><b>Link:</b> <a href='{link}'>{link}</a></p><p><b>Error message:</b> {str(e)}</p></body></html>"
)
time.sleep(5)
if __name__ == "__main__":
INTERVAL_MINUTES = int(os.getenv("INTERVAL_MINUTES", 30)) # Default: 30 Minuten
INTERVAL_MINUTES = int(os.getenv("INTERVAL_MINUTES", 30))
logger.info(f"Start feed check every {INTERVAL_MINUTES} minutes.")
start_health_server() # HTTP-Healthcheck starten
start_health_server()
while True:
try: