Files
BlueMastoFeed/bluemastofeed.py

259 lines
8.5 KiB
Python
Raw Normal View History

2025-05-24 16:50:42 +02:00
import os
import time
import feedparser
import json
import logging
import requests
import threading
import smtplib
from bs4 import BeautifulSoup
from io import BytesIO
from mastodon import Mastodon
from atproto import Client
from dotenv import load_dotenv
from http.server import HTTPServer, BaseHTTPRequestHandler
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from dateutil import parser as date_parser
from datetime import datetime, timezone, timedelta
2025-05-24 16:50:42 +02:00
load_dotenv()
FEED_URL = os.getenv("FEED_URL")
SEEN_POSTS_FILE = "/data/seen_posts.txt"
MASTODON_BASE_URL = os.getenv("MASTODON_API_BASE_URL")
MASTODON_TOKEN = os.getenv("MASTODON_ACCESS_TOKEN")
BSKY_HANDLE = os.getenv("BSKY_IDENTIFIER")
BSKY_PASSWORD = os.getenv("BSKY_PASSWORD")
MAX_POST_AGE_DAYS = int(os.getenv("MAX_POST_AGE_DAYS", 0))
2025-05-24 16:50:42 +02:00
logger = logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.StreamHandler()
2025-05-24 16:50:42 +02:00
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
class HealthHandler(BaseHTTPRequestHandler):
"""Handles HTTP GET requests for the health check endpoint."""
2025-05-24 16:50:42 +02:00
def do_GET(self):
if self.path == "/health":
self.send_response(200)
self.end_headers()
self.wfile.write(b"OK")
else:
self.send_response(404)
self.end_headers()
def log_message(self, format, *args):
"""Suppress default HTTP request logging."""
pass
2025-05-24 16:50:42 +02:00
def start_health_server():
"""Starts the health check HTTP server in a background thread."""
2025-05-24 16:50:42 +02:00
server = HTTPServer(("0.0.0.0", 8000), HealthHandler)
thread = threading.Thread(target=server.serve_forever, daemon=True)
thread.start()
logger.info("Healthcheck server is running on port 8000.")
2025-05-24 16:50:42 +02:00
def should_send_email(on_success: bool):
"""Determines whether to send a status email based on mode and success."""
2025-05-24 16:50:42 +02:00
mode = os.getenv("EMAIL_MODE", "errors").lower()
return (mode == "all") or (mode == "errors" and not on_success)
2025-05-24 16:50:42 +02:00
def send_status_email(subject, html_content):
"""Sends a formatted HTML email with the given subject and content."""
2025-05-24 16:50:42 +02:00
try:
smtp_host = os.getenv("SMTP_HOST")
smtp_port = int(os.getenv("SMTP_PORT", 587))
smtp_user = os.getenv("SMTP_USER")
smtp_password = os.getenv("SMTP_PASSWORD")
email_from = os.getenv("EMAIL_FROM")
email_to = os.getenv("EMAIL_TO")
msg = MIMEMultipart("alternative")
msg["Subject"] = subject
msg["From"] = email_from
msg["To"] = email_to
msg.attach(MIMEText(html_content, "html"))
2025-05-24 16:50:42 +02:00
with smtplib.SMTP(smtp_host, smtp_port) as server:
server.starttls()
server.login(smtp_user, smtp_password)
server.sendmail(email_from, email_to, msg.as_string())
logger.info("Status email sent.")
2025-05-24 16:50:42 +02:00
except Exception as e:
logger.error(f"Error sending status email: {e}")
2025-05-24 16:50:42 +02:00
def load_seen_ids():
"""Loads the set of already seen post IDs from file."""
2025-05-24 16:50:42 +02:00
os.makedirs(os.path.dirname(SEEN_POSTS_FILE), exist_ok=True)
if not os.path.exists(SEEN_POSTS_FILE):
open(SEEN_POSTS_FILE, "w").close()
2025-05-24 16:50:42 +02:00
with open(SEEN_POSTS_FILE, "r") as f:
return set(line.strip() for line in f)
2025-05-24 16:50:42 +02:00
def save_seen_id(post_id):
"""Appends a new post ID to the seen posts file."""
2025-05-24 16:50:42 +02:00
with open(SEEN_POSTS_FILE, "a") as f:
f.write(post_id + "\n")
2025-05-24 16:50:42 +02:00
def post_to_mastodon(message):
"""Posts a message to Mastodon."""
2025-05-24 16:50:42 +02:00
mastodon = Mastodon(access_token=MASTODON_TOKEN, api_base_url=MASTODON_BASE_URL)
mastodon.toot(message)
2025-05-24 16:50:42 +02:00
def fetch_og_data(url):
"""Fetches Open Graph title and image URL from a web page."""
2025-05-24 16:50:42 +02:00
try:
resp = requests.get(url, timeout=10)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")
og_title = soup.find("meta", property="og:title")
og_image = soup.find("meta", property="og:image")
title = og_title["content"] if og_title and og_title.has_attr("content") else None
image_url = og_image["content"] if og_image and og_image.has_attr("content") else None
return title, image_url
except Exception as e:
logger.error(f"Error loading OG data: {e}")
return None, None
2025-05-24 16:50:42 +02:00
def post_to_bluesky(message, link):
"""Posts a message and optional preview to Bluesky."""
2025-05-24 16:50:42 +02:00
client = Client()
client.login(BSKY_HANDLE, BSKY_PASSWORD)
title, image_url = fetch_og_data(link)
text = title or message
if title and image_url:
try:
embed = {
"$type": "app.bsky.embed.external",
"external": {
"uri": link,
"title": title,
"description": "",
2025-05-24 16:50:42 +02:00
"thumb": {
"$type": "blob",
"ref": None,
"mimeType": "",
"size": 0
2025-05-24 16:50:42 +02:00
}
}
}
img_resp = requests.get(image_url, timeout=10)
img_resp.raise_for_status()
blob = client.upload_blob(BytesIO(img_resp.content))
embed["external"]["thumb"] = blob.blob
2025-05-24 16:50:42 +02:00
client.send_post(text=text, embed=embed)
logger.info("Posted with OG preview.")
return
except Exception as e:
logger.error(f"Error uploading OG preview: {e}")
client.send_post(f"{text}\n{link}")
logger.info("Posted without preview.")
def extract_post_date(entry):
"""Extracts the oldest available date from various RSS date fields."""
date_fields = [
entry.get("published"),
entry.get("updated"),
entry.get("date_published"),
entry.get("date_modified"),
entry.get("pubDate")
]
dates = []
for d in date_fields:
if d:
try:
dt = date_parser.parse(d)
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
dates.append(dt)
except Exception as e:
logger.warning(f"⚠️ Cannot parse date field: {d} ({e})")
return min(dates) if dates else datetime.now(timezone.utc)
2025-05-24 16:50:42 +02:00
def main():
"""Main function to process feed entries and post new items."""
2025-05-24 16:50:42 +02:00
seen_ids = load_seen_ids()
feed = feedparser.parse(FEED_URL)
now = datetime.now(timezone.utc)
max_age = timedelta(days=MAX_POST_AGE_DAYS)
2025-05-24 16:50:42 +02:00
for entry in feed.entries:
post_id = entry.get("id") or entry.get("link")
if post_id in seen_ids:
continue
post_date = extract_post_date(entry)
age = now - post_date
age_days = age.days
age_hours = age.seconds // 3600
logger.info(f"Post '{entry.get('title', '').strip()}' is {age_days} days and {age_hours} hours old.")
if post_date < now - max_age:
logger.info(f"⏩ Skipping old post (older than {MAX_POST_AGE_DAYS} days): {post_id}")
continue
2025-05-24 16:50:42 +02:00
title = entry.get("title", "").strip()
link = entry.get("link", "").strip()
message = link
2025-05-24 16:50:42 +02:00
logger.info(f"New post: {title}")
try:
post_to_mastodon(message)
time.sleep(2)
post_to_bluesky(message, link)
save_seen_id(post_id)
logger.info("✅ Successfully posted.")
2025-05-24 16:50:42 +02:00
if should_send_email(on_success=True):
send_status_email(
f"✅ Successfully posted: {title}",
f"<html><body><h2>Post successfully published</h2><p><b>Title:</b> {title}</p><p><b>Link:</b> <a href='{link}'>{link}</a></p></body></html>"
)
2025-05-24 16:50:42 +02:00
except Exception as e:
logger.error(f"❌ Error posting: {e}")
2025-05-24 16:50:42 +02:00
if should_send_email(on_success=False):
send_status_email(
f"❌ Error posting: {title}",
f"<html><body><h2>Error posting</h2><p><b>Title:</b> {title}</p><p><b>Link:</b> <a href='{link}'>{link}</a></p><p><b>Error message:</b> {str(e)}</p></body></html>"
)
2025-05-24 16:50:42 +02:00
time.sleep(5)
2025-05-24 16:50:42 +02:00
if __name__ == "__main__":
INTERVAL_MINUTES = int(os.getenv("INTERVAL_MINUTES", 30))
2025-05-24 16:50:42 +02:00
logger.info(f"Start feed check every {INTERVAL_MINUTES} minutes.")
start_health_server()
2025-05-24 16:50:42 +02:00
while True:
try:
main()
except Exception as e:
logger.error(f"Error in main execution: {e}")
logger.info(f"Wait {INTERVAL_MINUTES} minutes until next execution...")
time.sleep(INTERVAL_MINUTES * 60)