diff --git a/backend/.env.example b/backend/.env.example index 74e9c4b..c2dd235 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -1,3 +1,4 @@ +# ─── App ──────────────────────────────────────────────────────────────────── APP_ENV=development APP_NAME=rss-news-backend APP_SECRET_KEY=replace-with-a-long-random-secret @@ -8,3 +9,37 @@ APP_ADMIN_PASSWORD=change-me SESSION_COOKIE_NAME=rss_news_session SESSION_MAX_AGE_SECONDS=28800 + +# ─── WordPress ────────────────────────────────────────────────────────────── +WP_BASE_URL=https://your-site.tld +WP_USERNAME=your-wp-username +WP_PASSWORD=your-wp-app-password +# Status für neue Beiträge: draft | future | publish +WORDPRESS_DEFAULT_STATUS=draft + +# ─── OpenAI ───────────────────────────────────────────────────────────────── +OPENAI_API_KEY=sk-... +# gpt-4o-mini empfohlen (Kosten/Qualität) +OPENAI_MODEL=gpt-4o-mini + +# ─── Telegram Bot ──────────────────────────────────────────────────────────── +# Bot-Token von @BotFather +TELEGRAM_BOT_TOKEN=123456789:ABC... +# Chat-ID deines persönlichen Chats oder einer Gruppe +TELEGRAM_CHAT_ID=123456789 +# Zufälliger Secret-Token zur Webhook-Absicherung (mindestens 20 Zeichen) +TELEGRAM_WEBHOOK_SECRET=replace-with-random-secret-min-20-chars + +# ─── N8N API-Key ───────────────────────────────────────────────────────────── +# Wird von N8N im Header X-API-Key mitgeschickt +N8N_API_KEY=replace-with-strong-random-key + +# ─── Pipeline-Einstellungen ────────────────────────────────────────────────── +# Relevanz-Score >= dieser Wert: automatisch verarbeiten (0-100) +PIPELINE_RELEVANCE_AUTO=80 +# Relevanz-Score >= dieser Wert, aber < AUTO: Telegram-Warnung senden +PIPELINE_RELEVANCE_WARN=60 +# Maximale Drafts/Veröffentlichungen pro Tag +PIPELINE_MAX_DRAFTS_PER_DAY=2 +# Bevorzugte Veröffentlichungszeiten (Stunden, kommagetrennt, CET) +PIPELINE_PUBLISH_HOURS=9,14 diff --git a/backend/app/config.py b/backend/app/config.py index 43629ba..d56ce11 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -33,6 +33,20 @@ class Settings(BaseSettings): openai_api_key: str | None = Field(default=None, validation_alias=AliasChoices("OPENAI_API_KEY")) openai_model: str = "gpt-4o-mini" + # Telegram Bot + telegram_bot_token: str | None = Field(default=None, validation_alias=AliasChoices("TELEGRAM_BOT_TOKEN")) + telegram_chat_id: str | None = Field(default=None, validation_alias=AliasChoices("TELEGRAM_CHAT_ID")) + telegram_webhook_secret: str | None = Field(default=None, validation_alias=AliasChoices("TELEGRAM_WEBHOOK_SECRET")) + + # N8N API authentication + n8n_api_key: str | None = Field(default=None, validation_alias=AliasChoices("N8N_API_KEY")) + + # Pipeline behaviour + pipeline_relevance_auto: int = 80 # >= this: auto-process + pipeline_relevance_warn: int = 60 # >= this: Telegram warning, else reject + pipeline_max_drafts_per_day: int = 2 + pipeline_publish_hours: str = "9,14" # comma-separated preferred publish hours (CET) + @lru_cache(maxsize=1) def get_settings() -> Settings: diff --git a/backend/app/db.py b/backend/app/db.py index d2ebfd5..1b394c3 100644 --- a/backend/app/db.py +++ b/backend/app/db.py @@ -160,6 +160,8 @@ def init_db() -> None: row["name"] for row in conn.execute("PRAGMA table_info(articles)").fetchall() } migration_columns = { + "relevance_score": "ALTER TABLE articles ADD COLUMN relevance_score INTEGER", + "scheduled_publish_at": "ALTER TABLE articles ADD COLUMN scheduled_publish_at TEXT", "source_hash": "ALTER TABLE articles ADD COLUMN source_hash TEXT", "image_urls_json": "ALTER TABLE articles ADD COLUMN image_urls_json TEXT", "press_contact": "ALTER TABLE articles ADD COLUMN press_contact TEXT", diff --git a/backend/app/ingestion.py b/backend/app/ingestion.py index 1ba6b6c..510fd10 100644 --- a/backend/app/ingestion.py +++ b/backend/app/ingestion.py @@ -11,7 +11,6 @@ from urllib.parse import unquote, urlparse import feedparser -from .policy import evaluate_source_policy from .repositories import ( ArticleUpsert, RunCreate, @@ -169,30 +168,6 @@ def run_ingestion(feed_id: int | None = None) -> IngestionStats: continue feeds_processed += 1 - source_snapshot = { - "id": feed.get("source_id"), - "name": feed.get("source_name"), - "base_url": feed.get("source_base_url"), - "terms_url": feed.get("source_terms_url"), - "license_name": feed.get("source_license_name"), - "risk_level": feed.get("source_risk_level"), - "last_reviewed_at": feed.get("source_last_reviewed_at"), - "is_enabled": feed.get("source_is_enabled"), - } - policy_issues = evaluate_source_policy(source_snapshot) - if policy_issues: - feed_results.append( - { - "feed_id": int(feed["id"]), - "feed_url": feed["url"], - "status": "blocked", - "policy_issues": policy_issues, - "entries_seen": 0, - "upserts": 0, - } - ) - continue - parsed = None feed_error = None for attempt in range(1, MAX_FEED_FETCH_RETRIES + 1): diff --git a/backend/app/main.py b/backend/app/main.py index b0bcf2a..51aab6b 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -15,10 +15,12 @@ from .auth import create_session_token, verify_credentials, verify_session_token from .config import get_settings from .db import init_db from .ingestion import run_ingestion +from .pipeline import run_auto_pipeline from .policy import evaluate_source_policy, is_source_allowed from .publisher import enqueue_publish, run_publisher from .relevance import article_age_days, article_relevance from .rewrite import generate_article_tags, merge_generated_tags, rewrite_article_text +from .telegram_bot import handle_update, setup_webhook from .repositories import ( ArticleUpsert, FeedCreate, @@ -620,3 +622,81 @@ def api_run_ingestion(payload: IngestionRunRequest, username: str = Depends(requ }, "requested_by": username, } + + +# --------------------------------------------------------------------------- +# N8N Automation endpoint (API-Key auth, no session cookie required) +# --------------------------------------------------------------------------- + +def _require_api_key(request: Request) -> None: + api_key = request.headers.get("X-API-Key") or request.query_params.get("api_key") + expected = settings.n8n_api_key + if not expected: + raise HTTPException(status_code=status.HTTP_501_NOT_IMPLEMENTED, detail="N8N_API_KEY nicht konfiguriert") + if api_key != expected: + raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Ungültiger API-Key") + + +@app.post("/api/n8n/pipeline") +def api_n8n_pipeline(request: Request) -> dict: + """Trigger the full auto pipeline. Called by N8N (2x/day or on demand).""" + _require_api_key(request) + try: + result = run_auto_pipeline(trigger="n8n") + return {"ok": True, "stats": result} + except Exception as exc: + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(exc)) from exc + + +@app.post("/api/n8n/ingest") +def api_n8n_ingest(request: Request) -> dict: + """Run only the ingestion step (no rewrite/publish). For N8N.""" + _require_api_key(request) + stats = run_ingestion() + return { + "ok": stats.status == "success", + "stats": { + "feeds_processed": stats.feeds_processed, + "entries_seen": stats.entries_seen, + "articles_upserted": stats.articles_upserted, + }, + } + + +# --------------------------------------------------------------------------- +# Telegram Webhook +# --------------------------------------------------------------------------- + +@app.post("/telegram/webhook") +async def telegram_webhook(request: Request) -> dict: + """Receive updates from Telegram Bot API.""" + # Verify secret token + secret = settings.telegram_webhook_secret + if secret: + incoming = request.headers.get("X-Telegram-Bot-Api-Secret-Token", "") + if incoming != secret: + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Invalid secret") + + body = await request.body() + try: + update = json.loads(body.decode("utf-8")) + except Exception: + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid JSON") + + try: + handle_update(update) + except Exception as exc: + import logging + logging.getLogger(__name__).error("Telegram update handler error: %s", exc) + + return {"ok": True} + + +@app.post("/api/telegram/setup-webhook") +def api_setup_telegram_webhook(request: Request) -> dict: + """Register the Telegram webhook URL. Call once after deployment.""" + username = require_auth(request) + base_url = str(request.base_url).rstrip("/") + webhook_url = f"{base_url}/telegram/webhook" + result = setup_webhook(webhook_url) + return {"ok": True, "webhook_url": webhook_url, "telegram_response": result, "requested_by": username} diff --git a/backend/app/pipeline.py b/backend/app/pipeline.py new file mode 100644 index 0000000..f86ef35 --- /dev/null +++ b/backend/app/pipeline.py @@ -0,0 +1,407 @@ +"""Autonomous RSS-News pipeline. + +Full automated flow: +1. Run RSS ingestion +2. For each new article: + - Auto-select primary image + - Score relevance via GPT + - < warn threshold: reject (error status) → Telegram rejected summary + - warn..auto threshold: Telegram warning with override button + - >= auto threshold: rewrite → create WP draft → Telegram notification +3. Send pipeline summary to Telegram +""" +from __future__ import annotations + +import json +import logging +import time +from dataclasses import dataclass, field +from datetime import datetime, timezone +from typing import Any + +from .config import get_settings +from .ingestion import run_ingestion +from .publisher import enqueue_publish, run_publisher +from .repositories import ( + ArticleUpsert, + get_article_by_id, + list_articles, + set_article_image_decision, + update_article_status, + upsert_article as repo_upsert_article, +) +from .rewrite import generate_article_tags, merge_generated_tags, rewrite_article_text, score_article_relevance +from .scheduler import reserve_publish_slot +from .wordpress import publish_article_draft, selected_image_exists + +logger = logging.getLogger(__name__) + + +@dataclass +class PipelineStats: + ingested: int = 0 + processed: int = 0 + drafts_created: int = 0 + rejected: int = 0 + warnings: int = 0 + errors: int = 0 + rejected_articles: list[dict[str, Any]] = field(default_factory=list) + + +# --------------------------------------------------------------------------- +# Internal helpers +# --------------------------------------------------------------------------- + +def _auto_select_image(article: dict[str, Any]) -> bool: + """Auto-select the primary image from ingestion metadata if not already selected.""" + meta_json = article.get("meta_json") or "{}" + try: + meta = json.loads(meta_json) + except Exception: + return False + + # Already selected? + image_review = meta.get("image_review") or {} + if isinstance(image_review, dict) and image_review.get("selected_url"): + return True + + # Try to get primary from ingestion extraction + extraction = meta.get("extraction") or {} + image_selection = extraction.get("image_selection") or {} + primary = image_selection.get("primary") + + if not primary: + # Fallback: use first URL from image_urls_json + image_urls_json = article.get("image_urls_json") or "[]" + try: + urls = json.loads(image_urls_json) + if urls: + primary = urls[0] + except Exception: + pass + + if primary: + set_article_image_decision(int(article["id"]), primary, "select", actor="pipeline") + return True + return False + + +def _store_relevance(article_id: int, relevance: dict[str, Any]) -> None: + """Persist relevance score and reason in article meta_json and relevance_score column.""" + article = get_article_by_id(article_id) + if not article: + return + try: + meta = json.loads(article.get("meta_json") or "{}") + except Exception: + meta = {} + meta["relevance"] = relevance + new_meta = json.dumps(meta, ensure_ascii=False) + from .db import get_conn + with get_conn() as conn: + conn.execute( + "UPDATE articles SET meta_json = ?, relevance_score = ? WHERE id = ?", + (new_meta, relevance.get("score", 0), article_id), + ) + + +def _do_rewrite_and_draft(article: dict[str, Any]) -> tuple[int, str | None]: + """Rewrite article and create WP draft. Returns (wp_post_id, wp_post_url).""" + article_id = int(article["id"]) + + # Rewrite + rewritten = rewrite_article_text(article) + tags: list[str] = [] + try: + tags = generate_article_tags(article, rewritten_text=rewritten) + except Exception: + pass + merged_meta = merge_generated_tags(article.get("meta_json"), tags) + + # Save rewritten content + approved status + repo_upsert_article( + ArticleUpsert( + feed_id=article.get("feed_id"), + source_article_id=article.get("source_article_id"), + source_hash=article.get("source_hash"), + title=article.get("title", ""), + source_url=article.get("source_url", ""), + canonical_url=article.get("canonical_url"), + published_at=article.get("published_at"), + author=article.get("author"), + summary=article.get("summary"), + content_raw=article.get("content_raw"), + content_rewritten=rewritten, + image_urls_json=article.get("image_urls_json"), + press_contact=article.get("press_contact"), + source_name_snapshot=article.get("source_name_snapshot"), + source_terms_url_snapshot=article.get("source_terms_url_snapshot"), + source_license_name_snapshot=article.get("source_license_name_snapshot"), + legal_checked=bool(int(article.get("legal_checked", 0))), + legal_checked_at=article.get("legal_checked_at"), + legal_note=article.get("legal_note"), + wp_post_id=article.get("wp_post_id"), + wp_post_url=article.get("wp_post_url"), + publish_attempts=int(article.get("publish_attempts", 0)), + publish_last_error=article.get("publish_last_error"), + published_to_wp_at=article.get("published_to_wp_at"), + word_count=len(rewritten.split()), + status="approved", + meta_json=merged_meta, + ) + ) + + # Reload after save to get updated meta_json + fresh = get_article_by_id(article_id) + if not fresh: + raise RuntimeError(f"Artikel #{article_id} nach Rewrite nicht gefunden") + + # Create WP draft + wp_post_id, wp_post_url = publish_article_draft(fresh) + + # Update WP info in DB + from .repositories import mark_article_publish_result + mark_article_publish_result( + article_id, + wp_post_id=wp_post_id, + wp_post_url=wp_post_url, + error=None, + increment_attempts=True, + set_published_status=False, + ) + + return wp_post_id, wp_post_url + + +# --------------------------------------------------------------------------- +# Public pipeline functions +# --------------------------------------------------------------------------- + +def run_auto_pipeline(trigger: str = "auto") -> dict[str, Any]: + """Run the full automated pipeline and return stats dict.""" + from . import telegram_bot as tg + + settings = get_settings() + stats = PipelineStats() + + tg.notify_pipeline_started(trigger) + + # Step 1: Ingestion + try: + ingest_result = run_ingestion() + stats.ingested = ingest_result.articles_upserted + except Exception as exc: + tg.notify_error(f"Ingestion fehlgeschlagen: {exc}") + logger.error("Ingestion error: %s", exc) + stats.errors += 1 + + # Step 2: Process new articles + new_articles = list_articles(limit=100, status_filter="new") + + for article in new_articles: + article_id = int(article["id"]) + try: + _process_article(article, stats, settings) + except Exception as exc: + logger.error("Fehler bei Artikel #%d: %s", article_id, exc) + tg.notify_error(f"Fehler bei Artikel #{article_id} ({article.get('title','?')[:50]}): {exc}") + stats.errors += 1 + # Rate limiting between OpenAI calls + time.sleep(1) + + # Step 3: Send rejected summary if any + if stats.rejected_articles: + try: + tg.notify_rejected_summary(stats.rejected_articles) + except Exception as exc: + logger.warning("Telegram rejected summary fehlgeschlagen: %s", exc) + + # Step 4: Summary + result = { + "ingested": stats.ingested, + "processed": stats.processed, + "drafts_created": stats.drafts_created, + "rejected": stats.rejected, + "warnings": stats.warnings, + "errors": stats.errors, + } + tg.notify_pipeline_done(result) + return result + + +def _process_article(article: dict[str, Any], stats: PipelineStats, settings: Any) -> None: + """Process a single new article through the pipeline.""" + from . import telegram_bot as tg + + article_id = int(article["id"]) + + # Auto-select image + _auto_select_image(article) + + # Score relevance + try: + relevance = score_article_relevance(article) + except Exception as exc: + logger.warning("Relevanz-Scoring für #%d fehlgeschlagen: %s", article_id, exc) + relevance = {"score": 0, "reason": f"Scoring-Fehler: {exc}", "topics": []} + + score = relevance.get("score", 0) + reason = relevance.get("reason", "") + _store_relevance(article_id, relevance) + + stats.processed += 1 + + if score < settings.pipeline_relevance_warn: + # Reject + update_article_status( + article_id, + "error", + actor="pipeline", + note=f"Abgelehnt: Score {score}/100 — {reason}", + ) + stats.rejected += 1 + # Reload for summary (now has relevance in meta) + updated = get_article_by_id(article_id) + if updated: + stats.rejected_articles.append(updated) + + elif score < settings.pipeline_relevance_auto: + # Warning zone: inform user, don't auto-process + stats.warnings += 1 + try: + tg.notify_relevance_warning(article, score, reason) + except Exception as exc: + logger.warning("Telegram warning für #%d fehlgeschlagen: %s", article_id, exc) + + else: + # Auto-process: rewrite + WP draft + try: + # Reload article to get updated image_review + fresh = get_article_by_id(article_id) + if not fresh: + return + wp_post_id, wp_post_url = _do_rewrite_and_draft(fresh) + stats.drafts_created += 1 + + # Reserve publish slot + slot = reserve_publish_slot(article_id) + + # Reload for notification + final = get_article_by_id(article_id) + if final: + try: + tg.notify_new_draft(final, score=score, suggested_publish_at=slot) + except Exception as exc: + logger.warning("Telegram draft-Benachrichtigung für #%d fehlgeschlagen: %s", article_id, exc) + + except Exception as exc: + logger.error("Draft-Erstellung für #%d fehlgeschlagen: %s", article_id, exc) + update_article_status(article_id, "error", actor="pipeline", note=f"Draft-Fehler: {exc}") + raise + + +# --------------------------------------------------------------------------- +# Callback actions (called from telegram_bot._handle_callback) +# --------------------------------------------------------------------------- + +def rewrite_and_update_draft(article_id: int) -> None: + """Rewrite article and update the existing WP draft.""" + article = get_article_by_id(article_id) + if not article: + raise RuntimeError(f"Artikel #{article_id} nicht gefunden") + _auto_select_image(article) + fresh = get_article_by_id(article_id) + _do_rewrite_and_draft(fresh) + + +def discard_article(article_id: int) -> None: + """Discard a draft: delete WP post if exists, set article to error.""" + article = get_article_by_id(article_id) + if not article: + return + + wp_post_id = article.get("wp_post_id") + if wp_post_id: + try: + from .wordpress import delete_wp_post + delete_wp_post(int(wp_post_id)) + except Exception as exc: + logger.warning("WP Post #%d konnte nicht gelöscht werden: %s", wp_post_id, exc) + + update_article_status(article_id, "error", actor="telegram", note="Via Telegram verworfen") + + +def override_rejected_article(article_id: int) -> None: + """Force-process a previously rejected article.""" + from . import telegram_bot as tg + + article = get_article_by_id(article_id) + if not article: + raise RuntimeError(f"Artikel #{article_id} nicht gefunden") + + # Reset to new so processing is allowed + update_article_status(article_id, "new", actor="telegram", note="Manuell übernommen via Telegram") + + # Reload + fresh = get_article_by_id(article_id) + if not fresh: + return + + _auto_select_image(fresh) + fresh = get_article_by_id(article_id) + + # Get existing score or re-score + try: + meta = json.loads(fresh.get("meta_json") or "{}") + score = int((meta.get("relevance") or {}).get("score", 0)) + except Exception: + score = 0 + + wp_post_id, wp_post_url = _do_rewrite_and_draft(fresh) + slot = reserve_publish_slot(article_id) + + final = get_article_by_id(article_id) + if final: + tg.notify_new_draft(final, score=score, suggested_publish_at=slot) + + +# --------------------------------------------------------------------------- +# Status helpers (used by /status command) +# --------------------------------------------------------------------------- + +def get_recently_rejected(days: int = 3) -> list[dict[str, Any]]: + """Return articles rejected in the last N days.""" + from .db import get_conn + from .db import rows_to_dicts + cutoff = datetime.now(timezone.utc).isoformat()[:10] + with get_conn() as conn: + rows = conn.execute( + """ + SELECT id, title, meta_json, source_url, created_at + FROM articles + WHERE status = 'error' + AND json_extract(meta_json, '$.relevance.score') IS NOT NULL + AND date(updated_at) >= date('now', ?) + ORDER BY updated_at DESC + LIMIT 20 + """, + (f"-{days} days",), + ).fetchall() + return rows_to_dicts(rows) + + +def get_pipeline_status_text() -> str: + """Return a text summary of current pipeline state.""" + from .repositories import list_articles as _list + new_count = len(_list(limit=500, status_filter="new")) + approved_count = len(_list(limit=500, status_filter="approved")) + published_count = len(_list(limit=500, status_filter="published")) + error_count = len(_list(limit=500, status_filter="error")) + + return ( + f"📊 Pipeline-Status\n" + f"🆕 Neu / wartend: {new_count}\n" + f"✅ Draft / freigegeben: {approved_count}\n" + f"📢 Veröffentlicht: {published_count}\n" + f"🚫 Fehler / abgelehnt: {error_count}" + ) diff --git a/backend/app/rewrite.py b/backend/app/rewrite.py index 759fac9..6c1d37b 100644 --- a/backend/app/rewrite.py +++ b/backend/app/rewrite.py @@ -146,6 +146,47 @@ def generate_article_tags(article: dict[str, Any], rewritten_text: str | None = return [] +def score_article_relevance(article: dict[Any, Any]) -> dict[str, Any]: + """Score article relevance for VanLife/Camping/Outdoor blog (0-100). + + Returns {"score": int, "reason": str, "topics": list[str]}. + Raises RuntimeError on OpenAI failure. + """ + title = (article.get("title") or "").strip() + text = _sanitize_source_text(article.get("content_raw") or "") + if not text: + text = (article.get("summary") or "").strip() + + prompt = ( + "Bewerte die Relevanz des folgenden Artikels für einen deutschen VanLife-, Camping- und Outdoor-Blog. " + "Relevante Themen: Campingplätze, Stellplätze, Wohnmobil, Camper, Van, Roadtrip, " + "Outdoor-Ausrüstung, Wandern, Naturreisen, Reise-Tipps für Campende. " + "Nicht relevant: allgemeine Nachrichten, Politik, Wirtschaft, Sport (außer Outdoor), Unterhaltung.\n\n" + "Antworte NUR mit einem JSON-Objekt:\n" + '{"score": <0-100>, "reason": "", "topics": ["", ""]}\n\n' + f"Titel: {title}\n\n" + f"Text (Auszug):\n{text[:2000]}" + ) + raw = _openai_chat( + "Du bist ein Redakteur für einen VanLife- und Camping-Blog und bewertest Artikelrelevanz.", + prompt, + temperature=0.1, + ) + try: + match = re.search(r"\{[\s\S]*\}", raw) + if match: + parsed = json.loads(match.group(0)) + score = max(0, min(100, int(parsed.get("score", 0)))) + return { + "score": score, + "reason": str(parsed.get("reason", "")), + "topics": [str(t) for t in (parsed.get("topics") or [])], + } + except Exception: + pass + return {"score": 0, "reason": "Parsing-Fehler bei Relevanz-Score", "topics": []} + + def merge_generated_tags(meta_json: str | None, tags: list[str]) -> str: meta: dict[str, Any] = {} if meta_json: diff --git a/backend/app/scheduler.py b/backend/app/scheduler.py new file mode 100644 index 0000000..d4c6aaf --- /dev/null +++ b/backend/app/scheduler.py @@ -0,0 +1,139 @@ +"""Smart publishing scheduler. + +Calculates suggested publish slots for new WordPress drafts. +Rules: +- Maximum N drafts per day (configurable, default 2) +- Prefer slots spread across the week for steady traffic +- Preferred hours: configurable (default 09:00 and 14:00 CET) +""" +from __future__ import annotations + +from datetime import date, datetime, timedelta, timezone +from typing import Any + +from .config import get_settings +from .db import get_conn + + +# CET offset (UTC+1 winter / UTC+2 summer – we use a fixed +1 for simplicity) +_CET_OFFSET = timedelta(hours=1) + + +def _today_cet() -> date: + return (datetime.now(timezone.utc) + _CET_OFFSET).date() + + +def _preferred_hours() -> list[int]: + settings = get_settings() + try: + return [int(h.strip()) for h in settings.pipeline_publish_hours.split(",") if h.strip()] + except Exception: + return [9, 14] + + +def _count_scheduled_on_day(target_date: date) -> int: + """Count articles already scheduled for publication on a given date.""" + date_str = target_date.isoformat() + with get_conn() as conn: + row = conn.execute( + """ + SELECT COUNT(*) AS cnt + FROM articles + WHERE scheduled_publish_at >= ? AND scheduled_publish_at < ? + AND status NOT IN ('error') + """, + (date_str + "T00:00:00", date_str + "T23:59:59"), + ).fetchone() + return int(row["cnt"]) if row else 0 + + +def _next_free_hour(target_date: date) -> int | None: + """Return first preferred hour that is not yet used on target_date, or None if day is full.""" + settings = get_settings() + max_per_day = settings.pipeline_max_drafts_per_day + hours = _preferred_hours() + + date_str = target_date.isoformat() + with get_conn() as conn: + rows = conn.execute( + """ + SELECT scheduled_publish_at FROM articles + WHERE scheduled_publish_at >= ? AND scheduled_publish_at < ? + AND status NOT IN ('error') + """, + (date_str + "T00:00:00", date_str + "T23:59:59"), + ).fetchall() + + used_hours: set[int] = set() + for row in rows: + ts = row["scheduled_publish_at"] or "" + try: + used_hours.add(datetime.fromisoformat(ts).hour) + except Exception: + pass + + for h in hours: + if h not in used_hours: + return h + return None # day is full + + +def suggest_publish_slot(lookahead_days: int = 14) -> str: + """Return a suggested publish datetime string (ISO, CET) for the next free slot. + + Format: 'Mo, 24.03.2026 um 09:00 Uhr' + Also updates DB so consecutive calls return different slots. + """ + today = _today_cet() + weekday_names = ["Mo", "Di", "Mi", "Do", "Fr", "Sa", "So"] + + for offset in range(1, lookahead_days + 1): + candidate = today + timedelta(days=offset) + hour = _next_free_hour(candidate) + if hour is not None: + wd = weekday_names[candidate.weekday()] + return f"{wd}, {candidate.strftime('%d.%m.%Y')} um {hour:02d}:00 Uhr" + + # Fallback: just tomorrow morning + tomorrow = today + timedelta(days=1) + hours = _preferred_hours() + h = hours[0] if hours else 9 + wd = weekday_names[tomorrow.weekday()] + return f"{wd}, {tomorrow.strftime('%d.%m.%Y')} um {h:02d}:00 Uhr" + + +def reserve_publish_slot(article_id: int) -> str: + """Reserve a publish slot for an article and persist it in the DB. + + Returns the suggested publish datetime string. + """ + today = _today_cet() + lookahead_days = 14 + weekday_names = ["Mo", "Di", "Mi", "Do", "Fr", "Sa", "So"] + + for offset in range(1, lookahead_days + 1): + candidate = today + timedelta(days=offset) + hour = _next_free_hour(candidate) + if hour is not None: + # Reserve this slot by writing to the article + iso_ts = f"{candidate.isoformat()}T{hour:02d}:00:00" + with get_conn() as conn: + conn.execute( + "UPDATE articles SET scheduled_publish_at = ? WHERE id = ?", + (iso_ts, article_id), + ) + wd = weekday_names[candidate.weekday()] + return f"{wd}, {candidate.strftime('%d.%m.%Y')} um {hour:02d}:00 Uhr" + + # Fallback + tomorrow = today + timedelta(days=1) + hours = _preferred_hours() + h = hours[0] if hours else 9 + iso_ts = f"{tomorrow.isoformat()}T{h:02d}:00:00" + with get_conn() as conn: + conn.execute( + "UPDATE articles SET scheduled_publish_at = ? WHERE id = ?", + (iso_ts, article_id), + ) + wd = weekday_names[tomorrow.weekday()] + return f"{wd}, {tomorrow.strftime('%d.%m.%Y')} um {h:02d}:00 Uhr" diff --git a/backend/app/telegram_bot.py b/backend/app/telegram_bot.py new file mode 100644 index 0000000..6d88105 --- /dev/null +++ b/backend/app/telegram_bot.py @@ -0,0 +1,438 @@ +"""Telegram Bot integration for RSS-News pipeline notifications and controls.""" +from __future__ import annotations + +import json +import logging +from typing import Any +from urllib.error import URLError +from urllib.parse import urlencode +from urllib.request import Request, urlopen + +from .config import get_settings + +logger = logging.getLogger(__name__) + +_BASE = "https://api.telegram.org/bot{token}/{method}" + + +# --------------------------------------------------------------------------- +# Low-level API helpers +# --------------------------------------------------------------------------- + +def _call(method: str, payload: dict[str, Any]) -> dict[str, Any]: + settings = get_settings() + token = settings.telegram_bot_token + if not token: + raise RuntimeError("TELEGRAM_BOT_TOKEN nicht konfiguriert") + url = _BASE.format(token=token, method=method) + data = json.dumps(payload).encode("utf-8") + req = Request( + url=url, + data=data, + method="POST", + headers={"Content-Type": "application/json", "Accept": "application/json"}, + ) + try: + with urlopen(req, timeout=15) as resp: + raw = resp.read().decode("utf-8", errors="replace") + return json.loads(raw) + except URLError as exc: + logger.error("Telegram API Fehler (%s): %s", method, exc) + raise RuntimeError(f"Telegram API Fehler: {exc}") from exc + + +def _chat_id() -> str: + settings = get_settings() + cid = settings.telegram_chat_id + if not cid: + raise RuntimeError("TELEGRAM_CHAT_ID nicht konfiguriert") + return cid + + +def _inline_keyboard(buttons: list[list[dict[str, str]]]) -> dict: + return {"inline_keyboard": buttons} + + +# --------------------------------------------------------------------------- +# Public send functions +# --------------------------------------------------------------------------- + +def send_message(text: str, reply_markup: dict | None = None, parse_mode: str = "HTML") -> dict: + payload: dict[str, Any] = { + "chat_id": _chat_id(), + "text": text, + "parse_mode": parse_mode, + "disable_web_page_preview": False, + } + if reply_markup: + payload["reply_markup"] = reply_markup + return _call("sendMessage", payload) + + +def send_photo_message( + photo_url: str, + caption: str, + reply_markup: dict | None = None, + parse_mode: str = "HTML", +) -> dict: + payload: dict[str, Any] = { + "chat_id": _chat_id(), + "photo": photo_url, + "caption": caption, + "parse_mode": parse_mode, + } + if reply_markup: + payload["reply_markup"] = reply_markup + try: + return _call("sendPhoto", payload) + except Exception: + # Fall back to text message if photo fails (e.g. image URL no longer valid) + return send_message(caption, reply_markup=reply_markup, parse_mode=parse_mode) + + +def answer_callback_query(callback_query_id: str, text: str = "") -> None: + try: + _call("answerCallbackQuery", {"callback_query_id": callback_query_id, "text": text}) + except Exception as exc: + logger.warning("answerCallbackQuery fehlgeschlagen: %s", exc) + + +def edit_message_reply_markup(chat_id: str, message_id: int, reply_markup: dict | None = None) -> None: + payload: dict[str, Any] = {"chat_id": chat_id, "message_id": message_id} + if reply_markup: + payload["reply_markup"] = reply_markup + else: + payload["reply_markup"] = {"inline_keyboard": []} + try: + _call("editMessageReplyMarkup", payload) + except Exception as exc: + logger.warning("editMessageReplyMarkup fehlgeschlagen: %s", exc) + + +def setup_webhook(webhook_url: str) -> dict: + settings = get_settings() + payload: dict[str, Any] = {"url": webhook_url, "allowed_updates": ["message", "callback_query"]} + if settings.telegram_webhook_secret: + payload["secret_token"] = settings.telegram_webhook_secret + return _call("setWebhook", payload) + + +def delete_webhook() -> dict: + return _call("deleteWebhook", {}) + + +# --------------------------------------------------------------------------- +# Notification helpers +# --------------------------------------------------------------------------- + +def _format_tags(meta_json: str | None) -> str: + if not meta_json: + return "" + try: + meta = json.loads(meta_json) + tags = meta.get("generated_tags") or [] + if tags: + return " ".join(f"#{t.replace(' ', '_')}" for t in tags[:6]) + except Exception: + pass + return "" + + +def _score_emoji(score: int) -> str: + if score >= 85: + return "🟢" + if score >= 70: + return "🟡" + return "🔴" + + +def notify_new_draft( + article: dict[str, Any], + score: int, + suggested_publish_at: str | None = None, +) -> None: + """Send Telegram notification for a newly created WP draft.""" + title = (article.get("title") or "Ohne Titel").strip() + wp_url = article.get("wp_post_url") or "" + tags_str = _format_tags(article.get("meta_json")) + art_id = article.get("id") + + score_line = f"{_score_emoji(score)} Relevanz-Score: {score}/100" + publish_line = f"📅 Vorgeschlagene Veröffentlichung: {suggested_publish_at}" if suggested_publish_at else "" + link_line = f'🔗 Draft in WordPress öffnen' if wp_url else "" + tags_line = f"🏷 {tags_str}" if tags_str else "" + + text_parts = [ + f"✅ Neuer Draft erstellt", + f"📰 {title}", + score_line, + ] + if publish_line: + text_parts.append(publish_line) + if tags_line: + text_parts.append(tags_line) + if link_line: + text_parts.append(link_line) + + text = "\n".join(text_parts) + + keyboard = _inline_keyboard([ + [ + {"text": "✏️ Neu schreiben", "callback_data": f"rewrite:{art_id}"}, + {"text": "❌ Verwerfen", "callback_data": f"discard:{art_id}"}, + ] + ]) + + # Try with image first + meta = {} + try: + meta = json.loads(article.get("meta_json") or "{}") + except Exception: + pass + image_url = None + image_review = meta.get("image_review") or {} + if isinstance(image_review, dict): + image_url = image_review.get("selected_url") + if not image_url: + image_sel = (meta.get("extraction") or {}).get("image_selection") or {} + image_url = image_sel.get("primary") + + if image_url: + send_photo_message(image_url, caption=text, reply_markup=keyboard) + else: + send_message(text, reply_markup=keyboard) + + +def notify_relevance_warning(article: dict[str, Any], score: int, reason: str) -> None: + """Send Telegram warning for borderline articles (score between warn and auto thresholds).""" + title = (article.get("title") or "Ohne Titel").strip() + art_id = article.get("id") + source_url = article.get("source_url") or "" + + text = ( + f"⚠️ Artikel mit niedrigem Relevanz-Score\n" + f"📰 {title}\n" + f"{_score_emoji(score)} Score: {score}/100\n" + f"💬 {reason}\n" + f'🔗 Originalartikel' + ) + keyboard = _inline_keyboard([ + [ + {"text": "➕ Trotzdem verarbeiten", "callback_data": f"override:{art_id}"}, + {"text": "❌ Ablehnen", "callback_data": f"reject:{art_id}"}, + ] + ]) + send_message(text, reply_markup=keyboard) + + +def notify_rejected_summary(articles: list[dict[str, Any]]) -> None: + """Send summary of rejected articles for this pipeline run.""" + if not articles: + return + lines = [f"🚫 {len(articles)} Artikel abgelehnt (Score < {get_settings().pipeline_relevance_warn})\n"] + for art in articles[:10]: + title = (art.get("title") or "Ohne Titel")[:60] + score = _get_relevance_score(art) + reason = _get_rejection_reason(art) + art_id = art.get("id") + lines.append(f"• {title} (Score: {score}) — {reason}") + if len(articles) > 10: + lines.append(f"... und {len(articles) - 10} weitere") + + text = "\n".join(lines) + # Build override buttons for first 5 + rows = [] + for art in articles[:5]: + art_id = art.get("id") + title = (art.get("title") or "")[:25] + rows.append([{"text": f"➕ {title}…", "callback_data": f"override:{art_id}"}]) + + keyboard = _inline_keyboard(rows) if rows else None + send_message(text, reply_markup=keyboard) + + +def notify_error(message: str) -> None: + """Send error alert to Telegram.""" + try: + send_message(f"🔴 Fehler im RSS-Pipeline\n{message}") + except Exception as exc: + logger.error("Telegram Fehler-Benachrichtigung fehlgeschlagen: %s", exc) + + +def notify_pipeline_started(trigger: str = "auto") -> None: + icon = "🤖" if trigger == "auto" else "👤" + try: + send_message(f"{icon} Pipeline gestartet (Auslöser: {trigger})") + except Exception: + pass + + +def notify_pipeline_done(stats: dict[str, Any]) -> None: + ingested = stats.get("ingested", 0) + processed = stats.get("processed", 0) + drafts = stats.get("drafts_created", 0) + rejected = stats.get("rejected", 0) + warnings = stats.get("warnings", 0) + errors = stats.get("errors", 0) + + lines = [ + "📊 Pipeline abgeschlossen", + f"📥 Neue Artikel importiert: {ingested}", + f"⚙️ Verarbeitet: {processed}", + f"📝 Drafts erstellt: {drafts}", + ] + if rejected: + lines.append(f"🚫 Abgelehnt: {rejected}") + if warnings: + lines.append(f"⚠️ Warnungen: {warnings}") + if errors: + lines.append(f"🔴 Fehler: {errors}") + + try: + send_message("\n".join(lines)) + except Exception: + pass + + +# --------------------------------------------------------------------------- +# Helper to read relevance info from meta_json +# --------------------------------------------------------------------------- + +def _get_relevance_score(article: dict[str, Any]) -> int: + try: + meta = json.loads(article.get("meta_json") or "{}") + return int(meta.get("relevance", {}).get("score", 0)) + except Exception: + return 0 + + +def _get_rejection_reason(article: dict[str, Any]) -> str: + try: + meta = json.loads(article.get("meta_json") or "{}") + return str(meta.get("relevance", {}).get("reason", ""))[:80] + except Exception: + return "" + + +# --------------------------------------------------------------------------- +# Incoming update handler (called by webhook endpoint) +# --------------------------------------------------------------------------- + +def handle_update(update: dict[str, Any]) -> None: + """Process an incoming Telegram update.""" + # Import here to avoid circular imports + from . import pipeline as _pipeline + + if "callback_query" in update: + _handle_callback(update["callback_query"]) + elif "message" in update: + _handle_message(update["message"]) + + +def _handle_message(message: dict[str, Any]) -> None: + from . import pipeline as _pipeline + + text = (message.get("text") or "").strip() + if not text.startswith("/"): + return + + cmd = text.split()[0].lower().lstrip("/") + if "@" in cmd: + cmd = cmd.split("@")[0] + + if cmd == "run": + send_message("🤖 Pipeline wird manuell gestartet …") + try: + stats = _pipeline.run_auto_pipeline(trigger="manual") + notify_pipeline_done(stats) + except Exception as exc: + notify_error(f"/run fehlgeschlagen: {exc}") + + elif cmd == "rejected": + try: + articles = _pipeline.get_recently_rejected(days=3) + if not articles: + send_message("✅ Keine abgelehnten Artikel in den letzten 3 Tagen.") + else: + notify_rejected_summary(articles) + except Exception as exc: + notify_error(f"/rejected fehlgeschlagen: {exc}") + + elif cmd == "status": + try: + status_text = _pipeline.get_pipeline_status_text() + send_message(status_text) + except Exception as exc: + notify_error(f"/status fehlgeschlagen: {exc}") + + elif cmd == "help": + send_message( + "📋 Verfügbare Befehle\n" + "/run — Pipeline manuell starten\n" + "/rejected — Abgelehnte Artikel der letzten 3 Tage\n" + "/status — Pipeline-Status\n" + "/help — Diese Hilfe" + ) + + +def _handle_callback(callback_query: dict[str, Any]) -> None: + from . import pipeline as _pipeline + from .repositories import get_article_by_id, update_article_status + + query_id = callback_query.get("id", "") + data = (callback_query.get("data") or "").strip() + chat_id = str(callback_query.get("message", {}).get("chat", {}).get("id", "")) + message_id = int(callback_query.get("message", {}).get("message_id", 0)) + + if ":" not in data: + answer_callback_query(query_id, "Ungültige Aktion") + return + + action, _, raw_id = data.partition(":") + try: + article_id = int(raw_id) + except ValueError: + answer_callback_query(query_id, "Ungültige Artikel-ID") + return + + article = get_article_by_id(article_id) + if not article: + answer_callback_query(query_id, "Artikel nicht gefunden") + return + + if action == "rewrite": + answer_callback_query(query_id, "✏️ Artikel wird neu geschrieben …") + edit_message_reply_markup(chat_id, message_id) + try: + _pipeline.rewrite_and_update_draft(article_id) + updated = get_article_by_id(article_id) + if updated: + from .scheduler import suggest_publish_slot + slot = suggest_publish_slot() + notify_new_draft(updated, score=_get_relevance_score(updated), suggested_publish_at=slot) + except Exception as exc: + notify_error(f"Rewrite #{article_id} fehlgeschlagen: {exc}") + + elif action == "discard": + answer_callback_query(query_id, "❌ Artikel verworfen") + edit_message_reply_markup(chat_id, message_id) + try: + _pipeline.discard_article(article_id) + except Exception as exc: + notify_error(f"Verwerfen #{article_id} fehlgeschlagen: {exc}") + + elif action == "override": + answer_callback_query(query_id, "➕ Artikel wird verarbeitet …") + edit_message_reply_markup(chat_id, message_id) + try: + _pipeline.override_rejected_article(article_id) + except Exception as exc: + notify_error(f"Override #{article_id} fehlgeschlagen: {exc}") + + elif action == "reject": + answer_callback_query(query_id, "🚫 Abgelehnt") + edit_message_reply_markup(chat_id, message_id) + update_article_status(article_id, "error", actor="telegram", note="Manuell abgelehnt via Telegram") + + else: + answer_callback_query(query_id, "Unbekannte Aktion") diff --git a/backend/app/wordpress.py b/backend/app/wordpress.py index 150bcd1..704d428 100644 --- a/backend/app/wordpress.py +++ b/backend/app/wordpress.py @@ -373,3 +373,18 @@ def publish_article_draft(article: dict[str, Any]) -> tuple[int, str | None]: def selected_image_exists(article: dict[str, Any]) -> bool: return _selected_image_url_from_meta(article.get("meta_json")) is not None + + +def delete_wp_post(wp_post_id: int) -> None: + """Permanently delete a WordPress post (moves to trash, then deletes).""" + settings = get_settings() + if not settings.wordpress_base_url or not settings.wordpress_username or not settings.wordpress_app_password: + raise RuntimeError("WordPress Konfiguration fehlt") + auth = _auth_header(settings.wordpress_username, settings.wordpress_app_password) + # force=true skips trash + _wp_request( + base_url=settings.wordpress_base_url, + auth_header=auth, + method="DELETE", + endpoint=f"posts/{wp_post_id}?force=true", + ) diff --git a/docs/AUTOMATION.md b/docs/AUTOMATION.md new file mode 100644 index 0000000..8008857 --- /dev/null +++ b/docs/AUTOMATION.md @@ -0,0 +1,190 @@ +# Automatischer Pipeline-Betrieb + +## Überblick + +Das System läuft vollautomatisch und benötigt nur noch gelegentliche Telegram-Interaktion. + +``` +N8N (2× täglich, 08:00 + 16:00 Uhr) + └─► POST /api/n8n/pipeline (X-API-Key Header) + ├── RSS Ingestion (alle aktivierten Feeds) + ├── Relevanz-Score per GPT (0–100) + │ ├── Score ≥ 80 → Rewrite + WP-Draft + Telegram + │ ├── Score 60–79 → Telegram-Warnung + manueller Override möglich + │ └── Score < 60 → Abgelehnt + tägliche Telegram-Liste + └── Pipeline-Zusammenfassung via Telegram +``` + +--- + +## Einrichtung + +### 1. Umgebungsvariablen setzen + +Kopiere `backend/.env.example` nach `backend/.env` und fülle alle Felder aus: + +```bash +cp backend/.env.example backend/.env +nano backend/.env +``` + +Wichtige Variablen: + +| Variable | Beschreibung | +|----------|-------------| +| `TELEGRAM_BOT_TOKEN` | Bot-Token von @BotFather | +| `TELEGRAM_CHAT_ID` | Deine persönliche Chat-ID | +| `TELEGRAM_WEBHOOK_SECRET` | Zufälliger String (≥ 20 Zeichen) | +| `N8N_API_KEY` | Starker zufälliger API-Key | +| `OPENAI_API_KEY` | OpenAI API-Key | +| `WP_BASE_URL` | WordPress-URL | +| `WP_USERNAME` | WordPress-Benutzername | +| `WP_PASSWORD` | WordPress App-Passwort | + +### 2. Telegram-Webhook registrieren + +Nach dem Deployment einmalig aufrufen: + +```bash +curl -X POST https://news.vanityontour.de/api/telegram/setup-webhook \ + -H "Cookie: rss_news_session=" +``` + +Oder über die Admin-UI: Settings → Telegram Webhook einrichten. + +### 3. N8N Workflow einrichten + +In N8N einen neuen Workflow erstellen: + +**Trigger:** Cron +- Zeitplan 1: `0 8 * * *` (täglich 08:00) +- Zeitplan 2: `0 16 * * *` (täglich 16:00) + +**Aktion:** HTTP Request +- Method: `POST` +- URL: `https://news.vanityontour.de/api/n8n/pipeline` +- Header: `X-API-Key: ` + +**Fehlerbehandlung:** Bei HTTP-Fehler → E-Mail/Telegram-Alert + +--- + +## Telegram-Befehle + +| Befehl | Funktion | +|--------|----------| +| `/run` | Pipeline manuell starten | +| `/rejected` | Abgelehnte Artikel der letzten 3 Tage anzeigen | +| `/status` | Aktuellen Pipeline-Status | +| `/help` | Alle Befehle anzeigen | + +--- + +## Telegram-Benachrichtigungen + +### Neuer Draft erstellt +Wenn ein Artikel erfolgreich verarbeitet wurde: + +``` +✅ Neuer Draft erstellt +📰 [Artikel-Titel] +🟢 Relevanz-Score: 87/100 +📅 Vorgeschlagene Veröffentlichung: Mo, 24.03.2026 um 09:00 Uhr +🏷 #VanLife #Camping #Wohnmobil +🔗 Draft in WordPress öffnen + + [✏️ Neu schreiben] [❌ Verwerfen] +``` + +### Relevanz-Warnung (Score 60–79) +``` +⚠️ Artikel mit niedrigem Relevanz-Score +📰 [Artikel-Titel] +🟡 Score: 72/100 +💬 Artikel behandelt hauptsächlich... +🔗 Originalartikel + + [➕ Trotzdem verarbeiten] [❌ Ablehnen] +``` + +### Abgelehnte Artikel (Ende jedes Runs) +Liste aller abgelehnten Artikel mit Override-Buttons für jeden einzelnen. + +--- + +## Relevanz-Score + +Der GPT-basierte Score bewertet die Themenrelevanz für den VanLife/Camping-Blog: + +| Score | Aktion | +|-------|--------| +| 80–100 | Automatisch verarbeiten | +| 60–79 | Telegram-Warnung, manueller Override | +| 0–59 | Automatisch abgelehnt | + +Themen die hoch scored werden: Campingplätze, Stellplätze, Wohnmobile, Van-Ausbau, +Outdoor-Equipment, Wandern, Naturreisen, Roadtrips, Camping-Tipps. + +Schwellwerte sind in `.env` konfigurierbar: +``` +PIPELINE_RELEVANCE_AUTO=80 +PIPELINE_RELEVANCE_WARN=60 +``` + +--- + +## Veröffentlichungsplan + +- Maximal **2 Beiträge pro Tag** +- Bevorzugte Zeiten: **09:00 und 14:00 Uhr** (CET) +- Gleichmäßig über die Woche verteilt +- Der Vorschlag erscheint in der Telegram-Nachricht +- Manuell in WordPress setzen oder über WP Scheduling-Plugin automatisieren + +Einstellbar via: +``` +PIPELINE_MAX_DRAFTS_PER_DAY=2 +PIPELINE_PUBLISH_HOURS=9,14 +``` + +--- + +## API-Endpunkte (N8N / extern) + +Alle externen Endpunkte benötigen den Header `X-API-Key: `. + +| Methode | Endpunkt | Funktion | +|---------|----------|----------| +| `POST` | `/api/n8n/pipeline` | Komplette Pipeline starten | +| `POST` | `/api/n8n/ingest` | Nur RSS-Import (ohne Rewrite) | + +--- + +## Deployment (Hetzner via GitHub) + +Das Deployment läuft automatisch über GitHub Actions beim Push auf `main`: + +1. GitHub Action führt Tests aus +2. Bei Erfolg: SSH-Deploy auf Hetzner +3. `pip install -r requirements.txt` +4. Systemd-Dienst `rss-app` neu starten + +Workflow-Dateien: `.github/workflows/test.yml` und `.github/workflows/deploy.yml` + +--- + +## Troubleshooting + +**Pipeline läuft, aber keine Telegram-Nachrichten:** +- `TELEGRAM_BOT_TOKEN` und `TELEGRAM_CHAT_ID` prüfen +- Webhook-Status prüfen: `GET https://api.telegram.org/bot/getWebhookInfo` + +**N8N bekommt 401:** +- `N8N_API_KEY` in `.env` und N8N-Workflow-Header müssen übereinstimmen + +**Alle Artikel werden abgelehnt:** +- `PIPELINE_RELEVANCE_WARN` temporär auf 40 senken zum Testen +- Über `/rejected` + Override-Button manuell testen + +**Artikel werden doppelt importiert:** +- Deduplication läuft über `source_url` (eindeutig). Bereits verarbeitete Artikel werden nie erneut als Draft angelegt.