diff --git a/backend/app/admin_ui.py b/backend/app/admin_ui.py index bc6d9d9..c401ad1 100644 --- a/backend/app/admin_ui.py +++ b/backend/app/admin_ui.py @@ -23,6 +23,7 @@ from .repositories import ( list_feeds, list_runs, list_sources, + set_article_legal_review, update_article_status, ) @@ -104,22 +105,22 @@ def _legal_checklist(article: dict, feed: dict | None) -> list[dict[str, str]]: checks.append( { "label": "Bilder extrahiert", - "status": "ok" if extraction.get("images") else "missing", + "status": "ok" if article.get("image_urls_json") else "missing", "value": str(len(extraction.get("images", []))) if isinstance(extraction.get("images"), list) else "0", } ) checks.append( { "label": "Pressekontakt", - "status": "ok" if extraction.get("press_contact") else "missing", - "value": extraction.get("press_contact") or "-", + "status": "ok" if article.get("press_contact") else "missing", + "value": article.get("press_contact") or extraction.get("press_contact") or "-", } ) checks.append( { "label": "Lizenz/Terms", - "status": "ok" if attribution.get("source_license_name") and attribution.get("source_terms_url") else "missing", - "value": f"{attribution.get('source_license_name') or '-'} | {attribution.get('source_terms_url') or '-'}", + "status": "ok" if article.get("source_license_name_snapshot") and article.get("source_terms_url_snapshot") else "missing", + "value": f"{article.get('source_license_name_snapshot') or attribution.get('source_license_name') or '-'} | {article.get('source_terms_url_snapshot') or attribution.get('source_terms_url') or '-'}", } ) checks.append( @@ -129,6 +130,13 @@ def _legal_checklist(article: dict, feed: dict | None) -> list[dict[str, str]]: "value": feed.get("source_risk_level") if feed else "-", } ) + checks.append( + { + "label": "Manuelle Rechtsfreigabe", + "status": "ok" if int(article.get("legal_checked", 0)) == 1 else "missing", + "value": article.get("legal_checked_at") or "-", + } + ) return checks @@ -193,9 +201,20 @@ def admin_dashboard(request: Request): for article in articles: meta = _parse_meta_json(article.get("meta_json")) extraction = meta.get("extraction") if isinstance(meta.get("extraction"), dict) else {} + images = [] + if article.get("image_urls_json"): + try: + parsed_images = json.loads(article["image_urls_json"]) + if isinstance(parsed_images, list): + images = [str(item) for item in parsed_images if item] + except Exception: + images = [] + if not images and isinstance(extraction.get("images"), list): + images = extraction.get("images") article["meta"] = meta - article["extracted_images"] = extraction.get("images") if isinstance(extraction.get("images"), list) else [] - article["press_contact"] = extraction.get("press_contact") if isinstance(extraction.get("press_contact"), str) else None + article["extracted_images"] = images + if not article.get("press_contact") and isinstance(extraction.get("press_contact"), str): + article["press_contact"] = extraction.get("press_contact") article["extraction_error"] = extraction.get("extraction_error") if isinstance(extraction.get("extraction_error"), str) else None return templates.TemplateResponse( @@ -232,6 +251,15 @@ def admin_article_detail(request: Request, article_id: int): meta = _parse_meta_json(article.get("meta_json")) article["meta"] = meta extraction = meta.get("extraction") if isinstance(meta.get("extraction"), dict) else {} + if article.get("image_urls_json"): + try: + parsed_images = json.loads(article["image_urls_json"]) + if isinstance(parsed_images, list): + extraction["images"] = [str(item) for item in parsed_images if item] + except Exception: + pass + if not article.get("press_contact") and isinstance(extraction.get("press_contact"), str): + article["press_contact"] = extraction.get("press_contact") article["extraction"] = extraction feed = get_feed_by_id(int(article["feed_id"])) if article.get("feed_id") else None checklist = _legal_checklist(article, feed) @@ -251,6 +279,19 @@ def admin_article_detail(request: Request, article_id: int): ) +@router.post("/admin/articles/{article_id}/legal-review") +def admin_article_legal_review(request: Request, article_id: int, approved: str = Form("0"), note: str = Form("")): + user = _admin_user(request) + if not user: + return RedirectResponse(url="/admin/login", status_code=303) + + is_approved = approved == "1" + ok = set_article_legal_review(article_id, approved=is_approved, note=note or None, actor=user) + if not ok: + return _dashboard_redirect(msg=f"Artikel #{article_id} nicht gefunden", msg_type="error") + return RedirectResponse(url=f"/admin/articles/{article_id}", status_code=303) + + @router.post("/admin/sources/create") def admin_create_source( request: Request, @@ -344,6 +385,8 @@ def admin_transition_article(request: Request, article_id: int, target_status: s if article: current = article.get("status") if target_status in ALLOWED_TRANSITIONS.get(current, ()): + if target_status == "published" and int(article.get("legal_checked", 0)) != 1: + return _dashboard_redirect(msg=f"Publish blockiert fuer Artikel #{article_id}: Rechtsfreigabe fehlt", msg_type="error") update_article_status(article_id, target_status, actor=user, note=note or None) return _dashboard_redirect(msg=f"Artikel #{article_id}: {current} -> {target_status}") return _dashboard_redirect(msg=f"Ungueltiger Statuswechsel fuer Artikel #{article_id}", msg_type="error") diff --git a/backend/app/db.py b/backend/app/db.py index c914044..27bbc10 100644 --- a/backend/app/db.py +++ b/backend/app/db.py @@ -81,6 +81,14 @@ def init_db() -> None: summary TEXT, content_raw TEXT, content_rewritten TEXT, + image_urls_json TEXT, + press_contact TEXT, + source_name_snapshot TEXT, + source_terms_url_snapshot TEXT, + source_license_name_snapshot TEXT, + legal_checked INTEGER NOT NULL DEFAULT 0, + legal_checked_at TEXT, + legal_note TEXT, word_count INTEGER DEFAULT 0, status TEXT NOT NULL DEFAULT 'new' CHECK (status IN ('new', 'rewrite', 'review', 'approved', 'published', 'error')), meta_json TEXT, @@ -130,8 +138,20 @@ def init_db() -> None: existing_columns = { row["name"] for row in conn.execute("PRAGMA table_info(articles)").fetchall() } - if "source_hash" not in existing_columns: - conn.execute("ALTER TABLE articles ADD COLUMN source_hash TEXT") + migration_columns = { + "source_hash": "ALTER TABLE articles ADD COLUMN source_hash TEXT", + "image_urls_json": "ALTER TABLE articles ADD COLUMN image_urls_json TEXT", + "press_contact": "ALTER TABLE articles ADD COLUMN press_contact TEXT", + "source_name_snapshot": "ALTER TABLE articles ADD COLUMN source_name_snapshot TEXT", + "source_terms_url_snapshot": "ALTER TABLE articles ADD COLUMN source_terms_url_snapshot TEXT", + "source_license_name_snapshot": "ALTER TABLE articles ADD COLUMN source_license_name_snapshot TEXT", + "legal_checked": "ALTER TABLE articles ADD COLUMN legal_checked INTEGER NOT NULL DEFAULT 0", + "legal_checked_at": "ALTER TABLE articles ADD COLUMN legal_checked_at TEXT", + "legal_note": "ALTER TABLE articles ADD COLUMN legal_note TEXT", + } + for column, ddl in migration_columns.items(): + if column not in existing_columns: + conn.execute(ddl) def rows_to_dicts(rows: list[sqlite3.Row]) -> list[dict[str, Any]]: diff --git a/backend/app/ingestion.py b/backend/app/ingestion.py index 87e44c2..37703de 100644 --- a/backend/app/ingestion.py +++ b/backend/app/ingestion.py @@ -201,6 +201,14 @@ def run_ingestion(feed_id: int | None = None) -> IngestionStats: summary=final_summary, content_raw=final_content_raw, content_rewritten=None, + image_urls_json=json.dumps(extracted.images, ensure_ascii=False) if extracted.images else None, + press_contact=extracted.press_contact, + source_name_snapshot=feed.get("source_name"), + source_terms_url_snapshot=feed.get("source_terms_url"), + source_license_name_snapshot=feed.get("source_license_name"), + legal_checked=False, + legal_checked_at=None, + legal_note=None, word_count=len((final_content_raw or "").split()), status="new", meta_json=json.dumps({"attribution": attribution, "extraction": extraction_meta}, ensure_ascii=False), diff --git a/backend/app/main.py b/backend/app/main.py index 616dd77..4fe6458 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -28,6 +28,7 @@ from .repositories import ( list_feeds as repo_list_feeds, list_runs, list_sources as repo_list_sources, + set_article_legal_review, update_article_status, upsert_article as repo_upsert_article, ) @@ -96,6 +97,14 @@ class ArticleUpsertRequest(BaseModel): summary: str | None = None content_raw: str | None = None content_rewritten: str | None = None + image_urls_json: str | None = None + press_contact: str | None = None + source_name_snapshot: str | None = None + source_terms_url_snapshot: str | None = None + source_license_name_snapshot: str | None = None + legal_checked: bool = False + legal_checked_at: str | None = None + legal_note: str | None = None word_count: int = 0 status: str = Field(default="new", pattern="^(new|rewrite|review|approved|published|error)$") meta_json: str | None = None @@ -115,6 +124,11 @@ class ArticleReviewRequest(BaseModel): note: str | None = None +class ArticleLegalReviewRequest(BaseModel): + approved: bool + note: str | None = None + + ALLOWED_ARTICLE_TRANSITIONS: dict[str, set[str]] = { "new": {"review", "rewrite", "error"}, "rewrite": {"review", "error"}, @@ -330,6 +344,14 @@ def api_upsert_article(payload: ArticleUpsertRequest, username: str = Depends(re summary=payload.summary, content_raw=payload.content_raw, content_rewritten=payload.content_rewritten, + image_urls_json=payload.image_urls_json, + press_contact=payload.press_contact, + source_name_snapshot=payload.source_name_snapshot, + source_terms_url_snapshot=payload.source_terms_url_snapshot, + source_license_name_snapshot=payload.source_license_name_snapshot, + legal_checked=payload.legal_checked, + legal_checked_at=payload.legal_checked_at, + legal_note=payload.legal_note, word_count=payload.word_count, status=payload.status, meta_json=payload.meta_json, @@ -351,6 +373,11 @@ def api_article_transition(article_id: int, payload: ArticleTransitionRequest, u status_code=status.HTTP_400_BAD_REQUEST, detail=f"Ungueltiger Statuswechsel: {current_status} -> {payload.target_status}", ) + if payload.target_status == "published" and int(article.get("legal_checked", 0)) != 1: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Publish gesperrt: Rechtscheck wurde noch nicht freigegeben", + ) updated = update_article_status(article_id, payload.target_status, actor=username, note=payload.note) if not updated: @@ -358,6 +385,22 @@ def api_article_transition(article_id: int, payload: ArticleTransitionRequest, u return {"ok": True, "id": article_id, "from_status": current_status, "to_status": payload.target_status} +@app.post("/api/articles/{article_id}/legal-review") +def api_article_legal_review(article_id: int, payload: ArticleLegalReviewRequest, username: str = Depends(require_auth)) -> dict: + article = get_article_by_id(article_id) + if not article: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Artikel nicht gefunden") + + updated = set_article_legal_review(article_id, approved=payload.approved, note=payload.note, actor=username) + if not updated: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Artikel nicht gefunden") + return { + "ok": True, + "id": article_id, + "legal_checked": payload.approved, + } + + @app.post("/api/articles/{article_id}/review") def api_article_review(article_id: int, payload: ArticleReviewRequest, username: str = Depends(require_auth)) -> dict: article = get_article_by_id(article_id) diff --git a/backend/app/repositories.py b/backend/app/repositories.py index e170a20..9d9883c 100644 --- a/backend/app/repositories.py +++ b/backend/app/repositories.py @@ -48,6 +48,14 @@ class ArticleUpsert: summary: str | None content_raw: str | None content_rewritten: str | None + image_urls_json: str | None + press_contact: str | None + source_name_snapshot: str | None + source_terms_url_snapshot: str | None + source_license_name_snapshot: str | None + legal_checked: bool + legal_checked_at: str | None + legal_note: str | None word_count: int status: str meta_json: str | None @@ -224,7 +232,10 @@ def get_article_by_id(article_id: int) -> dict[str, Any] | None: row = conn.execute( """ SELECT a.id, a.feed_id, a.source_article_id, a.source_hash, a.title, a.source_url, a.canonical_url, a.published_at, a.author, - a.summary, a.content_raw, a.content_rewritten, a.word_count, a.status, a.meta_json, a.created_at, a.updated_at + a.summary, a.content_raw, a.content_rewritten, a.image_urls_json, a.press_contact, + a.source_name_snapshot, a.source_terms_url_snapshot, a.source_license_name_snapshot, + a.legal_checked, a.legal_checked_at, a.legal_note, + a.word_count, a.status, a.meta_json, a.created_at, a.updated_at FROM articles a WHERE a.id = ? """, @@ -281,6 +292,31 @@ def update_article_status( return True +def set_article_legal_review(article_id: int, approved: bool, note: str | None, actor: str | None = None) -> bool: + article = get_article_by_id(article_id) + if not article: + return False + + event = { + "timestamp": datetime.now(timezone.utc).isoformat(), + "event": "legal_review", + "approved": approved, + "actor": actor or "system", + "note": note, + } + merged_meta = _merge_review_event(article.get("meta_json"), event) + with get_conn() as conn: + conn.execute( + """ + UPDATE articles + SET legal_checked = ?, legal_checked_at = datetime('now'), legal_note = ?, meta_json = ? + WHERE id = ? + """, + (1 if approved else 0, note, merged_meta, article_id), + ) + return True + + def _resolve_existing_article_id(payload: ArticleUpsert) -> int | None: with get_conn() as conn: # 1) strongest key: source_url @@ -320,8 +356,11 @@ def upsert_article(payload: ArticleUpsert) -> int: """ INSERT INTO articles ( feed_id, source_article_id, source_hash, title, source_url, canonical_url, published_at, author, - summary, content_raw, content_rewritten, word_count, status, meta_json - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + summary, content_raw, content_rewritten, image_urls_json, press_contact, + source_name_snapshot, source_terms_url_snapshot, source_license_name_snapshot, + legal_checked, legal_checked_at, legal_note, + word_count, status, meta_json + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( payload.feed_id, @@ -335,6 +374,14 @@ def upsert_article(payload: ArticleUpsert) -> int: payload.summary, payload.content_raw, payload.content_rewritten, + payload.image_urls_json, + payload.press_contact, + payload.source_name_snapshot, + payload.source_terms_url_snapshot, + payload.source_license_name_snapshot, + 1 if payload.legal_checked else 0, + payload.legal_checked_at, + payload.legal_note, payload.word_count, payload.status, payload.meta_json, @@ -356,6 +403,14 @@ def upsert_article(payload: ArticleUpsert) -> int: summary = ?, content_raw = ?, content_rewritten = ?, + image_urls_json = ?, + press_contact = ?, + source_name_snapshot = ?, + source_terms_url_snapshot = ?, + source_license_name_snapshot = ?, + legal_checked = ?, + legal_checked_at = ?, + legal_note = ?, word_count = ?, status = ?, meta_json = ? @@ -373,6 +428,14 @@ def upsert_article(payload: ArticleUpsert) -> int: payload.summary, payload.content_raw, payload.content_rewritten, + payload.image_urls_json, + payload.press_contact, + payload.source_name_snapshot, + payload.source_terms_url_snapshot, + payload.source_license_name_snapshot, + 1 if payload.legal_checked else 0, + payload.legal_checked_at, + payload.legal_note, payload.word_count, payload.status, payload.meta_json, @@ -392,7 +455,9 @@ def list_articles(limit: int = 100, status_filter: str | None = None) -> list[di rows = conn.execute( """ SELECT a.id, a.feed_id, a.source_article_id, a.source_hash, a.title, a.source_url, a.canonical_url, a.published_at, a.author, - a.summary, a.content_raw, a.word_count, a.status, a.meta_json, a.created_at, a.updated_at, f.name AS feed_name + a.summary, a.content_raw, a.word_count, a.status, a.meta_json, a.created_at, a.updated_at, f.name AS feed_name, + a.image_urls_json, a.press_contact, a.source_name_snapshot, a.source_terms_url_snapshot, + a.source_license_name_snapshot, a.legal_checked, a.legal_checked_at, a.legal_note FROM articles a LEFT JOIN feeds f ON f.id = a.feed_id WHERE a.status = ? @@ -405,7 +470,9 @@ def list_articles(limit: int = 100, status_filter: str | None = None) -> list[di rows = conn.execute( """ SELECT a.id, a.feed_id, a.source_article_id, a.source_hash, a.title, a.source_url, a.canonical_url, a.published_at, a.author, - a.summary, a.content_raw, a.word_count, a.status, a.meta_json, a.created_at, a.updated_at, f.name AS feed_name + a.summary, a.content_raw, a.word_count, a.status, a.meta_json, a.created_at, a.updated_at, f.name AS feed_name, + a.image_urls_json, a.press_contact, a.source_name_snapshot, a.source_terms_url_snapshot, + a.source_license_name_snapshot, a.legal_checked, a.legal_checked_at, a.legal_note FROM articles a LEFT JOIN feeds f ON f.id = a.feed_id ORDER BY a.id DESC diff --git a/backend/templates/admin_article_detail.html b/backend/templates/admin_article_detail.html index 098c273..62c7e70 100644 --- a/backend/templates/admin_article_detail.html +++ b/backend/templates/admin_article_detail.html @@ -26,6 +26,9 @@

Status: {{ article.status }}

Autor: {{ article.author or "-" }}

Feed: {{ feed.name if feed else "-" }}

+

Quelle Snapshot: {{ article.source_name_snapshot or "-" }}

+

Lizenz Snapshot: {{ article.source_license_name_snapshot or "-" }}

+

Terms Snapshot: {{ article.source_terms_url_snapshot or "-" }}

Quelle: {{ article.source_url }}

{% if article.canonical_url %}

Canonical: {{ article.canonical_url }}

@@ -69,9 +72,9 @@ {% endfor %} {% endif %} - {% if article.extraction.press_contact %} + {% if article.press_contact or article.extraction.press_contact %}

Pressekontakt

-
{{ article.extraction.press_contact }}
+
{{ article.press_contact or article.extraction.press_contact }}
{% endif %} {% if article.extraction.extraction_error %}

Extraktionsfehler: {{ article.extraction.extraction_error }}

@@ -83,8 +86,32 @@
{{ article.content_raw or "-" }}
+
+

Rechtsfreigabe

+

Freigabe: + {% if article.legal_checked %} + Freigegeben + {% else %} + Nicht freigegeben + {% endif %} +

+

Zeitpunkt: {{ article.legal_checked_at or "-" }}

+

Notiz: {{ article.legal_note or "-" }}

+
+ + + +
+
+

Status ändern

+ {% if not article.legal_checked %} +

Hinweis: `published` ist erst nach manueller Rechtsfreigabe erlaubt.

+ {% endif %}