diff --git a/backend/app/admin_ui.py b/backend/app/admin_ui.py index c401ad1..44cb7c5 100644 --- a/backend/app/admin_ui.py +++ b/backend/app/admin_ui.py @@ -12,6 +12,7 @@ from .auth import create_session_token, verify_credentials, verify_session_token from .config import get_settings from .ingestion import run_ingestion from .policy import evaluate_source_policy +from .relevance import article_age_days, article_relevance from .repositories import ( FeedCreate, SourceCreate, @@ -216,6 +217,8 @@ def admin_dashboard(request: Request): if not article.get("press_contact") and isinstance(extraction.get("press_contact"), str): article["press_contact"] = extraction.get("press_contact") article["extraction_error"] = extraction.get("extraction_error") if isinstance(extraction.get("extraction_error"), str) else None + article["days_old"] = article_age_days(article.get("published_at")) + article["relevance"] = article_relevance(article.get("published_at")) return templates.TemplateResponse( request, @@ -261,6 +264,8 @@ def admin_article_detail(request: Request, article_id: int): if not article.get("press_contact") and isinstance(extraction.get("press_contact"), str): article["press_contact"] = extraction.get("press_contact") article["extraction"] = extraction + article["days_old"] = article_age_days(article.get("published_at")) + article["relevance"] = article_relevance(article.get("published_at")) feed = get_feed_by_id(int(article["feed_id"])) if article.get("feed_id") else None checklist = _legal_checklist(article, feed) diff --git a/backend/app/main.py b/backend/app/main.py index 4fe6458..277630b 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -1,7 +1,12 @@ from contextlib import asynccontextmanager +import csv +from datetime import datetime, timezone +import io +import json from pathlib import Path from fastapi import Depends, FastAPI, HTTPException, Request, Response, status +from fastapi.responses import JSONResponse from pydantic import BaseModel, Field from fastapi.staticfiles import StaticFiles @@ -11,6 +16,7 @@ from .config import get_settings from .db import init_db from .ingestion import run_ingestion from .policy import evaluate_source_policy, is_source_allowed +from .relevance import article_age_days, article_relevance from .repositories import ( ArticleUpsert, FeedCreate, @@ -321,6 +327,81 @@ def api_list_articles(limit: int = 100, status_filter: str | None = None, userna return {"ok": True, "items": repo_list_articles(limit=limit, status_filter=status_filter), "requested_by": username} +@app.get("/api/articles/export") +def api_export_articles( + format: str = "json", + status_filter: str | None = None, + username: str = Depends(require_auth), +): + articles = repo_list_articles(limit=500, status_filter=status_filter) + rows = [] + for article in articles: + days_old = article_age_days(article.get("published_at")) + rows.append( + { + "id": article.get("id"), + "title": article.get("title"), + "status": article.get("status"), + "published_at": article.get("published_at"), + "days_old": days_old, + "relevance": article_relevance(article.get("published_at")), + "author": article.get("author"), + "source_url": article.get("source_url"), + "canonical_url": article.get("canonical_url"), + "source_name_snapshot": article.get("source_name_snapshot"), + "source_license_name_snapshot": article.get("source_license_name_snapshot"), + "source_terms_url_snapshot": article.get("source_terms_url_snapshot"), + "press_contact": article.get("press_contact"), + "image_urls_json": article.get("image_urls_json"), + "legal_checked": bool(int(article.get("legal_checked", 0))), + "legal_checked_at": article.get("legal_checked_at"), + "legal_note": article.get("legal_note"), + } + ) + + generated_at = datetime.now(timezone.utc).isoformat() + if format == "csv": + out = io.StringIO() + fieldnames = [ + "id", + "title", + "status", + "published_at", + "days_old", + "relevance", + "author", + "source_url", + "canonical_url", + "source_name_snapshot", + "source_license_name_snapshot", + "source_terms_url_snapshot", + "press_contact", + "image_urls_json", + "legal_checked", + "legal_checked_at", + "legal_note", + ] + writer = csv.DictWriter(out, fieldnames=fieldnames) + writer.writeheader() + writer.writerows(rows) + return Response( + content=out.getvalue(), + media_type="text/csv; charset=utf-8", + headers={"Content-Disposition": 'attachment; filename="articles_export.csv"'}, + ) + + return JSONResponse( + { + "ok": True, + "count": len(rows), + "generated_at": generated_at, + "status_filter": status_filter, + "items": rows, + "requested_by": username, + } + ) + + @app.get("/api/articles/{article_id}") def api_get_article(article_id: int, username: str = Depends(require_auth)) -> dict: article = get_article_by_id(article_id) diff --git a/backend/app/relevance.py b/backend/app/relevance.py new file mode 100644 index 0000000..8f69693 --- /dev/null +++ b/backend/app/relevance.py @@ -0,0 +1,44 @@ +from __future__ import annotations + +from datetime import datetime, timezone + + +def _parse_iso_datetime(value: str | None) -> datetime | None: + if not value: + return None + raw = value.strip() + if not raw: + return None + if raw.endswith("Z"): + raw = raw[:-1] + "+00:00" + try: + parsed = datetime.fromisoformat(raw) + except ValueError: + return None + if parsed.tzinfo is None: + parsed = parsed.replace(tzinfo=timezone.utc) + return parsed + + +def article_age_days(published_at: str | None, now: datetime | None = None) -> int | None: + published = _parse_iso_datetime(published_at) + if not published: + return None + ref = now or datetime.now(timezone.utc) + delta = ref - published + if delta.total_seconds() < 0: + return 0 + return delta.days + + +def article_relevance(published_at: str | None, now: datetime | None = None) -> str: + days = article_age_days(published_at, now=now) + if days is None: + return "unbekannt" + if days <= 2: + return "hoch" + if days <= 7: + return "mittel" + if days <= 30: + return "niedrig" + return "alt" diff --git a/backend/templates/admin_article_detail.html b/backend/templates/admin_article_detail.html index 62c7e70..d2b1b67 100644 --- a/backend/templates/admin_article_detail.html +++ b/backend/templates/admin_article_detail.html @@ -24,6 +24,9 @@

{{ article.title }}

Status: {{ article.status }}

+

Artikel-Datum: {{ article.published_at or "-" }}

+

Alter: {{ article.days_old if article.days_old is not none else "-" }} Tage

+

Relevanz: {{ article.relevance }}

Autor: {{ article.author or "-" }}

Feed: {{ feed.name if feed else "-" }}

Quelle Snapshot: {{ article.source_name_snapshot or "-" }}

diff --git a/backend/templates/admin_dashboard.html b/backend/templates/admin_dashboard.html index de0974a..27bcaf5 100644 --- a/backend/templates/admin_dashboard.html +++ b/backend/templates/admin_dashboard.html @@ -131,6 +131,8 @@ Reset + Export JSON + Export CSV @@ -143,6 +145,7 @@
{{ a.title }}
Autor: {{ a.author or "-" }}
+ Datum: {{ a.published_at or "-" }} | Alter: {{ a.days_old if a.days_old is not none else "-" }} Tage | Relevanz: {{ a.relevance }}
Original öffnen
Details anzeigen {% if a.canonical_url and a.canonical_url != a.source_url %} diff --git a/backend/tests/test_api_auth.py b/backend/tests/test_api_auth.py index aa86821..96fbe85 100644 --- a/backend/tests/test_api_auth.py +++ b/backend/tests/test_api_auth.py @@ -72,6 +72,73 @@ class TestApiAuth(unittest.TestCase): self.assertFalse(body["allowed"]) self.assertGreaterEqual(len(body["issues"]), 1) + def test_articles_export_json_and_csv_contains_relevance(self) -> None: + login = self.client.post("/auth/login", json={"username": "admin", "password": "secret"}) + self.assertEqual(login.status_code, 200) + + source = self.client.post( + "/api/sources", + json={ + "name": "Export Source", + "base_url": "https://example.org", + "terms_url": "https://example.org/terms", + "license_name": "cc-by", + "risk_level": "green", + "is_enabled": True, + "last_reviewed_at": "2026-02-18T00:00:00Z", + }, + ) + self.assertEqual(source.status_code, 200) + source_id = source.json()["id"] + + feed = self.client.post( + "/api/feeds", + json={"name": "Export Feed", "url": "https://example.org/feed.xml", "source_id": source_id, "is_enabled": True}, + ) + self.assertEqual(feed.status_code, 200) + feed_id = feed.json()["id"] + + article = self.client.post( + "/api/articles/upsert", + json={ + "feed_id": feed_id, + "source_article_id": "exp-1", + "source_hash": "exp-hash-1", + "title": "Export Artikel", + "source_url": "https://example.org/article/1", + "canonical_url": "https://example.org/article/1", + "published_at": "2026-02-18T00:00:00Z", + "author": "Autor", + "summary": "Kurz", + "content_raw": "Langtext", + "image_urls_json": "[\"https://example.org/img.jpg\"]", + "press_contact": "Kontakt", + "source_name_snapshot": "Export Source", + "source_terms_url_snapshot": "https://example.org/terms", + "source_license_name_snapshot": "cc-by", + "status": "review", + }, + ) + self.assertEqual(article.status_code, 200) + + export_json = self.client.get("/api/articles/export?format=json") + self.assertEqual(export_json.status_code, 200) + body = export_json.json() + self.assertTrue(body.get("ok")) + self.assertGreaterEqual(body.get("count", 0), 1) + first = body["items"][0] + self.assertIn("published_at", first) + self.assertIn("days_old", first) + self.assertIn("relevance", first) + + export_csv = self.client.get("/api/articles/export?format=csv") + self.assertEqual(export_csv.status_code, 200) + self.assertIn("text/csv", export_csv.headers.get("content-type", "")) + csv_text = export_csv.text + self.assertIn("published_at", csv_text) + self.assertIn("days_old", csv_text) + self.assertIn("relevance", csv_text) + if __name__ == "__main__": unittest.main() diff --git a/backend/tests/test_relevance.py b/backend/tests/test_relevance.py new file mode 100644 index 0000000..573e312 --- /dev/null +++ b/backend/tests/test_relevance.py @@ -0,0 +1,21 @@ +from datetime import datetime, timezone +import unittest + +from backend.app.relevance import article_age_days, article_relevance + + +class TestRelevance(unittest.TestCase): + def test_article_age_and_relevance(self) -> None: + now = datetime(2026, 2, 18, 12, 0, 0, tzinfo=timezone.utc) + self.assertEqual(article_age_days("2026-02-18T10:00:00Z", now=now), 0) + self.assertEqual(article_relevance("2026-02-18T10:00:00Z", now=now), "hoch") + + self.assertEqual(article_age_days("2026-02-14T12:00:00Z", now=now), 4) + self.assertEqual(article_relevance("2026-02-14T12:00:00Z", now=now), "mittel") + + self.assertEqual(article_relevance("2025-12-01T00:00:00Z", now=now), "alt") + self.assertEqual(article_relevance(None, now=now), "unbekannt") + + +if __name__ == "__main__": + unittest.main()