feat(export): add csv/json article export with date relevance scoring

This commit is contained in:
Oliver 2026-02-18 10:04:38 +01:00
parent 5159a6e3b4
commit 6691db8051
7 changed files with 224 additions and 0 deletions

View file

@ -12,6 +12,7 @@ from .auth import create_session_token, verify_credentials, verify_session_token
from .config import get_settings
from .ingestion import run_ingestion
from .policy import evaluate_source_policy
from .relevance import article_age_days, article_relevance
from .repositories import (
FeedCreate,
SourceCreate,
@ -216,6 +217,8 @@ def admin_dashboard(request: Request):
if not article.get("press_contact") and isinstance(extraction.get("press_contact"), str):
article["press_contact"] = extraction.get("press_contact")
article["extraction_error"] = extraction.get("extraction_error") if isinstance(extraction.get("extraction_error"), str) else None
article["days_old"] = article_age_days(article.get("published_at"))
article["relevance"] = article_relevance(article.get("published_at"))
return templates.TemplateResponse(
request,
@ -261,6 +264,8 @@ def admin_article_detail(request: Request, article_id: int):
if not article.get("press_contact") and isinstance(extraction.get("press_contact"), str):
article["press_contact"] = extraction.get("press_contact")
article["extraction"] = extraction
article["days_old"] = article_age_days(article.get("published_at"))
article["relevance"] = article_relevance(article.get("published_at"))
feed = get_feed_by_id(int(article["feed_id"])) if article.get("feed_id") else None
checklist = _legal_checklist(article, feed)

View file

@ -1,7 +1,12 @@
from contextlib import asynccontextmanager
import csv
from datetime import datetime, timezone
import io
import json
from pathlib import Path
from fastapi import Depends, FastAPI, HTTPException, Request, Response, status
from fastapi.responses import JSONResponse
from pydantic import BaseModel, Field
from fastapi.staticfiles import StaticFiles
@ -11,6 +16,7 @@ from .config import get_settings
from .db import init_db
from .ingestion import run_ingestion
from .policy import evaluate_source_policy, is_source_allowed
from .relevance import article_age_days, article_relevance
from .repositories import (
ArticleUpsert,
FeedCreate,
@ -321,6 +327,81 @@ def api_list_articles(limit: int = 100, status_filter: str | None = None, userna
return {"ok": True, "items": repo_list_articles(limit=limit, status_filter=status_filter), "requested_by": username}
@app.get("/api/articles/export")
def api_export_articles(
format: str = "json",
status_filter: str | None = None,
username: str = Depends(require_auth),
):
articles = repo_list_articles(limit=500, status_filter=status_filter)
rows = []
for article in articles:
days_old = article_age_days(article.get("published_at"))
rows.append(
{
"id": article.get("id"),
"title": article.get("title"),
"status": article.get("status"),
"published_at": article.get("published_at"),
"days_old": days_old,
"relevance": article_relevance(article.get("published_at")),
"author": article.get("author"),
"source_url": article.get("source_url"),
"canonical_url": article.get("canonical_url"),
"source_name_snapshot": article.get("source_name_snapshot"),
"source_license_name_snapshot": article.get("source_license_name_snapshot"),
"source_terms_url_snapshot": article.get("source_terms_url_snapshot"),
"press_contact": article.get("press_contact"),
"image_urls_json": article.get("image_urls_json"),
"legal_checked": bool(int(article.get("legal_checked", 0))),
"legal_checked_at": article.get("legal_checked_at"),
"legal_note": article.get("legal_note"),
}
)
generated_at = datetime.now(timezone.utc).isoformat()
if format == "csv":
out = io.StringIO()
fieldnames = [
"id",
"title",
"status",
"published_at",
"days_old",
"relevance",
"author",
"source_url",
"canonical_url",
"source_name_snapshot",
"source_license_name_snapshot",
"source_terms_url_snapshot",
"press_contact",
"image_urls_json",
"legal_checked",
"legal_checked_at",
"legal_note",
]
writer = csv.DictWriter(out, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(rows)
return Response(
content=out.getvalue(),
media_type="text/csv; charset=utf-8",
headers={"Content-Disposition": 'attachment; filename="articles_export.csv"'},
)
return JSONResponse(
{
"ok": True,
"count": len(rows),
"generated_at": generated_at,
"status_filter": status_filter,
"items": rows,
"requested_by": username,
}
)
@app.get("/api/articles/{article_id}")
def api_get_article(article_id: int, username: str = Depends(require_auth)) -> dict:
article = get_article_by_id(article_id)

44
backend/app/relevance.py Normal file
View file

@ -0,0 +1,44 @@
from __future__ import annotations
from datetime import datetime, timezone
def _parse_iso_datetime(value: str | None) -> datetime | None:
if not value:
return None
raw = value.strip()
if not raw:
return None
if raw.endswith("Z"):
raw = raw[:-1] + "+00:00"
try:
parsed = datetime.fromisoformat(raw)
except ValueError:
return None
if parsed.tzinfo is None:
parsed = parsed.replace(tzinfo=timezone.utc)
return parsed
def article_age_days(published_at: str | None, now: datetime | None = None) -> int | None:
published = _parse_iso_datetime(published_at)
if not published:
return None
ref = now or datetime.now(timezone.utc)
delta = ref - published
if delta.total_seconds() < 0:
return 0
return delta.days
def article_relevance(published_at: str | None, now: datetime | None = None) -> str:
days = article_age_days(published_at, now=now)
if days is None:
return "unbekannt"
if days <= 2:
return "hoch"
if days <= 7:
return "mittel"
if days <= 30:
return "niedrig"
return "alt"

View file

@ -24,6 +24,9 @@
<section class="card">
<h2>{{ article.title }}</h2>
<p><strong>Status:</strong> <span class="badge">{{ article.status }}</span></p>
<p><strong>Artikel-Datum:</strong> {{ article.published_at or "-" }}</p>
<p><strong>Alter:</strong> {{ article.days_old if article.days_old is not none else "-" }} Tage</p>
<p><strong>Relevanz:</strong> {{ article.relevance }}</p>
<p><strong>Autor:</strong> {{ article.author or "-" }}</p>
<p><strong>Feed:</strong> {{ feed.name if feed else "-" }}</p>
<p><strong>Quelle Snapshot:</strong> {{ article.source_name_snapshot or "-" }}</p>

View file

@ -131,6 +131,8 @@
</select>
<button type="submit" class="secondary">Filtern</button>
<a href="/admin/dashboard" class="linkbtn">Reset</a>
<a href="/api/articles/export?format=json{% if status_filter %}&status_filter={{ status_filter }}{% endif %}" class="linkbtn">Export JSON</a>
<a href="/api/articles/export?format=csv{% if status_filter %}&status_filter={{ status_filter }}{% endif %}" class="linkbtn">Export CSV</a>
</form>
<table>
<thead>
@ -143,6 +145,7 @@
<td>
<strong>{{ a.title }}</strong><br />
<span class="subtle">Autor: {{ a.author or "-" }}</span><br />
<span class="subtle">Datum: {{ a.published_at or "-" }} | Alter: {{ a.days_old if a.days_old is not none else "-" }} Tage | Relevanz: {{ a.relevance }}</span><br />
<a href="{{ a.source_url }}" target="_blank" rel="noopener">Original öffnen</a>
<br /><a href="/admin/articles/{{ a.id }}">Details anzeigen</a>
{% if a.canonical_url and a.canonical_url != a.source_url %}

View file

@ -72,6 +72,73 @@ class TestApiAuth(unittest.TestCase):
self.assertFalse(body["allowed"])
self.assertGreaterEqual(len(body["issues"]), 1)
def test_articles_export_json_and_csv_contains_relevance(self) -> None:
login = self.client.post("/auth/login", json={"username": "admin", "password": "secret"})
self.assertEqual(login.status_code, 200)
source = self.client.post(
"/api/sources",
json={
"name": "Export Source",
"base_url": "https://example.org",
"terms_url": "https://example.org/terms",
"license_name": "cc-by",
"risk_level": "green",
"is_enabled": True,
"last_reviewed_at": "2026-02-18T00:00:00Z",
},
)
self.assertEqual(source.status_code, 200)
source_id = source.json()["id"]
feed = self.client.post(
"/api/feeds",
json={"name": "Export Feed", "url": "https://example.org/feed.xml", "source_id": source_id, "is_enabled": True},
)
self.assertEqual(feed.status_code, 200)
feed_id = feed.json()["id"]
article = self.client.post(
"/api/articles/upsert",
json={
"feed_id": feed_id,
"source_article_id": "exp-1",
"source_hash": "exp-hash-1",
"title": "Export Artikel",
"source_url": "https://example.org/article/1",
"canonical_url": "https://example.org/article/1",
"published_at": "2026-02-18T00:00:00Z",
"author": "Autor",
"summary": "Kurz",
"content_raw": "Langtext",
"image_urls_json": "[\"https://example.org/img.jpg\"]",
"press_contact": "Kontakt",
"source_name_snapshot": "Export Source",
"source_terms_url_snapshot": "https://example.org/terms",
"source_license_name_snapshot": "cc-by",
"status": "review",
},
)
self.assertEqual(article.status_code, 200)
export_json = self.client.get("/api/articles/export?format=json")
self.assertEqual(export_json.status_code, 200)
body = export_json.json()
self.assertTrue(body.get("ok"))
self.assertGreaterEqual(body.get("count", 0), 1)
first = body["items"][0]
self.assertIn("published_at", first)
self.assertIn("days_old", first)
self.assertIn("relevance", first)
export_csv = self.client.get("/api/articles/export?format=csv")
self.assertEqual(export_csv.status_code, 200)
self.assertIn("text/csv", export_csv.headers.get("content-type", ""))
csv_text = export_csv.text
self.assertIn("published_at", csv_text)
self.assertIn("days_old", csv_text)
self.assertIn("relevance", csv_text)
if __name__ == "__main__":
unittest.main()

View file

@ -0,0 +1,21 @@
from datetime import datetime, timezone
import unittest
from backend.app.relevance import article_age_days, article_relevance
class TestRelevance(unittest.TestCase):
def test_article_age_and_relevance(self) -> None:
now = datetime(2026, 2, 18, 12, 0, 0, tzinfo=timezone.utc)
self.assertEqual(article_age_days("2026-02-18T10:00:00Z", now=now), 0)
self.assertEqual(article_relevance("2026-02-18T10:00:00Z", now=now), "hoch")
self.assertEqual(article_age_days("2026-02-14T12:00:00Z", now=now), 4)
self.assertEqual(article_relevance("2026-02-14T12:00:00Z", now=now), "mittel")
self.assertEqual(article_relevance("2025-12-01T00:00:00Z", now=now), "alt")
self.assertEqual(article_relevance(None, now=now), "unbekannt")
if __name__ == "__main__":
unittest.main()