feat(export): add csv/json article export with date relevance scoring

This commit is contained in:
Oliver 2026-02-18 10:04:38 +01:00
parent 5159a6e3b4
commit 6691db8051
7 changed files with 224 additions and 0 deletions

View file

@ -12,6 +12,7 @@ from .auth import create_session_token, verify_credentials, verify_session_token
from .config import get_settings
from .ingestion import run_ingestion
from .policy import evaluate_source_policy
from .relevance import article_age_days, article_relevance
from .repositories import (
FeedCreate,
SourceCreate,
@ -216,6 +217,8 @@ def admin_dashboard(request: Request):
if not article.get("press_contact") and isinstance(extraction.get("press_contact"), str):
article["press_contact"] = extraction.get("press_contact")
article["extraction_error"] = extraction.get("extraction_error") if isinstance(extraction.get("extraction_error"), str) else None
article["days_old"] = article_age_days(article.get("published_at"))
article["relevance"] = article_relevance(article.get("published_at"))
return templates.TemplateResponse(
request,
@ -261,6 +264,8 @@ def admin_article_detail(request: Request, article_id: int):
if not article.get("press_contact") and isinstance(extraction.get("press_contact"), str):
article["press_contact"] = extraction.get("press_contact")
article["extraction"] = extraction
article["days_old"] = article_age_days(article.get("published_at"))
article["relevance"] = article_relevance(article.get("published_at"))
feed = get_feed_by_id(int(article["feed_id"])) if article.get("feed_id") else None
checklist = _legal_checklist(article, feed)

View file

@ -1,7 +1,12 @@
from contextlib import asynccontextmanager
import csv
from datetime import datetime, timezone
import io
import json
from pathlib import Path
from fastapi import Depends, FastAPI, HTTPException, Request, Response, status
from fastapi.responses import JSONResponse
from pydantic import BaseModel, Field
from fastapi.staticfiles import StaticFiles
@ -11,6 +16,7 @@ from .config import get_settings
from .db import init_db
from .ingestion import run_ingestion
from .policy import evaluate_source_policy, is_source_allowed
from .relevance import article_age_days, article_relevance
from .repositories import (
ArticleUpsert,
FeedCreate,
@ -321,6 +327,81 @@ def api_list_articles(limit: int = 100, status_filter: str | None = None, userna
return {"ok": True, "items": repo_list_articles(limit=limit, status_filter=status_filter), "requested_by": username}
@app.get("/api/articles/export")
def api_export_articles(
format: str = "json",
status_filter: str | None = None,
username: str = Depends(require_auth),
):
articles = repo_list_articles(limit=500, status_filter=status_filter)
rows = []
for article in articles:
days_old = article_age_days(article.get("published_at"))
rows.append(
{
"id": article.get("id"),
"title": article.get("title"),
"status": article.get("status"),
"published_at": article.get("published_at"),
"days_old": days_old,
"relevance": article_relevance(article.get("published_at")),
"author": article.get("author"),
"source_url": article.get("source_url"),
"canonical_url": article.get("canonical_url"),
"source_name_snapshot": article.get("source_name_snapshot"),
"source_license_name_snapshot": article.get("source_license_name_snapshot"),
"source_terms_url_snapshot": article.get("source_terms_url_snapshot"),
"press_contact": article.get("press_contact"),
"image_urls_json": article.get("image_urls_json"),
"legal_checked": bool(int(article.get("legal_checked", 0))),
"legal_checked_at": article.get("legal_checked_at"),
"legal_note": article.get("legal_note"),
}
)
generated_at = datetime.now(timezone.utc).isoformat()
if format == "csv":
out = io.StringIO()
fieldnames = [
"id",
"title",
"status",
"published_at",
"days_old",
"relevance",
"author",
"source_url",
"canonical_url",
"source_name_snapshot",
"source_license_name_snapshot",
"source_terms_url_snapshot",
"press_contact",
"image_urls_json",
"legal_checked",
"legal_checked_at",
"legal_note",
]
writer = csv.DictWriter(out, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(rows)
return Response(
content=out.getvalue(),
media_type="text/csv; charset=utf-8",
headers={"Content-Disposition": 'attachment; filename="articles_export.csv"'},
)
return JSONResponse(
{
"ok": True,
"count": len(rows),
"generated_at": generated_at,
"status_filter": status_filter,
"items": rows,
"requested_by": username,
}
)
@app.get("/api/articles/{article_id}")
def api_get_article(article_id: int, username: str = Depends(require_auth)) -> dict:
article = get_article_by_id(article_id)

44
backend/app/relevance.py Normal file
View file

@ -0,0 +1,44 @@
from __future__ import annotations
from datetime import datetime, timezone
def _parse_iso_datetime(value: str | None) -> datetime | None:
if not value:
return None
raw = value.strip()
if not raw:
return None
if raw.endswith("Z"):
raw = raw[:-1] + "+00:00"
try:
parsed = datetime.fromisoformat(raw)
except ValueError:
return None
if parsed.tzinfo is None:
parsed = parsed.replace(tzinfo=timezone.utc)
return parsed
def article_age_days(published_at: str | None, now: datetime | None = None) -> int | None:
published = _parse_iso_datetime(published_at)
if not published:
return None
ref = now or datetime.now(timezone.utc)
delta = ref - published
if delta.total_seconds() < 0:
return 0
return delta.days
def article_relevance(published_at: str | None, now: datetime | None = None) -> str:
days = article_age_days(published_at, now=now)
if days is None:
return "unbekannt"
if days <= 2:
return "hoch"
if days <= 7:
return "mittel"
if days <= 30:
return "niedrig"
return "alt"