feat(export): add csv/json article export with date relevance scoring
This commit is contained in:
parent
5159a6e3b4
commit
6691db8051
7 changed files with 224 additions and 0 deletions
|
|
@ -12,6 +12,7 @@ from .auth import create_session_token, verify_credentials, verify_session_token
|
|||
from .config import get_settings
|
||||
from .ingestion import run_ingestion
|
||||
from .policy import evaluate_source_policy
|
||||
from .relevance import article_age_days, article_relevance
|
||||
from .repositories import (
|
||||
FeedCreate,
|
||||
SourceCreate,
|
||||
|
|
@ -216,6 +217,8 @@ def admin_dashboard(request: Request):
|
|||
if not article.get("press_contact") and isinstance(extraction.get("press_contact"), str):
|
||||
article["press_contact"] = extraction.get("press_contact")
|
||||
article["extraction_error"] = extraction.get("extraction_error") if isinstance(extraction.get("extraction_error"), str) else None
|
||||
article["days_old"] = article_age_days(article.get("published_at"))
|
||||
article["relevance"] = article_relevance(article.get("published_at"))
|
||||
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
|
|
@ -261,6 +264,8 @@ def admin_article_detail(request: Request, article_id: int):
|
|||
if not article.get("press_contact") and isinstance(extraction.get("press_contact"), str):
|
||||
article["press_contact"] = extraction.get("press_contact")
|
||||
article["extraction"] = extraction
|
||||
article["days_old"] = article_age_days(article.get("published_at"))
|
||||
article["relevance"] = article_relevance(article.get("published_at"))
|
||||
feed = get_feed_by_id(int(article["feed_id"])) if article.get("feed_id") else None
|
||||
checklist = _legal_checklist(article, feed)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,12 @@
|
|||
from contextlib import asynccontextmanager
|
||||
import csv
|
||||
from datetime import datetime, timezone
|
||||
import io
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import Depends, FastAPI, HTTPException, Request, Response, status
|
||||
from fastapi.responses import JSONResponse
|
||||
from pydantic import BaseModel, Field
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
|
||||
|
|
@ -11,6 +16,7 @@ from .config import get_settings
|
|||
from .db import init_db
|
||||
from .ingestion import run_ingestion
|
||||
from .policy import evaluate_source_policy, is_source_allowed
|
||||
from .relevance import article_age_days, article_relevance
|
||||
from .repositories import (
|
||||
ArticleUpsert,
|
||||
FeedCreate,
|
||||
|
|
@ -321,6 +327,81 @@ def api_list_articles(limit: int = 100, status_filter: str | None = None, userna
|
|||
return {"ok": True, "items": repo_list_articles(limit=limit, status_filter=status_filter), "requested_by": username}
|
||||
|
||||
|
||||
@app.get("/api/articles/export")
|
||||
def api_export_articles(
|
||||
format: str = "json",
|
||||
status_filter: str | None = None,
|
||||
username: str = Depends(require_auth),
|
||||
):
|
||||
articles = repo_list_articles(limit=500, status_filter=status_filter)
|
||||
rows = []
|
||||
for article in articles:
|
||||
days_old = article_age_days(article.get("published_at"))
|
||||
rows.append(
|
||||
{
|
||||
"id": article.get("id"),
|
||||
"title": article.get("title"),
|
||||
"status": article.get("status"),
|
||||
"published_at": article.get("published_at"),
|
||||
"days_old": days_old,
|
||||
"relevance": article_relevance(article.get("published_at")),
|
||||
"author": article.get("author"),
|
||||
"source_url": article.get("source_url"),
|
||||
"canonical_url": article.get("canonical_url"),
|
||||
"source_name_snapshot": article.get("source_name_snapshot"),
|
||||
"source_license_name_snapshot": article.get("source_license_name_snapshot"),
|
||||
"source_terms_url_snapshot": article.get("source_terms_url_snapshot"),
|
||||
"press_contact": article.get("press_contact"),
|
||||
"image_urls_json": article.get("image_urls_json"),
|
||||
"legal_checked": bool(int(article.get("legal_checked", 0))),
|
||||
"legal_checked_at": article.get("legal_checked_at"),
|
||||
"legal_note": article.get("legal_note"),
|
||||
}
|
||||
)
|
||||
|
||||
generated_at = datetime.now(timezone.utc).isoformat()
|
||||
if format == "csv":
|
||||
out = io.StringIO()
|
||||
fieldnames = [
|
||||
"id",
|
||||
"title",
|
||||
"status",
|
||||
"published_at",
|
||||
"days_old",
|
||||
"relevance",
|
||||
"author",
|
||||
"source_url",
|
||||
"canonical_url",
|
||||
"source_name_snapshot",
|
||||
"source_license_name_snapshot",
|
||||
"source_terms_url_snapshot",
|
||||
"press_contact",
|
||||
"image_urls_json",
|
||||
"legal_checked",
|
||||
"legal_checked_at",
|
||||
"legal_note",
|
||||
]
|
||||
writer = csv.DictWriter(out, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(rows)
|
||||
return Response(
|
||||
content=out.getvalue(),
|
||||
media_type="text/csv; charset=utf-8",
|
||||
headers={"Content-Disposition": 'attachment; filename="articles_export.csv"'},
|
||||
)
|
||||
|
||||
return JSONResponse(
|
||||
{
|
||||
"ok": True,
|
||||
"count": len(rows),
|
||||
"generated_at": generated_at,
|
||||
"status_filter": status_filter,
|
||||
"items": rows,
|
||||
"requested_by": username,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@app.get("/api/articles/{article_id}")
|
||||
def api_get_article(article_id: int, username: str = Depends(require_auth)) -> dict:
|
||||
article = get_article_by_id(article_id)
|
||||
|
|
|
|||
44
backend/app/relevance.py
Normal file
44
backend/app/relevance.py
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
|
||||
|
||||
def _parse_iso_datetime(value: str | None) -> datetime | None:
|
||||
if not value:
|
||||
return None
|
||||
raw = value.strip()
|
||||
if not raw:
|
||||
return None
|
||||
if raw.endswith("Z"):
|
||||
raw = raw[:-1] + "+00:00"
|
||||
try:
|
||||
parsed = datetime.fromisoformat(raw)
|
||||
except ValueError:
|
||||
return None
|
||||
if parsed.tzinfo is None:
|
||||
parsed = parsed.replace(tzinfo=timezone.utc)
|
||||
return parsed
|
||||
|
||||
|
||||
def article_age_days(published_at: str | None, now: datetime | None = None) -> int | None:
|
||||
published = _parse_iso_datetime(published_at)
|
||||
if not published:
|
||||
return None
|
||||
ref = now or datetime.now(timezone.utc)
|
||||
delta = ref - published
|
||||
if delta.total_seconds() < 0:
|
||||
return 0
|
||||
return delta.days
|
||||
|
||||
|
||||
def article_relevance(published_at: str | None, now: datetime | None = None) -> str:
|
||||
days = article_age_days(published_at, now=now)
|
||||
if days is None:
|
||||
return "unbekannt"
|
||||
if days <= 2:
|
||||
return "hoch"
|
||||
if days <= 7:
|
||||
return "mittel"
|
||||
if days <= 30:
|
||||
return "niedrig"
|
||||
return "alt"
|
||||
|
|
@ -24,6 +24,9 @@
|
|||
<section class="card">
|
||||
<h2>{{ article.title }}</h2>
|
||||
<p><strong>Status:</strong> <span class="badge">{{ article.status }}</span></p>
|
||||
<p><strong>Artikel-Datum:</strong> {{ article.published_at or "-" }}</p>
|
||||
<p><strong>Alter:</strong> {{ article.days_old if article.days_old is not none else "-" }} Tage</p>
|
||||
<p><strong>Relevanz:</strong> {{ article.relevance }}</p>
|
||||
<p><strong>Autor:</strong> {{ article.author or "-" }}</p>
|
||||
<p><strong>Feed:</strong> {{ feed.name if feed else "-" }}</p>
|
||||
<p><strong>Quelle Snapshot:</strong> {{ article.source_name_snapshot or "-" }}</p>
|
||||
|
|
|
|||
|
|
@ -131,6 +131,8 @@
|
|||
</select>
|
||||
<button type="submit" class="secondary">Filtern</button>
|
||||
<a href="/admin/dashboard" class="linkbtn">Reset</a>
|
||||
<a href="/api/articles/export?format=json{% if status_filter %}&status_filter={{ status_filter }}{% endif %}" class="linkbtn">Export JSON</a>
|
||||
<a href="/api/articles/export?format=csv{% if status_filter %}&status_filter={{ status_filter }}{% endif %}" class="linkbtn">Export CSV</a>
|
||||
</form>
|
||||
<table>
|
||||
<thead>
|
||||
|
|
@ -143,6 +145,7 @@
|
|||
<td>
|
||||
<strong>{{ a.title }}</strong><br />
|
||||
<span class="subtle">Autor: {{ a.author or "-" }}</span><br />
|
||||
<span class="subtle">Datum: {{ a.published_at or "-" }} | Alter: {{ a.days_old if a.days_old is not none else "-" }} Tage | Relevanz: {{ a.relevance }}</span><br />
|
||||
<a href="{{ a.source_url }}" target="_blank" rel="noopener">Original öffnen</a>
|
||||
<br /><a href="/admin/articles/{{ a.id }}">Details anzeigen</a>
|
||||
{% if a.canonical_url and a.canonical_url != a.source_url %}
|
||||
|
|
|
|||
|
|
@ -72,6 +72,73 @@ class TestApiAuth(unittest.TestCase):
|
|||
self.assertFalse(body["allowed"])
|
||||
self.assertGreaterEqual(len(body["issues"]), 1)
|
||||
|
||||
def test_articles_export_json_and_csv_contains_relevance(self) -> None:
|
||||
login = self.client.post("/auth/login", json={"username": "admin", "password": "secret"})
|
||||
self.assertEqual(login.status_code, 200)
|
||||
|
||||
source = self.client.post(
|
||||
"/api/sources",
|
||||
json={
|
||||
"name": "Export Source",
|
||||
"base_url": "https://example.org",
|
||||
"terms_url": "https://example.org/terms",
|
||||
"license_name": "cc-by",
|
||||
"risk_level": "green",
|
||||
"is_enabled": True,
|
||||
"last_reviewed_at": "2026-02-18T00:00:00Z",
|
||||
},
|
||||
)
|
||||
self.assertEqual(source.status_code, 200)
|
||||
source_id = source.json()["id"]
|
||||
|
||||
feed = self.client.post(
|
||||
"/api/feeds",
|
||||
json={"name": "Export Feed", "url": "https://example.org/feed.xml", "source_id": source_id, "is_enabled": True},
|
||||
)
|
||||
self.assertEqual(feed.status_code, 200)
|
||||
feed_id = feed.json()["id"]
|
||||
|
||||
article = self.client.post(
|
||||
"/api/articles/upsert",
|
||||
json={
|
||||
"feed_id": feed_id,
|
||||
"source_article_id": "exp-1",
|
||||
"source_hash": "exp-hash-1",
|
||||
"title": "Export Artikel",
|
||||
"source_url": "https://example.org/article/1",
|
||||
"canonical_url": "https://example.org/article/1",
|
||||
"published_at": "2026-02-18T00:00:00Z",
|
||||
"author": "Autor",
|
||||
"summary": "Kurz",
|
||||
"content_raw": "Langtext",
|
||||
"image_urls_json": "[\"https://example.org/img.jpg\"]",
|
||||
"press_contact": "Kontakt",
|
||||
"source_name_snapshot": "Export Source",
|
||||
"source_terms_url_snapshot": "https://example.org/terms",
|
||||
"source_license_name_snapshot": "cc-by",
|
||||
"status": "review",
|
||||
},
|
||||
)
|
||||
self.assertEqual(article.status_code, 200)
|
||||
|
||||
export_json = self.client.get("/api/articles/export?format=json")
|
||||
self.assertEqual(export_json.status_code, 200)
|
||||
body = export_json.json()
|
||||
self.assertTrue(body.get("ok"))
|
||||
self.assertGreaterEqual(body.get("count", 0), 1)
|
||||
first = body["items"][0]
|
||||
self.assertIn("published_at", first)
|
||||
self.assertIn("days_old", first)
|
||||
self.assertIn("relevance", first)
|
||||
|
||||
export_csv = self.client.get("/api/articles/export?format=csv")
|
||||
self.assertEqual(export_csv.status_code, 200)
|
||||
self.assertIn("text/csv", export_csv.headers.get("content-type", ""))
|
||||
csv_text = export_csv.text
|
||||
self.assertIn("published_at", csv_text)
|
||||
self.assertIn("days_old", csv_text)
|
||||
self.assertIn("relevance", csv_text)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
|
|||
21
backend/tests/test_relevance.py
Normal file
21
backend/tests/test_relevance.py
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
from datetime import datetime, timezone
|
||||
import unittest
|
||||
|
||||
from backend.app.relevance import article_age_days, article_relevance
|
||||
|
||||
|
||||
class TestRelevance(unittest.TestCase):
|
||||
def test_article_age_and_relevance(self) -> None:
|
||||
now = datetime(2026, 2, 18, 12, 0, 0, tzinfo=timezone.utc)
|
||||
self.assertEqual(article_age_days("2026-02-18T10:00:00Z", now=now), 0)
|
||||
self.assertEqual(article_relevance("2026-02-18T10:00:00Z", now=now), "hoch")
|
||||
|
||||
self.assertEqual(article_age_days("2026-02-14T12:00:00Z", now=now), 4)
|
||||
self.assertEqual(article_relevance("2026-02-14T12:00:00Z", now=now), "mittel")
|
||||
|
||||
self.assertEqual(article_relevance("2025-12-01T00:00:00Z", now=now), "alt")
|
||||
self.assertEqual(article_relevance(None, now=now), "unbekannt")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue