feat(export): add csv/json article export with date relevance scoring
This commit is contained in:
parent
5159a6e3b4
commit
6691db8051
7 changed files with 224 additions and 0 deletions
|
|
@ -12,6 +12,7 @@ from .auth import create_session_token, verify_credentials, verify_session_token
|
||||||
from .config import get_settings
|
from .config import get_settings
|
||||||
from .ingestion import run_ingestion
|
from .ingestion import run_ingestion
|
||||||
from .policy import evaluate_source_policy
|
from .policy import evaluate_source_policy
|
||||||
|
from .relevance import article_age_days, article_relevance
|
||||||
from .repositories import (
|
from .repositories import (
|
||||||
FeedCreate,
|
FeedCreate,
|
||||||
SourceCreate,
|
SourceCreate,
|
||||||
|
|
@ -216,6 +217,8 @@ def admin_dashboard(request: Request):
|
||||||
if not article.get("press_contact") and isinstance(extraction.get("press_contact"), str):
|
if not article.get("press_contact") and isinstance(extraction.get("press_contact"), str):
|
||||||
article["press_contact"] = extraction.get("press_contact")
|
article["press_contact"] = extraction.get("press_contact")
|
||||||
article["extraction_error"] = extraction.get("extraction_error") if isinstance(extraction.get("extraction_error"), str) else None
|
article["extraction_error"] = extraction.get("extraction_error") if isinstance(extraction.get("extraction_error"), str) else None
|
||||||
|
article["days_old"] = article_age_days(article.get("published_at"))
|
||||||
|
article["relevance"] = article_relevance(article.get("published_at"))
|
||||||
|
|
||||||
return templates.TemplateResponse(
|
return templates.TemplateResponse(
|
||||||
request,
|
request,
|
||||||
|
|
@ -261,6 +264,8 @@ def admin_article_detail(request: Request, article_id: int):
|
||||||
if not article.get("press_contact") and isinstance(extraction.get("press_contact"), str):
|
if not article.get("press_contact") and isinstance(extraction.get("press_contact"), str):
|
||||||
article["press_contact"] = extraction.get("press_contact")
|
article["press_contact"] = extraction.get("press_contact")
|
||||||
article["extraction"] = extraction
|
article["extraction"] = extraction
|
||||||
|
article["days_old"] = article_age_days(article.get("published_at"))
|
||||||
|
article["relevance"] = article_relevance(article.get("published_at"))
|
||||||
feed = get_feed_by_id(int(article["feed_id"])) if article.get("feed_id") else None
|
feed = get_feed_by_id(int(article["feed_id"])) if article.get("feed_id") else None
|
||||||
checklist = _legal_checklist(article, feed)
|
checklist = _legal_checklist(article, feed)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,12 @@
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
|
import csv
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
import io
|
||||||
|
import json
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from fastapi import Depends, FastAPI, HTTPException, Request, Response, status
|
from fastapi import Depends, FastAPI, HTTPException, Request, Response, status
|
||||||
|
from fastapi.responses import JSONResponse
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
from fastapi.staticfiles import StaticFiles
|
from fastapi.staticfiles import StaticFiles
|
||||||
|
|
||||||
|
|
@ -11,6 +16,7 @@ from .config import get_settings
|
||||||
from .db import init_db
|
from .db import init_db
|
||||||
from .ingestion import run_ingestion
|
from .ingestion import run_ingestion
|
||||||
from .policy import evaluate_source_policy, is_source_allowed
|
from .policy import evaluate_source_policy, is_source_allowed
|
||||||
|
from .relevance import article_age_days, article_relevance
|
||||||
from .repositories import (
|
from .repositories import (
|
||||||
ArticleUpsert,
|
ArticleUpsert,
|
||||||
FeedCreate,
|
FeedCreate,
|
||||||
|
|
@ -321,6 +327,81 @@ def api_list_articles(limit: int = 100, status_filter: str | None = None, userna
|
||||||
return {"ok": True, "items": repo_list_articles(limit=limit, status_filter=status_filter), "requested_by": username}
|
return {"ok": True, "items": repo_list_articles(limit=limit, status_filter=status_filter), "requested_by": username}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/articles/export")
|
||||||
|
def api_export_articles(
|
||||||
|
format: str = "json",
|
||||||
|
status_filter: str | None = None,
|
||||||
|
username: str = Depends(require_auth),
|
||||||
|
):
|
||||||
|
articles = repo_list_articles(limit=500, status_filter=status_filter)
|
||||||
|
rows = []
|
||||||
|
for article in articles:
|
||||||
|
days_old = article_age_days(article.get("published_at"))
|
||||||
|
rows.append(
|
||||||
|
{
|
||||||
|
"id": article.get("id"),
|
||||||
|
"title": article.get("title"),
|
||||||
|
"status": article.get("status"),
|
||||||
|
"published_at": article.get("published_at"),
|
||||||
|
"days_old": days_old,
|
||||||
|
"relevance": article_relevance(article.get("published_at")),
|
||||||
|
"author": article.get("author"),
|
||||||
|
"source_url": article.get("source_url"),
|
||||||
|
"canonical_url": article.get("canonical_url"),
|
||||||
|
"source_name_snapshot": article.get("source_name_snapshot"),
|
||||||
|
"source_license_name_snapshot": article.get("source_license_name_snapshot"),
|
||||||
|
"source_terms_url_snapshot": article.get("source_terms_url_snapshot"),
|
||||||
|
"press_contact": article.get("press_contact"),
|
||||||
|
"image_urls_json": article.get("image_urls_json"),
|
||||||
|
"legal_checked": bool(int(article.get("legal_checked", 0))),
|
||||||
|
"legal_checked_at": article.get("legal_checked_at"),
|
||||||
|
"legal_note": article.get("legal_note"),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
generated_at = datetime.now(timezone.utc).isoformat()
|
||||||
|
if format == "csv":
|
||||||
|
out = io.StringIO()
|
||||||
|
fieldnames = [
|
||||||
|
"id",
|
||||||
|
"title",
|
||||||
|
"status",
|
||||||
|
"published_at",
|
||||||
|
"days_old",
|
||||||
|
"relevance",
|
||||||
|
"author",
|
||||||
|
"source_url",
|
||||||
|
"canonical_url",
|
||||||
|
"source_name_snapshot",
|
||||||
|
"source_license_name_snapshot",
|
||||||
|
"source_terms_url_snapshot",
|
||||||
|
"press_contact",
|
||||||
|
"image_urls_json",
|
||||||
|
"legal_checked",
|
||||||
|
"legal_checked_at",
|
||||||
|
"legal_note",
|
||||||
|
]
|
||||||
|
writer = csv.DictWriter(out, fieldnames=fieldnames)
|
||||||
|
writer.writeheader()
|
||||||
|
writer.writerows(rows)
|
||||||
|
return Response(
|
||||||
|
content=out.getvalue(),
|
||||||
|
media_type="text/csv; charset=utf-8",
|
||||||
|
headers={"Content-Disposition": 'attachment; filename="articles_export.csv"'},
|
||||||
|
)
|
||||||
|
|
||||||
|
return JSONResponse(
|
||||||
|
{
|
||||||
|
"ok": True,
|
||||||
|
"count": len(rows),
|
||||||
|
"generated_at": generated_at,
|
||||||
|
"status_filter": status_filter,
|
||||||
|
"items": rows,
|
||||||
|
"requested_by": username,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/articles/{article_id}")
|
@app.get("/api/articles/{article_id}")
|
||||||
def api_get_article(article_id: int, username: str = Depends(require_auth)) -> dict:
|
def api_get_article(article_id: int, username: str = Depends(require_auth)) -> dict:
|
||||||
article = get_article_by_id(article_id)
|
article = get_article_by_id(article_id)
|
||||||
|
|
|
||||||
44
backend/app/relevance.py
Normal file
44
backend/app/relevance.py
Normal file
|
|
@ -0,0 +1,44 @@
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_iso_datetime(value: str | None) -> datetime | None:
|
||||||
|
if not value:
|
||||||
|
return None
|
||||||
|
raw = value.strip()
|
||||||
|
if not raw:
|
||||||
|
return None
|
||||||
|
if raw.endswith("Z"):
|
||||||
|
raw = raw[:-1] + "+00:00"
|
||||||
|
try:
|
||||||
|
parsed = datetime.fromisoformat(raw)
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
if parsed.tzinfo is None:
|
||||||
|
parsed = parsed.replace(tzinfo=timezone.utc)
|
||||||
|
return parsed
|
||||||
|
|
||||||
|
|
||||||
|
def article_age_days(published_at: str | None, now: datetime | None = None) -> int | None:
|
||||||
|
published = _parse_iso_datetime(published_at)
|
||||||
|
if not published:
|
||||||
|
return None
|
||||||
|
ref = now or datetime.now(timezone.utc)
|
||||||
|
delta = ref - published
|
||||||
|
if delta.total_seconds() < 0:
|
||||||
|
return 0
|
||||||
|
return delta.days
|
||||||
|
|
||||||
|
|
||||||
|
def article_relevance(published_at: str | None, now: datetime | None = None) -> str:
|
||||||
|
days = article_age_days(published_at, now=now)
|
||||||
|
if days is None:
|
||||||
|
return "unbekannt"
|
||||||
|
if days <= 2:
|
||||||
|
return "hoch"
|
||||||
|
if days <= 7:
|
||||||
|
return "mittel"
|
||||||
|
if days <= 30:
|
||||||
|
return "niedrig"
|
||||||
|
return "alt"
|
||||||
|
|
@ -24,6 +24,9 @@
|
||||||
<section class="card">
|
<section class="card">
|
||||||
<h2>{{ article.title }}</h2>
|
<h2>{{ article.title }}</h2>
|
||||||
<p><strong>Status:</strong> <span class="badge">{{ article.status }}</span></p>
|
<p><strong>Status:</strong> <span class="badge">{{ article.status }}</span></p>
|
||||||
|
<p><strong>Artikel-Datum:</strong> {{ article.published_at or "-" }}</p>
|
||||||
|
<p><strong>Alter:</strong> {{ article.days_old if article.days_old is not none else "-" }} Tage</p>
|
||||||
|
<p><strong>Relevanz:</strong> {{ article.relevance }}</p>
|
||||||
<p><strong>Autor:</strong> {{ article.author or "-" }}</p>
|
<p><strong>Autor:</strong> {{ article.author or "-" }}</p>
|
||||||
<p><strong>Feed:</strong> {{ feed.name if feed else "-" }}</p>
|
<p><strong>Feed:</strong> {{ feed.name if feed else "-" }}</p>
|
||||||
<p><strong>Quelle Snapshot:</strong> {{ article.source_name_snapshot or "-" }}</p>
|
<p><strong>Quelle Snapshot:</strong> {{ article.source_name_snapshot or "-" }}</p>
|
||||||
|
|
|
||||||
|
|
@ -131,6 +131,8 @@
|
||||||
</select>
|
</select>
|
||||||
<button type="submit" class="secondary">Filtern</button>
|
<button type="submit" class="secondary">Filtern</button>
|
||||||
<a href="/admin/dashboard" class="linkbtn">Reset</a>
|
<a href="/admin/dashboard" class="linkbtn">Reset</a>
|
||||||
|
<a href="/api/articles/export?format=json{% if status_filter %}&status_filter={{ status_filter }}{% endif %}" class="linkbtn">Export JSON</a>
|
||||||
|
<a href="/api/articles/export?format=csv{% if status_filter %}&status_filter={{ status_filter }}{% endif %}" class="linkbtn">Export CSV</a>
|
||||||
</form>
|
</form>
|
||||||
<table>
|
<table>
|
||||||
<thead>
|
<thead>
|
||||||
|
|
@ -143,6 +145,7 @@
|
||||||
<td>
|
<td>
|
||||||
<strong>{{ a.title }}</strong><br />
|
<strong>{{ a.title }}</strong><br />
|
||||||
<span class="subtle">Autor: {{ a.author or "-" }}</span><br />
|
<span class="subtle">Autor: {{ a.author or "-" }}</span><br />
|
||||||
|
<span class="subtle">Datum: {{ a.published_at or "-" }} | Alter: {{ a.days_old if a.days_old is not none else "-" }} Tage | Relevanz: {{ a.relevance }}</span><br />
|
||||||
<a href="{{ a.source_url }}" target="_blank" rel="noopener">Original öffnen</a>
|
<a href="{{ a.source_url }}" target="_blank" rel="noopener">Original öffnen</a>
|
||||||
<br /><a href="/admin/articles/{{ a.id }}">Details anzeigen</a>
|
<br /><a href="/admin/articles/{{ a.id }}">Details anzeigen</a>
|
||||||
{% if a.canonical_url and a.canonical_url != a.source_url %}
|
{% if a.canonical_url and a.canonical_url != a.source_url %}
|
||||||
|
|
|
||||||
|
|
@ -72,6 +72,73 @@ class TestApiAuth(unittest.TestCase):
|
||||||
self.assertFalse(body["allowed"])
|
self.assertFalse(body["allowed"])
|
||||||
self.assertGreaterEqual(len(body["issues"]), 1)
|
self.assertGreaterEqual(len(body["issues"]), 1)
|
||||||
|
|
||||||
|
def test_articles_export_json_and_csv_contains_relevance(self) -> None:
|
||||||
|
login = self.client.post("/auth/login", json={"username": "admin", "password": "secret"})
|
||||||
|
self.assertEqual(login.status_code, 200)
|
||||||
|
|
||||||
|
source = self.client.post(
|
||||||
|
"/api/sources",
|
||||||
|
json={
|
||||||
|
"name": "Export Source",
|
||||||
|
"base_url": "https://example.org",
|
||||||
|
"terms_url": "https://example.org/terms",
|
||||||
|
"license_name": "cc-by",
|
||||||
|
"risk_level": "green",
|
||||||
|
"is_enabled": True,
|
||||||
|
"last_reviewed_at": "2026-02-18T00:00:00Z",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
self.assertEqual(source.status_code, 200)
|
||||||
|
source_id = source.json()["id"]
|
||||||
|
|
||||||
|
feed = self.client.post(
|
||||||
|
"/api/feeds",
|
||||||
|
json={"name": "Export Feed", "url": "https://example.org/feed.xml", "source_id": source_id, "is_enabled": True},
|
||||||
|
)
|
||||||
|
self.assertEqual(feed.status_code, 200)
|
||||||
|
feed_id = feed.json()["id"]
|
||||||
|
|
||||||
|
article = self.client.post(
|
||||||
|
"/api/articles/upsert",
|
||||||
|
json={
|
||||||
|
"feed_id": feed_id,
|
||||||
|
"source_article_id": "exp-1",
|
||||||
|
"source_hash": "exp-hash-1",
|
||||||
|
"title": "Export Artikel",
|
||||||
|
"source_url": "https://example.org/article/1",
|
||||||
|
"canonical_url": "https://example.org/article/1",
|
||||||
|
"published_at": "2026-02-18T00:00:00Z",
|
||||||
|
"author": "Autor",
|
||||||
|
"summary": "Kurz",
|
||||||
|
"content_raw": "Langtext",
|
||||||
|
"image_urls_json": "[\"https://example.org/img.jpg\"]",
|
||||||
|
"press_contact": "Kontakt",
|
||||||
|
"source_name_snapshot": "Export Source",
|
||||||
|
"source_terms_url_snapshot": "https://example.org/terms",
|
||||||
|
"source_license_name_snapshot": "cc-by",
|
||||||
|
"status": "review",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
self.assertEqual(article.status_code, 200)
|
||||||
|
|
||||||
|
export_json = self.client.get("/api/articles/export?format=json")
|
||||||
|
self.assertEqual(export_json.status_code, 200)
|
||||||
|
body = export_json.json()
|
||||||
|
self.assertTrue(body.get("ok"))
|
||||||
|
self.assertGreaterEqual(body.get("count", 0), 1)
|
||||||
|
first = body["items"][0]
|
||||||
|
self.assertIn("published_at", first)
|
||||||
|
self.assertIn("days_old", first)
|
||||||
|
self.assertIn("relevance", first)
|
||||||
|
|
||||||
|
export_csv = self.client.get("/api/articles/export?format=csv")
|
||||||
|
self.assertEqual(export_csv.status_code, 200)
|
||||||
|
self.assertIn("text/csv", export_csv.headers.get("content-type", ""))
|
||||||
|
csv_text = export_csv.text
|
||||||
|
self.assertIn("published_at", csv_text)
|
||||||
|
self.assertIn("days_old", csv_text)
|
||||||
|
self.assertIn("relevance", csv_text)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
||||||
21
backend/tests/test_relevance.py
Normal file
21
backend/tests/test_relevance.py
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
from backend.app.relevance import article_age_days, article_relevance
|
||||||
|
|
||||||
|
|
||||||
|
class TestRelevance(unittest.TestCase):
|
||||||
|
def test_article_age_and_relevance(self) -> None:
|
||||||
|
now = datetime(2026, 2, 18, 12, 0, 0, tzinfo=timezone.utc)
|
||||||
|
self.assertEqual(article_age_days("2026-02-18T10:00:00Z", now=now), 0)
|
||||||
|
self.assertEqual(article_relevance("2026-02-18T10:00:00Z", now=now), "hoch")
|
||||||
|
|
||||||
|
self.assertEqual(article_age_days("2026-02-14T12:00:00Z", now=now), 4)
|
||||||
|
self.assertEqual(article_relevance("2026-02-14T12:00:00Z", now=now), "mittel")
|
||||||
|
|
||||||
|
self.assertEqual(article_relevance("2025-12-01T00:00:00Z", now=now), "alt")
|
||||||
|
self.assertEqual(article_relevance(None, now=now), "unbekannt")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
Loading…
Add table
Add a link
Reference in a new issue