feat(export): add csv/json article export with date relevance scoring
This commit is contained in:
parent
5159a6e3b4
commit
6691db8051
7 changed files with 224 additions and 0 deletions
|
|
@ -12,6 +12,7 @@ from .auth import create_session_token, verify_credentials, verify_session_token
|
|||
from .config import get_settings
|
||||
from .ingestion import run_ingestion
|
||||
from .policy import evaluate_source_policy
|
||||
from .relevance import article_age_days, article_relevance
|
||||
from .repositories import (
|
||||
FeedCreate,
|
||||
SourceCreate,
|
||||
|
|
@ -216,6 +217,8 @@ def admin_dashboard(request: Request):
|
|||
if not article.get("press_contact") and isinstance(extraction.get("press_contact"), str):
|
||||
article["press_contact"] = extraction.get("press_contact")
|
||||
article["extraction_error"] = extraction.get("extraction_error") if isinstance(extraction.get("extraction_error"), str) else None
|
||||
article["days_old"] = article_age_days(article.get("published_at"))
|
||||
article["relevance"] = article_relevance(article.get("published_at"))
|
||||
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
|
|
@ -261,6 +264,8 @@ def admin_article_detail(request: Request, article_id: int):
|
|||
if not article.get("press_contact") and isinstance(extraction.get("press_contact"), str):
|
||||
article["press_contact"] = extraction.get("press_contact")
|
||||
article["extraction"] = extraction
|
||||
article["days_old"] = article_age_days(article.get("published_at"))
|
||||
article["relevance"] = article_relevance(article.get("published_at"))
|
||||
feed = get_feed_by_id(int(article["feed_id"])) if article.get("feed_id") else None
|
||||
checklist = _legal_checklist(article, feed)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,12 @@
|
|||
from contextlib import asynccontextmanager
|
||||
import csv
|
||||
from datetime import datetime, timezone
|
||||
import io
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import Depends, FastAPI, HTTPException, Request, Response, status
|
||||
from fastapi.responses import JSONResponse
|
||||
from pydantic import BaseModel, Field
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
|
||||
|
|
@ -11,6 +16,7 @@ from .config import get_settings
|
|||
from .db import init_db
|
||||
from .ingestion import run_ingestion
|
||||
from .policy import evaluate_source_policy, is_source_allowed
|
||||
from .relevance import article_age_days, article_relevance
|
||||
from .repositories import (
|
||||
ArticleUpsert,
|
||||
FeedCreate,
|
||||
|
|
@ -321,6 +327,81 @@ def api_list_articles(limit: int = 100, status_filter: str | None = None, userna
|
|||
return {"ok": True, "items": repo_list_articles(limit=limit, status_filter=status_filter), "requested_by": username}
|
||||
|
||||
|
||||
@app.get("/api/articles/export")
|
||||
def api_export_articles(
|
||||
format: str = "json",
|
||||
status_filter: str | None = None,
|
||||
username: str = Depends(require_auth),
|
||||
):
|
||||
articles = repo_list_articles(limit=500, status_filter=status_filter)
|
||||
rows = []
|
||||
for article in articles:
|
||||
days_old = article_age_days(article.get("published_at"))
|
||||
rows.append(
|
||||
{
|
||||
"id": article.get("id"),
|
||||
"title": article.get("title"),
|
||||
"status": article.get("status"),
|
||||
"published_at": article.get("published_at"),
|
||||
"days_old": days_old,
|
||||
"relevance": article_relevance(article.get("published_at")),
|
||||
"author": article.get("author"),
|
||||
"source_url": article.get("source_url"),
|
||||
"canonical_url": article.get("canonical_url"),
|
||||
"source_name_snapshot": article.get("source_name_snapshot"),
|
||||
"source_license_name_snapshot": article.get("source_license_name_snapshot"),
|
||||
"source_terms_url_snapshot": article.get("source_terms_url_snapshot"),
|
||||
"press_contact": article.get("press_contact"),
|
||||
"image_urls_json": article.get("image_urls_json"),
|
||||
"legal_checked": bool(int(article.get("legal_checked", 0))),
|
||||
"legal_checked_at": article.get("legal_checked_at"),
|
||||
"legal_note": article.get("legal_note"),
|
||||
}
|
||||
)
|
||||
|
||||
generated_at = datetime.now(timezone.utc).isoformat()
|
||||
if format == "csv":
|
||||
out = io.StringIO()
|
||||
fieldnames = [
|
||||
"id",
|
||||
"title",
|
||||
"status",
|
||||
"published_at",
|
||||
"days_old",
|
||||
"relevance",
|
||||
"author",
|
||||
"source_url",
|
||||
"canonical_url",
|
||||
"source_name_snapshot",
|
||||
"source_license_name_snapshot",
|
||||
"source_terms_url_snapshot",
|
||||
"press_contact",
|
||||
"image_urls_json",
|
||||
"legal_checked",
|
||||
"legal_checked_at",
|
||||
"legal_note",
|
||||
]
|
||||
writer = csv.DictWriter(out, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(rows)
|
||||
return Response(
|
||||
content=out.getvalue(),
|
||||
media_type="text/csv; charset=utf-8",
|
||||
headers={"Content-Disposition": 'attachment; filename="articles_export.csv"'},
|
||||
)
|
||||
|
||||
return JSONResponse(
|
||||
{
|
||||
"ok": True,
|
||||
"count": len(rows),
|
||||
"generated_at": generated_at,
|
||||
"status_filter": status_filter,
|
||||
"items": rows,
|
||||
"requested_by": username,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@app.get("/api/articles/{article_id}")
|
||||
def api_get_article(article_id: int, username: str = Depends(require_auth)) -> dict:
|
||||
article = get_article_by_id(article_id)
|
||||
|
|
|
|||
44
backend/app/relevance.py
Normal file
44
backend/app/relevance.py
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
|
||||
|
||||
def _parse_iso_datetime(value: str | None) -> datetime | None:
|
||||
if not value:
|
||||
return None
|
||||
raw = value.strip()
|
||||
if not raw:
|
||||
return None
|
||||
if raw.endswith("Z"):
|
||||
raw = raw[:-1] + "+00:00"
|
||||
try:
|
||||
parsed = datetime.fromisoformat(raw)
|
||||
except ValueError:
|
||||
return None
|
||||
if parsed.tzinfo is None:
|
||||
parsed = parsed.replace(tzinfo=timezone.utc)
|
||||
return parsed
|
||||
|
||||
|
||||
def article_age_days(published_at: str | None, now: datetime | None = None) -> int | None:
|
||||
published = _parse_iso_datetime(published_at)
|
||||
if not published:
|
||||
return None
|
||||
ref = now or datetime.now(timezone.utc)
|
||||
delta = ref - published
|
||||
if delta.total_seconds() < 0:
|
||||
return 0
|
||||
return delta.days
|
||||
|
||||
|
||||
def article_relevance(published_at: str | None, now: datetime | None = None) -> str:
|
||||
days = article_age_days(published_at, now=now)
|
||||
if days is None:
|
||||
return "unbekannt"
|
||||
if days <= 2:
|
||||
return "hoch"
|
||||
if days <= 7:
|
||||
return "mittel"
|
||||
if days <= 30:
|
||||
return "niedrig"
|
||||
return "alt"
|
||||
Loading…
Add table
Add a link
Reference in a new issue