feat(legal): add structured attribution fields and publish legal gate
This commit is contained in:
parent
c52363f1a7
commit
5159a6e3b4
10 changed files with 259 additions and 16 deletions
|
|
@ -23,6 +23,7 @@ from .repositories import (
|
|||
list_feeds,
|
||||
list_runs,
|
||||
list_sources,
|
||||
set_article_legal_review,
|
||||
update_article_status,
|
||||
)
|
||||
|
||||
|
|
@ -104,22 +105,22 @@ def _legal_checklist(article: dict, feed: dict | None) -> list[dict[str, str]]:
|
|||
checks.append(
|
||||
{
|
||||
"label": "Bilder extrahiert",
|
||||
"status": "ok" if extraction.get("images") else "missing",
|
||||
"status": "ok" if article.get("image_urls_json") else "missing",
|
||||
"value": str(len(extraction.get("images", []))) if isinstance(extraction.get("images"), list) else "0",
|
||||
}
|
||||
)
|
||||
checks.append(
|
||||
{
|
||||
"label": "Pressekontakt",
|
||||
"status": "ok" if extraction.get("press_contact") else "missing",
|
||||
"value": extraction.get("press_contact") or "-",
|
||||
"status": "ok" if article.get("press_contact") else "missing",
|
||||
"value": article.get("press_contact") or extraction.get("press_contact") or "-",
|
||||
}
|
||||
)
|
||||
checks.append(
|
||||
{
|
||||
"label": "Lizenz/Terms",
|
||||
"status": "ok" if attribution.get("source_license_name") and attribution.get("source_terms_url") else "missing",
|
||||
"value": f"{attribution.get('source_license_name') or '-'} | {attribution.get('source_terms_url') or '-'}",
|
||||
"status": "ok" if article.get("source_license_name_snapshot") and article.get("source_terms_url_snapshot") else "missing",
|
||||
"value": f"{article.get('source_license_name_snapshot') or attribution.get('source_license_name') or '-'} | {article.get('source_terms_url_snapshot') or attribution.get('source_terms_url') or '-'}",
|
||||
}
|
||||
)
|
||||
checks.append(
|
||||
|
|
@ -129,6 +130,13 @@ def _legal_checklist(article: dict, feed: dict | None) -> list[dict[str, str]]:
|
|||
"value": feed.get("source_risk_level") if feed else "-",
|
||||
}
|
||||
)
|
||||
checks.append(
|
||||
{
|
||||
"label": "Manuelle Rechtsfreigabe",
|
||||
"status": "ok" if int(article.get("legal_checked", 0)) == 1 else "missing",
|
||||
"value": article.get("legal_checked_at") or "-",
|
||||
}
|
||||
)
|
||||
return checks
|
||||
|
||||
|
||||
|
|
@ -193,9 +201,20 @@ def admin_dashboard(request: Request):
|
|||
for article in articles:
|
||||
meta = _parse_meta_json(article.get("meta_json"))
|
||||
extraction = meta.get("extraction") if isinstance(meta.get("extraction"), dict) else {}
|
||||
images = []
|
||||
if article.get("image_urls_json"):
|
||||
try:
|
||||
parsed_images = json.loads(article["image_urls_json"])
|
||||
if isinstance(parsed_images, list):
|
||||
images = [str(item) for item in parsed_images if item]
|
||||
except Exception:
|
||||
images = []
|
||||
if not images and isinstance(extraction.get("images"), list):
|
||||
images = extraction.get("images")
|
||||
article["meta"] = meta
|
||||
article["extracted_images"] = extraction.get("images") if isinstance(extraction.get("images"), list) else []
|
||||
article["press_contact"] = extraction.get("press_contact") if isinstance(extraction.get("press_contact"), str) else None
|
||||
article["extracted_images"] = images
|
||||
if not article.get("press_contact") and isinstance(extraction.get("press_contact"), str):
|
||||
article["press_contact"] = extraction.get("press_contact")
|
||||
article["extraction_error"] = extraction.get("extraction_error") if isinstance(extraction.get("extraction_error"), str) else None
|
||||
|
||||
return templates.TemplateResponse(
|
||||
|
|
@ -232,6 +251,15 @@ def admin_article_detail(request: Request, article_id: int):
|
|||
meta = _parse_meta_json(article.get("meta_json"))
|
||||
article["meta"] = meta
|
||||
extraction = meta.get("extraction") if isinstance(meta.get("extraction"), dict) else {}
|
||||
if article.get("image_urls_json"):
|
||||
try:
|
||||
parsed_images = json.loads(article["image_urls_json"])
|
||||
if isinstance(parsed_images, list):
|
||||
extraction["images"] = [str(item) for item in parsed_images if item]
|
||||
except Exception:
|
||||
pass
|
||||
if not article.get("press_contact") and isinstance(extraction.get("press_contact"), str):
|
||||
article["press_contact"] = extraction.get("press_contact")
|
||||
article["extraction"] = extraction
|
||||
feed = get_feed_by_id(int(article["feed_id"])) if article.get("feed_id") else None
|
||||
checklist = _legal_checklist(article, feed)
|
||||
|
|
@ -251,6 +279,19 @@ def admin_article_detail(request: Request, article_id: int):
|
|||
)
|
||||
|
||||
|
||||
@router.post("/admin/articles/{article_id}/legal-review")
|
||||
def admin_article_legal_review(request: Request, article_id: int, approved: str = Form("0"), note: str = Form("")):
|
||||
user = _admin_user(request)
|
||||
if not user:
|
||||
return RedirectResponse(url="/admin/login", status_code=303)
|
||||
|
||||
is_approved = approved == "1"
|
||||
ok = set_article_legal_review(article_id, approved=is_approved, note=note or None, actor=user)
|
||||
if not ok:
|
||||
return _dashboard_redirect(msg=f"Artikel #{article_id} nicht gefunden", msg_type="error")
|
||||
return RedirectResponse(url=f"/admin/articles/{article_id}", status_code=303)
|
||||
|
||||
|
||||
@router.post("/admin/sources/create")
|
||||
def admin_create_source(
|
||||
request: Request,
|
||||
|
|
@ -344,6 +385,8 @@ def admin_transition_article(request: Request, article_id: int, target_status: s
|
|||
if article:
|
||||
current = article.get("status")
|
||||
if target_status in ALLOWED_TRANSITIONS.get(current, ()):
|
||||
if target_status == "published" and int(article.get("legal_checked", 0)) != 1:
|
||||
return _dashboard_redirect(msg=f"Publish blockiert fuer Artikel #{article_id}: Rechtsfreigabe fehlt", msg_type="error")
|
||||
update_article_status(article_id, target_status, actor=user, note=note or None)
|
||||
return _dashboard_redirect(msg=f"Artikel #{article_id}: {current} -> {target_status}")
|
||||
return _dashboard_redirect(msg=f"Ungueltiger Statuswechsel fuer Artikel #{article_id}", msg_type="error")
|
||||
|
|
|
|||
|
|
@ -81,6 +81,14 @@ def init_db() -> None:
|
|||
summary TEXT,
|
||||
content_raw TEXT,
|
||||
content_rewritten TEXT,
|
||||
image_urls_json TEXT,
|
||||
press_contact TEXT,
|
||||
source_name_snapshot TEXT,
|
||||
source_terms_url_snapshot TEXT,
|
||||
source_license_name_snapshot TEXT,
|
||||
legal_checked INTEGER NOT NULL DEFAULT 0,
|
||||
legal_checked_at TEXT,
|
||||
legal_note TEXT,
|
||||
word_count INTEGER DEFAULT 0,
|
||||
status TEXT NOT NULL DEFAULT 'new' CHECK (status IN ('new', 'rewrite', 'review', 'approved', 'published', 'error')),
|
||||
meta_json TEXT,
|
||||
|
|
@ -130,8 +138,20 @@ def init_db() -> None:
|
|||
existing_columns = {
|
||||
row["name"] for row in conn.execute("PRAGMA table_info(articles)").fetchall()
|
||||
}
|
||||
if "source_hash" not in existing_columns:
|
||||
conn.execute("ALTER TABLE articles ADD COLUMN source_hash TEXT")
|
||||
migration_columns = {
|
||||
"source_hash": "ALTER TABLE articles ADD COLUMN source_hash TEXT",
|
||||
"image_urls_json": "ALTER TABLE articles ADD COLUMN image_urls_json TEXT",
|
||||
"press_contact": "ALTER TABLE articles ADD COLUMN press_contact TEXT",
|
||||
"source_name_snapshot": "ALTER TABLE articles ADD COLUMN source_name_snapshot TEXT",
|
||||
"source_terms_url_snapshot": "ALTER TABLE articles ADD COLUMN source_terms_url_snapshot TEXT",
|
||||
"source_license_name_snapshot": "ALTER TABLE articles ADD COLUMN source_license_name_snapshot TEXT",
|
||||
"legal_checked": "ALTER TABLE articles ADD COLUMN legal_checked INTEGER NOT NULL DEFAULT 0",
|
||||
"legal_checked_at": "ALTER TABLE articles ADD COLUMN legal_checked_at TEXT",
|
||||
"legal_note": "ALTER TABLE articles ADD COLUMN legal_note TEXT",
|
||||
}
|
||||
for column, ddl in migration_columns.items():
|
||||
if column not in existing_columns:
|
||||
conn.execute(ddl)
|
||||
|
||||
|
||||
def rows_to_dicts(rows: list[sqlite3.Row]) -> list[dict[str, Any]]:
|
||||
|
|
|
|||
|
|
@ -201,6 +201,14 @@ def run_ingestion(feed_id: int | None = None) -> IngestionStats:
|
|||
summary=final_summary,
|
||||
content_raw=final_content_raw,
|
||||
content_rewritten=None,
|
||||
image_urls_json=json.dumps(extracted.images, ensure_ascii=False) if extracted.images else None,
|
||||
press_contact=extracted.press_contact,
|
||||
source_name_snapshot=feed.get("source_name"),
|
||||
source_terms_url_snapshot=feed.get("source_terms_url"),
|
||||
source_license_name_snapshot=feed.get("source_license_name"),
|
||||
legal_checked=False,
|
||||
legal_checked_at=None,
|
||||
legal_note=None,
|
||||
word_count=len((final_content_raw or "").split()),
|
||||
status="new",
|
||||
meta_json=json.dumps({"attribution": attribution, "extraction": extraction_meta}, ensure_ascii=False),
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ from .repositories import (
|
|||
list_feeds as repo_list_feeds,
|
||||
list_runs,
|
||||
list_sources as repo_list_sources,
|
||||
set_article_legal_review,
|
||||
update_article_status,
|
||||
upsert_article as repo_upsert_article,
|
||||
)
|
||||
|
|
@ -96,6 +97,14 @@ class ArticleUpsertRequest(BaseModel):
|
|||
summary: str | None = None
|
||||
content_raw: str | None = None
|
||||
content_rewritten: str | None = None
|
||||
image_urls_json: str | None = None
|
||||
press_contact: str | None = None
|
||||
source_name_snapshot: str | None = None
|
||||
source_terms_url_snapshot: str | None = None
|
||||
source_license_name_snapshot: str | None = None
|
||||
legal_checked: bool = False
|
||||
legal_checked_at: str | None = None
|
||||
legal_note: str | None = None
|
||||
word_count: int = 0
|
||||
status: str = Field(default="new", pattern="^(new|rewrite|review|approved|published|error)$")
|
||||
meta_json: str | None = None
|
||||
|
|
@ -115,6 +124,11 @@ class ArticleReviewRequest(BaseModel):
|
|||
note: str | None = None
|
||||
|
||||
|
||||
class ArticleLegalReviewRequest(BaseModel):
|
||||
approved: bool
|
||||
note: str | None = None
|
||||
|
||||
|
||||
ALLOWED_ARTICLE_TRANSITIONS: dict[str, set[str]] = {
|
||||
"new": {"review", "rewrite", "error"},
|
||||
"rewrite": {"review", "error"},
|
||||
|
|
@ -330,6 +344,14 @@ def api_upsert_article(payload: ArticleUpsertRequest, username: str = Depends(re
|
|||
summary=payload.summary,
|
||||
content_raw=payload.content_raw,
|
||||
content_rewritten=payload.content_rewritten,
|
||||
image_urls_json=payload.image_urls_json,
|
||||
press_contact=payload.press_contact,
|
||||
source_name_snapshot=payload.source_name_snapshot,
|
||||
source_terms_url_snapshot=payload.source_terms_url_snapshot,
|
||||
source_license_name_snapshot=payload.source_license_name_snapshot,
|
||||
legal_checked=payload.legal_checked,
|
||||
legal_checked_at=payload.legal_checked_at,
|
||||
legal_note=payload.legal_note,
|
||||
word_count=payload.word_count,
|
||||
status=payload.status,
|
||||
meta_json=payload.meta_json,
|
||||
|
|
@ -351,6 +373,11 @@ def api_article_transition(article_id: int, payload: ArticleTransitionRequest, u
|
|||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=f"Ungueltiger Statuswechsel: {current_status} -> {payload.target_status}",
|
||||
)
|
||||
if payload.target_status == "published" and int(article.get("legal_checked", 0)) != 1:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail="Publish gesperrt: Rechtscheck wurde noch nicht freigegeben",
|
||||
)
|
||||
|
||||
updated = update_article_status(article_id, payload.target_status, actor=username, note=payload.note)
|
||||
if not updated:
|
||||
|
|
@ -358,6 +385,22 @@ def api_article_transition(article_id: int, payload: ArticleTransitionRequest, u
|
|||
return {"ok": True, "id": article_id, "from_status": current_status, "to_status": payload.target_status}
|
||||
|
||||
|
||||
@app.post("/api/articles/{article_id}/legal-review")
|
||||
def api_article_legal_review(article_id: int, payload: ArticleLegalReviewRequest, username: str = Depends(require_auth)) -> dict:
|
||||
article = get_article_by_id(article_id)
|
||||
if not article:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Artikel nicht gefunden")
|
||||
|
||||
updated = set_article_legal_review(article_id, approved=payload.approved, note=payload.note, actor=username)
|
||||
if not updated:
|
||||
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Artikel nicht gefunden")
|
||||
return {
|
||||
"ok": True,
|
||||
"id": article_id,
|
||||
"legal_checked": payload.approved,
|
||||
}
|
||||
|
||||
|
||||
@app.post("/api/articles/{article_id}/review")
|
||||
def api_article_review(article_id: int, payload: ArticleReviewRequest, username: str = Depends(require_auth)) -> dict:
|
||||
article = get_article_by_id(article_id)
|
||||
|
|
|
|||
|
|
@ -48,6 +48,14 @@ class ArticleUpsert:
|
|||
summary: str | None
|
||||
content_raw: str | None
|
||||
content_rewritten: str | None
|
||||
image_urls_json: str | None
|
||||
press_contact: str | None
|
||||
source_name_snapshot: str | None
|
||||
source_terms_url_snapshot: str | None
|
||||
source_license_name_snapshot: str | None
|
||||
legal_checked: bool
|
||||
legal_checked_at: str | None
|
||||
legal_note: str | None
|
||||
word_count: int
|
||||
status: str
|
||||
meta_json: str | None
|
||||
|
|
@ -224,7 +232,10 @@ def get_article_by_id(article_id: int) -> dict[str, Any] | None:
|
|||
row = conn.execute(
|
||||
"""
|
||||
SELECT a.id, a.feed_id, a.source_article_id, a.source_hash, a.title, a.source_url, a.canonical_url, a.published_at, a.author,
|
||||
a.summary, a.content_raw, a.content_rewritten, a.word_count, a.status, a.meta_json, a.created_at, a.updated_at
|
||||
a.summary, a.content_raw, a.content_rewritten, a.image_urls_json, a.press_contact,
|
||||
a.source_name_snapshot, a.source_terms_url_snapshot, a.source_license_name_snapshot,
|
||||
a.legal_checked, a.legal_checked_at, a.legal_note,
|
||||
a.word_count, a.status, a.meta_json, a.created_at, a.updated_at
|
||||
FROM articles a
|
||||
WHERE a.id = ?
|
||||
""",
|
||||
|
|
@ -281,6 +292,31 @@ def update_article_status(
|
|||
return True
|
||||
|
||||
|
||||
def set_article_legal_review(article_id: int, approved: bool, note: str | None, actor: str | None = None) -> bool:
|
||||
article = get_article_by_id(article_id)
|
||||
if not article:
|
||||
return False
|
||||
|
||||
event = {
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"event": "legal_review",
|
||||
"approved": approved,
|
||||
"actor": actor or "system",
|
||||
"note": note,
|
||||
}
|
||||
merged_meta = _merge_review_event(article.get("meta_json"), event)
|
||||
with get_conn() as conn:
|
||||
conn.execute(
|
||||
"""
|
||||
UPDATE articles
|
||||
SET legal_checked = ?, legal_checked_at = datetime('now'), legal_note = ?, meta_json = ?
|
||||
WHERE id = ?
|
||||
""",
|
||||
(1 if approved else 0, note, merged_meta, article_id),
|
||||
)
|
||||
return True
|
||||
|
||||
|
||||
def _resolve_existing_article_id(payload: ArticleUpsert) -> int | None:
|
||||
with get_conn() as conn:
|
||||
# 1) strongest key: source_url
|
||||
|
|
@ -320,8 +356,11 @@ def upsert_article(payload: ArticleUpsert) -> int:
|
|||
"""
|
||||
INSERT INTO articles (
|
||||
feed_id, source_article_id, source_hash, title, source_url, canonical_url, published_at, author,
|
||||
summary, content_raw, content_rewritten, word_count, status, meta_json
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
summary, content_raw, content_rewritten, image_urls_json, press_contact,
|
||||
source_name_snapshot, source_terms_url_snapshot, source_license_name_snapshot,
|
||||
legal_checked, legal_checked_at, legal_note,
|
||||
word_count, status, meta_json
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
payload.feed_id,
|
||||
|
|
@ -335,6 +374,14 @@ def upsert_article(payload: ArticleUpsert) -> int:
|
|||
payload.summary,
|
||||
payload.content_raw,
|
||||
payload.content_rewritten,
|
||||
payload.image_urls_json,
|
||||
payload.press_contact,
|
||||
payload.source_name_snapshot,
|
||||
payload.source_terms_url_snapshot,
|
||||
payload.source_license_name_snapshot,
|
||||
1 if payload.legal_checked else 0,
|
||||
payload.legal_checked_at,
|
||||
payload.legal_note,
|
||||
payload.word_count,
|
||||
payload.status,
|
||||
payload.meta_json,
|
||||
|
|
@ -356,6 +403,14 @@ def upsert_article(payload: ArticleUpsert) -> int:
|
|||
summary = ?,
|
||||
content_raw = ?,
|
||||
content_rewritten = ?,
|
||||
image_urls_json = ?,
|
||||
press_contact = ?,
|
||||
source_name_snapshot = ?,
|
||||
source_terms_url_snapshot = ?,
|
||||
source_license_name_snapshot = ?,
|
||||
legal_checked = ?,
|
||||
legal_checked_at = ?,
|
||||
legal_note = ?,
|
||||
word_count = ?,
|
||||
status = ?,
|
||||
meta_json = ?
|
||||
|
|
@ -373,6 +428,14 @@ def upsert_article(payload: ArticleUpsert) -> int:
|
|||
payload.summary,
|
||||
payload.content_raw,
|
||||
payload.content_rewritten,
|
||||
payload.image_urls_json,
|
||||
payload.press_contact,
|
||||
payload.source_name_snapshot,
|
||||
payload.source_terms_url_snapshot,
|
||||
payload.source_license_name_snapshot,
|
||||
1 if payload.legal_checked else 0,
|
||||
payload.legal_checked_at,
|
||||
payload.legal_note,
|
||||
payload.word_count,
|
||||
payload.status,
|
||||
payload.meta_json,
|
||||
|
|
@ -392,7 +455,9 @@ def list_articles(limit: int = 100, status_filter: str | None = None) -> list[di
|
|||
rows = conn.execute(
|
||||
"""
|
||||
SELECT a.id, a.feed_id, a.source_article_id, a.source_hash, a.title, a.source_url, a.canonical_url, a.published_at, a.author,
|
||||
a.summary, a.content_raw, a.word_count, a.status, a.meta_json, a.created_at, a.updated_at, f.name AS feed_name
|
||||
a.summary, a.content_raw, a.word_count, a.status, a.meta_json, a.created_at, a.updated_at, f.name AS feed_name,
|
||||
a.image_urls_json, a.press_contact, a.source_name_snapshot, a.source_terms_url_snapshot,
|
||||
a.source_license_name_snapshot, a.legal_checked, a.legal_checked_at, a.legal_note
|
||||
FROM articles a
|
||||
LEFT JOIN feeds f ON f.id = a.feed_id
|
||||
WHERE a.status = ?
|
||||
|
|
@ -405,7 +470,9 @@ def list_articles(limit: int = 100, status_filter: str | None = None) -> list[di
|
|||
rows = conn.execute(
|
||||
"""
|
||||
SELECT a.id, a.feed_id, a.source_article_id, a.source_hash, a.title, a.source_url, a.canonical_url, a.published_at, a.author,
|
||||
a.summary, a.content_raw, a.word_count, a.status, a.meta_json, a.created_at, a.updated_at, f.name AS feed_name
|
||||
a.summary, a.content_raw, a.word_count, a.status, a.meta_json, a.created_at, a.updated_at, f.name AS feed_name,
|
||||
a.image_urls_json, a.press_contact, a.source_name_snapshot, a.source_terms_url_snapshot,
|
||||
a.source_license_name_snapshot, a.legal_checked, a.legal_checked_at, a.legal_note
|
||||
FROM articles a
|
||||
LEFT JOIN feeds f ON f.id = a.feed_id
|
||||
ORDER BY a.id DESC
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue