feat(images): add thumbnail gallery with select/exclude workflow

This commit is contained in:
Oliver 2026-02-18 10:11:22 +01:00
parent 6691db8051
commit efaf132936
7 changed files with 282 additions and 24 deletions

View file

@ -2,6 +2,7 @@ from __future__ import annotations
import json import json
from pathlib import Path from pathlib import Path
import re
from urllib.parse import urlencode from urllib.parse import urlencode
from fastapi import APIRouter, Form, Request from fastapi import APIRouter, Form, Request
@ -24,6 +25,7 @@ from .repositories import (
list_feeds, list_feeds,
list_runs, list_runs,
list_sources, list_sources,
set_article_image_decision,
set_article_legal_review, set_article_legal_review,
update_article_status, update_article_status,
) )
@ -83,6 +85,63 @@ def _parse_meta_json(raw: str | None) -> dict:
return {} return {}
def _read_article_images(article: dict, extraction: dict) -> list[str]:
images: list[str] = []
if article.get("image_urls_json"):
try:
parsed_images = json.loads(article["image_urls_json"])
if isinstance(parsed_images, list):
images = [str(item) for item in parsed_images if item]
except Exception:
images = []
if not images and isinstance(extraction.get("images"), list):
images = [str(item) for item in extraction.get("images") if item]
# deduplicate preserving order
seen: set[str] = set()
deduped: list[str] = []
for image in images:
if image not in seen:
seen.add(image)
deduped.append(image)
return deduped
def _is_probably_irrelevant_image(url: str) -> bool:
lowered = url.lower()
patterns = (
r"logo",
r"icon",
r"sprite",
r"avatar",
r"favicon",
r"/ads/",
r"tracking",
r"pixel",
r"banner",
)
return any(re.search(pattern, lowered) for pattern in patterns)
def _build_image_entries(article: dict, extraction: dict, meta: dict) -> list[dict[str, object]]:
all_images = _read_article_images(article, extraction)
image_review = meta.get("image_review") if isinstance(meta.get("image_review"), dict) else {}
selected_url = image_review.get("selected_url") if isinstance(image_review.get("selected_url"), str) else None
excluded_urls = image_review.get("excluded_urls") if isinstance(image_review.get("excluded_urls"), list) else []
excluded_set = {str(item) for item in excluded_urls if item}
entries: list[dict[str, object]] = []
for url in all_images:
entries.append(
{
"url": url,
"is_selected": selected_url == url,
"is_excluded": url in excluded_set,
"is_irrelevant_hint": _is_probably_irrelevant_image(url),
}
)
return entries
def _legal_checklist(article: dict, feed: dict | None) -> list[dict[str, str]]: def _legal_checklist(article: dict, feed: dict | None) -> list[dict[str, str]]:
meta = article.get("meta", {}) meta = article.get("meta", {})
extraction = meta.get("extraction") if isinstance(meta.get("extraction"), dict) else {} extraction = meta.get("extraction") if isinstance(meta.get("extraction"), dict) else {}
@ -138,6 +197,15 @@ def _legal_checklist(article: dict, feed: dict | None) -> list[dict[str, str]]:
"value": article.get("legal_checked_at") or "-", "value": article.get("legal_checked_at") or "-",
} }
) )
image_review = meta.get("image_review") if isinstance(meta.get("image_review"), dict) else {}
selected_image = image_review.get("selected_url") if isinstance(image_review.get("selected_url"), str) else None
checks.append(
{
"label": "Hauptbild ausgewählt",
"status": "ok" if selected_image else "missing",
"value": selected_image or "-",
}
)
return checks return checks
@ -202,18 +270,12 @@ def admin_dashboard(request: Request):
for article in articles: for article in articles:
meta = _parse_meta_json(article.get("meta_json")) meta = _parse_meta_json(article.get("meta_json"))
extraction = meta.get("extraction") if isinstance(meta.get("extraction"), dict) else {} extraction = meta.get("extraction") if isinstance(meta.get("extraction"), dict) else {}
images = [] images = _read_article_images(article, extraction)
if article.get("image_urls_json"):
try:
parsed_images = json.loads(article["image_urls_json"])
if isinstance(parsed_images, list):
images = [str(item) for item in parsed_images if item]
except Exception:
images = []
if not images and isinstance(extraction.get("images"), list):
images = extraction.get("images")
article["meta"] = meta article["meta"] = meta
article["extracted_images"] = images article["extracted_images"] = images
article["image_entries"] = _build_image_entries(article, extraction, meta)
image_review = meta.get("image_review") if isinstance(meta.get("image_review"), dict) else {}
article["selected_image_url"] = image_review.get("selected_url") if isinstance(image_review.get("selected_url"), str) else None
if not article.get("press_contact") and isinstance(extraction.get("press_contact"), str): if not article.get("press_contact") and isinstance(extraction.get("press_contact"), str):
article["press_contact"] = extraction.get("press_contact") article["press_contact"] = extraction.get("press_contact")
article["extraction_error"] = extraction.get("extraction_error") if isinstance(extraction.get("extraction_error"), str) else None article["extraction_error"] = extraction.get("extraction_error") if isinstance(extraction.get("extraction_error"), str) else None
@ -254,16 +316,13 @@ def admin_article_detail(request: Request, article_id: int):
meta = _parse_meta_json(article.get("meta_json")) meta = _parse_meta_json(article.get("meta_json"))
article["meta"] = meta article["meta"] = meta
extraction = meta.get("extraction") if isinstance(meta.get("extraction"), dict) else {} extraction = meta.get("extraction") if isinstance(meta.get("extraction"), dict) else {}
if article.get("image_urls_json"): extraction["images"] = _read_article_images(article, extraction)
try:
parsed_images = json.loads(article["image_urls_json"])
if isinstance(parsed_images, list):
extraction["images"] = [str(item) for item in parsed_images if item]
except Exception:
pass
if not article.get("press_contact") and isinstance(extraction.get("press_contact"), str): if not article.get("press_contact") and isinstance(extraction.get("press_contact"), str):
article["press_contact"] = extraction.get("press_contact") article["press_contact"] = extraction.get("press_contact")
article["extraction"] = extraction article["extraction"] = extraction
article["image_entries"] = _build_image_entries(article, extraction, meta)
image_review = meta.get("image_review") if isinstance(meta.get("image_review"), dict) else {}
article["selected_image_url"] = image_review.get("selected_url") if isinstance(image_review.get("selected_url"), str) else None
article["days_old"] = article_age_days(article.get("published_at")) article["days_old"] = article_age_days(article.get("published_at"))
article["relevance"] = article_relevance(article.get("published_at")) article["relevance"] = article_relevance(article.get("published_at"))
feed = get_feed_by_id(int(article["feed_id"])) if article.get("feed_id") else None feed = get_feed_by_id(int(article["feed_id"])) if article.get("feed_id") else None
@ -284,6 +343,23 @@ def admin_article_detail(request: Request, article_id: int):
) )
@router.post("/admin/articles/{article_id}/images/decision")
def admin_article_image_decision(
request: Request,
article_id: int,
image_url: str = Form(...),
action: str = Form(...),
):
user = _admin_user(request)
if not user:
return RedirectResponse(url="/admin/login", status_code=303)
ok = set_article_image_decision(article_id=article_id, image_url=image_url, action=action, actor=user)
if not ok:
return _dashboard_redirect(msg=f"Bildaktion fehlgeschlagen fuer Artikel #{article_id}", msg_type="error")
return RedirectResponse(url=f"/admin/articles/{article_id}", status_code=303)
@router.post("/admin/articles/{article_id}/legal-review") @router.post("/admin/articles/{article_id}/legal-review")
def admin_article_legal_review(request: Request, article_id: int, approved: str = Form("0"), note: str = Form("")): def admin_article_legal_review(request: Request, article_id: int, approved: str = Form("0"), note: str = Form("")):
user = _admin_user(request) user = _admin_user(request)

View file

@ -336,6 +336,17 @@ def api_export_articles(
articles = repo_list_articles(limit=500, status_filter=status_filter) articles = repo_list_articles(limit=500, status_filter=status_filter)
rows = [] rows = []
for article in articles: for article in articles:
meta: dict = {}
if article.get("meta_json"):
try:
parsed = json.loads(article["meta_json"])
if isinstance(parsed, dict):
meta = parsed
except Exception:
meta = {}
image_review = meta.get("image_review") if isinstance(meta.get("image_review"), dict) else {}
selected_image_url = image_review.get("selected_url") if isinstance(image_review.get("selected_url"), str) else None
days_old = article_age_days(article.get("published_at")) days_old = article_age_days(article.get("published_at"))
rows.append( rows.append(
{ {
@ -353,6 +364,7 @@ def api_export_articles(
"source_terms_url_snapshot": article.get("source_terms_url_snapshot"), "source_terms_url_snapshot": article.get("source_terms_url_snapshot"),
"press_contact": article.get("press_contact"), "press_contact": article.get("press_contact"),
"image_urls_json": article.get("image_urls_json"), "image_urls_json": article.get("image_urls_json"),
"selected_image_url": selected_image_url,
"legal_checked": bool(int(article.get("legal_checked", 0))), "legal_checked": bool(int(article.get("legal_checked", 0))),
"legal_checked_at": article.get("legal_checked_at"), "legal_checked_at": article.get("legal_checked_at"),
"legal_note": article.get("legal_note"), "legal_note": article.get("legal_note"),
@ -377,6 +389,7 @@ def api_export_articles(
"source_terms_url_snapshot", "source_terms_url_snapshot",
"press_contact", "press_contact",
"image_urls_json", "image_urls_json",
"selected_image_url",
"legal_checked", "legal_checked",
"legal_checked_at", "legal_checked_at",
"legal_note", "legal_note",

View file

@ -262,6 +262,16 @@ def _merge_review_event(meta_json: str | None, event: dict[str, Any]) -> str:
return json.dumps(meta, ensure_ascii=False) return json.dumps(meta, ensure_ascii=False)
def _load_meta(meta_json: str | None) -> dict[str, Any]:
if not meta_json:
return {}
try:
parsed = json.loads(meta_json)
return parsed if isinstance(parsed, dict) else {}
except Exception:
return {}
def update_article_status( def update_article_status(
article_id: int, article_id: int,
new_status: str, new_status: str,
@ -317,6 +327,54 @@ def set_article_legal_review(article_id: int, approved: bool, note: str | None,
return True return True
def set_article_image_decision(article_id: int, image_url: str, action: str, actor: str | None = None) -> bool:
article = get_article_by_id(article_id)
if not article:
return False
url = (image_url or "").strip()
if not url:
return False
if action not in {"select", "exclude", "restore"}:
return False
meta = _load_meta(article.get("meta_json"))
image_review = meta.get("image_review")
if not isinstance(image_review, dict):
image_review = {}
excluded = image_review.get("excluded_urls")
if not isinstance(excluded, list):
excluded = []
excluded_set = {str(item) for item in excluded if item}
selected_url = image_review.get("selected_url")
if not isinstance(selected_url, str):
selected_url = None
if action == "select":
selected_url = url
excluded_set.discard(url)
elif action == "exclude":
excluded_set.add(url)
if selected_url == url:
selected_url = None
elif action == "restore":
excluded_set.discard(url)
image_review["selected_url"] = selected_url
image_review["excluded_urls"] = sorted(excluded_set)
image_review["updated_at"] = datetime.now(timezone.utc).isoformat()
image_review["updated_by"] = actor or "system"
meta["image_review"] = image_review
with get_conn() as conn:
conn.execute(
"UPDATE articles SET meta_json = ? WHERE id = ?",
(json.dumps(meta, ensure_ascii=False), article_id),
)
return True
def _resolve_existing_article_id(payload: ArticleUpsert) -> int | None: def _resolve_existing_article_id(payload: ArticleUpsert) -> int | None:
with get_conn() as conn: with get_conn() as conn:
# 1) strongest key: source_url # 1) strongest key: source_url

View file

@ -179,6 +179,60 @@ button.secondary {
background: #f8fafc; background: #f8fafc;
} }
.thumb {
width: 72px;
height: 72px;
object-fit: cover;
border-radius: 8px;
border: 1px solid #cbd5e1;
margin-top: 6px;
}
.image-grid {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(180px, 1fr));
gap: 10px;
}
.image-card {
border: 1px solid #e2e8f0;
border-radius: 8px;
padding: 8px;
background: #fff;
}
.image-card img {
width: 100%;
height: 120px;
object-fit: cover;
border-radius: 6px;
border: 1px solid #e2e8f0;
background: #f8fafc;
}
.image-meta {
margin-top: 6px;
display: flex;
gap: 6px;
flex-wrap: wrap;
}
.image-actions {
margin-top: 8px;
display: flex;
gap: 6px;
flex-wrap: wrap;
}
.image-selected {
border-color: #10b981;
box-shadow: 0 0 0 1px rgba(16, 185, 129, 0.25);
}
.image-excluded {
opacity: 0.65;
}
@media (max-width: 920px) { @media (max-width: 920px) {
.stats { .stats {
grid-template-columns: repeat(2, minmax(0, 1fr)); grid-template-columns: repeat(2, minmax(0, 1fr));

View file

@ -67,13 +67,46 @@
<section class="card"> <section class="card">
<h2>Extrahierte Daten</h2> <h2>Extrahierte Daten</h2>
<p><strong>Bilder:</strong> {{ article.extraction.images|length if article.extraction.images else 0 }}</p> <p><strong>Bilder:</strong> {{ article.image_entries|length if article.image_entries else 0 }}</p>
{% if article.extraction.images %} {% if article.selected_image_url %}
<ul> <p><strong>Ausgewähltes Hauptbild:</strong> <a href="{{ article.selected_image_url }}" target="_blank" rel="noopener">{{ article.selected_image_url }}</a></p>
{% for img in article.extraction.images %} {% endif %}
<li><a href="{{ img }}" target="_blank" rel="noopener">{{ img }}</a></li> {% if article.image_entries %}
<div class="image-grid">
{% for image in article.image_entries %}
<article class="image-card {{ 'image-selected' if image.is_selected else '' }} {{ 'image-excluded' if image.is_excluded else '' }}">
<a href="{{ image.url }}" target="_blank" rel="noopener">
<img src="{{ image.url }}" alt="Artikelbild" loading="lazy" />
</a>
<div class="image-meta">
{% if image.is_selected %}<span class="badge ok">Ausgewählt</span>{% endif %}
{% if image.is_excluded %}<span class="badge bad">Ausgeblendet</span>{% endif %}
{% if image.is_irrelevant_hint %}<span class="badge">evtl. irrelevant</span>{% endif %}
</div>
<div class="image-actions">
<form method="post" action="/admin/articles/{{ article.id }}/images/decision">
<input type="hidden" name="image_url" value="{{ image.url }}" />
<input type="hidden" name="action" value="select" />
<button type="submit">Als Hauptbild</button>
</form>
{% if not image.is_excluded %}
<form method="post" action="/admin/articles/{{ article.id }}/images/decision">
<input type="hidden" name="image_url" value="{{ image.url }}" />
<input type="hidden" name="action" value="exclude" />
<button type="submit" class="secondary">Ausblenden</button>
</form>
{% else %}
<form method="post" action="/admin/articles/{{ article.id }}/images/decision">
<input type="hidden" name="image_url" value="{{ image.url }}" />
<input type="hidden" name="action" value="restore" />
<button type="submit" class="secondary">Einblenden</button>
</form>
{% endif %}
</div>
<div class="subtle"><a href="{{ image.url }}" target="_blank" rel="noopener">{{ image.url }}</a></div>
</article>
{% endfor %} {% endfor %}
</ul> </div>
{% endif %} {% endif %}
{% if article.press_contact or article.extraction.press_contact %} {% if article.press_contact or article.extraction.press_contact %}
<p><strong>Pressekontakt</strong></p> <p><strong>Pressekontakt</strong></p>

View file

@ -155,6 +155,10 @@
<td><span class="badge">{{ a.status }}</span></td> <td><span class="badge">{{ a.status }}</span></td>
<td> <td>
<div class="subtle">Legal: {{ "OK" if a.legal_checked else "offen" }}</div> <div class="subtle">Legal: {{ "OK" if a.legal_checked else "offen" }}</div>
{% if a.selected_image_url %}
<div class="subtle">Hauptbild gesetzt</div>
<a href="{{ a.selected_image_url }}" target="_blank" rel="noopener"><img src="{{ a.selected_image_url }}" alt="Hauptbild" class="thumb" loading="lazy" /></a>
{% endif %}
{% if a.summary %} {% if a.summary %}
<div><strong>Summary:</strong> {{ a.summary }}</div> <div><strong>Summary:</strong> {{ a.summary }}</div>
{% endif %} {% endif %}

View file

@ -8,7 +8,15 @@ from fastapi.testclient import TestClient
from backend.app import config as config_module from backend.app import config as config_module
from backend.app.db import init_db from backend.app.db import init_db
from backend.app.main import app from backend.app.main import app
from backend.app.repositories import ArticleUpsert, FeedCreate, SourceCreate, create_feed, create_source, upsert_article from backend.app.repositories import (
ArticleUpsert,
FeedCreate,
SourceCreate,
create_feed,
create_source,
get_article_by_id,
upsert_article,
)
class TestAdminUi(unittest.TestCase): class TestAdminUi(unittest.TestCase):
@ -119,6 +127,18 @@ class TestAdminUi(unittest.TestCase):
self.assertIn("Artikel-Detail", res.text) self.assertIn("Artikel-Detail", res.text)
self.assertIn("Rechts-Checkliste", res.text) self.assertIn("Rechts-Checkliste", res.text)
decision = self.client.post(
f"/admin/articles/{article_id}/images/decision",
data={"image_url": "https://example.org/img.jpg", "action": "select"},
follow_redirects=True,
)
self.assertEqual(decision.status_code, 200)
self.assertIn("Ausgewähltes Hauptbild", decision.text)
article = get_article_by_id(article_id)
self.assertIsNotNone(article)
self.assertIn("selected_url", article.get("meta_json", ""))
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()