feat(admin): add feed/source management, rewrite editor, reopen flow, and WP block output

This commit is contained in:
Oliver 2026-02-21 14:03:49 +01:00
parent 50f737f434
commit 88b2ee1d01
No known key found for this signature in database
9 changed files with 555 additions and 70 deletions

View file

@ -23,7 +23,11 @@ from .relevance import article_age_days, article_relevance
from .rewrite import rewrite_article_text
from .repositories import (
FeedCreate,
FeedUpdate,
SourceCreate,
SourceUpdate,
delete_feed,
delete_source,
create_feed,
create_source,
get_article_by_id,
@ -36,6 +40,8 @@ from .repositories import (
set_article_image_decision,
set_article_legal_review,
upsert_article,
update_feed,
update_source,
update_article_status,
ArticleUpsert,
)
@ -48,10 +54,11 @@ ALLOWED_TRANSITIONS: dict[str, tuple[str, ...]] = {
"new": ("rewrite", "close"),
"rewrite": ("publish", "close"),
"publish": ("published", "close"),
"published": ("close",),
"published": ("rewrite", "close"),
"close": ("rewrite",),
}
IMAGE_PROXY_USER_AGENT = "rss-news-admin/1.0"
_UNSET = object()
def _admin_user(request: Request) -> str | None:
@ -364,6 +371,51 @@ def _run_connectivity_check(target: dict[str, str]) -> dict[str, object]:
row["duration_ms"] = int((time.perf_counter() - started) * 1000)
def _upsert_article_from_existing(
article: dict,
*,
content_rewritten: str | None = None,
status: str | None = None,
wp_post_id: int | None | object = _UNSET,
wp_post_url: str | None | object = _UNSET,
publish_attempts: int | object = _UNSET,
publish_last_error: str | None | object = _UNSET,
published_to_wp_at: str | None | object = _UNSET,
) -> None:
rewritten = article.get("content_rewritten") if content_rewritten is None else content_rewritten
upsert_article(
ArticleUpsert(
feed_id=article.get("feed_id"),
source_article_id=article.get("source_article_id"),
source_hash=article.get("source_hash"),
title=article.get("title"),
source_url=article.get("source_url"),
canonical_url=article.get("canonical_url"),
published_at=article.get("published_at"),
author=article.get("author"),
summary=article.get("summary"),
content_raw=article.get("content_raw"),
content_rewritten=rewritten,
image_urls_json=article.get("image_urls_json"),
press_contact=article.get("press_contact"),
source_name_snapshot=article.get("source_name_snapshot"),
source_terms_url_snapshot=article.get("source_terms_url_snapshot"),
source_license_name_snapshot=article.get("source_license_name_snapshot"),
legal_checked=bool(int(article.get("legal_checked", 0))),
legal_checked_at=article.get("legal_checked_at"),
legal_note=article.get("legal_note"),
wp_post_id=article.get("wp_post_id") if wp_post_id is _UNSET else wp_post_id,
wp_post_url=article.get("wp_post_url") if wp_post_url is _UNSET else wp_post_url,
publish_attempts=int(article.get("publish_attempts", 0)) if publish_attempts is _UNSET else publish_attempts,
publish_last_error=article.get("publish_last_error") if publish_last_error is _UNSET else publish_last_error,
published_to_wp_at=article.get("published_to_wp_at") if published_to_wp_at is _UNSET else published_to_wp_at,
word_count=len(str(rewritten or "").split()),
status=article.get("status") if status is None else status,
meta_json=article.get("meta_json"),
)
)
@router.get("/admin", response_class=HTMLResponse)
def admin_index(request: Request):
user = _admin_user(request)
@ -427,7 +479,7 @@ def admin_dashboard(request: Request):
articles = list_articles(limit=100, status_filter=internal_filter)
else:
status_filter = ""
articles = list_articles(limit=100)
articles = [a for a in list_articles(limit=250) if internal_to_ui_status(a.get("status")) != "close"][:100]
for article in articles:
meta = _parse_meta_json(article.get("meta_json"))
extraction = meta.get("extraction") if isinstance(meta.get("extraction"), dict) else {}
@ -659,6 +711,54 @@ def admin_create_source(
return _dashboard_redirect(msg="Quelle gespeichert")
@router.post("/admin/sources/{source_id}/update")
def admin_update_source(
request: Request,
source_id: int,
name: str = Form(...),
base_url: str = Form(""),
terms_url: str = Form(""),
license_name: str = Form(""),
risk_level: str = Form("yellow"),
is_enabled: str = Form("1"),
notes: str = Form(""),
last_reviewed_at: str = Form(""),
):
user = _admin_user(request)
if not user:
return RedirectResponse(url="/admin/login", status_code=303)
try:
ok = update_source(
source_id,
SourceUpdate(
name=name,
base_url=base_url or None,
terms_url=terms_url or None,
license_name=license_name or None,
risk_level=risk_level,
is_enabled=is_enabled == "1",
notes=notes or None,
last_reviewed_at=last_reviewed_at or None,
),
)
except Exception as exc:
return _dashboard_redirect(msg=f"Quelle #{source_id} Update fehlgeschlagen: {exc}", msg_type="error")
if not ok:
return _dashboard_redirect(msg=f"Quelle #{source_id} nicht gefunden", msg_type="error")
return _dashboard_redirect(msg=f"Quelle #{source_id} aktualisiert")
@router.post("/admin/sources/{source_id}/delete")
def admin_delete_source(request: Request, source_id: int):
user = _admin_user(request)
if not user:
return RedirectResponse(url="/admin/login", status_code=303)
ok = delete_source(source_id)
if not ok:
return _dashboard_redirect(msg=f"Quelle #{source_id} nicht gefunden", msg_type="error")
return _dashboard_redirect(msg=f"Quelle #{source_id} gelöscht")
@router.post("/admin/feeds/create")
def admin_create_feed(
request: Request,
@ -684,6 +784,46 @@ def admin_create_feed(
return _dashboard_redirect(msg="Feed gespeichert")
@router.post("/admin/feeds/{feed_id}/update")
def admin_update_feed(
request: Request,
feed_id: int,
name: str = Form(...),
url: str = Form(...),
source_id: str = Form(""),
is_enabled: str = Form("1"),
):
user = _admin_user(request)
if not user:
return RedirectResponse(url="/admin/login", status_code=303)
try:
ok = update_feed(
feed_id,
FeedUpdate(
name=name,
url=url,
source_id=_to_optional_int(source_id),
is_enabled=is_enabled == "1",
),
)
except Exception as exc:
return _dashboard_redirect(msg=f"Feed #{feed_id} Update fehlgeschlagen: {exc}", msg_type="error")
if not ok:
return _dashboard_redirect(msg=f"Feed #{feed_id} nicht gefunden", msg_type="error")
return _dashboard_redirect(msg=f"Feed #{feed_id} aktualisiert")
@router.post("/admin/feeds/{feed_id}/delete")
def admin_delete_feed(request: Request, feed_id: int):
user = _admin_user(request)
if not user:
return RedirectResponse(url="/admin/login", status_code=303)
ok = delete_feed(feed_id)
if not ok:
return _dashboard_redirect(msg=f"Feed #{feed_id} nicht gefunden", msg_type="error")
return _dashboard_redirect(msg=f"Feed #{feed_id} gelöscht")
@router.post("/admin/ingestion/run")
def admin_run_ingestion(request: Request, feed_id: str = Form("")):
user = _admin_user(request)
@ -719,41 +859,51 @@ def admin_rewrite_run(request: Request, article_id: int):
rewritten = rewrite_article_text(article)
except Exception as exc:
return _dashboard_redirect(msg=f"Rewrite fehlgeschlagen fuer Artikel #{article_id}: {exc}", msg_type="error")
upsert_article(
ArticleUpsert(
feed_id=article.get("feed_id"),
source_article_id=article.get("source_article_id"),
source_hash=article.get("source_hash"),
title=article.get("title"),
source_url=article.get("source_url"),
canonical_url=article.get("canonical_url"),
published_at=article.get("published_at"),
author=article.get("author"),
summary=article.get("summary"),
content_raw=article.get("content_raw"),
content_rewritten=rewritten,
image_urls_json=article.get("image_urls_json"),
press_contact=article.get("press_contact"),
source_name_snapshot=article.get("source_name_snapshot"),
source_terms_url_snapshot=article.get("source_terms_url_snapshot"),
source_license_name_snapshot=article.get("source_license_name_snapshot"),
legal_checked=bool(int(article.get("legal_checked", 0))),
legal_checked_at=article.get("legal_checked_at"),
legal_note=article.get("legal_note"),
wp_post_id=article.get("wp_post_id"),
wp_post_url=article.get("wp_post_url"),
publish_attempts=int(article.get("publish_attempts", 0)),
publish_last_error=article.get("publish_last_error"),
published_to_wp_at=article.get("published_to_wp_at"),
word_count=len(rewritten.split()),
status="approved",
meta_json=article.get("meta_json"),
)
)
_upsert_article_from_existing(article, content_rewritten=rewritten, status="approved")
return _dashboard_redirect(msg=f"Rewrite fertig fuer Artikel #{article_id} -> publish")
@router.post("/admin/articles/{article_id}/rewrite-save")
def admin_rewrite_save(request: Request, article_id: int, content_rewritten: str = Form(...)):
user = _admin_user(request)
if not user:
return RedirectResponse(url="/admin/login", status_code=303)
article = get_article_by_id(article_id)
if not article:
return _dashboard_redirect(msg=f"Artikel #{article_id} nicht gefunden", msg_type="error")
text = (content_rewritten or "").strip()
if not text:
return RedirectResponse(
url=f"/admin/articles/{article_id}?msg=Rewrite-Text%20darf%20nicht%20leer%20sein&type=error",
status_code=303,
)
_upsert_article_from_existing(article, content_rewritten=text)
return RedirectResponse(url=f"/admin/articles/{article_id}?msg=Rewrite-Text%20gespeichert&type=success", status_code=303)
@router.post("/admin/articles/{article_id}/reopen")
def admin_reopen_article(request: Request, article_id: int):
user = _admin_user(request)
if not user:
return RedirectResponse(url="/admin/login", status_code=303)
article = get_article_by_id(article_id)
if not article:
return _dashboard_redirect(msg=f"Artikel #{article_id} nicht gefunden", msg_type="error")
_upsert_article_from_existing(
article,
status="rewrite",
wp_post_id=None,
wp_post_url=None,
publish_attempts=0,
publish_last_error=None,
published_to_wp_at=None,
)
return RedirectResponse(
url=f"/admin/articles/{article_id}?msg=Artikel%20zurueck%20in%20Rewrite-Workflow%20gesetzt&type=success",
status_code=303,
)
@router.post("/admin/articles/{article_id}/transition")
def admin_transition_article(request: Request, article_id: int, target_status: str = Form(...), note: str = Form("")):
user = _admin_user(request)

View file

@ -28,6 +28,26 @@ class FeedCreate:
is_enabled: bool
@dataclass(frozen=True)
class SourceUpdate:
name: str
base_url: str | None
terms_url: str | None
license_name: str | None
risk_level: str
is_enabled: bool
notes: str | None
last_reviewed_at: str | None
@dataclass(frozen=True)
class FeedUpdate:
name: str
url: str
source_id: int | None
is_enabled: bool
@dataclass(frozen=True)
class RunCreate:
run_type: str
@ -118,6 +138,35 @@ def get_source_by_id(source_id: int) -> dict[str, Any] | None:
return dict(row) if row else None
def update_source(source_id: int, payload: SourceUpdate) -> bool:
with get_conn() as conn:
cur = conn.execute(
"""
UPDATE sources
SET name = ?, base_url = ?, terms_url = ?, license_name = ?, risk_level = ?, is_enabled = ?, notes = ?, last_reviewed_at = ?
WHERE id = ?
""",
(
payload.name.strip(),
payload.base_url,
payload.terms_url,
payload.license_name,
payload.risk_level,
1 if payload.is_enabled else 0,
payload.notes,
payload.last_reviewed_at,
source_id,
),
)
return cur.rowcount > 0
def delete_source(source_id: int) -> bool:
with get_conn() as conn:
cur = conn.execute("DELETE FROM sources WHERE id = ?", (source_id,))
return cur.rowcount > 0
def create_feed(payload: FeedCreate) -> int:
with get_conn() as conn:
cur = conn.execute(
@ -177,6 +226,31 @@ def get_feed_by_id(feed_id: int) -> dict[str, Any] | None:
return dict(row) if row else None
def update_feed(feed_id: int, payload: FeedUpdate) -> bool:
with get_conn() as conn:
cur = conn.execute(
"""
UPDATE feeds
SET name = ?, url = ?, source_id = ?, is_enabled = ?
WHERE id = ?
""",
(
payload.name.strip(),
payload.url.strip(),
payload.source_id,
1 if payload.is_enabled else 0,
feed_id,
),
)
return cur.rowcount > 0
def delete_feed(feed_id: int) -> bool:
with get_conn() as conn:
cur = conn.execute("DELETE FROM feeds WHERE id = ?", (feed_id,))
return cur.rowcount > 0
def update_feed_fetch_state(feed_id: int, etag: str | None, last_modified: str | None) -> None:
with get_conn() as conn:
conn.execute(

View file

@ -61,17 +61,18 @@ def _selected_image_url_from_meta(meta_json: str | None) -> str | None:
return selected if isinstance(selected, str) and selected.strip() else None
def _download_image_bytes(url: str) -> tuple[bytes, str]:
req = Request(
url=url,
headers={
"User-Agent": "rss-news-publisher/1.0",
"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
},
)
def _download_image_bytes(url: str, referer: str | None = None) -> tuple[bytes, str]:
headers = {
"User-Agent": "rss-news-publisher/1.0",
"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
}
if referer:
headers["Referer"] = referer
req = Request(url=url, headers=headers)
with urlopen(req, timeout=20) as resp:
raw = resp.read()
content_type = resp.headers.get("Content-Type", "application/octet-stream")
content_type = content_type.split(";")[0].strip() if content_type else "application/octet-stream"
if not content_type.lower().startswith("image/"):
raise RuntimeError(f"Ausgewählte Bild-URL liefert kein Bild ({content_type})")
return raw, content_type
@ -94,7 +95,7 @@ def _upload_featured_media(
article_title: str,
source_url: str,
) -> int:
image_bytes, content_type = _download_image_bytes(image_url)
image_bytes, content_type = _download_image_bytes(image_url, referer=source_url or None)
filename = _guess_filename(image_url, content_type)
media_url = f"{base_url.rstrip('/')}/wp-json/wp/v2/media"
@ -143,6 +144,29 @@ def _as_paragraph_html(text: str) -> str:
return "\n".join(lines)
def _as_block_paragraphs(text: str) -> str:
chunks = [chunk.strip() for chunk in re.split(r"\n{2,}", text.strip()) if chunk.strip()]
if not chunks:
return ""
lines = []
for chunk in chunks:
compact = re.sub(r"\s*\n\s*", " ", chunk)
lines.append(f"<!-- wp:paragraph --><p>{escape(compact)}</p><!-- /wp:paragraph -->")
return "\n".join(lines)
def _as_block_heading(level: int, text: str) -> str:
safe_level = min(6, max(1, int(level)))
return f'<!-- wp:heading {{"level":{safe_level}}} --><h{safe_level}>{escape(text)}</h{safe_level}><!-- /wp:heading -->'
def _as_block_list(items: list[str]) -> str:
if not items:
return ""
content = "".join(f"<li>{item}</li>" for item in items)
return f"<!-- wp:list --><ul>{content}</ul><!-- /wp:list -->"
def _sanitize_publish_text(text: str) -> str:
raw = (text or "").strip()
if not raw:
@ -164,11 +188,13 @@ def _build_post_content(article: dict[str, Any]) -> tuple[str, str | None]:
if not body_text:
body_text = summary
# Keep existing HTML if already present, otherwise wrap plain text into paragraphs.
# Keep existing HTML if already present, otherwise wrap plain text into block paragraphs.
has_html = bool(re.search(r"<[a-zA-Z][^>]*>", body_text))
body_html = body_text if has_html else _as_paragraph_html(body_text)
body_html = body_text if has_html else _as_block_paragraphs(body_text)
if not body_html:
body_html = "<p>Kein Inhalt verfügbar.</p>"
body_html = "<!-- wp:paragraph --><p>Kein Inhalt verfügbar.</p><!-- /wp:paragraph -->"
elif has_html:
body_html = f"<!-- wp:html -->\n{body_html}\n<!-- /wp:html -->"
author = (article.get("author") or "").strip()
published_at = (article.get("published_at") or "").strip()
@ -176,35 +202,35 @@ def _build_post_content(article: dict[str, Any]) -> tuple[str, str | None]:
license_name = (article.get("source_license_name_snapshot") or "").strip()
terms_url = (article.get("source_terms_url_snapshot") or "").strip()
lead_html = f"<p><em>{escape(summary)}</em></p>\n" if summary else ""
lead_html = f"<!-- wp:paragraph --><p><em>{escape(summary)}</em></p><!-- /wp:paragraph -->\n" if summary else ""
facts: list[str] = []
if author:
facts.append(f"<li><strong>Autor:</strong> {escape(author)}</li>")
facts.append(f"<strong>Autor:</strong> {escape(author)}")
if published_at:
facts.append(f"<li><strong>Veröffentlicht (Quelle):</strong> {escape(published_at)}</li>")
facts.append(f"<strong>Veröffentlicht (Quelle):</strong> {escape(published_at)}")
if source_name:
facts.append(f"<li><strong>Quelle:</strong> {escape(source_name)}</li>")
facts.append(f"<strong>Quelle:</strong> {escape(source_name)}")
if license_name:
facts.append(f"<li><strong>Lizenz:</strong> {escape(license_name)}</li>")
facts.append(f"<strong>Lizenz:</strong> {escape(license_name)}")
if terms_url:
facts.append(f"<li><strong>Lizenzhinweise:</strong> <a href=\"{escape(terms_url)}\">{escape(terms_url)}</a></li>")
facts.append(f"<strong>Lizenzhinweise:</strong> <a href=\"{escape(terms_url)}\">{escape(terms_url)}</a>")
facts_html = (
"<h3>Artikeldetails</h3>\n<ul>\n" + "\n".join(facts) + "\n</ul>\n"
if facts
else ""
)
attribution_html = (
"<hr />\n<section class=\"rss-news-attribution\">\n"
"<h3>Quelle</h3>\n"
f"<p>Originalartikel: <a href=\"{escape(source_url)}\">{escape(source_url)}</a></p>\n"
)
facts_html = ""
if facts:
facts_html = _as_block_heading(3, "Artikeldetails") + "\n" + _as_block_list(facts)
attribution_parts = [
_as_block_heading(3, "Quelle"),
f'<!-- wp:paragraph --><p>Originalartikel: <a href="{escape(source_url)}">{escape(source_url)}</a></p><!-- /wp:paragraph -->',
]
if canonical_url and canonical_url != source_url:
attribution_html += f"<p>Canonical: <a href=\"{escape(canonical_url)}\">{escape(canonical_url)}</a></p>\n"
attribution_html += "</section>"
attribution_parts.append(
f'<!-- wp:paragraph --><p>Canonical: <a href="{escape(canonical_url)}">{escape(canonical_url)}</a></p><!-- /wp:paragraph -->'
)
attribution_html = "\n".join(attribution_parts)
content = f"{lead_html}{body_html}\n\n{facts_html}{attribution_html}".strip()
content = f"{lead_html}{body_html}\n\n{facts_html}\n{attribution_html}".strip()
excerpt_source = summary or re.sub(r"\s+", " ", body_text).strip()
excerpt = excerpt_source[:220] if excerpt_source else None
return content, excerpt

View file

@ -33,7 +33,6 @@ ALLOWED_UI_TRANSITIONS: dict[str, set[str]] = {
"new": {"rewrite", "close"},
"rewrite": {"publish", "close"},
"publish": {"published", "close"},
"published": {"close"},
"published": {"rewrite", "close"},
"close": {"rewrite"},
}