feat(admin): add feed/source management, rewrite editor, reopen flow, and WP block output

2026-02-21 14:03:49 +01:00 · 2026-02-21 14:03:49 +01:00 · 88b2ee1d01
commit 88b2ee1d01
parent 50f737f434
9 changed files with 555 additions and 70 deletions
--- a/backend/app/wordpress.py
+++ b/backend/app/wordpress.py
@ -61,17 +61,18 @@ def _selected_image_url_from_meta(meta_json: str | None) -> str | None:
    return selected if isinstance(selected, str) and selected.strip() else None


-def _download_image_bytes(url: str) -> tuple[bytes, str]:
-    req = Request(
-        url=url,
-        headers={
-            "User-Agent": "rss-news-publisher/1.0",
-            "Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
-        },
-    )
+def _download_image_bytes(url: str, referer: str | None = None) -> tuple[bytes, str]:
+    headers = {
+        "User-Agent": "rss-news-publisher/1.0",
+        "Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
+    }
+    if referer:
+        headers["Referer"] = referer
+    req = Request(url=url, headers=headers)
    with urlopen(req, timeout=20) as resp:
        raw = resp.read()
        content_type = resp.headers.get("Content-Type", "application/octet-stream")
+    content_type = content_type.split(";")[0].strip() if content_type else "application/octet-stream"
    if not content_type.lower().startswith("image/"):
        raise RuntimeError(f"Ausgewählte Bild-URL liefert kein Bild ({content_type})")
    return raw, content_type
@ -94,7 +95,7 @@ def _upload_featured_media(
    article_title: str,
    source_url: str,
 ) -> int:
-    image_bytes, content_type = _download_image_bytes(image_url)
+    image_bytes, content_type = _download_image_bytes(image_url, referer=source_url or None)
    filename = _guess_filename(image_url, content_type)

    media_url = f"{base_url.rstrip('/')}/wp-json/wp/v2/media"
@ -143,6 +144,29 @@ def _as_paragraph_html(text: str) -> str:
    return "\n".join(lines)


+def _as_block_paragraphs(text: str) -> str:
+    chunks = [chunk.strip() for chunk in re.split(r"\n{2,}", text.strip()) if chunk.strip()]
+    if not chunks:
+        return ""
+    lines = []
+    for chunk in chunks:
+        compact = re.sub(r"\s*\n\s*", " ", chunk)
+        lines.append(f"<!-- wp:paragraph --><p>{escape(compact)}</p><!-- /wp:paragraph -->")
+    return "\n".join(lines)
+
+
+def _as_block_heading(level: int, text: str) -> str:
+    safe_level = min(6, max(1, int(level)))
+    return f'<!-- wp:heading {{"level":{safe_level}}} --><h{safe_level}>{escape(text)}</h{safe_level}><!-- /wp:heading -->'
+
+
+def _as_block_list(items: list[str]) -> str:
+    if not items:
+        return ""
+    content = "".join(f"<li>{item}</li>" for item in items)
+    return f"<!-- wp:list --><ul>{content}</ul><!-- /wp:list -->"
+
+
 def _sanitize_publish_text(text: str) -> str:
    raw = (text or "").strip()
    if not raw:
@ -164,11 +188,13 @@ def _build_post_content(article: dict[str, Any]) -> tuple[str, str | None]:
    if not body_text:
        body_text = summary

-    # Keep existing HTML if already present, otherwise wrap plain text into paragraphs.
+    # Keep existing HTML if already present, otherwise wrap plain text into block paragraphs.
    has_html = bool(re.search(r"<[a-zA-Z][^>]*>", body_text))
-    body_html = body_text if has_html else _as_paragraph_html(body_text)
+    body_html = body_text if has_html else _as_block_paragraphs(body_text)
    if not body_html:
-        body_html = "<p>Kein Inhalt verfügbar.</p>"
+        body_html = "<!-- wp:paragraph --><p>Kein Inhalt verfügbar.</p><!-- /wp:paragraph -->"
+    elif has_html:
+        body_html = f"<!-- wp:html -->\n{body_html}\n<!-- /wp:html -->"

    author = (article.get("author") or "").strip()
    published_at = (article.get("published_at") or "").strip()
@ -176,35 +202,35 @@ def _build_post_content(article: dict[str, Any]) -> tuple[str, str | None]:
    license_name = (article.get("source_license_name_snapshot") or "").strip()
    terms_url = (article.get("source_terms_url_snapshot") or "").strip()

-    lead_html = f"<p><em>{escape(summary)}</em></p>\n" if summary else ""
+    lead_html = f"<!-- wp:paragraph --><p><em>{escape(summary)}</em></p><!-- /wp:paragraph -->\n" if summary else ""

    facts: list[str] = []
    if author:
-        facts.append(f"<li><strong>Autor:</strong> {escape(author)}</li>")
+        facts.append(f"<strong>Autor:</strong> {escape(author)}")
    if published_at:
-        facts.append(f"<li><strong>Veröffentlicht (Quelle):</strong> {escape(published_at)}</li>")
+        facts.append(f"<strong>Veröffentlicht (Quelle):</strong> {escape(published_at)}")
    if source_name:
-        facts.append(f"<li><strong>Quelle:</strong> {escape(source_name)}</li>")
+        facts.append(f"<strong>Quelle:</strong> {escape(source_name)}")
    if license_name:
-        facts.append(f"<li><strong>Lizenz:</strong> {escape(license_name)}</li>")
+        facts.append(f"<strong>Lizenz:</strong> {escape(license_name)}")
    if terms_url:
-        facts.append(f"<li><strong>Lizenzhinweise:</strong> <a href=\"{escape(terms_url)}\">{escape(terms_url)}</a></li>")
+        facts.append(f"<strong>Lizenzhinweise:</strong> <a href=\"{escape(terms_url)}\">{escape(terms_url)}</a>")

-    facts_html = (
-        "<h3>Artikeldetails</h3>\n<ul>\n" + "\n".join(facts) + "\n</ul>\n"
-        if facts
-        else ""
-    )
-    attribution_html = (
-        "<hr />\n<section class=\"rss-news-attribution\">\n"
-        "<h3>Quelle</h3>\n"
-        f"<p>Originalartikel: <a href=\"{escape(source_url)}\">{escape(source_url)}</a></p>\n"
-    )
+    facts_html = ""
+    if facts:
+        facts_html = _as_block_heading(3, "Artikeldetails") + "\n" + _as_block_list(facts)
+
+    attribution_parts = [
+        _as_block_heading(3, "Quelle"),
+        f'<!-- wp:paragraph --><p>Originalartikel: <a href="{escape(source_url)}">{escape(source_url)}</a></p><!-- /wp:paragraph -->',
+    ]
    if canonical_url and canonical_url != source_url:
-        attribution_html += f"<p>Canonical: <a href=\"{escape(canonical_url)}\">{escape(canonical_url)}</a></p>\n"
-    attribution_html += "</section>"
+        attribution_parts.append(
+            f'<!-- wp:paragraph --><p>Canonical: <a href="{escape(canonical_url)}">{escape(canonical_url)}</a></p><!-- /wp:paragraph -->'
+        )
+    attribution_html = "\n".join(attribution_parts)

-    content = f"{lead_html}{body_html}\n\n{facts_html}{attribution_html}".strip()
+    content = f"{lead_html}{body_html}\n\n{facts_html}\n{attribution_html}".strip()
    excerpt_source = summary or re.sub(r"\s+", " ", body_text).strip()
    excerpt = excerpt_source[:220] if excerpt_source else None
    return content, excerpt