feat(admin): add feed/source management, rewrite editor, reopen flow, and WP block output

This commit is contained in:
Oliver 2026-02-21 14:03:49 +01:00
parent 50f737f434
commit 88b2ee1d01
No known key found for this signature in database
9 changed files with 555 additions and 70 deletions

View file

@ -61,17 +61,18 @@ def _selected_image_url_from_meta(meta_json: str | None) -> str | None:
return selected if isinstance(selected, str) and selected.strip() else None
def _download_image_bytes(url: str) -> tuple[bytes, str]:
req = Request(
url=url,
headers={
"User-Agent": "rss-news-publisher/1.0",
"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
},
)
def _download_image_bytes(url: str, referer: str | None = None) -> tuple[bytes, str]:
headers = {
"User-Agent": "rss-news-publisher/1.0",
"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
}
if referer:
headers["Referer"] = referer
req = Request(url=url, headers=headers)
with urlopen(req, timeout=20) as resp:
raw = resp.read()
content_type = resp.headers.get("Content-Type", "application/octet-stream")
content_type = content_type.split(";")[0].strip() if content_type else "application/octet-stream"
if not content_type.lower().startswith("image/"):
raise RuntimeError(f"Ausgewählte Bild-URL liefert kein Bild ({content_type})")
return raw, content_type
@ -94,7 +95,7 @@ def _upload_featured_media(
article_title: str,
source_url: str,
) -> int:
image_bytes, content_type = _download_image_bytes(image_url)
image_bytes, content_type = _download_image_bytes(image_url, referer=source_url or None)
filename = _guess_filename(image_url, content_type)
media_url = f"{base_url.rstrip('/')}/wp-json/wp/v2/media"
@ -143,6 +144,29 @@ def _as_paragraph_html(text: str) -> str:
return "\n".join(lines)
def _as_block_paragraphs(text: str) -> str:
chunks = [chunk.strip() for chunk in re.split(r"\n{2,}", text.strip()) if chunk.strip()]
if not chunks:
return ""
lines = []
for chunk in chunks:
compact = re.sub(r"\s*\n\s*", " ", chunk)
lines.append(f"<!-- wp:paragraph --><p>{escape(compact)}</p><!-- /wp:paragraph -->")
return "\n".join(lines)
def _as_block_heading(level: int, text: str) -> str:
safe_level = min(6, max(1, int(level)))
return f'<!-- wp:heading {{"level":{safe_level}}} --><h{safe_level}>{escape(text)}</h{safe_level}><!-- /wp:heading -->'
def _as_block_list(items: list[str]) -> str:
if not items:
return ""
content = "".join(f"<li>{item}</li>" for item in items)
return f"<!-- wp:list --><ul>{content}</ul><!-- /wp:list -->"
def _sanitize_publish_text(text: str) -> str:
raw = (text or "").strip()
if not raw:
@ -164,11 +188,13 @@ def _build_post_content(article: dict[str, Any]) -> tuple[str, str | None]:
if not body_text:
body_text = summary
# Keep existing HTML if already present, otherwise wrap plain text into paragraphs.
# Keep existing HTML if already present, otherwise wrap plain text into block paragraphs.
has_html = bool(re.search(r"<[a-zA-Z][^>]*>", body_text))
body_html = body_text if has_html else _as_paragraph_html(body_text)
body_html = body_text if has_html else _as_block_paragraphs(body_text)
if not body_html:
body_html = "<p>Kein Inhalt verfügbar.</p>"
body_html = "<!-- wp:paragraph --><p>Kein Inhalt verfügbar.</p><!-- /wp:paragraph -->"
elif has_html:
body_html = f"<!-- wp:html -->\n{body_html}\n<!-- /wp:html -->"
author = (article.get("author") or "").strip()
published_at = (article.get("published_at") or "").strip()
@ -176,35 +202,35 @@ def _build_post_content(article: dict[str, Any]) -> tuple[str, str | None]:
license_name = (article.get("source_license_name_snapshot") or "").strip()
terms_url = (article.get("source_terms_url_snapshot") or "").strip()
lead_html = f"<p><em>{escape(summary)}</em></p>\n" if summary else ""
lead_html = f"<!-- wp:paragraph --><p><em>{escape(summary)}</em></p><!-- /wp:paragraph -->\n" if summary else ""
facts: list[str] = []
if author:
facts.append(f"<li><strong>Autor:</strong> {escape(author)}</li>")
facts.append(f"<strong>Autor:</strong> {escape(author)}")
if published_at:
facts.append(f"<li><strong>Veröffentlicht (Quelle):</strong> {escape(published_at)}</li>")
facts.append(f"<strong>Veröffentlicht (Quelle):</strong> {escape(published_at)}")
if source_name:
facts.append(f"<li><strong>Quelle:</strong> {escape(source_name)}</li>")
facts.append(f"<strong>Quelle:</strong> {escape(source_name)}")
if license_name:
facts.append(f"<li><strong>Lizenz:</strong> {escape(license_name)}</li>")
facts.append(f"<strong>Lizenz:</strong> {escape(license_name)}")
if terms_url:
facts.append(f"<li><strong>Lizenzhinweise:</strong> <a href=\"{escape(terms_url)}\">{escape(terms_url)}</a></li>")
facts.append(f"<strong>Lizenzhinweise:</strong> <a href=\"{escape(terms_url)}\">{escape(terms_url)}</a>")
facts_html = (
"<h3>Artikeldetails</h3>\n<ul>\n" + "\n".join(facts) + "\n</ul>\n"
if facts
else ""
)
attribution_html = (
"<hr />\n<section class=\"rss-news-attribution\">\n"
"<h3>Quelle</h3>\n"
f"<p>Originalartikel: <a href=\"{escape(source_url)}\">{escape(source_url)}</a></p>\n"
)
facts_html = ""
if facts:
facts_html = _as_block_heading(3, "Artikeldetails") + "\n" + _as_block_list(facts)
attribution_parts = [
_as_block_heading(3, "Quelle"),
f'<!-- wp:paragraph --><p>Originalartikel: <a href="{escape(source_url)}">{escape(source_url)}</a></p><!-- /wp:paragraph -->',
]
if canonical_url and canonical_url != source_url:
attribution_html += f"<p>Canonical: <a href=\"{escape(canonical_url)}\">{escape(canonical_url)}</a></p>\n"
attribution_html += "</section>"
attribution_parts.append(
f'<!-- wp:paragraph --><p>Canonical: <a href="{escape(canonical_url)}">{escape(canonical_url)}</a></p><!-- /wp:paragraph -->'
)
attribution_html = "\n".join(attribution_parts)
content = f"{lead_html}{body_html}\n\n{facts_html}{attribution_html}".strip()
content = f"{lead_html}{body_html}\n\n{facts_html}\n{attribution_html}".strip()
excerpt_source = summary or re.sub(r"\s+", " ", body_text).strip()
excerpt = excerpt_source[:220] if excerpt_source else None
return content, excerpt