feat(admin): add feed/source management, rewrite editor, reopen flow, and WP block output
This commit is contained in:
parent
50f737f434
commit
88b2ee1d01
9 changed files with 555 additions and 70 deletions
|
|
@ -61,17 +61,18 @@ def _selected_image_url_from_meta(meta_json: str | None) -> str | None:
|
|||
return selected if isinstance(selected, str) and selected.strip() else None
|
||||
|
||||
|
||||
def _download_image_bytes(url: str) -> tuple[bytes, str]:
|
||||
req = Request(
|
||||
url=url,
|
||||
headers={
|
||||
"User-Agent": "rss-news-publisher/1.0",
|
||||
"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
},
|
||||
)
|
||||
def _download_image_bytes(url: str, referer: str | None = None) -> tuple[bytes, str]:
|
||||
headers = {
|
||||
"User-Agent": "rss-news-publisher/1.0",
|
||||
"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
||||
}
|
||||
if referer:
|
||||
headers["Referer"] = referer
|
||||
req = Request(url=url, headers=headers)
|
||||
with urlopen(req, timeout=20) as resp:
|
||||
raw = resp.read()
|
||||
content_type = resp.headers.get("Content-Type", "application/octet-stream")
|
||||
content_type = content_type.split(";")[0].strip() if content_type else "application/octet-stream"
|
||||
if not content_type.lower().startswith("image/"):
|
||||
raise RuntimeError(f"Ausgewählte Bild-URL liefert kein Bild ({content_type})")
|
||||
return raw, content_type
|
||||
|
|
@ -94,7 +95,7 @@ def _upload_featured_media(
|
|||
article_title: str,
|
||||
source_url: str,
|
||||
) -> int:
|
||||
image_bytes, content_type = _download_image_bytes(image_url)
|
||||
image_bytes, content_type = _download_image_bytes(image_url, referer=source_url or None)
|
||||
filename = _guess_filename(image_url, content_type)
|
||||
|
||||
media_url = f"{base_url.rstrip('/')}/wp-json/wp/v2/media"
|
||||
|
|
@ -143,6 +144,29 @@ def _as_paragraph_html(text: str) -> str:
|
|||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _as_block_paragraphs(text: str) -> str:
|
||||
chunks = [chunk.strip() for chunk in re.split(r"\n{2,}", text.strip()) if chunk.strip()]
|
||||
if not chunks:
|
||||
return ""
|
||||
lines = []
|
||||
for chunk in chunks:
|
||||
compact = re.sub(r"\s*\n\s*", " ", chunk)
|
||||
lines.append(f"<!-- wp:paragraph --><p>{escape(compact)}</p><!-- /wp:paragraph -->")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _as_block_heading(level: int, text: str) -> str:
|
||||
safe_level = min(6, max(1, int(level)))
|
||||
return f'<!-- wp:heading {{"level":{safe_level}}} --><h{safe_level}>{escape(text)}</h{safe_level}><!-- /wp:heading -->'
|
||||
|
||||
|
||||
def _as_block_list(items: list[str]) -> str:
|
||||
if not items:
|
||||
return ""
|
||||
content = "".join(f"<li>{item}</li>" for item in items)
|
||||
return f"<!-- wp:list --><ul>{content}</ul><!-- /wp:list -->"
|
||||
|
||||
|
||||
def _sanitize_publish_text(text: str) -> str:
|
||||
raw = (text or "").strip()
|
||||
if not raw:
|
||||
|
|
@ -164,11 +188,13 @@ def _build_post_content(article: dict[str, Any]) -> tuple[str, str | None]:
|
|||
if not body_text:
|
||||
body_text = summary
|
||||
|
||||
# Keep existing HTML if already present, otherwise wrap plain text into paragraphs.
|
||||
# Keep existing HTML if already present, otherwise wrap plain text into block paragraphs.
|
||||
has_html = bool(re.search(r"<[a-zA-Z][^>]*>", body_text))
|
||||
body_html = body_text if has_html else _as_paragraph_html(body_text)
|
||||
body_html = body_text if has_html else _as_block_paragraphs(body_text)
|
||||
if not body_html:
|
||||
body_html = "<p>Kein Inhalt verfügbar.</p>"
|
||||
body_html = "<!-- wp:paragraph --><p>Kein Inhalt verfügbar.</p><!-- /wp:paragraph -->"
|
||||
elif has_html:
|
||||
body_html = f"<!-- wp:html -->\n{body_html}\n<!-- /wp:html -->"
|
||||
|
||||
author = (article.get("author") or "").strip()
|
||||
published_at = (article.get("published_at") or "").strip()
|
||||
|
|
@ -176,35 +202,35 @@ def _build_post_content(article: dict[str, Any]) -> tuple[str, str | None]:
|
|||
license_name = (article.get("source_license_name_snapshot") or "").strip()
|
||||
terms_url = (article.get("source_terms_url_snapshot") or "").strip()
|
||||
|
||||
lead_html = f"<p><em>{escape(summary)}</em></p>\n" if summary else ""
|
||||
lead_html = f"<!-- wp:paragraph --><p><em>{escape(summary)}</em></p><!-- /wp:paragraph -->\n" if summary else ""
|
||||
|
||||
facts: list[str] = []
|
||||
if author:
|
||||
facts.append(f"<li><strong>Autor:</strong> {escape(author)}</li>")
|
||||
facts.append(f"<strong>Autor:</strong> {escape(author)}")
|
||||
if published_at:
|
||||
facts.append(f"<li><strong>Veröffentlicht (Quelle):</strong> {escape(published_at)}</li>")
|
||||
facts.append(f"<strong>Veröffentlicht (Quelle):</strong> {escape(published_at)}")
|
||||
if source_name:
|
||||
facts.append(f"<li><strong>Quelle:</strong> {escape(source_name)}</li>")
|
||||
facts.append(f"<strong>Quelle:</strong> {escape(source_name)}")
|
||||
if license_name:
|
||||
facts.append(f"<li><strong>Lizenz:</strong> {escape(license_name)}</li>")
|
||||
facts.append(f"<strong>Lizenz:</strong> {escape(license_name)}")
|
||||
if terms_url:
|
||||
facts.append(f"<li><strong>Lizenzhinweise:</strong> <a href=\"{escape(terms_url)}\">{escape(terms_url)}</a></li>")
|
||||
facts.append(f"<strong>Lizenzhinweise:</strong> <a href=\"{escape(terms_url)}\">{escape(terms_url)}</a>")
|
||||
|
||||
facts_html = (
|
||||
"<h3>Artikeldetails</h3>\n<ul>\n" + "\n".join(facts) + "\n</ul>\n"
|
||||
if facts
|
||||
else ""
|
||||
)
|
||||
attribution_html = (
|
||||
"<hr />\n<section class=\"rss-news-attribution\">\n"
|
||||
"<h3>Quelle</h3>\n"
|
||||
f"<p>Originalartikel: <a href=\"{escape(source_url)}\">{escape(source_url)}</a></p>\n"
|
||||
)
|
||||
facts_html = ""
|
||||
if facts:
|
||||
facts_html = _as_block_heading(3, "Artikeldetails") + "\n" + _as_block_list(facts)
|
||||
|
||||
attribution_parts = [
|
||||
_as_block_heading(3, "Quelle"),
|
||||
f'<!-- wp:paragraph --><p>Originalartikel: <a href="{escape(source_url)}">{escape(source_url)}</a></p><!-- /wp:paragraph -->',
|
||||
]
|
||||
if canonical_url and canonical_url != source_url:
|
||||
attribution_html += f"<p>Canonical: <a href=\"{escape(canonical_url)}\">{escape(canonical_url)}</a></p>\n"
|
||||
attribution_html += "</section>"
|
||||
attribution_parts.append(
|
||||
f'<!-- wp:paragraph --><p>Canonical: <a href="{escape(canonical_url)}">{escape(canonical_url)}</a></p><!-- /wp:paragraph -->'
|
||||
)
|
||||
attribution_html = "\n".join(attribution_parts)
|
||||
|
||||
content = f"{lead_html}{body_html}\n\n{facts_html}{attribution_html}".strip()
|
||||
content = f"{lead_html}{body_html}\n\n{facts_html}\n{attribution_html}".strip()
|
||||
excerpt_source = summary or re.sub(r"\s+", " ", body_text).strip()
|
||||
excerpt = excerpt_source[:220] if excerpt_source else None
|
||||
return content, excerpt
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue