diff --git a/backend/app/wordpress.py b/backend/app/wordpress.py index 756a346..fbf4443 100644 --- a/backend/app/wordpress.py +++ b/backend/app/wordpress.py @@ -1,9 +1,11 @@ from __future__ import annotations import base64 +from html import escape import json import mimetypes from pathlib import Path +import re from typing import Any from urllib.parse import urlparse from urllib.request import Request, urlopen @@ -130,6 +132,75 @@ def _upload_featured_media( return media_id +def _as_paragraph_html(text: str) -> str: + chunks = [chunk.strip() for chunk in re.split(r"\n{2,}", text.strip()) if chunk.strip()] + if not chunks: + return "" + lines = [] + for chunk in chunks: + compact = re.sub(r"\s*\n\s*", " ", chunk) + lines.append(f"
{escape(compact)}
") + return "\n".join(lines) + + +def _build_post_content(article: dict[str, Any]) -> tuple[str, str | None]: + source_url = article.get("source_url") or "" + canonical_url = article.get("canonical_url") or source_url + summary = (article.get("summary") or "").strip() + body_text = (article.get("content_rewritten") or article.get("content_raw") or "").strip() + if not body_text: + body_text = summary + + # Keep existing HTML if already present, otherwise wrap plain text into paragraphs. + has_html = bool(re.search(r"<[a-zA-Z][^>]*>", body_text)) + body_html = body_text if has_html else _as_paragraph_html(body_text) + if not body_html: + body_html = "Kein Inhalt verfügbar.
" + + author = (article.get("author") or "").strip() + published_at = (article.get("published_at") or "").strip() + source_name = (article.get("source_name_snapshot") or "").strip() + license_name = (article.get("source_license_name_snapshot") or "").strip() + terms_url = (article.get("source_terms_url_snapshot") or "").strip() + press_contact = (article.get("press_contact") or "").strip() + + lead_html = f"{escape(summary)}
\n" if summary else "" + + facts: list[str] = [] + if author: + facts.append(f"{escape(press_contact)}
\n" if press_contact else "" + ) + attribution_html = ( + "Originalartikel: {escape(source_url)}
\n" + ) + if canonical_url and canonical_url != source_url: + attribution_html += f"Canonical: {escape(canonical_url)}
\n" + attribution_html += "Quelle: " - footer += f"{source_url}
" - if canonical_url and canonical_url != source_url: - footer += f"\nCanonical: {canonical_url}
" - content = f"{body}{footer}" + content, excerpt = _build_post_content(article) + source_url = article.get("source_url") or "" featured_media_id = None selected_image_url = _selected_image_url_from_meta(article.get("meta_json")) @@ -166,6 +228,8 @@ def publish_article_draft(article: dict[str, Any]) -> tuple[int, str | None]: "content": content, "status": settings.wordpress_default_status, } + if excerpt: + payload["excerpt"] = excerpt if featured_media_id: payload["featured_media"] = featured_media_id diff --git a/backend/tests/test_wordpress.py b/backend/tests/test_wordpress.py index f12c6e1..2c9094e 100644 --- a/backend/tests/test_wordpress.py +++ b/backend/tests/test_wordpress.py @@ -38,6 +38,9 @@ class TestWordpressPublish(unittest.TestCase): self.assertTrue(mock_upload_media.called) payload = mock_wp_request.call_args.kwargs["payload"] self.assertEqual(payload.get("featured_media"), 456) + self.assertIn("Inhalt
", payload.get("content", "")) if __name__ == "__main__":