diff --git a/backend/app/wordpress.py b/backend/app/wordpress.py
index 8da5fc5..150bcd1 100644
--- a/backend/app/wordpress.py
+++ b/backend/app/wordpress.py
@@ -229,6 +229,42 @@ def _as_block_paragraphs(text: str) -> str:
return "\n".join(lines)
+def _strip_html_tags(raw: str) -> str:
+ text = re.sub(r"<[^>]+>", " ", raw or "")
+ return re.sub(r"\s+", " ", text).strip()
+
+
+def _html_to_wp_blocks(html: str) -> str:
+ src = (html or "").strip()
+ if not src:
+ return ""
+ pattern = re.compile(
+ r"
]*>[\s\S]*?
|Kein Inhalt verfügbar.
" - elif has_html: - body_html = f"\n{body_html}\n" - - author = (article.get("author") or "").strip() - published_at = (article.get("published_at") or "").strip() - source_name = (article.get("source_name_snapshot") or "").strip() - license_name = (article.get("source_license_name_snapshot") or "").strip() - terms_url = (article.get("source_terms_url_snapshot") or "").strip() - - lead_html = f"{escape(summary)}
\n" if summary else "" - - facts: list[str] = [] - if author: - facts.append(f"Autor: {escape(author)}") - if published_at: - facts.append(f"Veröffentlicht (Quelle): {escape(published_at)}") - if source_name: - facts.append(f"Quelle: {escape(source_name)}") - if license_name: - facts.append(f"Lizenz: {escape(license_name)}") - if terms_url: - facts.append(f"Lizenzhinweise: {escape(terms_url)}") - - facts_html = "" - if facts: - facts_html = _as_block_heading(3, "Artikeldetails") + "\n" + _as_block_list(facts) - - attribution_parts = [ - _as_block_heading(3, "Quelle"), - f'Originalartikel: {escape(source_url)}
', - ] - if canonical_url and canonical_url != source_url: - attribution_parts.append( - f'Canonical: {escape(canonical_url)}
' - ) - attribution_html = "\n".join(attribution_parts) - - content = f"{lead_html}{body_html}\n\n{facts_html}\n{attribution_html}".strip() - excerpt_source = summary or re.sub(r"\s+", " ", body_text).strip() - excerpt = excerpt_source[:220] if excerpt_source else None - return content, excerpt + content = body_html.strip() + return content, None def publish_article_draft(article: dict[str, Any]) -> tuple[int, str | None]: diff --git a/backend/tests/test_wordpress.py b/backend/tests/test_wordpress.py index 4cafc55..20b0618 100644 --- a/backend/tests/test_wordpress.py +++ b/backend/tests/test_wordpress.py @@ -38,9 +38,9 @@ class TestWordpressPublish(unittest.TestCase): self.assertTrue(mock_upload_media.called) payload = mock_wp_request.call_args.kwargs["payload"] self.assertEqual(payload.get("featured_media"), 456) - self.assertIn("Inhalt
", payload.get("content", "")) + self.assertNotIn("excerpt", payload) @patch("backend.app.wordpress._upload_featured_media") @patch("backend.app.wordpress._wp_request") @@ -79,6 +79,7 @@ class TestWordpressPublish(unittest.TestCase): self.assertNotIn("Firma GmbH", content) self.assertNotIn("Pressekontakt", content) self.assertIn("eigentliche Text", content) + self.assertNotIn("Artikeldetails", content) @patch("backend.app.wordpress._upload_featured_media") @patch("backend.app.wordpress._wp_request") @@ -114,6 +115,25 @@ class TestWordpressPublish(unittest.TestCase): payload = post_calls[0].kwargs.get("payload", {}) self.assertEqual(payload.get("tags"), [11, 12]) + @patch("backend.app.wordpress._upload_featured_media") + @patch("backend.app.wordpress._wp_request") + def test_publish_converts_html_to_wp_blocks_without_html_block(self, mock_wp_request, mock_upload_media) -> None: + mock_wp_request.return_value = {"id": 111, "link": "https://example.org/?p=111"} + article = { + "title": "Block Test", + "content_rewritten": "Absatz 1