feat(rewrite): add batch rewrite run, AI tags for WP, and agentur contact detection

This commit is contained in:
Oliver 2026-02-21 14:39:47 +01:00
parent da269d08f1
commit b0f995d5c9
No known key found for this signature in database
10 changed files with 374 additions and 36 deletions

View file

@ -271,6 +271,71 @@ class TestAdminUi(unittest.TestCase):
self.assertIn("Neu", article.get("content_rewritten") or "")
self.assertIsNone(article.get("wp_post_id"))
@patch("backend.app.admin_ui.generate_article_tags")
@patch("backend.app.admin_ui.rewrite_article_text")
def test_batch_rewrite_run_processes_planned_articles(self, mock_rewrite_text, mock_tags) -> None:
mock_rewrite_text.return_value = "<h2>Neu</h2><p>Text</p>"
mock_tags.return_value = ["Rheingas", "Monheim"]
source_id = create_source(
SourceCreate(
name="Batch Source",
base_url="https://example.org",
terms_url="https://example.org/terms",
license_name="cc-by",
risk_level="green",
is_enabled=True,
notes=None,
last_reviewed_at=None,
)
)
feed_id = create_feed(
FeedCreate(
name="Batch Feed",
url="https://example.org/feed.xml",
source_id=source_id,
is_enabled=True,
)
)
article_id = upsert_article(
ArticleUpsert(
feed_id=feed_id,
source_article_id="batch-1",
source_hash="batch-hash-1",
title="Batch Titel",
source_url="https://example.org/batch",
canonical_url="https://example.org/batch",
published_at=None,
author="Autor",
summary="Summary",
content_raw="Raw",
content_rewritten=None,
image_urls_json=None,
press_contact=None,
source_name_snapshot="Batch Source",
source_terms_url_snapshot="https://example.org/terms",
source_license_name_snapshot="cc-by",
legal_checked=False,
legal_checked_at=None,
legal_note=None,
wp_post_id=None,
wp_post_url=None,
publish_attempts=0,
publish_last_error=None,
published_to_wp_at=None,
word_count=1,
status="rewrite",
meta_json="{}",
)
)
self.client.post("/admin/login", data={"username": "admin", "password": "secret"}, follow_redirects=True)
res = self.client.post("/admin/rewrite/run", data={"max_jobs": "10"}, follow_redirects=False)
self.assertEqual(res.status_code, 303)
article = get_article_by_id(article_id)
self.assertIsNotNone(article)
self.assertEqual(article.get("status"), "approved")
self.assertIn("generated_tags", article.get("meta_json", ""))
@patch("backend.app.admin_ui.urlopen")
def test_image_proxy_returns_image_data(self, mock_urlopen) -> None:
class _FakeHeaders:

View file

@ -26,6 +26,25 @@ SAMPLE_HTML = """
</html>
"""
SAMPLE_HTML_AGENTUR = """
<!doctype html>
<html lang="de">
<head>
<meta charset="utf-8" />
<meta property="og:title" content="Demo Meldung Agentur" />
</head>
<body>
<article>
<p>Inhalt der Meldung.</p>
<h3>Agentur</h3>
<p>Agenturname GmbH</p>
<p>presse@agentur.example</p>
<p>Original-Content von Beispiel</p>
</article>
</body>
</html>
"""
class _FakeHeaders:
@staticmethod
@ -64,6 +83,14 @@ class TestSourceExtraction(unittest.TestCase):
self.assertIn("Pressekontakt", extracted.press_contact or "")
self.assertIsNone(extracted.extraction_error)
@patch("backend.app.source_extraction.urlopen")
def test_extract_article_detects_agentur_block_as_press_contact(self, mock_urlopen) -> None:
mock_urlopen.return_value = _FakeResponse(SAMPLE_HTML_AGENTUR)
extracted = extract_article("https://www.presseportal.de/pm/155103/6210401")
self.assertIn("Agentur", extracted.press_contact or "")
self.assertIn("Agenturname", extracted.press_contact or "")
self.assertIn("presse@agentur.example", extracted.press_contact or "")
if __name__ == "__main__":
unittest.main()

View file

@ -80,6 +80,40 @@ class TestWordpressPublish(unittest.TestCase):
self.assertNotIn("Pressekontakt", content)
self.assertIn("eigentliche Text", content)
@patch("backend.app.wordpress._upload_featured_media")
@patch("backend.app.wordpress._wp_request")
def test_publish_resolves_and_sets_tags(self, mock_wp_request, mock_upload_media) -> None:
def _fake_wp_request(**kwargs):
endpoint = kwargs.get("endpoint", "")
method = kwargs.get("method", "")
if method == "GET" and endpoint.startswith("tags?search="):
if "Rheingas" in endpoint:
return [{"id": 11, "name": "Rheingas"}]
return []
if method == "POST" and endpoint == "tags":
name = (kwargs.get("payload") or {}).get("name")
if name == "Gasflasche":
return {"id": 12, "name": "Gasflasche"}
return {"id": 13, "name": str(name)}
if method == "POST" and endpoint == "posts":
return {"id": 900, "link": "https://example.org/?p=900"}
return {}
mock_wp_request.side_effect = _fake_wp_request
article = {
"title": "Tag Test",
"content_raw": "Inhalt",
"source_url": "https://example.com/source",
"canonical_url": "https://example.com/source",
"meta_json": '{"generated_tags":["Rheingas","Gasflasche"]}',
}
post_id, _ = publish_article_draft(article)
self.assertEqual(post_id, 900)
post_calls = [call for call in mock_wp_request.call_args_list if call.kwargs.get("endpoint") == "posts"]
self.assertEqual(len(post_calls), 1)
payload = post_calls[0].kwargs.get("payload", {})
self.assertEqual(payload.get("tags"), [11, 12])
if __name__ == "__main__":
unittest.main()