feat(rewrite): add batch rewrite run, AI tags for WP, and agentur contact detection
This commit is contained in:
parent
da269d08f1
commit
b0f995d5c9
10 changed files with 374 additions and 36 deletions
|
|
@ -271,6 +271,71 @@ class TestAdminUi(unittest.TestCase):
|
|||
self.assertIn("Neu", article.get("content_rewritten") or "")
|
||||
self.assertIsNone(article.get("wp_post_id"))
|
||||
|
||||
@patch("backend.app.admin_ui.generate_article_tags")
|
||||
@patch("backend.app.admin_ui.rewrite_article_text")
|
||||
def test_batch_rewrite_run_processes_planned_articles(self, mock_rewrite_text, mock_tags) -> None:
|
||||
mock_rewrite_text.return_value = "<h2>Neu</h2><p>Text</p>"
|
||||
mock_tags.return_value = ["Rheingas", "Monheim"]
|
||||
|
||||
source_id = create_source(
|
||||
SourceCreate(
|
||||
name="Batch Source",
|
||||
base_url="https://example.org",
|
||||
terms_url="https://example.org/terms",
|
||||
license_name="cc-by",
|
||||
risk_level="green",
|
||||
is_enabled=True,
|
||||
notes=None,
|
||||
last_reviewed_at=None,
|
||||
)
|
||||
)
|
||||
feed_id = create_feed(
|
||||
FeedCreate(
|
||||
name="Batch Feed",
|
||||
url="https://example.org/feed.xml",
|
||||
source_id=source_id,
|
||||
is_enabled=True,
|
||||
)
|
||||
)
|
||||
article_id = upsert_article(
|
||||
ArticleUpsert(
|
||||
feed_id=feed_id,
|
||||
source_article_id="batch-1",
|
||||
source_hash="batch-hash-1",
|
||||
title="Batch Titel",
|
||||
source_url="https://example.org/batch",
|
||||
canonical_url="https://example.org/batch",
|
||||
published_at=None,
|
||||
author="Autor",
|
||||
summary="Summary",
|
||||
content_raw="Raw",
|
||||
content_rewritten=None,
|
||||
image_urls_json=None,
|
||||
press_contact=None,
|
||||
source_name_snapshot="Batch Source",
|
||||
source_terms_url_snapshot="https://example.org/terms",
|
||||
source_license_name_snapshot="cc-by",
|
||||
legal_checked=False,
|
||||
legal_checked_at=None,
|
||||
legal_note=None,
|
||||
wp_post_id=None,
|
||||
wp_post_url=None,
|
||||
publish_attempts=0,
|
||||
publish_last_error=None,
|
||||
published_to_wp_at=None,
|
||||
word_count=1,
|
||||
status="rewrite",
|
||||
meta_json="{}",
|
||||
)
|
||||
)
|
||||
self.client.post("/admin/login", data={"username": "admin", "password": "secret"}, follow_redirects=True)
|
||||
res = self.client.post("/admin/rewrite/run", data={"max_jobs": "10"}, follow_redirects=False)
|
||||
self.assertEqual(res.status_code, 303)
|
||||
article = get_article_by_id(article_id)
|
||||
self.assertIsNotNone(article)
|
||||
self.assertEqual(article.get("status"), "approved")
|
||||
self.assertIn("generated_tags", article.get("meta_json", ""))
|
||||
|
||||
@patch("backend.app.admin_ui.urlopen")
|
||||
def test_image_proxy_returns_image_data(self, mock_urlopen) -> None:
|
||||
class _FakeHeaders:
|
||||
|
|
|
|||
|
|
@ -26,6 +26,25 @@ SAMPLE_HTML = """
|
|||
</html>
|
||||
"""
|
||||
|
||||
SAMPLE_HTML_AGENTUR = """
|
||||
<!doctype html>
|
||||
<html lang="de">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta property="og:title" content="Demo Meldung Agentur" />
|
||||
</head>
|
||||
<body>
|
||||
<article>
|
||||
<p>Inhalt der Meldung.</p>
|
||||
<h3>Agentur</h3>
|
||||
<p>Agenturname GmbH</p>
|
||||
<p>presse@agentur.example</p>
|
||||
<p>Original-Content von Beispiel</p>
|
||||
</article>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
|
||||
class _FakeHeaders:
|
||||
@staticmethod
|
||||
|
|
@ -64,6 +83,14 @@ class TestSourceExtraction(unittest.TestCase):
|
|||
self.assertIn("Pressekontakt", extracted.press_contact or "")
|
||||
self.assertIsNone(extracted.extraction_error)
|
||||
|
||||
@patch("backend.app.source_extraction.urlopen")
|
||||
def test_extract_article_detects_agentur_block_as_press_contact(self, mock_urlopen) -> None:
|
||||
mock_urlopen.return_value = _FakeResponse(SAMPLE_HTML_AGENTUR)
|
||||
extracted = extract_article("https://www.presseportal.de/pm/155103/6210401")
|
||||
self.assertIn("Agentur", extracted.press_contact or "")
|
||||
self.assertIn("Agenturname", extracted.press_contact or "")
|
||||
self.assertIn("presse@agentur.example", extracted.press_contact or "")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
|
|||
|
|
@ -80,6 +80,40 @@ class TestWordpressPublish(unittest.TestCase):
|
|||
self.assertNotIn("Pressekontakt", content)
|
||||
self.assertIn("eigentliche Text", content)
|
||||
|
||||
@patch("backend.app.wordpress._upload_featured_media")
|
||||
@patch("backend.app.wordpress._wp_request")
|
||||
def test_publish_resolves_and_sets_tags(self, mock_wp_request, mock_upload_media) -> None:
|
||||
def _fake_wp_request(**kwargs):
|
||||
endpoint = kwargs.get("endpoint", "")
|
||||
method = kwargs.get("method", "")
|
||||
if method == "GET" and endpoint.startswith("tags?search="):
|
||||
if "Rheingas" in endpoint:
|
||||
return [{"id": 11, "name": "Rheingas"}]
|
||||
return []
|
||||
if method == "POST" and endpoint == "tags":
|
||||
name = (kwargs.get("payload") or {}).get("name")
|
||||
if name == "Gasflasche":
|
||||
return {"id": 12, "name": "Gasflasche"}
|
||||
return {"id": 13, "name": str(name)}
|
||||
if method == "POST" and endpoint == "posts":
|
||||
return {"id": 900, "link": "https://example.org/?p=900"}
|
||||
return {}
|
||||
|
||||
mock_wp_request.side_effect = _fake_wp_request
|
||||
article = {
|
||||
"title": "Tag Test",
|
||||
"content_raw": "Inhalt",
|
||||
"source_url": "https://example.com/source",
|
||||
"canonical_url": "https://example.com/source",
|
||||
"meta_json": '{"generated_tags":["Rheingas","Gasflasche"]}',
|
||||
}
|
||||
post_id, _ = publish_article_draft(article)
|
||||
self.assertEqual(post_id, 900)
|
||||
post_calls = [call for call in mock_wp_request.call_args_list if call.kwargs.get("endpoint") == "posts"]
|
||||
self.assertEqual(len(post_calls), 1)
|
||||
payload = post_calls[0].kwargs.get("payload", {})
|
||||
self.assertEqual(payload.get("tags"), [11, 12])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue