Ampel system removed – all enabled feeds are now processed regardless of risk_level. Updated test to verify feeds with any risk_level are processed instead of blocked. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
245 lines
9.4 KiB
Python
245 lines
9.4 KiB
Python
import os
|
||
import tempfile
|
||
import unittest
|
||
from pathlib import Path
|
||
from unittest.mock import patch
|
||
|
||
from backend.app import config as config_module
|
||
from backend.app.db import init_db
|
||
from backend.app.ingestion import run_ingestion
|
||
from backend.app.repositories import (
|
||
ArticleUpsert,
|
||
FeedCreate,
|
||
SourceCreate,
|
||
create_feed,
|
||
create_source,
|
||
get_article_by_id,
|
||
list_articles,
|
||
upsert_article,
|
||
)
|
||
from backend.app.source_extraction import ExtractedArticle
|
||
|
||
|
||
class TestIngestion(unittest.TestCase):
|
||
def setUp(self) -> None:
|
||
self.tmp_dir = tempfile.TemporaryDirectory()
|
||
os.environ["APP_DB_PATH"] = str(Path(self.tmp_dir.name) / "ingestion.db")
|
||
config_module.get_settings.cache_clear()
|
||
init_db()
|
||
|
||
source_id = create_source(
|
||
SourceCreate(
|
||
name="Test Source",
|
||
base_url="https://example.org",
|
||
terms_url="https://example.org/terms",
|
||
license_name="cc-by",
|
||
risk_level="green",
|
||
is_enabled=True,
|
||
notes=None,
|
||
last_reviewed_at="2026-02-18T00:00:00Z",
|
||
)
|
||
)
|
||
self.feed_id = create_feed(
|
||
FeedCreate(
|
||
name="Test Feed",
|
||
url="https://example.org/feed.xml",
|
||
source_id=source_id,
|
||
is_enabled=True,
|
||
)
|
||
)
|
||
|
||
def tearDown(self) -> None:
|
||
config_module.get_settings.cache_clear()
|
||
os.environ.pop("APP_DB_PATH", None)
|
||
self.tmp_dir.cleanup()
|
||
|
||
@patch("backend.app.ingestion.extract_article")
|
||
@patch("backend.app.ingestion.feedparser.parse")
|
||
def test_ingestion_deduplicates_by_feed_and_guid(self, mock_parse, mock_extract_article) -> None:
|
||
mock_extract_article.return_value = ExtractedArticle(
|
||
title="Artikel 1 original",
|
||
author="Autorin A",
|
||
canonical_url="https://example.org/article/1",
|
||
summary="Original Summary",
|
||
content_text="Original Volltext",
|
||
images=["https://example.org/a.jpg"],
|
||
press_contact="Pressekontakt: Team A",
|
||
extraction_error=None,
|
||
)
|
||
mock_parse.return_value = {
|
||
"etag": "etag-1",
|
||
"modified": "Tue, 18 Feb 2026 10:00:00 GMT",
|
||
"entries": [
|
||
{
|
||
"id": "item-1",
|
||
"title": "Artikel 1",
|
||
"link": "https://example.org/article/1",
|
||
"summary": "A",
|
||
},
|
||
{
|
||
"id": "item-1",
|
||
"title": "Artikel 1 aktualisiert",
|
||
"link": "https://example.org/article/1-neu",
|
||
"summary": "B",
|
||
},
|
||
],
|
||
}
|
||
|
||
stats = run_ingestion(feed_id=self.feed_id)
|
||
self.assertEqual(stats.status, "success")
|
||
self.assertEqual(stats.entries_seen, 2)
|
||
self.assertEqual(len(list_articles()), 1)
|
||
article = list_articles()[0]
|
||
self.assertEqual(article["title"], "Artikel 1 original")
|
||
self.assertEqual(article["author"], "Autorin A")
|
||
self.assertIn("Original Volltext", article["content_raw"] or "")
|
||
self.assertIn("Pressekontakt", article["meta_json"] or "")
|
||
self.assertIsNotNone(article["image_urls_json"])
|
||
|
||
@patch("backend.app.ingestion.extract_article")
|
||
@patch("backend.app.ingestion.feedparser.parse")
|
||
def test_ingestion_processes_any_enabled_source(self, mock_parse, mock_extract_article) -> None:
|
||
# Ampel/risk-level system removed – all enabled feeds are processed regardless of risk_level
|
||
source_id = create_source(
|
||
SourceCreate(
|
||
name="Any Risk Source",
|
||
base_url="https://example.net",
|
||
terms_url="https://example.net/terms",
|
||
license_name="custom",
|
||
risk_level="yellow",
|
||
is_enabled=True,
|
||
notes=None,
|
||
last_reviewed_at="2026-02-18T00:00:00Z",
|
||
)
|
||
)
|
||
feed_id = create_feed(
|
||
FeedCreate(
|
||
name="Any Risk Feed",
|
||
url="https://example.net/feed.xml",
|
||
source_id=source_id,
|
||
is_enabled=True,
|
||
)
|
||
)
|
||
|
||
mock_parse.return_value = type("FP", (), {"entries": [], "etag": None, "modified": None})()
|
||
mock_extract_article.return_value = type("E", (), {
|
||
"title": None, "author": None, "summary": None, "content_text": None,
|
||
"canonical_url": None, "images": [], "press_contact": None,
|
||
})()
|
||
|
||
stats = run_ingestion(feed_id=feed_id)
|
||
self.assertEqual(stats.status, "success")
|
||
# Feed was processed (feedparser was called), even with yellow risk_level
|
||
mock_parse.assert_called_once()
|
||
|
||
@patch("backend.app.ingestion.extract_article")
|
||
@patch("backend.app.ingestion.feedparser.parse")
|
||
def test_ingestion_preserves_existing_work_and_skips_closed(self, mock_parse, mock_extract_article) -> None:
|
||
existing_closed_id = upsert_article(
|
||
ArticleUpsert(
|
||
feed_id=self.feed_id,
|
||
source_article_id="closed-1",
|
||
source_hash="closed-hash-1",
|
||
title="Alt Closed",
|
||
source_url="https://example.org/closed-article",
|
||
canonical_url="https://example.org/closed-article",
|
||
published_at=None,
|
||
author="Autor",
|
||
summary="Alt",
|
||
content_raw="Alt Raw",
|
||
content_rewritten="<p>Alt Rewrite Closed</p>",
|
||
image_urls_json=None,
|
||
press_contact="Kontakt Alt",
|
||
source_name_snapshot="Test Source",
|
||
source_terms_url_snapshot="https://example.org/terms",
|
||
source_license_name_snapshot="cc-by",
|
||
legal_checked=False,
|
||
legal_checked_at=None,
|
||
legal_note=None,
|
||
wp_post_id=42,
|
||
wp_post_url="https://wp.local/?p=42",
|
||
publish_attempts=2,
|
||
publish_last_error=None,
|
||
published_to_wp_at="2026-02-21T12:00:00Z",
|
||
word_count=3,
|
||
status="error", # UI: close
|
||
meta_json='{"generated_tags":["AltTag"]}',
|
||
)
|
||
)
|
||
existing_published_id = upsert_article(
|
||
ArticleUpsert(
|
||
feed_id=self.feed_id,
|
||
source_article_id="published-1",
|
||
source_hash="published-hash-1",
|
||
title="Alt Published",
|
||
source_url="https://example.org/published-article",
|
||
canonical_url="https://example.org/published-article",
|
||
published_at=None,
|
||
author="Autor",
|
||
summary="Alt",
|
||
content_raw="Alt Raw",
|
||
content_rewritten="<p>Alt Rewrite Published</p>",
|
||
image_urls_json=None,
|
||
press_contact="Kontakt Alt",
|
||
source_name_snapshot="Test Source",
|
||
source_terms_url_snapshot="https://example.org/terms",
|
||
source_license_name_snapshot="cc-by",
|
||
legal_checked=False,
|
||
legal_checked_at=None,
|
||
legal_note=None,
|
||
wp_post_id=77,
|
||
wp_post_url="https://wp.local/?p=77",
|
||
publish_attempts=3,
|
||
publish_last_error=None,
|
||
published_to_wp_at="2026-02-21T12:10:00Z",
|
||
word_count=3,
|
||
status="published",
|
||
meta_json='{"generated_tags":["Rheingas"],"image_review":{"selected_url":"https://img.local/1.jpg"}}',
|
||
)
|
||
)
|
||
|
||
mock_extract_article.return_value = ExtractedArticle(
|
||
title="Neu Titel",
|
||
author="Neu Autor",
|
||
canonical_url=None,
|
||
summary="Neu Summary",
|
||
content_text="Neu Volltext",
|
||
images=["https://example.org/a.jpg"],
|
||
press_contact=None,
|
||
extraction_error=None,
|
||
)
|
||
mock_parse.return_value = {
|
||
"etag": "etag-2",
|
||
"modified": "Tue, 18 Feb 2026 11:00:00 GMT",
|
||
"entries": [
|
||
{
|
||
"id": "closed-1",
|
||
"title": "Closed Entry",
|
||
"link": "https://example.org/closed-article",
|
||
"summary": "X",
|
||
},
|
||
{
|
||
"id": "published-1",
|
||
"title": "Published Entry",
|
||
"link": "https://example.org/published-article",
|
||
"summary": "Y",
|
||
},
|
||
],
|
||
}
|
||
|
||
stats = run_ingestion(feed_id=self.feed_id)
|
||
self.assertEqual(stats.status, "success")
|
||
closed_row = get_article_by_id(existing_closed_id) or {}
|
||
self.assertEqual(closed_row["status"], "error")
|
||
self.assertIn("Alt Rewrite Closed", closed_row.get("content_rewritten") or "")
|
||
self.assertEqual(closed_row.get("wp_post_id"), 42)
|
||
|
||
published_row = get_article_by_id(existing_published_id) or {}
|
||
self.assertEqual(published_row["status"], "published")
|
||
self.assertIn("Alt Rewrite Published", published_row.get("content_rewritten") or "")
|
||
self.assertEqual(published_row.get("wp_post_id"), 77)
|
||
self.assertIn("generated_tags", published_row.get("meta_json") or "")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
unittest.main()
|