feat: rebuild rss-news backend, admin ui, and legal extraction pipeline
This commit is contained in:
parent
d65c55d315
commit
2c331d683b
43 changed files with 3463 additions and 73 deletions
122
backend/tests/test_ingestion.py
Normal file
122
backend/tests/test_ingestion.py
Normal file
|
|
@ -0,0 +1,122 @@
|
|||
import os
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
from backend.app import config as config_module
|
||||
from backend.app.db import init_db
|
||||
from backend.app.ingestion import run_ingestion
|
||||
from backend.app.repositories import FeedCreate, SourceCreate, create_feed, create_source, list_articles
|
||||
from backend.app.source_extraction import ExtractedArticle
|
||||
|
||||
|
||||
class TestIngestion(unittest.TestCase):
|
||||
def setUp(self) -> None:
|
||||
self.tmp_dir = tempfile.TemporaryDirectory()
|
||||
os.environ["APP_DB_PATH"] = str(Path(self.tmp_dir.name) / "ingestion.db")
|
||||
config_module.get_settings.cache_clear()
|
||||
init_db()
|
||||
|
||||
source_id = create_source(
|
||||
SourceCreate(
|
||||
name="Test Source",
|
||||
base_url="https://example.org",
|
||||
terms_url="https://example.org/terms",
|
||||
license_name="cc-by",
|
||||
risk_level="green",
|
||||
is_enabled=True,
|
||||
notes=None,
|
||||
last_reviewed_at="2026-02-18T00:00:00Z",
|
||||
)
|
||||
)
|
||||
self.feed_id = create_feed(
|
||||
FeedCreate(
|
||||
name="Test Feed",
|
||||
url="https://example.org/feed.xml",
|
||||
source_id=source_id,
|
||||
is_enabled=True,
|
||||
)
|
||||
)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
config_module.get_settings.cache_clear()
|
||||
os.environ.pop("APP_DB_PATH", None)
|
||||
self.tmp_dir.cleanup()
|
||||
|
||||
@patch("backend.app.ingestion.extract_article")
|
||||
@patch("backend.app.ingestion.feedparser.parse")
|
||||
def test_ingestion_deduplicates_by_feed_and_guid(self, mock_parse, mock_extract_article) -> None:
|
||||
mock_extract_article.return_value = ExtractedArticle(
|
||||
title="Artikel 1 original",
|
||||
author="Autorin A",
|
||||
canonical_url="https://example.org/article/1",
|
||||
summary="Original Summary",
|
||||
content_text="Original Volltext",
|
||||
images=["https://example.org/a.jpg"],
|
||||
press_contact="Pressekontakt: Team A",
|
||||
extraction_error=None,
|
||||
)
|
||||
mock_parse.return_value = {
|
||||
"etag": "etag-1",
|
||||
"modified": "Tue, 18 Feb 2026 10:00:00 GMT",
|
||||
"entries": [
|
||||
{
|
||||
"id": "item-1",
|
||||
"title": "Artikel 1",
|
||||
"link": "https://example.org/article/1",
|
||||
"summary": "A",
|
||||
},
|
||||
{
|
||||
"id": "item-1",
|
||||
"title": "Artikel 1 aktualisiert",
|
||||
"link": "https://example.org/article/1-neu",
|
||||
"summary": "B",
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
stats = run_ingestion(feed_id=self.feed_id)
|
||||
self.assertEqual(stats.status, "success")
|
||||
self.assertEqual(stats.entries_seen, 2)
|
||||
self.assertEqual(len(list_articles()), 1)
|
||||
article = list_articles()[0]
|
||||
self.assertEqual(article["title"], "Artikel 1 original")
|
||||
self.assertEqual(article["author"], "Autorin A")
|
||||
self.assertIn("Original Volltext", article["content_raw"] or "")
|
||||
self.assertIn("Pressekontakt", article["meta_json"] or "")
|
||||
|
||||
@patch("backend.app.ingestion.extract_article")
|
||||
@patch("backend.app.ingestion.feedparser.parse")
|
||||
def test_ingestion_blocks_non_green_source(self, mock_parse, mock_extract_article) -> None:
|
||||
# Re-create source/feed with yellow risk to verify enforcement
|
||||
source_id = create_source(
|
||||
SourceCreate(
|
||||
name="Blocked Source",
|
||||
base_url="https://example.net",
|
||||
terms_url="https://example.net/terms",
|
||||
license_name="custom",
|
||||
risk_level="yellow",
|
||||
is_enabled=True,
|
||||
notes=None,
|
||||
last_reviewed_at="2026-02-18T00:00:00Z",
|
||||
)
|
||||
)
|
||||
blocked_feed_id = create_feed(
|
||||
FeedCreate(
|
||||
name="Blocked Feed",
|
||||
url="https://example.net/feed.xml",
|
||||
source_id=source_id,
|
||||
is_enabled=True,
|
||||
)
|
||||
)
|
||||
|
||||
stats = run_ingestion(feed_id=blocked_feed_id)
|
||||
self.assertEqual(stats.status, "success")
|
||||
self.assertEqual(stats.articles_upserted, 0)
|
||||
mock_parse.assert_not_called()
|
||||
mock_extract_article.assert_not_called()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue