feat: rebuild rss-news backend, admin ui, and legal extraction pipeline
This commit is contained in:
parent
d65c55d315
commit
2c331d683b
43 changed files with 3463 additions and 73 deletions
1
backend/tests/__init__.py
Normal file
1
backend/tests/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
"""Tests package."""
|
||||
65
backend/tests/test_admin_ui.py
Normal file
65
backend/tests/test_admin_ui.py
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
import os
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from backend.app import config as config_module
|
||||
from backend.app.db import init_db
|
||||
from backend.app.main import app
|
||||
|
||||
|
||||
class TestAdminUi(unittest.TestCase):
|
||||
def setUp(self) -> None:
|
||||
self.tmp_dir = tempfile.TemporaryDirectory()
|
||||
os.environ["APP_DB_PATH"] = str(Path(self.tmp_dir.name) / "admin_ui.db")
|
||||
os.environ["APP_ADMIN_USERNAME"] = "admin"
|
||||
os.environ["APP_ADMIN_PASSWORD"] = "secret"
|
||||
config_module.get_settings.cache_clear()
|
||||
init_db()
|
||||
self.client = TestClient(app)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
config_module.get_settings.cache_clear()
|
||||
os.environ.pop("APP_DB_PATH", None)
|
||||
os.environ.pop("APP_ADMIN_USERNAME", None)
|
||||
os.environ.pop("APP_ADMIN_PASSWORD", None)
|
||||
self.tmp_dir.cleanup()
|
||||
|
||||
def test_admin_login_and_dashboard(self) -> None:
|
||||
login_page = self.client.get("/admin/login")
|
||||
self.assertEqual(login_page.status_code, 200)
|
||||
self.assertIn("rss-news Admin", login_page.text)
|
||||
|
||||
login = self.client.post(
|
||||
"/admin/login",
|
||||
data={"username": "admin", "password": "secret"},
|
||||
follow_redirects=True,
|
||||
)
|
||||
self.assertEqual(login.status_code, 200)
|
||||
self.assertIn("Admin Dashboard", login.text)
|
||||
|
||||
def test_dashboard_redirects_if_not_logged_in(self) -> None:
|
||||
res = self.client.get("/admin/dashboard", follow_redirects=False)
|
||||
self.assertEqual(res.status_code, 303)
|
||||
self.assertEqual(res.headers.get("location"), "/admin/login")
|
||||
|
||||
def test_create_feed_with_empty_source_id_does_not_error(self) -> None:
|
||||
self.client.post(
|
||||
"/admin/login",
|
||||
data={"username": "admin", "password": "secret"},
|
||||
follow_redirects=True,
|
||||
)
|
||||
# empty source_id used to cause validation issues in form parsing
|
||||
res = self.client.post(
|
||||
"/admin/feeds/create",
|
||||
data={"name": "Feed X", "url": "https://example.org/feed.xml", "source_id": ""},
|
||||
follow_redirects=False,
|
||||
)
|
||||
self.assertEqual(res.status_code, 303)
|
||||
self.assertTrue(res.headers.get("location", "").startswith("/admin/dashboard"))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
77
backend/tests/test_api_auth.py
Normal file
77
backend/tests/test_api_auth.py
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
import os
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from backend.app import config as config_module
|
||||
from backend.app.db import init_db
|
||||
from backend.app.main import app
|
||||
|
||||
|
||||
class TestApiAuth(unittest.TestCase):
|
||||
def setUp(self) -> None:
|
||||
self.tmp_dir = tempfile.TemporaryDirectory()
|
||||
os.environ["APP_DB_PATH"] = str(Path(self.tmp_dir.name) / "api.db")
|
||||
os.environ["APP_ADMIN_USERNAME"] = "admin"
|
||||
os.environ["APP_ADMIN_PASSWORD"] = "secret"
|
||||
config_module.get_settings.cache_clear()
|
||||
init_db()
|
||||
self.client = TestClient(app)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
config_module.get_settings.cache_clear()
|
||||
os.environ.pop("APP_DB_PATH", None)
|
||||
os.environ.pop("APP_ADMIN_USERNAME", None)
|
||||
os.environ.pop("APP_ADMIN_PASSWORD", None)
|
||||
self.tmp_dir.cleanup()
|
||||
|
||||
def test_login_and_protected_endpoint(self) -> None:
|
||||
r = self.client.post("/auth/login", json={"username": "admin", "password": "secret"})
|
||||
self.assertEqual(r.status_code, 200)
|
||||
|
||||
p = self.client.get("/api/protected")
|
||||
self.assertEqual(p.status_code, 200)
|
||||
self.assertTrue(p.json().get("ok"))
|
||||
|
||||
def test_protected_requires_auth(self) -> None:
|
||||
r = self.client.get("/api/protected")
|
||||
self.assertEqual(r.status_code, 401)
|
||||
|
||||
def test_run_detail_endpoint(self) -> None:
|
||||
login = self.client.post("/auth/login", json={"username": "admin", "password": "secret"})
|
||||
self.assertEqual(login.status_code, 200)
|
||||
|
||||
created = self.client.post("/api/runs", json={"run_type": "ingestion", "status": "running"})
|
||||
self.assertEqual(created.status_code, 200)
|
||||
run_id = created.json()["id"]
|
||||
|
||||
detail = self.client.get(f"/api/runs/{run_id}")
|
||||
self.assertEqual(detail.status_code, 200)
|
||||
self.assertEqual(detail.json()["item"]["id"], run_id)
|
||||
|
||||
def test_source_policy_check_endpoint(self) -> None:
|
||||
login = self.client.post("/auth/login", json={"username": "admin", "password": "secret"})
|
||||
self.assertEqual(login.status_code, 200)
|
||||
|
||||
created = self.client.post(
|
||||
"/api/sources",
|
||||
json={
|
||||
"name": "Policy Source",
|
||||
"risk_level": "yellow",
|
||||
"is_enabled": True,
|
||||
},
|
||||
)
|
||||
self.assertEqual(created.status_code, 200)
|
||||
source_id = created.json()["id"]
|
||||
|
||||
check = self.client.get(f"/api/sources/{source_id}/policy-check")
|
||||
self.assertEqual(check.status_code, 200)
|
||||
body = check.json()
|
||||
self.assertFalse(body["allowed"])
|
||||
self.assertGreaterEqual(len(body["issues"]), 1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
95
backend/tests/test_article_workflow.py
Normal file
95
backend/tests/test_article_workflow.py
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
import os
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from backend.app import config as config_module
|
||||
from backend.app.db import init_db
|
||||
from backend.app.main import app
|
||||
|
||||
|
||||
class TestArticleWorkflow(unittest.TestCase):
|
||||
def setUp(self) -> None:
|
||||
self.tmp_dir = tempfile.TemporaryDirectory()
|
||||
os.environ["APP_DB_PATH"] = str(Path(self.tmp_dir.name) / "workflow.db")
|
||||
os.environ["APP_ADMIN_USERNAME"] = "admin"
|
||||
os.environ["APP_ADMIN_PASSWORD"] = "secret"
|
||||
config_module.get_settings.cache_clear()
|
||||
init_db()
|
||||
self.client = TestClient(app)
|
||||
self.client.post("/auth/login", json={"username": "admin", "password": "secret"})
|
||||
|
||||
def tearDown(self) -> None:
|
||||
config_module.get_settings.cache_clear()
|
||||
os.environ.pop("APP_DB_PATH", None)
|
||||
os.environ.pop("APP_ADMIN_USERNAME", None)
|
||||
os.environ.pop("APP_ADMIN_PASSWORD", None)
|
||||
self.tmp_dir.cleanup()
|
||||
|
||||
def _create_article(self) -> int:
|
||||
source = self.client.post(
|
||||
"/api/sources",
|
||||
json={
|
||||
"name": "Workflow Source",
|
||||
"base_url": "https://example.org",
|
||||
"terms_url": "https://example.org/terms",
|
||||
"license_name": "cc-by",
|
||||
"risk_level": "green",
|
||||
"is_enabled": True,
|
||||
"last_reviewed_at": "2026-02-18T00:00:00Z",
|
||||
},
|
||||
)
|
||||
source_id = source.json()["id"]
|
||||
|
||||
feed = self.client.post(
|
||||
"/api/feeds",
|
||||
json={"name": "Workflow Feed", "url": "https://example.org/feed.xml", "source_id": source_id, "is_enabled": True},
|
||||
)
|
||||
feed_id = feed.json()["id"]
|
||||
|
||||
article = self.client.post(
|
||||
"/api/articles/upsert",
|
||||
json={
|
||||
"feed_id": feed_id,
|
||||
"source_article_id": "wf-1",
|
||||
"source_url": "https://example.org/a1",
|
||||
"title": "Workflow Artikel",
|
||||
"summary": "s",
|
||||
"content_raw": "c",
|
||||
"status": "new",
|
||||
},
|
||||
)
|
||||
return article.json()["id"]
|
||||
|
||||
def test_valid_transition_chain(self) -> None:
|
||||
article_id = self._create_article()
|
||||
|
||||
t1 = self.client.post(f"/api/articles/{article_id}/transition", json={"target_status": "review"})
|
||||
self.assertEqual(t1.status_code, 200)
|
||||
|
||||
r1 = self.client.post(f"/api/articles/{article_id}/review", json={"decision": "approve", "note": "ok"})
|
||||
self.assertEqual(r1.status_code, 200)
|
||||
self.assertEqual(r1.json()["to_status"], "approved")
|
||||
|
||||
t2 = self.client.post(f"/api/articles/{article_id}/transition", json={"target_status": "published"})
|
||||
self.assertEqual(t2.status_code, 200)
|
||||
|
||||
final = self.client.get(f"/api/articles/{article_id}")
|
||||
self.assertEqual(final.status_code, 200)
|
||||
self.assertEqual(final.json()["item"]["status"], "published")
|
||||
|
||||
def test_invalid_transition_rejected(self) -> None:
|
||||
article_id = self._create_article()
|
||||
bad = self.client.post(f"/api/articles/{article_id}/transition", json={"target_status": "published"})
|
||||
self.assertEqual(bad.status_code, 400)
|
||||
|
||||
def test_review_only_allowed_in_review_status(self) -> None:
|
||||
article_id = self._create_article()
|
||||
bad = self.client.post(f"/api/articles/{article_id}/review", json={"decision": "approve"})
|
||||
self.assertEqual(bad.status_code, 400)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
119
backend/tests/test_db_repositories.py
Normal file
119
backend/tests/test_db_repositories.py
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
import os
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
from backend.app import config as config_module
|
||||
from backend.app.db import init_db
|
||||
from backend.app.repositories import (
|
||||
ArticleUpsert,
|
||||
FeedCreate,
|
||||
RunCreate,
|
||||
SourceCreate,
|
||||
create_feed,
|
||||
create_run,
|
||||
create_source,
|
||||
finish_run,
|
||||
list_articles,
|
||||
list_feeds,
|
||||
list_runs,
|
||||
list_sources,
|
||||
upsert_article,
|
||||
)
|
||||
|
||||
|
||||
class TestSQLiteRepositories(unittest.TestCase):
|
||||
def setUp(self) -> None:
|
||||
self.tmp_dir = tempfile.TemporaryDirectory()
|
||||
self.db_path = str(Path(self.tmp_dir.name) / "test.db")
|
||||
os.environ["APP_DB_PATH"] = self.db_path
|
||||
config_module.get_settings.cache_clear()
|
||||
init_db()
|
||||
|
||||
def tearDown(self) -> None:
|
||||
config_module.get_settings.cache_clear()
|
||||
os.environ.pop("APP_DB_PATH", None)
|
||||
self.tmp_dir.cleanup()
|
||||
|
||||
def test_end_to_end_basic_crud(self) -> None:
|
||||
source_id = create_source(
|
||||
SourceCreate(
|
||||
name="GovData",
|
||||
base_url="https://data.gov.de",
|
||||
terms_url="https://www.govdata.de/dl-de/by-2-0",
|
||||
license_name="dl-de/by-2-0",
|
||||
risk_level="green",
|
||||
is_enabled=True,
|
||||
notes="test source",
|
||||
last_reviewed_at="2026-02-18T00:00:00Z",
|
||||
)
|
||||
)
|
||||
self.assertGreater(source_id, 0)
|
||||
|
||||
feed_id = create_feed(
|
||||
FeedCreate(
|
||||
name="GovData RSS",
|
||||
url="https://example.org/feed.xml",
|
||||
source_id=source_id,
|
||||
is_enabled=True,
|
||||
)
|
||||
)
|
||||
self.assertGreater(feed_id, 0)
|
||||
|
||||
run_id = create_run(RunCreate(run_type="ingest", status="running", details="start"))
|
||||
self.assertGreater(run_id, 0)
|
||||
finish_run(run_id=run_id, status="success", details="ok")
|
||||
|
||||
article_id = upsert_article(
|
||||
ArticleUpsert(
|
||||
feed_id=feed_id,
|
||||
source_article_id="abc-1",
|
||||
source_hash="hash-abc-1",
|
||||
title="Beispielartikel",
|
||||
source_url="https://example.org/articles/1",
|
||||
canonical_url="https://example.org/articles/1",
|
||||
published_at="2026-02-18T00:00:00Z",
|
||||
author="Max Mustermann",
|
||||
summary="Kurzfassung",
|
||||
content_raw="Originaltext",
|
||||
content_rewritten="Umschreibung",
|
||||
word_count=120,
|
||||
status="review",
|
||||
meta_json='{"lang":"de"}',
|
||||
)
|
||||
)
|
||||
self.assertGreater(article_id, 0)
|
||||
|
||||
# Upsert with same source_url updates same row
|
||||
article_id_2 = upsert_article(
|
||||
ArticleUpsert(
|
||||
feed_id=feed_id,
|
||||
source_article_id="abc-1",
|
||||
source_hash="hash-abc-1",
|
||||
title="Beispielartikel aktualisiert",
|
||||
source_url="https://example.org/articles/1",
|
||||
canonical_url="https://example.org/articles/1",
|
||||
published_at="2026-02-18T00:00:00Z",
|
||||
author="Max Mustermann",
|
||||
summary="Kurzfassung 2",
|
||||
content_raw="Originaltext 2",
|
||||
content_rewritten="Umschreibung 2",
|
||||
word_count=140,
|
||||
status="approved",
|
||||
meta_json='{"lang":"de","v":2}',
|
||||
)
|
||||
)
|
||||
self.assertEqual(article_id, article_id_2)
|
||||
|
||||
self.assertEqual(len(list_sources()), 1)
|
||||
self.assertEqual(len(list_feeds()), 1)
|
||||
self.assertEqual(len(list_runs()), 1)
|
||||
|
||||
articles = list_articles()
|
||||
self.assertEqual(len(articles), 1)
|
||||
self.assertEqual(articles[0]["title"], "Beispielartikel aktualisiert")
|
||||
self.assertEqual(articles[0]["status"], "approved")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
122
backend/tests/test_ingestion.py
Normal file
122
backend/tests/test_ingestion.py
Normal file
|
|
@ -0,0 +1,122 @@
|
|||
import os
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
from backend.app import config as config_module
|
||||
from backend.app.db import init_db
|
||||
from backend.app.ingestion import run_ingestion
|
||||
from backend.app.repositories import FeedCreate, SourceCreate, create_feed, create_source, list_articles
|
||||
from backend.app.source_extraction import ExtractedArticle
|
||||
|
||||
|
||||
class TestIngestion(unittest.TestCase):
|
||||
def setUp(self) -> None:
|
||||
self.tmp_dir = tempfile.TemporaryDirectory()
|
||||
os.environ["APP_DB_PATH"] = str(Path(self.tmp_dir.name) / "ingestion.db")
|
||||
config_module.get_settings.cache_clear()
|
||||
init_db()
|
||||
|
||||
source_id = create_source(
|
||||
SourceCreate(
|
||||
name="Test Source",
|
||||
base_url="https://example.org",
|
||||
terms_url="https://example.org/terms",
|
||||
license_name="cc-by",
|
||||
risk_level="green",
|
||||
is_enabled=True,
|
||||
notes=None,
|
||||
last_reviewed_at="2026-02-18T00:00:00Z",
|
||||
)
|
||||
)
|
||||
self.feed_id = create_feed(
|
||||
FeedCreate(
|
||||
name="Test Feed",
|
||||
url="https://example.org/feed.xml",
|
||||
source_id=source_id,
|
||||
is_enabled=True,
|
||||
)
|
||||
)
|
||||
|
||||
def tearDown(self) -> None:
|
||||
config_module.get_settings.cache_clear()
|
||||
os.environ.pop("APP_DB_PATH", None)
|
||||
self.tmp_dir.cleanup()
|
||||
|
||||
@patch("backend.app.ingestion.extract_article")
|
||||
@patch("backend.app.ingestion.feedparser.parse")
|
||||
def test_ingestion_deduplicates_by_feed_and_guid(self, mock_parse, mock_extract_article) -> None:
|
||||
mock_extract_article.return_value = ExtractedArticle(
|
||||
title="Artikel 1 original",
|
||||
author="Autorin A",
|
||||
canonical_url="https://example.org/article/1",
|
||||
summary="Original Summary",
|
||||
content_text="Original Volltext",
|
||||
images=["https://example.org/a.jpg"],
|
||||
press_contact="Pressekontakt: Team A",
|
||||
extraction_error=None,
|
||||
)
|
||||
mock_parse.return_value = {
|
||||
"etag": "etag-1",
|
||||
"modified": "Tue, 18 Feb 2026 10:00:00 GMT",
|
||||
"entries": [
|
||||
{
|
||||
"id": "item-1",
|
||||
"title": "Artikel 1",
|
||||
"link": "https://example.org/article/1",
|
||||
"summary": "A",
|
||||
},
|
||||
{
|
||||
"id": "item-1",
|
||||
"title": "Artikel 1 aktualisiert",
|
||||
"link": "https://example.org/article/1-neu",
|
||||
"summary": "B",
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
stats = run_ingestion(feed_id=self.feed_id)
|
||||
self.assertEqual(stats.status, "success")
|
||||
self.assertEqual(stats.entries_seen, 2)
|
||||
self.assertEqual(len(list_articles()), 1)
|
||||
article = list_articles()[0]
|
||||
self.assertEqual(article["title"], "Artikel 1 original")
|
||||
self.assertEqual(article["author"], "Autorin A")
|
||||
self.assertIn("Original Volltext", article["content_raw"] or "")
|
||||
self.assertIn("Pressekontakt", article["meta_json"] or "")
|
||||
|
||||
@patch("backend.app.ingestion.extract_article")
|
||||
@patch("backend.app.ingestion.feedparser.parse")
|
||||
def test_ingestion_blocks_non_green_source(self, mock_parse, mock_extract_article) -> None:
|
||||
# Re-create source/feed with yellow risk to verify enforcement
|
||||
source_id = create_source(
|
||||
SourceCreate(
|
||||
name="Blocked Source",
|
||||
base_url="https://example.net",
|
||||
terms_url="https://example.net/terms",
|
||||
license_name="custom",
|
||||
risk_level="yellow",
|
||||
is_enabled=True,
|
||||
notes=None,
|
||||
last_reviewed_at="2026-02-18T00:00:00Z",
|
||||
)
|
||||
)
|
||||
blocked_feed_id = create_feed(
|
||||
FeedCreate(
|
||||
name="Blocked Feed",
|
||||
url="https://example.net/feed.xml",
|
||||
source_id=source_id,
|
||||
is_enabled=True,
|
||||
)
|
||||
)
|
||||
|
||||
stats = run_ingestion(feed_id=blocked_feed_id)
|
||||
self.assertEqual(stats.status, "success")
|
||||
self.assertEqual(stats.articles_upserted, 0)
|
||||
mock_parse.assert_not_called()
|
||||
mock_extract_article.assert_not_called()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
69
backend/tests/test_source_extraction.py
Normal file
69
backend/tests/test_source_extraction.py
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
import unittest
|
||||
from unittest.mock import patch
|
||||
|
||||
from backend.app.source_extraction import extract_article
|
||||
|
||||
|
||||
SAMPLE_HTML = """
|
||||
<!doctype html>
|
||||
<html lang="de">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta property="og:title" content="Demo Meldung von Presseportal" />
|
||||
<meta name="author" content="Max Mustermann" />
|
||||
<meta name="description" content="Kurzbeschreibung aus der Originalseite" />
|
||||
<meta property="og:image" content="/images/demo.jpg" />
|
||||
<link rel="canonical" href="https://www.presseportal.de/pm/118273/6158137" />
|
||||
</head>
|
||||
<body>
|
||||
<article>
|
||||
<p>Dies ist der vollstaendige Inhalt des Artikels.</p>
|
||||
<p>Weitere relevante Informationen fuer die Meldung.</p>
|
||||
<h3>Pressekontakt</h3>
|
||||
<p>Musterfirma GmbH, Kontakt: presse@example.org</p>
|
||||
</article>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
|
||||
class _FakeHeaders:
|
||||
@staticmethod
|
||||
def get_content_charset():
|
||||
return "utf-8"
|
||||
|
||||
|
||||
class _FakeResponse:
|
||||
headers = _FakeHeaders()
|
||||
|
||||
def __init__(self, body: str):
|
||||
self._body = body.encode("utf-8")
|
||||
|
||||
def read(self):
|
||||
return self._body
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
return False
|
||||
|
||||
|
||||
class TestSourceExtraction(unittest.TestCase):
|
||||
@patch("backend.app.source_extraction.urlopen")
|
||||
def test_extract_article_parses_author_images_and_press_contact(self, mock_urlopen) -> None:
|
||||
mock_urlopen.return_value = _FakeResponse(SAMPLE_HTML)
|
||||
|
||||
extracted = extract_article("https://www.presseportal.de/pm/118273/6158137")
|
||||
self.assertEqual(extracted.title, "Demo Meldung von Presseportal")
|
||||
self.assertEqual(extracted.author, "Max Mustermann")
|
||||
self.assertEqual(extracted.canonical_url, "https://www.presseportal.de/pm/118273/6158137")
|
||||
self.assertIn("vollstaendige Inhalt", extracted.content_text or "")
|
||||
self.assertIn("Kurzbeschreibung", extracted.summary or "")
|
||||
self.assertIn("https://www.presseportal.de/images/demo.jpg", extracted.images)
|
||||
self.assertIn("Pressekontakt", extracted.press_contact or "")
|
||||
self.assertIsNone(extracted.extraction_error)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue