import unittest from unittest.mock import patch from backend.app.source_extraction import extract_article SAMPLE_HTML = """

Dies ist der vollstaendige Inhalt des Artikels.

Weitere relevante Informationen fuer die Meldung.

Pressekontakt

Musterfirma GmbH, Kontakt: presse@example.org

""" SAMPLE_HTML_AGENTUR = """

Inhalt der Meldung.

Agentur

Agenturname GmbH

presse@agentur.example

Original-Content von Beispiel

""" class _FakeHeaders: @staticmethod def get_content_charset(): return "utf-8" class _FakeResponse: headers = _FakeHeaders() def __init__(self, body: str): self._body = body.encode("utf-8") def read(self): return self._body def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): return False class TestSourceExtraction(unittest.TestCase): @patch("backend.app.source_extraction.urlopen") def test_extract_article_parses_author_images_and_press_contact(self, mock_urlopen) -> None: mock_urlopen.return_value = _FakeResponse(SAMPLE_HTML) extracted = extract_article("https://www.presseportal.de/pm/118273/6158137") self.assertEqual(extracted.title, "Demo Meldung von Presseportal") self.assertEqual(extracted.author, "Max Mustermann") self.assertEqual(extracted.canonical_url, "https://www.presseportal.de/pm/118273/6158137") self.assertIn("vollstaendige Inhalt", extracted.content_text or "") self.assertIn("Kurzbeschreibung", extracted.summary or "") self.assertIn("https://www.presseportal.de/images/demo.jpg", extracted.images) self.assertIn("Pressekontakt", extracted.press_contact or "") self.assertIsNone(extracted.extraction_error) @patch("backend.app.source_extraction.urlopen") def test_extract_article_detects_agentur_block_as_press_contact(self, mock_urlopen) -> None: mock_urlopen.return_value = _FakeResponse(SAMPLE_HTML_AGENTUR) extracted = extract_article("https://www.presseportal.de/pm/155103/6210401") self.assertIn("Agentur", extracted.press_contact or "") self.assertIn("Agenturname", extracted.press_contact or "") self.assertIn("presse@agentur.example", extracted.press_contact or "") if __name__ == "__main__": unittest.main()