From 8e65485f0c7d89dc8e225ed3add5e7b91da4ee55 Mon Sep 17 00:00:00 2001 From: OliverGiertz Date: Fri, 27 Mar 2026 08:08:07 +0000 Subject: [PATCH] fix(ingestion): strip HTML tags from feed entry titles Google Alerts wraps matched keywords in ... tags. Strip all HTML tags from the title before storing. Co-Authored-By: Claude Sonnet 4.6 --- backend/app/ingestion.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/app/ingestion.py b/backend/app/ingestion.py index 3710276..d76f4c4 100644 --- a/backend/app/ingestion.py +++ b/backend/app/ingestion.py @@ -244,7 +244,9 @@ def run_ingestion(feed_id: int | None = None) -> IngestionStats: link = _resolve_google_redirect(link) summary, content_raw = _entry_text(entry) - title = entry.get("title") or "Ohne Titel" + # Strip HTML tags from title (Google Alerts wraps matched keywords in ) + raw_title = entry.get("title") or "Ohne Titel" + title = re.sub(r"<[^>]+>", "", raw_title).strip() or "Ohne Titel" extracted = extract_article(link) final_title = extracted.title or title