fix(ingestion): strip HTML tags from feed entry titles

Google Alerts wraps matched keywords in <b>...</b> tags.
Strip all HTML tags from the title before storing.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
OliverGiertz 2026-03-27 08:08:07 +00:00
parent 0d07a9804d
commit 8e65485f0c

View file

@ -244,7 +244,9 @@ def run_ingestion(feed_id: int | None = None) -> IngestionStats:
link = _resolve_google_redirect(link)
summary, content_raw = _entry_text(entry)
title = entry.get("title") or "Ohne Titel"
# Strip HTML tags from title (Google Alerts wraps matched keywords in <b>)
raw_title = entry.get("title") or "Ohne Titel"
title = re.sub(r"<[^>]+>", "", raw_title).strip() or "Ohne Titel"
extracted = extract_article(link)
final_title = extracted.title or title