fix(ingestion): strip HTML tags from feed entry titles
Google Alerts wraps matched keywords in <b>...</b> tags. Strip all HTML tags from the title before storing. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
0d07a9804d
commit
8e65485f0c
1 changed files with 3 additions and 1 deletions
|
|
@ -244,7 +244,9 @@ def run_ingestion(feed_id: int | None = None) -> IngestionStats:
|
|||
link = _resolve_google_redirect(link)
|
||||
|
||||
summary, content_raw = _entry_text(entry)
|
||||
title = entry.get("title") or "Ohne Titel"
|
||||
# Strip HTML tags from title (Google Alerts wraps matched keywords in <b>)
|
||||
raw_title = entry.get("title") or "Ohne Titel"
|
||||
title = re.sub(r"<[^>]+>", "", raw_title).strip() or "Ohne Titel"
|
||||
extracted = extract_article(link)
|
||||
|
||||
final_title = extracted.title or title
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue