From d1cb809852e16a4642af1ae2964cdbce1e37ef95 Mon Sep 17 00:00:00 2001
From: OliverGiertz <oliver@vanityontour.de>
Date: Fri, 27 Mar 2026 08:28:44 +0000
Subject: [PATCH] fix(wordpress): fix attribution block source name and image
 credit lookup

- Derive real source hostname from canonical URL when feed name is generic
  (e.g. "Google Alerts"), so the link shows "moin.de" instead of "Google Alerts"
- Use _get_image_meta_for_url() (fuzzy URL matching) for image credit lookup
- Use caption field for Bildnachweis since it already contains embedded credits

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 backend/app/wordpress.py | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/backend/app/wordpress.py b/backend/app/wordpress.py
index 6ae686e..313719d 100644
--- a/backend/app/wordpress.py
+++ b/backend/app/wordpress.py
@@ -322,18 +322,30 @@ def _sanitize_publish_text(text: str) -> str:
 
 def _build_attribution_block(article: dict[str, Any]) -> str:
     """Build a WP Gutenberg attribution block for the bottom of the article."""
+    from urllib.parse import urlparse
     source_url = (article.get("canonical_url") or article.get("source_url") or "").strip()
     source_name = (article.get("source_name_snapshot") or "").strip()
     author = (article.get("author") or "").strip()
 
-    # Get image credit from extraction metadata
+    # If the feed name is "Google Alerts" (or similar generic names), derive the
+    # real source name from the hostname of the canonical URL.
+    if not source_name or source_name.lower() in ("google alerts", "google"):
+        try:
+            hostname = urlparse(source_url).hostname or ""
+            source_name = hostname.removeprefix("www.")
+        except Exception:
+            pass
+
+    # Get image credit from extraction metadata (uses fuzzy URL match)
+    meta_json = article.get("meta_json")
     credit = ""
     try:
-        meta = json.loads(article.get("meta_json") or "{}")
+        meta = json.loads(meta_json or "{}")
         selected_url = (meta.get("image_review") or {}).get("selected_url") or ""
         if selected_url:
-            img_meta = (meta.get("extraction") or {}).get("image_metadata") or {}
-            credit = (img_meta.get(selected_url) or {}).get("credit") or ""
+            img_meta = _get_image_meta_for_url(meta_json, selected_url)
+            # caption already contains embedded credit text (e.g. "Foto: IMAGO/Zoonar")
+            credit = img_meta.get("caption") or img_meta.get("credit") or ""
     except Exception:
         pass