fix(wordpress): fix attribution block source name and image credit lookup
- Derive real source hostname from canonical URL when feed name is generic (e.g. "Google Alerts"), so the link shows "moin.de" instead of "Google Alerts" - Use _get_image_meta_for_url() (fuzzy URL matching) for image credit lookup - Use caption field for Bildnachweis since it already contains embedded credits Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
82f2df610d
commit
d1cb809852
1 changed files with 16 additions and 4 deletions
|
|
@ -322,18 +322,30 @@ def _sanitize_publish_text(text: str) -> str:
|
|||
|
||||
def _build_attribution_block(article: dict[str, Any]) -> str:
|
||||
"""Build a WP Gutenberg attribution block for the bottom of the article."""
|
||||
from urllib.parse import urlparse
|
||||
source_url = (article.get("canonical_url") or article.get("source_url") or "").strip()
|
||||
source_name = (article.get("source_name_snapshot") or "").strip()
|
||||
author = (article.get("author") or "").strip()
|
||||
|
||||
# Get image credit from extraction metadata
|
||||
# If the feed name is "Google Alerts" (or similar generic names), derive the
|
||||
# real source name from the hostname of the canonical URL.
|
||||
if not source_name or source_name.lower() in ("google alerts", "google"):
|
||||
try:
|
||||
hostname = urlparse(source_url).hostname or ""
|
||||
source_name = hostname.removeprefix("www.")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Get image credit from extraction metadata (uses fuzzy URL match)
|
||||
meta_json = article.get("meta_json")
|
||||
credit = ""
|
||||
try:
|
||||
meta = json.loads(article.get("meta_json") or "{}")
|
||||
meta = json.loads(meta_json or "{}")
|
||||
selected_url = (meta.get("image_review") or {}).get("selected_url") or ""
|
||||
if selected_url:
|
||||
img_meta = (meta.get("extraction") or {}).get("image_metadata") or {}
|
||||
credit = (img_meta.get(selected_url) or {}).get("credit") or ""
|
||||
img_meta = _get_image_meta_for_url(meta_json, selected_url)
|
||||
# caption already contains embedded credit text (e.g. "Foto: IMAGO/Zoonar")
|
||||
credit = img_meta.get("caption") or img_meta.get("credit") or ""
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue