fix(wordpress): fuzzy URL match for image metadata and simplify caption builder

Image metadata keys may have query params (e.g. ?w=1200) that differ from
the selected_url stored in image_review. Fall back to comparing URLs without
query string so the figcaption text is correctly found.

Also simplified _build_image_caption: figcaption text already contains the
credit info, so just use caption directly instead of appending the redundant
credit prefix marker.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
OliverGiertz 2026-03-27 08:24:40 +00:00
parent 8e65485f0c
commit 82f2df610d

View file

@ -166,25 +166,30 @@ def _get_image_meta_for_url(meta_json: str | None, image_url: str) -> dict:
if not meta_json or not image_url:
return {}
try:
from urllib.parse import urlparse
meta = json.loads(meta_json)
image_metadata = (meta.get("extraction") or {}).get("image_metadata") or {}
return image_metadata.get(image_url) or {}
# Exact match first
if image_url in image_metadata:
return image_metadata[image_url]
# Fuzzy match: compare without query string (handles ?w=1200 variants)
base_url = urlparse(image_url)._replace(query="").geturl()
for key, val in image_metadata.items():
key_base = urlparse(key)._replace(query="").geturl()
if key_base == base_url:
return val
return {}
except Exception:
return {}
def _build_image_caption(image_meta: dict, source_url: str) -> str:
"""Build a WP caption string from image metadata and source URL."""
# caption from figcaption typically already contains the credit text
caption = (image_meta.get("caption") or "").strip()
credit = (image_meta.get("credit") or "").strip()
parts = []
if caption:
parts.append(caption)
if credit:
parts.append(credit)
if not parts:
parts.append(f"Quelle: {source_url}")
return " | ".join(parts)
return caption
return f"Quelle: {source_url}"
def _upload_featured_media(