from __future__ import annotations import json import re from typing import Any from urllib.request import Request, urlopen from .config import get_settings def _sanitize_source_text(text: str) -> str: raw = (text or "").strip() if not raw: return "" lines = [ln.strip() for ln in raw.splitlines() if ln.strip()] if len(lines) > 3: lines = lines[3:] joined = "\n".join(lines) # Remove press contact block at end from "Pressekontakt" onward. joined = re.sub( r"\n?\s*Pressekontakt[\s\S]*$", "", joined, flags=re.IGNORECASE, ).strip() return joined def rewrite_article_text(article: dict[str, Any]) -> str: settings = get_settings() api_key = settings.openai_api_key if not api_key: raise RuntimeError("OPENAI_API_KEY fehlt") source_text = _sanitize_source_text(article.get("content_raw") or "") if not source_text: source_text = (article.get("summary") or "").strip() if not source_text: raise RuntimeError("Kein Quelltext für Rewrite verfügbar") title = (article.get("title") or "").strip() prompt = ( "Schreibe den folgenden News-Text neu auf Deutsch in persönlicher Du-Form. " "Stil: ausführlich, gut lesbar, ohne Einleitung mit Datum/Uhrzeit/Firma/Ort, " "ohne Pressekontakt, ohne Quellenblock. " "Nutze klare Absätze und Zwischenüberschriften in HTML (
,