from __future__ import annotations import json import re from typing import Any from urllib.request import Request, urlopen from .config import get_settings def _sanitize_source_text(text: str) -> str: raw = (text or "").strip() if not raw: return "" lines = [ln.strip() for ln in raw.splitlines() if ln.strip()] if len(lines) > 3: lines = lines[3:] joined = "\n".join(lines) # Remove press contact block at end from "Pressekontakt" onward. joined = re.sub( r"\n?\s*Pressekontakt[\s\S]*$", "", joined, flags=re.IGNORECASE, ).strip() return joined def _normalize_tags(tags: list[str], max_tags: int = 8) -> list[str]: out: list[str] = [] seen: set[str] = set() for raw in tags: value = re.sub(r"\s+", " ", str(raw or "").strip()) value = re.sub(r"^[#\-•\s]+", "", value) value = re.sub(r"[;,.:\s]+$", "", value) if not value: continue if len(value) < 2 or len(value) > 40: continue key = value.casefold() if key in seen: continue seen.add(key) out.append(value) if len(out) >= max_tags: break return out def _openai_chat(system: str, user: str, temperature: float = 0.4) -> str: settings = get_settings() api_key = settings.openai_api_key if not api_key: raise RuntimeError("OPENAI_API_KEY fehlt") payload = { "model": settings.openai_model, "temperature": temperature, "messages": [ {"role": "system", "content": system}, {"role": "user", "content": user}, ], } req = Request( url="https://api.openai.com/v1/chat/completions", method="POST", data=json.dumps(payload).encode("utf-8"), headers={ "Authorization": f"Bearer {api_key}", "Content-Type": "application/json", "Accept": "application/json", }, ) with urlopen(req, timeout=60) as resp: raw = resp.read().decode("utf-8", errors="replace") data = json.loads(raw) choices = data.get("choices") if not isinstance(choices, list) or not choices: raise RuntimeError(f"Ungültige OpenAI-Antwort: {data}") message = choices[0].get("message", {}) content = message.get("content") if not isinstance(content, str) or not content.strip(): raise RuntimeError("OpenAI lieferte keinen Inhalt") return content.strip() def rewrite_article_text(article: dict[str, Any]) -> str: source_text = _sanitize_source_text(article.get("content_raw") or "") if not source_text: source_text = (article.get("summary") or "").strip() if not source_text: raise RuntimeError("Kein Quelltext für Rewrite verfügbar") title = (article.get("title") or "").strip() prompt = ( "Schreibe den folgenden News-Text neu auf Deutsch in persönlicher Du-Form. " "Stil: ausführlich, gut lesbar, ohne Einleitung mit Datum/Uhrzeit/Firma/Ort, " "ohne Pressekontakt, ohne Quellenblock. " "Nutze klare Absätze und Zwischenüberschriften in HTML (

,

,