feat(rewrite): add batch rewrite run, AI tags for WP, and agentur contact detection
This commit is contained in:
parent
da269d08f1
commit
b0f995d5c9
10 changed files with 374 additions and 36 deletions
|
|
@ -28,35 +28,39 @@ def _sanitize_source_text(text: str) -> str:
|
|||
return joined
|
||||
|
||||
|
||||
def rewrite_article_text(article: dict[str, Any]) -> str:
|
||||
def _normalize_tags(tags: list[str], max_tags: int = 8) -> list[str]:
|
||||
out: list[str] = []
|
||||
seen: set[str] = set()
|
||||
for raw in tags:
|
||||
value = re.sub(r"\s+", " ", str(raw or "").strip())
|
||||
value = re.sub(r"^[#\-•\s]+", "", value)
|
||||
value = re.sub(r"[;,.:\s]+$", "", value)
|
||||
if not value:
|
||||
continue
|
||||
if len(value) < 2 or len(value) > 40:
|
||||
continue
|
||||
key = value.casefold()
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
out.append(value)
|
||||
if len(out) >= max_tags:
|
||||
break
|
||||
return out
|
||||
|
||||
|
||||
def _openai_chat(system: str, user: str, temperature: float = 0.4) -> str:
|
||||
settings = get_settings()
|
||||
api_key = settings.openai_api_key
|
||||
if not api_key:
|
||||
raise RuntimeError("OPENAI_API_KEY fehlt")
|
||||
|
||||
source_text = _sanitize_source_text(article.get("content_raw") or "")
|
||||
if not source_text:
|
||||
source_text = (article.get("summary") or "").strip()
|
||||
if not source_text:
|
||||
raise RuntimeError("Kein Quelltext für Rewrite verfügbar")
|
||||
|
||||
title = (article.get("title") or "").strip()
|
||||
prompt = (
|
||||
"Schreibe den folgenden News-Text neu auf Deutsch in persönlicher Du-Form. "
|
||||
"Stil: ausführlich, gut lesbar, ohne Einleitung mit Datum/Uhrzeit/Firma/Ort, "
|
||||
"ohne Pressekontakt, ohne Quellenblock. "
|
||||
"Nutze klare Absätze und Zwischenüberschriften in HTML (<h2>, <p>, <ul><li> falls passend). "
|
||||
"Inhaltlich korrekt bleiben, nichts erfinden.\n\n"
|
||||
f"Titel: {title}\n\n"
|
||||
f"Originaltext:\n{source_text}"
|
||||
)
|
||||
|
||||
payload = {
|
||||
"model": settings.openai_model,
|
||||
"temperature": 0.4,
|
||||
"temperature": temperature,
|
||||
"messages": [
|
||||
{"role": "system", "content": "Du bist ein deutscher News-Redakteur."},
|
||||
{"role": "user", "content": prompt},
|
||||
{"role": "system", "content": system},
|
||||
{"role": "user", "content": user},
|
||||
],
|
||||
}
|
||||
req = Request(
|
||||
|
|
@ -78,6 +82,78 @@ def rewrite_article_text(article: dict[str, Any]) -> str:
|
|||
message = choices[0].get("message", {})
|
||||
content = message.get("content")
|
||||
if not isinstance(content, str) or not content.strip():
|
||||
raise RuntimeError("OpenAI lieferte keinen Rewrite-Text")
|
||||
raise RuntimeError("OpenAI lieferte keinen Inhalt")
|
||||
return content.strip()
|
||||
|
||||
|
||||
def rewrite_article_text(article: dict[str, Any]) -> str:
|
||||
source_text = _sanitize_source_text(article.get("content_raw") or "")
|
||||
if not source_text:
|
||||
source_text = (article.get("summary") or "").strip()
|
||||
if not source_text:
|
||||
raise RuntimeError("Kein Quelltext für Rewrite verfügbar")
|
||||
|
||||
title = (article.get("title") or "").strip()
|
||||
prompt = (
|
||||
"Schreibe den folgenden News-Text neu auf Deutsch in persönlicher Du-Form. "
|
||||
"Stil: ausführlich, gut lesbar, ohne Einleitung mit Datum/Uhrzeit/Firma/Ort, "
|
||||
"ohne Pressekontakt, ohne Quellenblock. "
|
||||
"Nutze klare Absätze und Zwischenüberschriften in HTML (<h2>, <p>, <ul><li> falls passend). "
|
||||
"Inhaltlich korrekt bleiben, nichts erfinden.\n\n"
|
||||
f"Titel: {title}\n\n"
|
||||
f"Originaltext:\n{source_text}"
|
||||
)
|
||||
return _openai_chat(
|
||||
"Du bist ein deutscher News-Redakteur.",
|
||||
prompt,
|
||||
temperature=0.4,
|
||||
)
|
||||
|
||||
|
||||
def generate_article_tags(article: dict[str, Any], rewritten_text: str | None = None, max_tags: int = 8) -> list[str]:
|
||||
source_text = rewritten_text or _sanitize_source_text(article.get("content_raw") or "") or (article.get("summary") or "")
|
||||
source_text = str(source_text).strip()
|
||||
if not source_text:
|
||||
return []
|
||||
title = (article.get("title") or "").strip()
|
||||
prompt = (
|
||||
"Erzeuge präzise Schlagwörter für einen deutschen News-Artikel. "
|
||||
f"Maximal {max_tags} Tags. Nur relevante Begriffe, keine allgemeinen Wörter wie News/Artikel. "
|
||||
"Gib ausschließlich ein JSON-Array mit Strings zurück, ohne Erklärung.\n\n"
|
||||
f"Titel: {title}\n\n"
|
||||
f"Text:\n{source_text[:3500]}"
|
||||
)
|
||||
raw = _openai_chat(
|
||||
"Du extrahierst präzise, kurze News-Tags auf Deutsch.",
|
||||
prompt,
|
||||
temperature=0.2,
|
||||
)
|
||||
try:
|
||||
parsed = json.loads(raw)
|
||||
if isinstance(parsed, list):
|
||||
return _normalize_tags([str(x) for x in parsed], max_tags=max_tags)
|
||||
except Exception:
|
||||
pass
|
||||
# fallback: extract first JSON-like array if model wrapped output
|
||||
match = re.search(r"\[[\s\S]*\]", raw)
|
||||
if match:
|
||||
try:
|
||||
parsed = json.loads(match.group(0))
|
||||
if isinstance(parsed, list):
|
||||
return _normalize_tags([str(x) for x in parsed], max_tags=max_tags)
|
||||
except Exception:
|
||||
return []
|
||||
return []
|
||||
|
||||
|
||||
def merge_generated_tags(meta_json: str | None, tags: list[str]) -> str:
|
||||
meta: dict[str, Any] = {}
|
||||
if meta_json:
|
||||
try:
|
||||
parsed = json.loads(meta_json)
|
||||
if isinstance(parsed, dict):
|
||||
meta = parsed
|
||||
except Exception:
|
||||
meta = {}
|
||||
meta["generated_tags"] = _normalize_tags(tags)
|
||||
return json.dumps(meta, ensure_ascii=False)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue