diff --git a/backend/app/admin_ui.py b/backend/app/admin_ui.py index d3ca53e..689efce 100644 --- a/backend/app/admin_ui.py +++ b/backend/app/admin_ui.py @@ -20,7 +20,7 @@ from .ingestion import run_ingestion from .policy import evaluate_source_policy from .publisher import enqueue_publish, run_publisher from .relevance import article_age_days, article_relevance -from .rewrite import rewrite_article_text +from .rewrite import generate_article_tags, merge_generated_tags, rewrite_article_text from .repositories import ( FeedCreate, FeedUpdate, @@ -373,6 +373,7 @@ def _upsert_article_from_existing( publish_attempts: int | object = _UNSET, publish_last_error: str | None | object = _UNSET, published_to_wp_at: str | None | object = _UNSET, + meta_json: str | None | object = _UNSET, ) -> None: rewritten = article.get("content_rewritten") if content_rewritten is None else content_rewritten upsert_article( @@ -403,7 +404,7 @@ def _upsert_article_from_existing( published_to_wp_at=article.get("published_to_wp_at") if published_to_wp_at is _UNSET else published_to_wp_at, word_count=len(str(rewritten or "").split()), status=article.get("status") if status is None else status, - meta_json=article.get("meta_json"), + meta_json=article.get("meta_json") if meta_json is _UNSET else meta_json, ) ) @@ -493,6 +494,8 @@ def admin_dashboard(request: Request): article["days_old"] = article_age_days(article.get("published_at")) article["relevance"] = article_relevance(article.get("published_at")) article["status_ui"] = internal_to_ui_status(article.get("status")) + tags = meta.get("generated_tags") if isinstance(meta.get("generated_tags"), list) else [] + article["generated_tags"] = [str(t) for t in tags if t] return templates.TemplateResponse( request, @@ -836,12 +839,40 @@ def admin_rewrite_run(request: Request, article_id: int): return _dashboard_redirect(msg=f"Rewrite nur aus new/rewrite fuer Artikel #{article_id}", msg_type="error") try: rewritten = rewrite_article_text(article) + tags = generate_article_tags(article, rewritten_text=rewritten) except Exception as exc: return _dashboard_redirect(msg=f"Rewrite fehlgeschlagen fuer Artikel #{article_id}: {exc}", msg_type="error") - _upsert_article_from_existing(article, content_rewritten=rewritten, status="approved") + merged_meta = merge_generated_tags(article.get("meta_json"), tags) + _upsert_article_from_existing(article, content_rewritten=rewritten, status="approved", meta_json=merged_meta) return _dashboard_redirect(msg=f"Rewrite fertig fuer Artikel #{article_id} -> publish") +@router.post("/admin/rewrite/run") +def admin_rewrite_run_batch(request: Request, max_jobs: str = Form("10")): + user = _admin_user(request) + if not user: + return RedirectResponse(url="/admin/login", status_code=303) + try: + limit = max(1, min(int(max_jobs), 100)) + except Exception: + limit = 10 + planned = list_articles(limit=limit, status_filter="rewrite") + processed = 0 + success = 0 + failed = 0 + for article in planned: + processed += 1 + try: + rewritten = rewrite_article_text(article) + tags = generate_article_tags(article, rewritten_text=rewritten) + merged_meta = merge_generated_tags(article.get("meta_json"), tags) + _upsert_article_from_existing(article, content_rewritten=rewritten, status="approved", meta_json=merged_meta) + success += 1 + except Exception: + failed += 1 + return _dashboard_redirect(msg=f"Rewrite-Run: processed={processed}, success={success}, failed={failed}") + + @router.post("/admin/articles/{article_id}/rewrite-save") def admin_rewrite_save(request: Request, article_id: int, content_rewritten: str = Form(...)): user = _admin_user(request) diff --git a/backend/app/main.py b/backend/app/main.py index 4dcee28..b0bcf2a 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -18,7 +18,7 @@ from .ingestion import run_ingestion from .policy import evaluate_source_policy, is_source_allowed from .publisher import enqueue_publish, run_publisher from .relevance import article_age_days, article_relevance -from .rewrite import rewrite_article_text +from .rewrite import generate_article_tags, merge_generated_tags, rewrite_article_text from .repositories import ( ArticleUpsert, FeedCreate, @@ -514,6 +514,12 @@ def api_article_rewrite_run(article_id: int, username: str = Depends(require_aut raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Rewrite nur aus Status 'new' oder 'rewrite'") rewritten = rewrite_article_text(article) + tags: list[str] = [] + try: + tags = generate_article_tags(article, rewritten_text=rewritten) + except Exception: + tags = [] + merged_meta = merge_generated_tags(article.get("meta_json"), tags) # upsert via status update + existing fields by lightweight path: repo_upsert_article( ArticleUpsert( @@ -543,10 +549,10 @@ def api_article_rewrite_run(article_id: int, username: str = Depends(require_aut published_to_wp_at=article.get("published_to_wp_at"), word_count=len(rewritten.split()), status="approved", - meta_json=article.get("meta_json"), + meta_json=merged_meta, ) ) - return {"ok": True, "id": article_id, "status": "publish"} + return {"ok": True, "id": article_id, "status": "publish", "tags": tags} @app.post("/api/articles/{article_id}/legal-review") diff --git a/backend/app/rewrite.py b/backend/app/rewrite.py index 8c313ad..759fac9 100644 --- a/backend/app/rewrite.py +++ b/backend/app/rewrite.py @@ -28,35 +28,39 @@ def _sanitize_source_text(text: str) -> str: return joined -def rewrite_article_text(article: dict[str, Any]) -> str: +def _normalize_tags(tags: list[str], max_tags: int = 8) -> list[str]: + out: list[str] = [] + seen: set[str] = set() + for raw in tags: + value = re.sub(r"\s+", " ", str(raw or "").strip()) + value = re.sub(r"^[#\-•\s]+", "", value) + value = re.sub(r"[;,.:\s]+$", "", value) + if not value: + continue + if len(value) < 2 or len(value) > 40: + continue + key = value.casefold() + if key in seen: + continue + seen.add(key) + out.append(value) + if len(out) >= max_tags: + break + return out + + +def _openai_chat(system: str, user: str, temperature: float = 0.4) -> str: settings = get_settings() api_key = settings.openai_api_key if not api_key: raise RuntimeError("OPENAI_API_KEY fehlt") - source_text = _sanitize_source_text(article.get("content_raw") or "") - if not source_text: - source_text = (article.get("summary") or "").strip() - if not source_text: - raise RuntimeError("Kein Quelltext für Rewrite verfügbar") - - title = (article.get("title") or "").strip() - prompt = ( - "Schreibe den folgenden News-Text neu auf Deutsch in persönlicher Du-Form. " - "Stil: ausführlich, gut lesbar, ohne Einleitung mit Datum/Uhrzeit/Firma/Ort, " - "ohne Pressekontakt, ohne Quellenblock. " - "Nutze klare Absätze und Zwischenüberschriften in HTML (

,

,