feat(publisher): add wordpress draft queue with retry and admin controls

This commit is contained in:
Oliver 2026-02-18 10:49:43 +01:00
parent dcdf4d954a
commit 1cee56205e
13 changed files with 719 additions and 3 deletions

View file

@ -15,6 +15,7 @@ from .auth import create_session_token, verify_credentials, verify_session_token
from .config import get_settings from .config import get_settings
from .ingestion import run_ingestion from .ingestion import run_ingestion
from .policy import evaluate_source_policy from .policy import evaluate_source_policy
from .publisher import enqueue_publish, run_publisher
from .relevance import article_age_days, article_relevance from .relevance import article_age_days, article_relevance
from .repositories import ( from .repositories import (
FeedCreate, FeedCreate,
@ -25,6 +26,7 @@ from .repositories import (
get_feed_by_id, get_feed_by_id,
list_articles, list_articles,
list_feeds, list_feeds,
list_publish_jobs,
list_runs, list_runs,
list_sources, list_sources,
set_article_image_decision, set_article_image_decision,
@ -273,6 +275,7 @@ def admin_dashboard(request: Request):
source_policy = {s["id"]: evaluate_source_policy(s) for s in sources} source_policy = {s["id"]: evaluate_source_policy(s) for s in sources}
feeds = list_feeds() feeds = list_feeds()
runs = list_runs(limit=30) runs = list_runs(limit=30)
publish_jobs = list_publish_jobs(limit=30)
status_filter = request.query_params.get("status_filter") status_filter = request.query_params.get("status_filter")
if status_filter in {"new", "rewrite", "review", "approved", "published", "error"}: if status_filter in {"new", "rewrite", "review", "approved", "published", "error"}:
articles = list_articles(limit=100, status_filter=status_filter) articles = list_articles(limit=100, status_filter=status_filter)
@ -308,6 +311,7 @@ def admin_dashboard(request: Request):
"source_policy": source_policy, "source_policy": source_policy,
"feeds": feeds, "feeds": feeds,
"runs": runs, "runs": runs,
"publish_jobs": publish_jobs,
"articles": articles, "articles": articles,
"status_options": ["new", "rewrite", "review", "approved", "published", "error"], "status_options": ["new", "rewrite", "review", "approved", "published", "error"],
"allowed_transitions": ALLOWED_TRANSITIONS, "allowed_transitions": ALLOWED_TRANSITIONS,
@ -358,6 +362,8 @@ def admin_article_detail(request: Request, article_id: int):
"feed": feed, "feed": feed,
"checklist": checklist, "checklist": checklist,
"allowed_transitions": ALLOWED_TRANSITIONS.get(article.get("status"), ()), "allowed_transitions": ALLOWED_TRANSITIONS.get(article.get("status"), ()),
"flash_msg": request.query_params.get("msg", ""),
"flash_type": request.query_params.get("type", "success"),
}, },
) )
@ -379,6 +385,32 @@ def admin_article_image_decision(
return RedirectResponse(url=f"/admin/articles/{article_id}", status_code=303) return RedirectResponse(url=f"/admin/articles/{article_id}", status_code=303)
@router.post("/admin/articles/{article_id}/publish-enqueue")
def admin_enqueue_publish(request: Request, article_id: int, max_attempts: str = Form("3")):
user = _admin_user(request)
if not user:
return RedirectResponse(url="/admin/login", status_code=303)
try:
job_id = enqueue_publish(article_id=article_id, max_attempts=max(1, int(max_attempts)))
except Exception as exc:
return _dashboard_redirect(msg=f"Publish Queue Fehler fuer Artikel #{article_id}: {exc}", msg_type="error")
return RedirectResponse(url=f"/admin/articles/{article_id}?msg=Publish-Job%20#{job_id}%20erstellt&type=success", status_code=303)
@router.post("/admin/publisher/run")
def admin_run_publisher(request: Request, max_jobs: str = Form("10")):
user = _admin_user(request)
if not user:
return RedirectResponse(url="/admin/login", status_code=303)
try:
stats = run_publisher(max_jobs=max(1, int(max_jobs)))
except Exception as exc:
return _dashboard_redirect(msg=f"Publisher Fehler: {exc}", msg_type="error")
return _dashboard_redirect(
msg=f"Publisher: processed={stats.processed}, success={stats.success}, failed={stats.failed}, requeued={stats.requeued}"
)
@router.get("/admin/images/proxy") @router.get("/admin/images/proxy")
def admin_image_proxy(request: Request, url: str): def admin_image_proxy(request: Request, url: str):
if not _is_http_image_url(url): if not _is_http_image_url(url):

View file

@ -23,6 +23,11 @@ class Settings(BaseSettings):
app_db_path: str = "backend/data/rss_news.db" app_db_path: str = "backend/data/rss_news.db"
wordpress_base_url: str | None = None
wordpress_username: str | None = None
wordpress_app_password: str | None = None
wordpress_default_status: str = "draft"
@lru_cache(maxsize=1) @lru_cache(maxsize=1)
def get_settings() -> Settings: def get_settings() -> Settings:

View file

@ -68,6 +68,21 @@ def init_db() -> None:
details TEXT details TEXT
); );
CREATE TABLE IF NOT EXISTS publish_jobs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
article_id INTEGER NOT NULL,
status TEXT NOT NULL CHECK (status IN ('queued', 'running', 'success', 'failed')),
attempts INTEGER NOT NULL DEFAULT 0,
max_attempts INTEGER NOT NULL DEFAULT 3,
error_message TEXT,
wp_post_id INTEGER,
wp_post_url TEXT,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
started_at TEXT,
finished_at TEXT,
FOREIGN KEY(article_id) REFERENCES articles(id) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS articles ( CREATE TABLE IF NOT EXISTS articles (
id INTEGER PRIMARY KEY AUTOINCREMENT, id INTEGER PRIMARY KEY AUTOINCREMENT,
feed_id INTEGER, feed_id INTEGER,
@ -89,6 +104,11 @@ def init_db() -> None:
legal_checked INTEGER NOT NULL DEFAULT 0, legal_checked INTEGER NOT NULL DEFAULT 0,
legal_checked_at TEXT, legal_checked_at TEXT,
legal_note TEXT, legal_note TEXT,
wp_post_id INTEGER,
wp_post_url TEXT,
publish_attempts INTEGER NOT NULL DEFAULT 0,
publish_last_error TEXT,
published_to_wp_at TEXT,
word_count INTEGER DEFAULT 0, word_count INTEGER DEFAULT 0,
status TEXT NOT NULL DEFAULT 'new' CHECK (status IN ('new', 'rewrite', 'review', 'approved', 'published', 'error')), status TEXT NOT NULL DEFAULT 'new' CHECK (status IN ('new', 'rewrite', 'review', 'approved', 'published', 'error')),
meta_json TEXT, meta_json TEXT,
@ -110,6 +130,7 @@ def init_db() -> None:
CREATE INDEX IF NOT EXISTS idx_feeds_source_id ON feeds(source_id); CREATE INDEX IF NOT EXISTS idx_feeds_source_id ON feeds(source_id);
CREATE INDEX IF NOT EXISTS idx_runs_started_at ON runs(started_at); CREATE INDEX IF NOT EXISTS idx_runs_started_at ON runs(started_at);
CREATE INDEX IF NOT EXISTS idx_articles_published_at ON articles(published_at); CREATE INDEX IF NOT EXISTS idx_articles_published_at ON articles(published_at);
CREATE INDEX IF NOT EXISTS idx_publish_jobs_status_created_at ON publish_jobs(status, created_at);
CREATE TRIGGER IF NOT EXISTS trg_sources_updated_at CREATE TRIGGER IF NOT EXISTS trg_sources_updated_at
AFTER UPDATE ON sources AFTER UPDATE ON sources
@ -148,11 +169,40 @@ def init_db() -> None:
"legal_checked": "ALTER TABLE articles ADD COLUMN legal_checked INTEGER NOT NULL DEFAULT 0", "legal_checked": "ALTER TABLE articles ADD COLUMN legal_checked INTEGER NOT NULL DEFAULT 0",
"legal_checked_at": "ALTER TABLE articles ADD COLUMN legal_checked_at TEXT", "legal_checked_at": "ALTER TABLE articles ADD COLUMN legal_checked_at TEXT",
"legal_note": "ALTER TABLE articles ADD COLUMN legal_note TEXT", "legal_note": "ALTER TABLE articles ADD COLUMN legal_note TEXT",
"wp_post_id": "ALTER TABLE articles ADD COLUMN wp_post_id INTEGER",
"wp_post_url": "ALTER TABLE articles ADD COLUMN wp_post_url TEXT",
"publish_attempts": "ALTER TABLE articles ADD COLUMN publish_attempts INTEGER NOT NULL DEFAULT 0",
"publish_last_error": "ALTER TABLE articles ADD COLUMN publish_last_error TEXT",
"published_to_wp_at": "ALTER TABLE articles ADD COLUMN published_to_wp_at TEXT",
} }
for column, ddl in migration_columns.items(): for column, ddl in migration_columns.items():
if column not in existing_columns: if column not in existing_columns:
conn.execute(ddl) conn.execute(ddl)
table_rows = conn.execute(
"SELECT name FROM sqlite_master WHERE type = 'table' AND name = 'publish_jobs'"
).fetchall()
if not table_rows:
conn.executescript(
"""
CREATE TABLE IF NOT EXISTS publish_jobs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
article_id INTEGER NOT NULL,
status TEXT NOT NULL CHECK (status IN ('queued', 'running', 'success', 'failed')),
attempts INTEGER NOT NULL DEFAULT 0,
max_attempts INTEGER NOT NULL DEFAULT 3,
error_message TEXT,
wp_post_id INTEGER,
wp_post_url TEXT,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
started_at TEXT,
finished_at TEXT,
FOREIGN KEY(article_id) REFERENCES articles(id) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS idx_publish_jobs_status_created_at ON publish_jobs(status, created_at);
"""
)
def rows_to_dicts(rows: list[sqlite3.Row]) -> list[dict[str, Any]]: def rows_to_dicts(rows: list[sqlite3.Row]) -> list[dict[str, Any]]:
return [dict(r) for r in rows] return [dict(r) for r in rows]

View file

@ -289,6 +289,11 @@ def run_ingestion(feed_id: int | None = None) -> IngestionStats:
legal_checked=False, legal_checked=False,
legal_checked_at=None, legal_checked_at=None,
legal_note=None, legal_note=None,
wp_post_id=None,
wp_post_url=None,
publish_attempts=0,
publish_last_error=None,
published_to_wp_at=None,
word_count=len((final_content_raw or "").split()), word_count=len((final_content_raw or "").split()),
status="new", status="new",
meta_json=json.dumps({"attribution": attribution, "extraction": extraction_meta}, ensure_ascii=False), meta_json=json.dumps({"attribution": attribution, "extraction": extraction_meta}, ensure_ascii=False),

View file

@ -16,6 +16,7 @@ from .config import get_settings
from .db import init_db from .db import init_db
from .ingestion import run_ingestion from .ingestion import run_ingestion
from .policy import evaluate_source_policy, is_source_allowed from .policy import evaluate_source_policy, is_source_allowed
from .publisher import enqueue_publish, run_publisher
from .relevance import article_age_days, article_relevance from .relevance import article_age_days, article_relevance
from .repositories import ( from .repositories import (
ArticleUpsert, ArticleUpsert,
@ -30,6 +31,7 @@ from .repositories import (
get_feed_by_id, get_feed_by_id,
get_run_by_id, get_run_by_id,
get_source_by_id, get_source_by_id,
list_publish_jobs,
list_articles as repo_list_articles, list_articles as repo_list_articles,
list_feeds as repo_list_feeds, list_feeds as repo_list_feeds,
list_runs, list_runs,
@ -111,6 +113,11 @@ class ArticleUpsertRequest(BaseModel):
legal_checked: bool = False legal_checked: bool = False
legal_checked_at: str | None = None legal_checked_at: str | None = None
legal_note: str | None = None legal_note: str | None = None
wp_post_id: int | None = None
wp_post_url: str | None = None
publish_attempts: int = 0
publish_last_error: str | None = None
published_to_wp_at: str | None = None
word_count: int = 0 word_count: int = 0
status: str = Field(default="new", pattern="^(new|rewrite|review|approved|published|error)$") status: str = Field(default="new", pattern="^(new|rewrite|review|approved|published|error)$")
meta_json: str | None = None meta_json: str | None = None
@ -135,6 +142,15 @@ class ArticleLegalReviewRequest(BaseModel):
note: str | None = None note: str | None = None
class PublisherEnqueueRequest(BaseModel):
article_id: int
max_attempts: int = 3
class PublisherRunRequest(BaseModel):
max_jobs: int = 10
ALLOWED_ARTICLE_TRANSITIONS: dict[str, set[str]] = { ALLOWED_ARTICLE_TRANSITIONS: dict[str, set[str]] = {
"new": {"review", "rewrite", "error"}, "new": {"review", "rewrite", "error"},
"rewrite": {"review", "error"}, "rewrite": {"review", "error"},
@ -446,6 +462,11 @@ def api_upsert_article(payload: ArticleUpsertRequest, username: str = Depends(re
legal_checked=payload.legal_checked, legal_checked=payload.legal_checked,
legal_checked_at=payload.legal_checked_at, legal_checked_at=payload.legal_checked_at,
legal_note=payload.legal_note, legal_note=payload.legal_note,
wp_post_id=payload.wp_post_id,
wp_post_url=payload.wp_post_url,
publish_attempts=payload.publish_attempts,
publish_last_error=payload.publish_last_error,
published_to_wp_at=payload.published_to_wp_at,
word_count=payload.word_count, word_count=payload.word_count,
status=payload.status, status=payload.status,
meta_json=payload.meta_json, meta_json=payload.meta_json,
@ -495,6 +516,35 @@ def api_article_legal_review(article_id: int, payload: ArticleLegalReviewRequest
} }
@app.get("/api/publisher/jobs")
def api_publisher_jobs(limit: int = 100, username: str = Depends(require_auth)) -> dict:
return {"ok": True, "items": list_publish_jobs(limit=limit), "requested_by": username}
@app.post("/api/publisher/enqueue")
def api_publisher_enqueue(payload: PublisherEnqueueRequest, username: str = Depends(require_auth)) -> dict:
article = get_article_by_id(payload.article_id)
if not article:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Artikel nicht gefunden")
job_id = enqueue_publish(article_id=payload.article_id, max_attempts=payload.max_attempts)
return {"ok": True, "job_id": job_id, "article_id": payload.article_id, "requested_by": username}
@app.post("/api/publisher/run")
def api_publisher_run(payload: PublisherRunRequest, username: str = Depends(require_auth)) -> dict:
stats = run_publisher(max_jobs=payload.max_jobs)
return {
"ok": True,
"requested_by": username,
"stats": {
"processed": stats.processed,
"success": stats.success,
"failed": stats.failed,
"requeued": stats.requeued,
},
}
@app.post("/api/articles/{article_id}/review") @app.post("/api/articles/{article_id}/review")
def api_article_review(article_id: int, payload: ArticleReviewRequest, username: str = Depends(require_auth)) -> dict: def api_article_review(article_id: int, payload: ArticleReviewRequest, username: str = Depends(require_auth)) -> dict:
article = get_article_by_id(article_id) article = get_article_by_id(article_id)

103
backend/app/publisher.py Normal file
View file

@ -0,0 +1,103 @@
from __future__ import annotations
from dataclasses import dataclass
from .repositories import (
claim_next_publish_job,
complete_publish_job,
create_publish_job,
fail_publish_job,
get_article_by_id,
mark_article_publish_result,
PublishJobCreate,
)
from .wordpress import publish_article_draft, selected_image_exists
@dataclass(frozen=True)
class PublisherStats:
processed: int
success: int
failed: int
requeued: int
def enqueue_publish(article_id: int, max_attempts: int = 3) -> int:
return create_publish_job(PublishJobCreate(article_id=article_id, max_attempts=max_attempts))
def _can_publish(article: dict) -> tuple[bool, str | None]:
if article.get("status") not in {"approved", "published"}:
return False, "Artikelstatus muss 'approved' sein"
if int(article.get("legal_checked", 0)) != 1:
return False, "Rechtsfreigabe fehlt"
if not selected_image_exists(article):
return False, "Hauptbild nicht gesetzt"
return True, None
def run_publisher(max_jobs: int = 10) -> PublisherStats:
processed = 0
success = 0
failed = 0
requeued = 0
for _ in range(max(1, max_jobs)):
job = claim_next_publish_job()
if not job:
break
processed += 1
job_id = int(job["id"])
article_id = int(job["article_id"])
article = get_article_by_id(article_id)
if not article:
fail_publish_job(job_id, "Artikel nicht gefunden", requeue=False)
failed += 1
continue
allowed, reason = _can_publish(article)
if not allowed:
fail_publish_job(job_id, reason or "Publish-Bedingungen nicht erfüllt", requeue=False)
mark_article_publish_result(
article_id,
wp_post_id=article.get("wp_post_id"),
wp_post_url=article.get("wp_post_url"),
error=reason or "blocked",
increment_attempts=True,
set_published_status=False,
)
failed += 1
continue
try:
wp_post_id, wp_post_url = publish_article_draft(article)
complete_publish_job(job_id, wp_post_id=wp_post_id, wp_post_url=wp_post_url)
mark_article_publish_result(
article_id,
wp_post_id=wp_post_id,
wp_post_url=wp_post_url,
error=None,
increment_attempts=True,
set_published_status=True,
)
success += 1
except Exception as exc:
attempts = int(job.get("attempts", 1))
max_attempts = int(job.get("max_attempts", 3))
should_requeue = attempts < max_attempts
fail_publish_job(job_id, str(exc), requeue=should_requeue)
mark_article_publish_result(
article_id,
wp_post_id=article.get("wp_post_id"),
wp_post_url=article.get("wp_post_url"),
error=str(exc),
increment_attempts=True,
set_published_status=False,
)
if should_requeue:
requeued += 1
else:
failed += 1
return PublisherStats(processed=processed, success=success, failed=failed, requeued=requeued)

View file

@ -56,11 +56,22 @@ class ArticleUpsert:
legal_checked: bool legal_checked: bool
legal_checked_at: str | None legal_checked_at: str | None
legal_note: str | None legal_note: str | None
wp_post_id: int | None
wp_post_url: str | None
publish_attempts: int
publish_last_error: str | None
published_to_wp_at: str | None
word_count: int word_count: int
status: str status: str
meta_json: str | None meta_json: str | None
@dataclass(frozen=True)
class PublishJobCreate:
article_id: int
max_attempts: int = 3
def create_source(payload: SourceCreate) -> int: def create_source(payload: SourceCreate) -> int:
with get_conn() as conn: with get_conn() as conn:
cur = conn.execute( cur = conn.execute(
@ -235,6 +246,7 @@ def get_article_by_id(article_id: int) -> dict[str, Any] | None:
a.summary, a.content_raw, a.content_rewritten, a.image_urls_json, a.press_contact, a.summary, a.content_raw, a.content_rewritten, a.image_urls_json, a.press_contact,
a.source_name_snapshot, a.source_terms_url_snapshot, a.source_license_name_snapshot, a.source_name_snapshot, a.source_terms_url_snapshot, a.source_license_name_snapshot,
a.legal_checked, a.legal_checked_at, a.legal_note, a.legal_checked, a.legal_checked_at, a.legal_note,
a.wp_post_id, a.wp_post_url, a.publish_attempts, a.publish_last_error, a.published_to_wp_at,
a.word_count, a.status, a.meta_json, a.created_at, a.updated_at a.word_count, a.status, a.meta_json, a.created_at, a.updated_at
FROM articles a FROM articles a
WHERE a.id = ? WHERE a.id = ?
@ -375,6 +387,147 @@ def set_article_image_decision(article_id: int, image_url: str, action: str, act
return True return True
def create_publish_job(payload: PublishJobCreate) -> int:
with get_conn() as conn:
existing = conn.execute(
"""
SELECT id FROM publish_jobs
WHERE article_id = ? AND status IN ('queued', 'running')
ORDER BY id DESC
LIMIT 1
""",
(payload.article_id,),
).fetchone()
if existing:
return int(existing["id"])
cur = conn.execute(
"""
INSERT INTO publish_jobs (article_id, status, attempts, max_attempts)
VALUES (?, 'queued', 0, ?)
""",
(payload.article_id, max(1, payload.max_attempts)),
)
return int(cur.lastrowid)
def list_publish_jobs(limit: int = 100) -> list[dict[str, Any]]:
safe_limit = max(1, min(limit, 500))
with get_conn() as conn:
rows = conn.execute(
"""
SELECT j.id, j.article_id, j.status, j.attempts, j.max_attempts, j.error_message, j.wp_post_id, j.wp_post_url,
j.created_at, j.started_at, j.finished_at, a.title AS article_title
FROM publish_jobs j
LEFT JOIN articles a ON a.id = j.article_id
ORDER BY j.id DESC
LIMIT ?
""",
(safe_limit,),
).fetchall()
return rows_to_dicts(rows)
def claim_next_publish_job() -> dict[str, Any] | None:
with get_conn() as conn:
row = conn.execute(
"""
SELECT id, article_id, status, attempts, max_attempts, error_message, wp_post_id, wp_post_url
FROM publish_jobs
WHERE status = 'queued' AND attempts < max_attempts
ORDER BY id ASC
LIMIT 1
"""
).fetchone()
if not row:
return None
job_id = int(row["id"])
conn.execute(
"""
UPDATE publish_jobs
SET status = 'running',
attempts = attempts + 1,
started_at = datetime('now'),
finished_at = NULL
WHERE id = ?
""",
(job_id,),
)
claimed = conn.execute(
"""
SELECT id, article_id, status, attempts, max_attempts, error_message, wp_post_id, wp_post_url
FROM publish_jobs
WHERE id = ?
""",
(job_id,),
).fetchone()
return dict(claimed) if claimed else None
def complete_publish_job(job_id: int, wp_post_id: int | None, wp_post_url: str | None) -> None:
with get_conn() as conn:
conn.execute(
"""
UPDATE publish_jobs
SET status = 'success',
wp_post_id = ?,
wp_post_url = ?,
error_message = NULL,
finished_at = datetime('now')
WHERE id = ?
""",
(wp_post_id, wp_post_url, job_id),
)
def fail_publish_job(job_id: int, error_message: str, requeue: bool) -> None:
next_status = "queued" if requeue else "failed"
with get_conn() as conn:
conn.execute(
"""
UPDATE publish_jobs
SET status = ?,
error_message = ?,
finished_at = datetime('now')
WHERE id = ?
""",
(next_status, error_message[:2000], job_id),
)
def mark_article_publish_result(
article_id: int,
*,
wp_post_id: int | None,
wp_post_url: str | None,
error: str | None,
increment_attempts: bool,
set_published_status: bool,
) -> None:
with get_conn() as conn:
conn.execute(
"""
UPDATE articles
SET wp_post_id = ?,
wp_post_url = ?,
publish_attempts = CASE WHEN ? THEN publish_attempts + 1 ELSE publish_attempts END,
publish_last_error = ?,
published_to_wp_at = CASE WHEN ? IS NOT NULL THEN datetime('now') ELSE published_to_wp_at END,
status = CASE WHEN ? THEN 'published' ELSE status END
WHERE id = ?
""",
(
wp_post_id,
wp_post_url,
1 if increment_attempts else 0,
error[:2000] if error else None,
wp_post_id,
1 if set_published_status else 0,
article_id,
),
)
def _resolve_existing_article_id(payload: ArticleUpsert) -> int | None: def _resolve_existing_article_id(payload: ArticleUpsert) -> int | None:
with get_conn() as conn: with get_conn() as conn:
# 1) strongest key: source_url # 1) strongest key: source_url
@ -417,8 +570,9 @@ def upsert_article(payload: ArticleUpsert) -> int:
summary, content_raw, content_rewritten, image_urls_json, press_contact, summary, content_raw, content_rewritten, image_urls_json, press_contact,
source_name_snapshot, source_terms_url_snapshot, source_license_name_snapshot, source_name_snapshot, source_terms_url_snapshot, source_license_name_snapshot,
legal_checked, legal_checked_at, legal_note, legal_checked, legal_checked_at, legal_note,
wp_post_id, wp_post_url, publish_attempts, publish_last_error, published_to_wp_at,
word_count, status, meta_json word_count, status, meta_json
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", """,
( (
payload.feed_id, payload.feed_id,
@ -440,6 +594,11 @@ def upsert_article(payload: ArticleUpsert) -> int:
1 if payload.legal_checked else 0, 1 if payload.legal_checked else 0,
payload.legal_checked_at, payload.legal_checked_at,
payload.legal_note, payload.legal_note,
payload.wp_post_id,
payload.wp_post_url,
payload.publish_attempts,
payload.publish_last_error,
payload.published_to_wp_at,
payload.word_count, payload.word_count,
payload.status, payload.status,
payload.meta_json, payload.meta_json,
@ -469,6 +628,11 @@ def upsert_article(payload: ArticleUpsert) -> int:
legal_checked = ?, legal_checked = ?,
legal_checked_at = ?, legal_checked_at = ?,
legal_note = ?, legal_note = ?,
wp_post_id = ?,
wp_post_url = ?,
publish_attempts = ?,
publish_last_error = ?,
published_to_wp_at = ?,
word_count = ?, word_count = ?,
status = ?, status = ?,
meta_json = ? meta_json = ?
@ -494,6 +658,11 @@ def upsert_article(payload: ArticleUpsert) -> int:
1 if payload.legal_checked else 0, 1 if payload.legal_checked else 0,
payload.legal_checked_at, payload.legal_checked_at,
payload.legal_note, payload.legal_note,
payload.wp_post_id,
payload.wp_post_url,
payload.publish_attempts,
payload.publish_last_error,
payload.published_to_wp_at,
payload.word_count, payload.word_count,
payload.status, payload.status,
payload.meta_json, payload.meta_json,
@ -515,7 +684,8 @@ def list_articles(limit: int = 100, status_filter: str | None = None) -> list[di
SELECT a.id, a.feed_id, a.source_article_id, a.source_hash, a.title, a.source_url, a.canonical_url, a.published_at, a.author, SELECT a.id, a.feed_id, a.source_article_id, a.source_hash, a.title, a.source_url, a.canonical_url, a.published_at, a.author,
a.summary, a.content_raw, a.word_count, a.status, a.meta_json, a.created_at, a.updated_at, f.name AS feed_name, a.summary, a.content_raw, a.word_count, a.status, a.meta_json, a.created_at, a.updated_at, f.name AS feed_name,
a.image_urls_json, a.press_contact, a.source_name_snapshot, a.source_terms_url_snapshot, a.image_urls_json, a.press_contact, a.source_name_snapshot, a.source_terms_url_snapshot,
a.source_license_name_snapshot, a.legal_checked, a.legal_checked_at, a.legal_note a.source_license_name_snapshot, a.legal_checked, a.legal_checked_at, a.legal_note,
a.wp_post_id, a.wp_post_url, a.publish_attempts, a.publish_last_error, a.published_to_wp_at
FROM articles a FROM articles a
LEFT JOIN feeds f ON f.id = a.feed_id LEFT JOIN feeds f ON f.id = a.feed_id
WHERE a.status = ? WHERE a.status = ?
@ -530,7 +700,8 @@ def list_articles(limit: int = 100, status_filter: str | None = None) -> list[di
SELECT a.id, a.feed_id, a.source_article_id, a.source_hash, a.title, a.source_url, a.canonical_url, a.published_at, a.author, SELECT a.id, a.feed_id, a.source_article_id, a.source_hash, a.title, a.source_url, a.canonical_url, a.published_at, a.author,
a.summary, a.content_raw, a.word_count, a.status, a.meta_json, a.created_at, a.updated_at, f.name AS feed_name, a.summary, a.content_raw, a.word_count, a.status, a.meta_json, a.created_at, a.updated_at, f.name AS feed_name,
a.image_urls_json, a.press_contact, a.source_name_snapshot, a.source_terms_url_snapshot, a.image_urls_json, a.press_contact, a.source_name_snapshot, a.source_terms_url_snapshot,
a.source_license_name_snapshot, a.legal_checked, a.legal_checked_at, a.legal_note a.source_license_name_snapshot, a.legal_checked, a.legal_checked_at, a.legal_note,
a.wp_post_id, a.wp_post_url, a.publish_attempts, a.publish_last_error, a.published_to_wp_at
FROM articles a FROM articles a
LEFT JOIN feeds f ON f.id = a.feed_id LEFT JOIN feeds f ON f.id = a.feed_id
ORDER BY a.id DESC ORDER BY a.id DESC

111
backend/app/wordpress.py Normal file
View file

@ -0,0 +1,111 @@
from __future__ import annotations
import base64
import json
from typing import Any
from urllib.request import Request, urlopen
from .config import get_settings
def _auth_header(username: str, app_password: str) -> str:
token = base64.b64encode(f"{username}:{app_password}".encode("utf-8")).decode("ascii")
return f"Basic {token}"
def _wp_request(
*,
base_url: str,
auth_header: str,
method: str,
endpoint: str,
payload: dict[str, Any] | None = None,
) -> dict[str, Any]:
url = f"{base_url.rstrip('/')}/wp-json/wp/v2/{endpoint.lstrip('/')}"
data = json.dumps(payload).encode("utf-8") if payload is not None else None
req = Request(
url=url,
data=data,
method=method,
headers={
"Authorization": auth_header,
"Content-Type": "application/json; charset=utf-8",
"Accept": "application/json",
"User-Agent": "rss-news-publisher/1.0",
},
)
with urlopen(req, timeout=20) as resp:
raw = resp.read().decode("utf-8", errors="replace")
parsed = json.loads(raw) if raw else {}
return parsed if isinstance(parsed, dict) else {}
def _selected_image_url_from_meta(meta_json: str | None) -> str | None:
if not meta_json:
return None
try:
meta = json.loads(meta_json)
except Exception:
return None
if not isinstance(meta, dict):
return None
image_review = meta.get("image_review")
if not isinstance(image_review, dict):
return None
selected = image_review.get("selected_url")
return selected if isinstance(selected, str) and selected.strip() else None
def publish_article_draft(article: dict[str, Any]) -> tuple[int, str | None]:
settings = get_settings()
if not settings.wordpress_base_url or not settings.wordpress_username or not settings.wordpress_app_password:
raise RuntimeError("WordPress Konfiguration fehlt (base_url, username, app_password)")
auth = _auth_header(settings.wordpress_username, settings.wordpress_app_password)
source_url = article.get("source_url") or ""
canonical_url = article.get("canonical_url") or source_url
title = (article.get("title") or "Ohne Titel").strip()
body = (article.get("content_rewritten") or article.get("content_raw") or "").strip()
if not body:
body = article.get("summary") or ""
footer = "\n\n<hr />\n<p><strong>Quelle:</strong> "
footer += f"<a href=\"{source_url}\">{source_url}</a></p>"
if canonical_url and canonical_url != source_url:
footer += f"\n<p><strong>Canonical:</strong> <a href=\"{canonical_url}\">{canonical_url}</a></p>"
content = f"{body}{footer}"
payload = {
"title": title,
"content": content,
"status": settings.wordpress_default_status,
}
wp_post_id = article.get("wp_post_id")
if wp_post_id:
result = _wp_request(
base_url=settings.wordpress_base_url,
auth_header=auth,
method="POST",
endpoint=f"posts/{int(wp_post_id)}",
payload=payload,
)
else:
result = _wp_request(
base_url=settings.wordpress_base_url,
auth_header=auth,
method="POST",
endpoint="posts",
payload=payload,
)
post_id = int(result.get("id", 0))
if post_id <= 0:
raise RuntimeError(f"WordPress Antwort ohne Post-ID: {result}")
post_url = result.get("link")
return post_id, post_url if isinstance(post_url, str) else None
def selected_image_exists(article: dict[str, Any]) -> bool:
return _selected_image_url_from_meta(article.get("meta_json")) is not None

View file

@ -21,6 +21,12 @@
</header> </header>
<main class="container"> <main class="container">
{% if flash_msg %}
<section class="card flash {{ 'flash-error' if flash_type == 'error' else 'flash-success' }}">
{{ flash_msg }}
</section>
{% endif %}
<section class="card"> <section class="card">
<h2>{{ article.title }}</h2> <h2>{{ article.title }}</h2>
<div class="detail-grid"> <div class="detail-grid">
@ -39,6 +45,16 @@
{% if article.summary %} {% if article.summary %}
<p><strong>Summary:</strong> {{ article.summary }}</p> <p><strong>Summary:</strong> {{ article.summary }}</p>
{% endif %} {% endif %}
<p><strong>WordPress Post:</strong>
{% if article.wp_post_url %}
<a href="{{ article.wp_post_url }}" target="_blank" rel="noopener">#{{ article.wp_post_id }}</a>
{% elif article.wp_post_id %}
#{{ article.wp_post_id }}
{% else %}
-
{% endif %}
</p>
<p><strong>Publish Attempts:</strong> {{ article.publish_attempts or 0 }} | <strong>Letzter Fehler:</strong> {{ article.publish_last_error or "-" }}</p>
</section> </section>
<section class="card"> <section class="card">
@ -184,6 +200,15 @@
<button type="submit" class="secondary">Setzen</button> <button type="submit" class="secondary">Setzen</button>
</form> </form>
</section> </section>
<section class="card">
<h2>WordPress Publish Queue</h2>
<p class="subtle">Voraussetzungen: Status `approved`, Rechtsfreigabe aktiv, Hauptbild gesetzt.</p>
<form method="post" action="/admin/articles/{{ article.id }}/publish-enqueue" class="row">
<input name="max_attempts" value="3" />
<button type="submit">In Queue einreihen</button>
</form>
</section>
</main> </main>
</body> </body>
</html> </html>

View file

@ -91,6 +91,14 @@
</form> </form>
</section> </section>
<section class="card">
<h2>Publisher ausführen</h2>
<form method="post" action="/admin/publisher/run" class="row">
<input name="max_jobs" value="10" />
<button type="submit">Publisher Run starten</button>
</form>
</section>
<section class="card"> <section class="card">
<h2>Quellen + Policy</h2> <h2>Quellen + Policy</h2>
<table> <table>
@ -239,6 +247,35 @@
</tbody> </tbody>
</table> </table>
</section> </section>
<section class="card">
<h2>Publish Jobs</h2>
<table>
<thead>
<tr><th>ID</th><th>Artikel</th><th>Status</th><th>Attempts</th><th>WP Post</th><th>Fehler</th></tr>
</thead>
<tbody>
{% for j in publish_jobs %}
<tr>
<td>{{ j.id }}</td>
<td>#{{ j.article_id }} {{ j.article_title or "-" }}</td>
<td>{{ j.status }}</td>
<td>{{ j.attempts }}/{{ j.max_attempts }}</td>
<td>
{% if j.wp_post_url %}
<a href="{{ j.wp_post_url }}" target="_blank" rel="noopener">#{{ j.wp_post_id }}</a>
{% elif j.wp_post_id %}
#{{ j.wp_post_id }}
{% else %}
-
{% endif %}
</td>
<td>{{ j.error_message or "-" }}</td>
</tr>
{% endfor %}
</tbody>
</table>
</section>
</main> </main>
</body> </body>
</html> </html>

View file

@ -112,6 +112,11 @@ class TestAdminUi(unittest.TestCase):
legal_checked=False, legal_checked=False,
legal_checked_at=None, legal_checked_at=None,
legal_note=None, legal_note=None,
wp_post_id=None,
wp_post_url=None,
publish_attempts=0,
publish_last_error=None,
published_to_wp_at=None,
word_count=2, word_count=2,
status="new", status="new",
meta_json='{"extraction":{"images":["https://example.org/img.jpg"],"press_contact":"Kontakt"}}', meta_json='{"extraction":{"images":["https://example.org/img.jpg"],"press_contact":"Kontakt"}}',

View file

@ -85,6 +85,11 @@ class TestSQLiteRepositories(unittest.TestCase):
legal_checked=False, legal_checked=False,
legal_checked_at=None, legal_checked_at=None,
legal_note=None, legal_note=None,
wp_post_id=None,
wp_post_url=None,
publish_attempts=0,
publish_last_error=None,
published_to_wp_at=None,
word_count=120, word_count=120,
status="review", status="review",
meta_json='{"lang":"de"}', meta_json='{"lang":"de"}',
@ -114,6 +119,11 @@ class TestSQLiteRepositories(unittest.TestCase):
legal_checked=True, legal_checked=True,
legal_checked_at="2026-02-18T00:10:00Z", legal_checked_at="2026-02-18T00:10:00Z",
legal_note="ok", legal_note="ok",
wp_post_id=123,
wp_post_url="https://example.org/wp/123",
publish_attempts=1,
publish_last_error=None,
published_to_wp_at="2026-02-18T00:12:00Z",
word_count=140, word_count=140,
status="approved", status="approved",
meta_json='{"lang":"de","v":2}', meta_json='{"lang":"de","v":2}',

View file

@ -0,0 +1,112 @@
import os
import tempfile
import unittest
from pathlib import Path
from unittest.mock import patch
from fastapi.testclient import TestClient
from backend.app import config as config_module
from backend.app.db import init_db
from backend.app.main import app
class TestPublisher(unittest.TestCase):
def setUp(self) -> None:
self.tmp_dir = tempfile.TemporaryDirectory()
os.environ["APP_DB_PATH"] = str(Path(self.tmp_dir.name) / "publisher.db")
os.environ["APP_ADMIN_USERNAME"] = "admin"
os.environ["APP_ADMIN_PASSWORD"] = "secret"
os.environ["WORDPRESS_BASE_URL"] = "https://example.org"
os.environ["WORDPRESS_USERNAME"] = "wp-user"
os.environ["WORDPRESS_APP_PASSWORD"] = "wp-pass"
config_module.get_settings.cache_clear()
init_db()
self.client = TestClient(app)
self.client.post("/auth/login", json={"username": "admin", "password": "secret"})
def tearDown(self) -> None:
config_module.get_settings.cache_clear()
for key in (
"APP_DB_PATH",
"APP_ADMIN_USERNAME",
"APP_ADMIN_PASSWORD",
"WORDPRESS_BASE_URL",
"WORDPRESS_USERNAME",
"WORDPRESS_APP_PASSWORD",
):
os.environ.pop(key, None)
self.tmp_dir.cleanup()
def _create_publishable_article(self) -> int:
source = self.client.post(
"/api/sources",
json={
"name": "WP Source",
"base_url": "https://example.org",
"terms_url": "https://example.org/terms",
"license_name": "cc-by",
"risk_level": "green",
"is_enabled": True,
"last_reviewed_at": "2026-02-18T00:00:00Z",
},
)
source_id = source.json()["id"]
feed = self.client.post(
"/api/feeds",
json={"name": "WP Feed", "url": "https://example.org/feed.xml", "source_id": source_id, "is_enabled": True},
)
feed_id = feed.json()["id"]
article = self.client.post(
"/api/articles/upsert",
json={
"feed_id": feed_id,
"source_article_id": "pub-1",
"source_hash": "pub-hash-1",
"title": "Publish Artikel",
"source_url": "https://example.org/article/1",
"canonical_url": "https://example.org/article/1",
"published_at": "2026-02-18T00:00:00Z",
"author": "Autor",
"summary": "Kurz",
"content_raw": "Langtext",
"image_urls_json": "[\"https://example.org/img.jpg\"]",
"press_contact": "Kontakt",
"source_name_snapshot": "WP Source",
"source_terms_url_snapshot": "https://example.org/terms",
"source_license_name_snapshot": "cc-by",
"legal_checked": True,
"status": "approved",
"meta_json": "{\"image_review\":{\"selected_url\":\"https://example.org/img.jpg\"}}",
},
)
return article.json()["id"]
@patch("backend.app.publisher.publish_article_draft")
def test_enqueue_and_run_publisher(self, mock_publish) -> None:
mock_publish.return_value = (777, "https://example.org/?p=777")
article_id = self._create_publishable_article()
enqueue = self.client.post("/api/publisher/enqueue", json={"article_id": article_id, "max_attempts": 3})
self.assertEqual(enqueue.status_code, 200)
run = self.client.post("/api/publisher/run", json={"max_jobs": 5})
self.assertEqual(run.status_code, 200)
stats = run.json()["stats"]
self.assertEqual(stats["success"], 1)
article = self.client.get(f"/api/articles/{article_id}")
self.assertEqual(article.status_code, 200)
item = article.json()["item"]
self.assertEqual(item["status"], "published")
self.assertEqual(item["wp_post_id"], 777)
self.assertIn("?p=777", item["wp_post_url"] or "")
jobs = self.client.get("/api/publisher/jobs")
self.assertEqual(jobs.status_code, 200)
self.assertGreaterEqual(len(jobs.json()["items"]), 1)
if __name__ == "__main__":
unittest.main()