feat: rebuild rss-news backend, admin ui, and legal extraction pipeline

This commit is contained in:
Oliver 2026-02-18 09:46:44 +01:00
parent d65c55d315
commit 2c331d683b
No known key found for this signature in database
43 changed files with 3463 additions and 73 deletions

404
backend/app/main.py Normal file
View file

@ -0,0 +1,404 @@
from contextlib import asynccontextmanager
from pathlib import Path
from fastapi import Depends, FastAPI, HTTPException, Request, Response, status
from pydantic import BaseModel, Field
from fastapi.staticfiles import StaticFiles
from .admin_ui import router as admin_router
from .auth import create_session_token, verify_credentials, verify_session_token
from .config import get_settings
from .db import init_db
from .ingestion import run_ingestion
from .policy import evaluate_source_policy, is_source_allowed
from .repositories import (
ArticleUpsert,
FeedCreate,
RunCreate,
SourceCreate,
create_feed as repo_create_feed,
create_run,
create_source as repo_create_source,
finish_run,
get_article_by_id,
get_feed_by_id,
get_run_by_id,
get_source_by_id,
list_articles as repo_list_articles,
list_feeds as repo_list_feeds,
list_runs,
list_sources as repo_list_sources,
update_article_status,
upsert_article as repo_upsert_article,
)
settings = get_settings()
@asynccontextmanager
async def app_lifespan(_: FastAPI):
init_db()
yield
app = FastAPI(title=settings.app_name, lifespan=app_lifespan)
app.include_router(admin_router)
app.mount(
"/admin/static",
StaticFiles(directory=str(Path(__file__).resolve().parent.parent / "static")),
name="admin-static",
)
class LoginRequest(BaseModel):
username: str
password: str
class SourceCreateRequest(BaseModel):
name: str = Field(min_length=1, max_length=200)
base_url: str | None = None
terms_url: str | None = None
license_name: str | None = None
risk_level: str = Field(default="yellow", pattern="^(green|yellow|red)$")
is_enabled: bool = False
notes: str | None = None
last_reviewed_at: str | None = None
class FeedCreateRequest(BaseModel):
name: str = Field(min_length=1, max_length=200)
url: str = Field(min_length=5, max_length=1000)
source_id: int | None = None
is_enabled: bool = True
class RunCreateRequest(BaseModel):
run_type: str = Field(min_length=2, max_length=100)
status: str = Field(default="queued", pattern="^(queued|running|success|failed)$")
details: str | None = None
class RunFinishRequest(BaseModel):
status: str = Field(pattern="^(success|failed)$")
details: str | None = None
class ArticleUpsertRequest(BaseModel):
feed_id: int | None = None
source_article_id: str | None = None
source_hash: str | None = None
title: str = Field(min_length=1, max_length=500)
source_url: str = Field(min_length=5, max_length=2000)
canonical_url: str | None = None
published_at: str | None = None
author: str | None = None
summary: str | None = None
content_raw: str | None = None
content_rewritten: str | None = None
word_count: int = 0
status: str = Field(default="new", pattern="^(new|rewrite|review|approved|published|error)$")
meta_json: str | None = None
class IngestionRunRequest(BaseModel):
feed_id: int | None = None
class ArticleTransitionRequest(BaseModel):
target_status: str = Field(pattern="^(new|rewrite|review|approved|published|error)$")
note: str | None = None
class ArticleReviewRequest(BaseModel):
decision: str = Field(pattern="^(approve|reject)$")
note: str | None = None
ALLOWED_ARTICLE_TRANSITIONS: dict[str, set[str]] = {
"new": {"review", "rewrite", "error"},
"rewrite": {"review", "error"},
"review": {"approved", "rewrite", "error"},
"approved": {"published", "error"},
"published": {"error"},
"error": {"review", "rewrite"},
}
def require_auth(request: Request) -> str:
token = request.cookies.get(settings.session_cookie_name)
if not token:
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Nicht angemeldet")
username = verify_session_token(token)
if not username:
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Session ungueltig oder abgelaufen")
return username
@app.get("/health")
def health() -> dict:
return {"status": "ok", "service": settings.app_name, "db_path": settings.app_db_path}
@app.post("/auth/login")
def login(payload: LoginRequest, response: Response) -> dict:
if not verify_credentials(payload.username, payload.password):
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Ungueltige Zugangsdaten")
token = create_session_token(payload.username)
response.set_cookie(
key=settings.session_cookie_name,
value=token,
max_age=settings.session_max_age_seconds,
httponly=True,
secure=False,
samesite="lax",
)
return {"ok": True, "username": payload.username}
@app.post("/auth/logout")
def logout(response: Response) -> dict:
response.delete_cookie(settings.session_cookie_name)
return {"ok": True}
@app.get("/auth/me")
def me(username: str = Depends(require_auth)) -> dict:
return {"authenticated": True, "username": username}
@app.get("/api/protected")
def protected(username: str = Depends(require_auth)) -> dict:
return {"ok": True, "message": "Protected endpoint", "username": username}
@app.get("/api/pipeline/status")
def pipeline_status(username: str = Depends(require_auth)) -> dict:
feeds_total = len(repo_list_feeds())
sources_total = len(repo_list_sources())
articles_total = len(repo_list_articles(limit=500))
return {
"ok": True,
"stage": "skeleton+db",
"requested_by": username,
"counts": {
"sources": sources_total,
"feeds": feeds_total,
"articles": articles_total,
},
}
@app.get("/api/sources")
def list_sources(username: str = Depends(require_auth)) -> dict:
return {"ok": True, "items": repo_list_sources(), "requested_by": username}
@app.get("/api/sources/{source_id}/policy-check")
def source_policy_check(source_id: int, username: str = Depends(require_auth)) -> dict:
source = get_source_by_id(source_id)
if not source:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Quelle nicht gefunden")
issues = evaluate_source_policy(source)
return {
"ok": True,
"source_id": source_id,
"allowed": is_source_allowed(source),
"issues": issues,
"requested_by": username,
}
@app.post("/api/sources")
def create_source(payload: SourceCreateRequest, username: str = Depends(require_auth)) -> dict:
source_id = repo_create_source(
SourceCreate(
name=payload.name,
base_url=payload.base_url,
terms_url=payload.terms_url,
license_name=payload.license_name,
risk_level=payload.risk_level,
is_enabled=payload.is_enabled,
notes=payload.notes,
last_reviewed_at=payload.last_reviewed_at,
)
)
return {"ok": True, "id": source_id, "requested_by": username}
@app.get("/api/feeds")
def list_feeds(username: str = Depends(require_auth)) -> dict:
return {"ok": True, "items": repo_list_feeds(), "requested_by": username}
@app.get("/api/feeds/{feed_id}/policy-check")
def feed_policy_check(feed_id: int, username: str = Depends(require_auth)) -> dict:
feed = get_feed_by_id(feed_id)
if not feed:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Feed nicht gefunden")
source_snapshot = {
"id": feed.get("source_id"),
"name": feed.get("source_name"),
"base_url": feed.get("source_base_url"),
"terms_url": feed.get("source_terms_url"),
"license_name": feed.get("source_license_name"),
"risk_level": feed.get("source_risk_level"),
"last_reviewed_at": feed.get("source_last_reviewed_at"),
"is_enabled": feed.get("source_is_enabled"),
}
issues = evaluate_source_policy(source_snapshot)
return {
"ok": True,
"feed_id": feed_id,
"allowed": len(issues) == 0,
"issues": issues,
"requested_by": username,
}
@app.post("/api/feeds")
def create_feed(payload: FeedCreateRequest, username: str = Depends(require_auth)) -> dict:
try:
feed_id = repo_create_feed(
FeedCreate(
name=payload.name,
url=payload.url,
source_id=payload.source_id,
is_enabled=payload.is_enabled,
)
)
except Exception as exc:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=f"Feed konnte nicht angelegt werden: {exc}") from exc
return {"ok": True, "id": feed_id, "requested_by": username}
@app.get("/api/runs")
def api_list_runs(limit: int = 50, username: str = Depends(require_auth)) -> dict:
return {"ok": True, "items": list_runs(limit=limit), "requested_by": username}
@app.get("/api/runs/{run_id}")
def api_get_run(run_id: int, username: str = Depends(require_auth)) -> dict:
run = get_run_by_id(run_id)
if not run:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Run nicht gefunden")
return {"ok": True, "item": run, "requested_by": username}
@app.post("/api/runs")
def api_create_run(payload: RunCreateRequest, username: str = Depends(require_auth)) -> dict:
run_id = create_run(RunCreate(run_type=payload.run_type, status=payload.status, details=payload.details))
return {"ok": True, "id": run_id, "requested_by": username}
@app.post("/api/runs/{run_id}/finish")
def api_finish_run(run_id: int, payload: RunFinishRequest, username: str = Depends(require_auth)) -> dict:
finish_run(run_id=run_id, status=payload.status, details=payload.details)
return {"ok": True, "id": run_id, "requested_by": username}
@app.get("/api/articles")
def api_list_articles(limit: int = 100, status_filter: str | None = None, username: str = Depends(require_auth)) -> dict:
return {"ok": True, "items": repo_list_articles(limit=limit, status_filter=status_filter), "requested_by": username}
@app.get("/api/articles/{article_id}")
def api_get_article(article_id: int, username: str = Depends(require_auth)) -> dict:
article = get_article_by_id(article_id)
if not article:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Artikel nicht gefunden")
return {"ok": True, "item": article, "requested_by": username}
@app.post("/api/articles/upsert")
def api_upsert_article(payload: ArticleUpsertRequest, username: str = Depends(require_auth)) -> dict:
article_id = repo_upsert_article(
ArticleUpsert(
feed_id=payload.feed_id,
source_article_id=payload.source_article_id,
source_hash=payload.source_hash,
title=payload.title,
source_url=payload.source_url,
canonical_url=payload.canonical_url,
published_at=payload.published_at,
author=payload.author,
summary=payload.summary,
content_raw=payload.content_raw,
content_rewritten=payload.content_rewritten,
word_count=payload.word_count,
status=payload.status,
meta_json=payload.meta_json,
)
)
return {"ok": True, "id": article_id, "requested_by": username}
@app.post("/api/articles/{article_id}/transition")
def api_article_transition(article_id: int, payload: ArticleTransitionRequest, username: str = Depends(require_auth)) -> dict:
article = get_article_by_id(article_id)
if not article:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Artikel nicht gefunden")
current_status = article.get("status")
allowed_targets = ALLOWED_ARTICLE_TRANSITIONS.get(current_status, set())
if payload.target_status not in allowed_targets:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Ungueltiger Statuswechsel: {current_status} -> {payload.target_status}",
)
updated = update_article_status(article_id, payload.target_status, actor=username, note=payload.note)
if not updated:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Artikel nicht gefunden")
return {"ok": True, "id": article_id, "from_status": current_status, "to_status": payload.target_status}
@app.post("/api/articles/{article_id}/review")
def api_article_review(article_id: int, payload: ArticleReviewRequest, username: str = Depends(require_auth)) -> dict:
article = get_article_by_id(article_id)
if not article:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Artikel nicht gefunden")
if article.get("status") != "review":
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Review nur fuer Status 'review' erlaubt (aktuell: {article.get('status')})",
)
target_status = "approved" if payload.decision == "approve" else "rewrite"
updated = update_article_status(
article_id,
target_status,
actor=username,
note=payload.note,
decision=payload.decision,
)
if not updated:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Artikel nicht gefunden")
return {
"ok": True,
"id": article_id,
"decision": payload.decision,
"to_status": target_status,
}
@app.post("/api/ingestion/run")
def api_run_ingestion(payload: IngestionRunRequest, username: str = Depends(require_auth)) -> dict:
stats = run_ingestion(feed_id=payload.feed_id)
return {
"ok": stats.status == "success",
"run_id": stats.run_id,
"status": stats.status,
"message": stats.message,
"stats": {
"feeds_processed": stats.feeds_processed,
"entries_seen": stats.entries_seen,
"articles_upserted": stats.articles_upserted,
},
"requested_by": username,
}