rss-news/backend/app/repositories.py

416 lines
14 KiB
Python

from __future__ import annotations
from dataclasses import dataclass
import json
from datetime import datetime, timezone
from typing import Any
from .db import get_conn, rows_to_dicts
@dataclass(frozen=True)
class SourceCreate:
name: str
base_url: str | None
terms_url: str | None
license_name: str | None
risk_level: str
is_enabled: bool
notes: str | None
last_reviewed_at: str | None
@dataclass(frozen=True)
class FeedCreate:
name: str
url: str
source_id: int | None
is_enabled: bool
@dataclass(frozen=True)
class RunCreate:
run_type: str
status: str
details: str | None = None
@dataclass(frozen=True)
class ArticleUpsert:
feed_id: int | None
source_article_id: str | None
source_hash: str | None
title: str
source_url: str
canonical_url: str | None
published_at: str | None
author: str | None
summary: str | None
content_raw: str | None
content_rewritten: str | None
word_count: int
status: str
meta_json: str | None
def create_source(payload: SourceCreate) -> int:
with get_conn() as conn:
cur = conn.execute(
"""
INSERT INTO sources (name, base_url, terms_url, license_name, risk_level, is_enabled, notes, last_reviewed_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""",
(
payload.name.strip(),
payload.base_url,
payload.terms_url,
payload.license_name,
payload.risk_level,
1 if payload.is_enabled else 0,
payload.notes,
payload.last_reviewed_at,
),
)
return int(cur.lastrowid)
def list_sources() -> list[dict[str, Any]]:
with get_conn() as conn:
rows = conn.execute(
"""
SELECT id, name, base_url, terms_url, license_name, risk_level, is_enabled, notes, last_reviewed_at, created_at, updated_at
FROM sources
ORDER BY id DESC
"""
).fetchall()
return rows_to_dicts(rows)
def get_source_by_id(source_id: int) -> dict[str, Any] | None:
with get_conn() as conn:
row = conn.execute(
"""
SELECT id, name, base_url, terms_url, license_name, risk_level, is_enabled, notes, last_reviewed_at, created_at, updated_at
FROM sources
WHERE id = ?
""",
(source_id,),
).fetchone()
return dict(row) if row else None
def create_feed(payload: FeedCreate) -> int:
with get_conn() as conn:
cur = conn.execute(
"INSERT INTO feeds (name, url, source_id, is_enabled) VALUES (?, ?, ?, ?)",
(payload.name.strip(), payload.url.strip(), payload.source_id, 1 if payload.is_enabled else 0),
)
return int(cur.lastrowid)
def list_feeds() -> list[dict[str, Any]]:
with get_conn() as conn:
rows = conn.execute(
"""
SELECT f.id, f.name, f.url, f.source_id, f.is_enabled, f.etag, f.last_modified, f.last_checked_at,
f.created_at, f.updated_at, s.name AS source_name, s.license_name AS source_license_name,
s.terms_url AS source_terms_url, s.risk_level AS source_risk_level, s.base_url AS source_base_url,
s.last_reviewed_at AS source_last_reviewed_at, s.is_enabled AS source_is_enabled
FROM feeds f
LEFT JOIN sources s ON s.id = f.source_id
ORDER BY f.id DESC
"""
).fetchall()
return rows_to_dicts(rows)
def list_enabled_feeds() -> list[dict[str, Any]]:
with get_conn() as conn:
rows = conn.execute(
"""
SELECT f.id, f.name, f.url, f.source_id, f.is_enabled, f.etag, f.last_modified, f.last_checked_at,
s.name AS source_name, s.license_name AS source_license_name, s.terms_url AS source_terms_url,
s.risk_level AS source_risk_level, s.base_url AS source_base_url,
s.last_reviewed_at AS source_last_reviewed_at, s.is_enabled AS source_is_enabled
FROM feeds f
LEFT JOIN sources s ON s.id = f.source_id
WHERE f.is_enabled = 1
ORDER BY f.id ASC
"""
).fetchall()
return rows_to_dicts(rows)
def get_feed_by_id(feed_id: int) -> dict[str, Any] | None:
with get_conn() as conn:
row = conn.execute(
"""
SELECT f.id, f.name, f.url, f.source_id, f.is_enabled, f.etag, f.last_modified, f.last_checked_at,
s.name AS source_name, s.license_name AS source_license_name, s.terms_url AS source_terms_url,
s.risk_level AS source_risk_level, s.base_url AS source_base_url,
s.last_reviewed_at AS source_last_reviewed_at, s.is_enabled AS source_is_enabled
FROM feeds f
LEFT JOIN sources s ON s.id = f.source_id
WHERE f.id = ?
""",
(feed_id,),
).fetchone()
return dict(row) if row else None
def update_feed_fetch_state(feed_id: int, etag: str | None, last_modified: str | None) -> None:
with get_conn() as conn:
conn.execute(
"""
UPDATE feeds
SET etag = ?, last_modified = ?, last_checked_at = datetime('now')
WHERE id = ?
""",
(etag, last_modified, feed_id),
)
def create_run(payload: RunCreate) -> int:
with get_conn() as conn:
cur = conn.execute(
"INSERT INTO runs (run_type, status, details) VALUES (?, ?, ?)",
(payload.run_type, payload.status, payload.details),
)
return int(cur.lastrowid)
def finish_run(run_id: int, status: str, details: str | None = None) -> None:
with get_conn() as conn:
conn.execute(
"""
UPDATE runs
SET status = ?, details = ?, finished_at = datetime('now')
WHERE id = ?
""",
(status, details, run_id),
)
def list_runs(limit: int = 50) -> list[dict[str, Any]]:
safe_limit = max(1, min(limit, 500))
with get_conn() as conn:
rows = conn.execute(
"""
SELECT id, run_type, status, started_at, finished_at, details
FROM runs
ORDER BY id DESC
LIMIT ?
""",
(safe_limit,),
).fetchall()
return rows_to_dicts(rows)
def get_run_by_id(run_id: int) -> dict[str, Any] | None:
with get_conn() as conn:
row = conn.execute(
"""
SELECT id, run_type, status, started_at, finished_at, details
FROM runs
WHERE id = ?
""",
(run_id,),
).fetchone()
return dict(row) if row else None
def get_article_by_id(article_id: int) -> dict[str, Any] | None:
with get_conn() as conn:
row = conn.execute(
"""
SELECT a.id, a.feed_id, a.source_article_id, a.source_hash, a.title, a.source_url, a.canonical_url, a.published_at, a.author,
a.summary, a.content_raw, a.content_rewritten, a.word_count, a.status, a.meta_json, a.created_at, a.updated_at
FROM articles a
WHERE a.id = ?
""",
(article_id,),
).fetchone()
return dict(row) if row else None
def _merge_review_event(meta_json: str | None, event: dict[str, Any]) -> str:
meta: dict[str, Any] = {}
if meta_json:
try:
meta = json.loads(meta_json)
if not isinstance(meta, dict):
meta = {}
except Exception:
meta = {}
events = meta.get("review_events")
if not isinstance(events, list):
events = []
events.append(event)
meta["review_events"] = events
return json.dumps(meta, ensure_ascii=False)
def update_article_status(
article_id: int,
new_status: str,
*,
actor: str | None = None,
note: str | None = None,
decision: str | None = None,
) -> bool:
article = get_article_by_id(article_id)
if not article:
return False
event = {
"timestamp": datetime.now(timezone.utc).isoformat(),
"from_status": article.get("status"),
"to_status": new_status,
"actor": actor or "system",
"note": note,
"decision": decision,
}
merged_meta = _merge_review_event(article.get("meta_json"), event)
with get_conn() as conn:
conn.execute(
"UPDATE articles SET status = ?, meta_json = ? WHERE id = ?",
(new_status, merged_meta, article_id),
)
return True
def _resolve_existing_article_id(payload: ArticleUpsert) -> int | None:
with get_conn() as conn:
# 1) strongest key: source_url
row = conn.execute(
"SELECT id FROM articles WHERE source_url = ?",
(payload.source_url.strip(),),
).fetchone()
if row:
return int(row["id"])
# 2) stable feed+guid combo
if payload.feed_id is not None and payload.source_article_id:
row = conn.execute(
"SELECT id FROM articles WHERE feed_id = ? AND source_article_id = ?",
(payload.feed_id, payload.source_article_id),
).fetchone()
if row:
return int(row["id"])
# 3) content hash fallback
if payload.source_hash:
row = conn.execute(
"SELECT id FROM articles WHERE source_hash = ?",
(payload.source_hash,),
).fetchone()
if row:
return int(row["id"])
return None
def upsert_article(payload: ArticleUpsert) -> int:
existing_id = _resolve_existing_article_id(payload)
with get_conn() as conn:
if existing_id is None:
conn.execute(
"""
INSERT INTO articles (
feed_id, source_article_id, source_hash, title, source_url, canonical_url, published_at, author,
summary, content_raw, content_rewritten, word_count, status, meta_json
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
payload.feed_id,
payload.source_article_id,
payload.source_hash,
payload.title.strip(),
payload.source_url.strip(),
payload.canonical_url,
payload.published_at,
payload.author,
payload.summary,
payload.content_raw,
payload.content_rewritten,
payload.word_count,
payload.status,
payload.meta_json,
),
)
else:
conn.execute(
"""
UPDATE articles
SET
feed_id = ?,
source_article_id = ?,
source_hash = ?,
title = ?,
source_url = ?,
canonical_url = ?,
published_at = ?,
author = ?,
summary = ?,
content_raw = ?,
content_rewritten = ?,
word_count = ?,
status = ?,
meta_json = ?
WHERE id = ?
""",
(
payload.feed_id,
payload.source_article_id,
payload.source_hash,
payload.title.strip(),
payload.source_url.strip(),
payload.canonical_url,
payload.published_at,
payload.author,
payload.summary,
payload.content_raw,
payload.content_rewritten,
payload.word_count,
payload.status,
payload.meta_json,
existing_id,
),
)
row = conn.execute("SELECT id FROM articles WHERE source_url = ?", (payload.source_url.strip(),)).fetchone()
if row:
return int(row["id"])
return int(existing_id) if existing_id else 0
def list_articles(limit: int = 100, status_filter: str | None = None) -> list[dict[str, Any]]:
safe_limit = max(1, min(limit, 500))
with get_conn() as conn:
if status_filter:
rows = conn.execute(
"""
SELECT a.id, a.feed_id, a.source_article_id, a.source_hash, a.title, a.source_url, a.canonical_url, a.published_at, a.author,
a.summary, a.content_raw, a.word_count, a.status, a.meta_json, a.created_at, a.updated_at, f.name AS feed_name
FROM articles a
LEFT JOIN feeds f ON f.id = a.feed_id
WHERE a.status = ?
ORDER BY a.id DESC
LIMIT ?
""",
(status_filter, safe_limit),
).fetchall()
else:
rows = conn.execute(
"""
SELECT a.id, a.feed_id, a.source_article_id, a.source_hash, a.title, a.source_url, a.canonical_url, a.published_at, a.author,
a.summary, a.content_raw, a.word_count, a.status, a.meta_json, a.created_at, a.updated_at, f.name AS feed_name
FROM articles a
LEFT JOIN feeds f ON f.id = a.feed_id
ORDER BY a.id DESC
LIMIT ?
""",
(safe_limit,),
).fetchall()
return rows_to_dicts(rows)