feat(pipeline): article age filter, image URL validation, schedule UI, retry button
1. Article age filter (ingestion.py + config.py):
- New setting pipeline_max_article_age_days=7 (0 = no limit)
- Skip RSS entries older than N days before expensive extract_article()
- Prevents old articles from Google Alerts re-entering pipeline
2. Image URL pre-validation (ingestion.py):
- HEAD request probe for each primary image candidate during ingestion
- Falls back to next-best candidate if primary returns 4xx
- Network errors treated as OK to avoid false negatives on flaky servers
3. Stale WP draft cleanup (pipeline.py):
- Quality gate rejections now delete any pre-existing WP draft (wp_post_id)
- Prevents orphaned drafts when re-running articles that previously had drafts
4. Schedule overview UI (scheduler.py + admin_ui.py + admin_schedule.html):
- New /admin/schedule page showing calendar grid of all booked slots
- Distinguishes Pipeline-DB slots from WordPress-only slots
- Link added to dashboard navigation
5. Retry for failed articles (admin_ui.py + admin_dashboard.html):
- New POST /admin/articles/{id}/retry endpoint: resets to 'new', releases slot
- '🔄 Wiederholen' button shown in dashboard for all 'close' (error) articles
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
cf2d826c8a
commit
8676ace102
7 changed files with 344 additions and 5 deletions
|
|
@ -165,6 +165,72 @@ def _find_next_free_slot(
|
|||
return tomorrow, _preferred_hours()[0] if _preferred_hours() else 9
|
||||
|
||||
|
||||
def get_schedule_overview(lookahead_days: int = 60) -> list[dict]:
|
||||
"""Return all booked scheduling slots (DB + WP) for the next N days, sorted by date."""
|
||||
today = _today_cet()
|
||||
hours = _preferred_hours()
|
||||
|
||||
# Slots booked in local DB
|
||||
with get_conn() as conn:
|
||||
rows = conn.execute(
|
||||
"""
|
||||
SELECT id, title, status, wp_post_id, wp_post_url, scheduled_publish_at
|
||||
FROM articles
|
||||
WHERE scheduled_publish_at IS NOT NULL
|
||||
AND scheduled_publish_at >= ?
|
||||
AND status NOT IN ('error', 'no_image')
|
||||
ORDER BY scheduled_publish_at
|
||||
""",
|
||||
(today.isoformat() + "T00:00:00",),
|
||||
).fetchall()
|
||||
|
||||
db_slots: dict[tuple[str, int], dict] = {}
|
||||
for row in rows:
|
||||
try:
|
||||
dt = datetime.fromisoformat(row["scheduled_publish_at"])
|
||||
key = (dt.date().isoformat(), dt.hour)
|
||||
db_slots[key] = {
|
||||
"date": dt.date().isoformat(),
|
||||
"hour": dt.hour,
|
||||
"formatted": _format_slot(dt.date(), dt.hour),
|
||||
"source": "db",
|
||||
"article_id": row["id"],
|
||||
"article_title": row["title"],
|
||||
"article_status": row["status"],
|
||||
"wp_post_id": row["wp_post_id"],
|
||||
"wp_post_url": row["wp_post_url"],
|
||||
}
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Slots occupied in WordPress but not in local DB
|
||||
wp_occupied = _fetch_wp_occupied_slots()
|
||||
wp_only: list[dict] = []
|
||||
for d_str, h in sorted(wp_occupied):
|
||||
if (d_str, h) in db_slots:
|
||||
continue
|
||||
try:
|
||||
d = date.fromisoformat(d_str)
|
||||
if d >= today:
|
||||
wp_only.append({
|
||||
"date": d_str,
|
||||
"hour": h,
|
||||
"formatted": _format_slot(d, h),
|
||||
"source": "wordpress",
|
||||
"article_id": None,
|
||||
"article_title": "(WP-Beitrag außerhalb Pipeline)",
|
||||
"article_status": None,
|
||||
"wp_post_id": None,
|
||||
"wp_post_url": None,
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
all_slots = list(db_slots.values()) + wp_only
|
||||
all_slots.sort(key=lambda s: (s["date"], s["hour"]))
|
||||
return all_slots
|
||||
|
||||
|
||||
def release_publish_slot(article_id: int) -> None:
|
||||
"""Clear a previously reserved slot (e.g. when article is rejected after slot assignment)."""
|
||||
with get_conn() as conn:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue