fix(scheduler): query WordPress future posts to avoid double-booking slots
The scheduler previously only checked the local SQLite DB for occupied slots. Posts created outside the pipeline (e.g. recovery scripts) were invisible, causing newly scheduled articles to land on already-taken WP dates. _fetch_wp_occupied_slots() now queries WP /wp/v2/posts?status=future before each slot assignment. All scheduling functions accept a wp_occupied set. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
764e7bff6a
commit
8fa46312e8
1 changed files with 134 additions and 60 deletions
|
|
@ -3,11 +3,15 @@
|
|||
Calculates suggested publish slots for new WordPress drafts.
|
||||
Rules:
|
||||
- Maximum N drafts per day (configurable, default 2)
|
||||
- Prefer slots spread across the week for steady traffic
|
||||
- Preferred hours: configurable (default 09:00 and 14:00 CET)
|
||||
- Preferred slots: configurable hours (default 09:00 and 14:00 CET)
|
||||
- New articles queue up after the last already-scheduled article
|
||||
- Checks both local DB AND WordPress future posts to avoid double-booking
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import json
|
||||
import urllib.request
|
||||
from datetime import date, datetime, timedelta, timezone
|
||||
from typing import Any
|
||||
|
||||
|
|
@ -15,7 +19,7 @@ from .config import get_settings
|
|||
from .db import get_conn
|
||||
|
||||
|
||||
# CET offset (UTC+1 winter / UTC+2 summer – we use a fixed +1 for simplicity)
|
||||
# CET offset (UTC+1 winter / UTC+2 summer – fixed +1 for simplicity)
|
||||
_CET_OFFSET = timedelta(hours=1)
|
||||
|
||||
|
||||
|
|
@ -31,35 +35,87 @@ def _preferred_hours() -> list[int]:
|
|||
return [9, 14]
|
||||
|
||||
|
||||
def _count_scheduled_on_day(target_date: date) -> int:
|
||||
"""Count articles already scheduled for publication on a given date."""
|
||||
date_str = target_date.isoformat()
|
||||
def _fetch_wp_occupied_slots() -> set[tuple[str, int]]:
|
||||
"""Fetch all future-scheduled WordPress posts and return occupied (date_iso, hour) pairs.
|
||||
|
||||
This prevents the scheduler from assigning a slot that is already taken
|
||||
by a WP post that was not created via this pipeline (e.g. manually or via recovery scripts).
|
||||
Returns an empty set on any error so the scheduler degrades gracefully.
|
||||
"""
|
||||
settings = get_settings()
|
||||
try:
|
||||
auth = base64.b64encode(
|
||||
f"{settings.wordpress_username}:{settings.wordpress_password}".encode()
|
||||
).decode()
|
||||
url = (
|
||||
f"{settings.wordpress_base_url}/wp-json/wp/v2/posts"
|
||||
f"?status=future&per_page=100&orderby=date&order=asc&_fields=id,date"
|
||||
)
|
||||
req = urllib.request.Request(url, headers={"Authorization": f"Basic {auth}"})
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
posts = json.loads(resp.read())
|
||||
occupied: set[tuple[str, int]] = set()
|
||||
for p in posts:
|
||||
try:
|
||||
dt = datetime.fromisoformat(p["date"])
|
||||
occupied.add((dt.date().isoformat(), dt.hour))
|
||||
except Exception:
|
||||
pass
|
||||
return occupied
|
||||
except Exception:
|
||||
return set()
|
||||
|
||||
|
||||
def _get_last_future_scheduled_date(wp_occupied: set[tuple[str, int]]) -> date | None:
|
||||
"""Return the date of the latest already-scheduled slot (DB + WP)."""
|
||||
today = _today_cet()
|
||||
|
||||
# Latest from local DB
|
||||
with get_conn() as conn:
|
||||
row = conn.execute(
|
||||
"""
|
||||
SELECT COUNT(*) AS cnt
|
||||
SELECT MAX(scheduled_publish_at) AS last_slot
|
||||
FROM articles
|
||||
WHERE scheduled_publish_at >= ? AND scheduled_publish_at < ?
|
||||
AND status NOT IN ('error')
|
||||
WHERE scheduled_publish_at IS NOT NULL
|
||||
AND scheduled_publish_at >= ?
|
||||
AND status NOT IN ('error', 'no_image')
|
||||
""",
|
||||
(date_str + "T00:00:00", date_str + "T23:59:59"),
|
||||
(today.isoformat() + "T00:00:00",),
|
||||
).fetchone()
|
||||
return int(row["cnt"]) if row else 0
|
||||
db_last: date | None = None
|
||||
if row and row["last_slot"]:
|
||||
try:
|
||||
db_last = datetime.fromisoformat(row["last_slot"]).date()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Latest from WP
|
||||
wp_last: date | None = None
|
||||
for d_str, _ in wp_occupied:
|
||||
try:
|
||||
d = date.fromisoformat(d_str)
|
||||
if d >= today and (wp_last is None or d > wp_last):
|
||||
wp_last = d
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if db_last and wp_last:
|
||||
return max(db_last, wp_last)
|
||||
return db_last or wp_last
|
||||
|
||||
|
||||
def _next_free_hour(target_date: date) -> int | None:
|
||||
"""Return first preferred hour that is not yet used on target_date, or None if day is full."""
|
||||
settings = get_settings()
|
||||
max_per_day = settings.pipeline_max_drafts_per_day
|
||||
def _next_free_hour(target_date: date, wp_occupied: set[tuple[str, int]]) -> int | None:
|
||||
"""Return first preferred hour not yet used on target_date (DB + WP), or None if day is full."""
|
||||
hours = _preferred_hours()
|
||||
|
||||
date_str = target_date.isoformat()
|
||||
|
||||
# Hours used in local DB
|
||||
with get_conn() as conn:
|
||||
rows = conn.execute(
|
||||
"""
|
||||
SELECT scheduled_publish_at FROM articles
|
||||
WHERE scheduled_publish_at >= ? AND scheduled_publish_at < ?
|
||||
AND status NOT IN ('error')
|
||||
AND status NOT IN ('error', 'no_image')
|
||||
""",
|
||||
(date_str + "T00:00:00", date_str + "T23:59:59"),
|
||||
).fetchall()
|
||||
|
|
@ -72,68 +128,86 @@ def _next_free_hour(target_date: date) -> int | None:
|
|||
except Exception:
|
||||
pass
|
||||
|
||||
# Hours used in WordPress
|
||||
for d_str, h in wp_occupied:
|
||||
if d_str == date_str:
|
||||
used_hours.add(h)
|
||||
|
||||
for h in hours:
|
||||
if h not in used_hours:
|
||||
return h
|
||||
return None # day is full
|
||||
return None
|
||||
|
||||
|
||||
def suggest_publish_slot(lookahead_days: int = 14) -> str:
|
||||
"""Return a suggested publish datetime string (ISO, CET) for the next free slot.
|
||||
|
||||
Format: 'Mo, 24.03.2026 um 09:00 Uhr'
|
||||
Also updates DB so consecutive calls return different slots.
|
||||
"""
|
||||
today = _today_cet()
|
||||
def _format_slot(d: date, hour: int) -> str:
|
||||
weekday_names = ["Mo", "Di", "Mi", "Do", "Fr", "Sa", "So"]
|
||||
wd = weekday_names[d.weekday()]
|
||||
return f"{wd}, {d.strftime('%d.%m.%Y')} um {hour:02d}:00 Uhr"
|
||||
|
||||
for offset in range(1, lookahead_days + 1):
|
||||
candidate = today + timedelta(days=offset)
|
||||
hour = _next_free_hour(candidate)
|
||||
if hour is not None:
|
||||
wd = weekday_names[candidate.weekday()]
|
||||
return f"{wd}, {candidate.strftime('%d.%m.%Y')} um {hour:02d}:00 Uhr"
|
||||
|
||||
# Fallback: just tomorrow morning
|
||||
def _find_next_free_slot(
|
||||
wp_occupied: set[tuple[str, int]], lookahead_days: int = 30
|
||||
) -> tuple[date, int] | None:
|
||||
"""Find the next free (date, hour) slot, anchored after the last scheduled article."""
|
||||
today = _today_cet()
|
||||
tomorrow = today + timedelta(days=1)
|
||||
hours = _preferred_hours()
|
||||
h = hours[0] if hours else 9
|
||||
wd = weekday_names[tomorrow.weekday()]
|
||||
return f"{wd}, {tomorrow.strftime('%d.%m.%Y')} um {h:02d}:00 Uhr"
|
||||
|
||||
last_date = _get_last_future_scheduled_date(wp_occupied)
|
||||
start_date = last_date if (last_date and last_date >= tomorrow) else tomorrow
|
||||
|
||||
for offset in range(0, lookahead_days + 1):
|
||||
candidate = start_date + timedelta(days=offset)
|
||||
hour = _next_free_hour(candidate, wp_occupied)
|
||||
if hour is not None:
|
||||
return candidate, hour
|
||||
|
||||
return tomorrow, _preferred_hours()[0] if _preferred_hours() else 9
|
||||
|
||||
|
||||
def suggest_publish_slot() -> str:
|
||||
"""Return a suggested publish datetime string (CET) for the next free slot."""
|
||||
wp_occupied = _fetch_wp_occupied_slots()
|
||||
result = _find_next_free_slot(wp_occupied)
|
||||
if result:
|
||||
d, hour = result
|
||||
return _format_slot(d, hour)
|
||||
tomorrow = _today_cet() + timedelta(days=1)
|
||||
return _format_slot(tomorrow, _preferred_hours()[0] if _preferred_hours() else 9)
|
||||
|
||||
|
||||
def reserve_publish_slot(article_id: int) -> str:
|
||||
"""Reserve a publish slot for an article and persist it in the DB.
|
||||
|
||||
Returns the suggested publish datetime string.
|
||||
If the article already has a scheduled_publish_at, keep it unchanged.
|
||||
Returns the formatted publish datetime string.
|
||||
"""
|
||||
today = _today_cet()
|
||||
lookahead_days = 14
|
||||
weekday_names = ["Mo", "Di", "Mi", "Do", "Fr", "Sa", "So"]
|
||||
# Check if already has a slot
|
||||
with get_conn() as conn:
|
||||
row = conn.execute(
|
||||
"SELECT scheduled_publish_at FROM articles WHERE id = ?",
|
||||
(article_id,),
|
||||
).fetchone()
|
||||
existing_slot = row["scheduled_publish_at"] if row else None
|
||||
if existing_slot:
|
||||
try:
|
||||
dt = datetime.fromisoformat(existing_slot)
|
||||
return _format_slot(dt.date(), dt.hour)
|
||||
except Exception:
|
||||
pass # invalid slot, re-assign below
|
||||
|
||||
for offset in range(1, lookahead_days + 1):
|
||||
candidate = today + timedelta(days=offset)
|
||||
hour = _next_free_hour(candidate)
|
||||
if hour is not None:
|
||||
# Reserve this slot by writing to the article
|
||||
iso_ts = f"{candidate.isoformat()}T{hour:02d}:00:00"
|
||||
with get_conn() as conn:
|
||||
conn.execute(
|
||||
"UPDATE articles SET scheduled_publish_at = ? WHERE id = ?",
|
||||
(iso_ts, article_id),
|
||||
)
|
||||
wd = weekday_names[candidate.weekday()]
|
||||
return f"{wd}, {candidate.strftime('%d.%m.%Y')} um {hour:02d}:00 Uhr"
|
||||
wp_occupied = _fetch_wp_occupied_slots()
|
||||
result = _find_next_free_slot(wp_occupied, lookahead_days=30)
|
||||
if result:
|
||||
candidate, hour = result
|
||||
else:
|
||||
candidate = _today_cet() + timedelta(days=1)
|
||||
hours = _preferred_hours()
|
||||
hour = hours[0] if hours else 9
|
||||
|
||||
# Fallback
|
||||
tomorrow = today + timedelta(days=1)
|
||||
hours = _preferred_hours()
|
||||
h = hours[0] if hours else 9
|
||||
iso_ts = f"{tomorrow.isoformat()}T{h:02d}:00:00"
|
||||
iso_ts = f"{candidate.isoformat()}T{hour:02d}:00:00"
|
||||
with get_conn() as conn:
|
||||
conn.execute(
|
||||
"UPDATE articles SET scheduled_publish_at = ? WHERE id = ?",
|
||||
(iso_ts, article_id),
|
||||
)
|
||||
wd = weekday_names[tomorrow.weekday()]
|
||||
return f"{wd}, {tomorrow.strftime('%d.%m.%Y')} um {h:02d}:00 Uhr"
|
||||
return _format_slot(candidate, hour)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue