Harden API uptime with systemd ownership guard and watchdog
This commit is contained in:
parent
ab6ad85db7
commit
b089dc1639
10 changed files with 93 additions and 7 deletions
|
|
@ -3,6 +3,16 @@ from pathlib import Path
|
|||
|
||||
BASE_DIR = Path(__file__).resolve().parent.parent
|
||||
DB_PATH = BASE_DIR / "data" / "staysense.db"
|
||||
REQUIRED_TABLES = {
|
||||
"spot",
|
||||
"community_signal",
|
||||
"osm_poi",
|
||||
"osm_zone",
|
||||
"osm_road",
|
||||
"open_data_event",
|
||||
"data_source_state",
|
||||
"admin_user",
|
||||
}
|
||||
|
||||
|
||||
def get_conn() -> sqlite3.Connection:
|
||||
|
|
@ -21,8 +31,7 @@ def init_db() -> None:
|
|||
except sqlite3.OperationalError:
|
||||
# Some deployments run with read-only db mounts; continue without WAL.
|
||||
pass
|
||||
conn.executescript(
|
||||
"""
|
||||
schema_sql = """
|
||||
CREATE TABLE IF NOT EXISTS spot (
|
||||
id TEXT PRIMARY KEY,
|
||||
lat REAL NOT NULL,
|
||||
|
|
@ -134,4 +143,14 @@ def init_db() -> None:
|
|||
CREATE INDEX IF NOT EXISTS idx_open_data_event_source
|
||||
ON open_data_event (source);
|
||||
"""
|
||||
)
|
||||
try:
|
||||
conn.executescript(schema_sql)
|
||||
except sqlite3.OperationalError as exc:
|
||||
if "readonly" not in str(exc).lower():
|
||||
raise
|
||||
# In read-only mode, continue if schema is already present.
|
||||
rows = conn.execute("SELECT name FROM sqlite_master WHERE type='table'").fetchall()
|
||||
existing = {str(row["name"]) for row in rows}
|
||||
missing = REQUIRED_TABLES - existing
|
||||
if missing:
|
||||
raise
|
||||
|
|
|
|||
10
deploy/scripts/staysense-watchdog.sh
Executable file
10
deploy/scripts/staysense-watchdog.sh
Executable file
|
|
@ -0,0 +1,10 @@
|
|||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
HEALTH_URL="${HEALTH_URL:-http://127.0.0.1:8787/health}"
|
||||
TIMEOUT_SECONDS="${TIMEOUT_SECONDS:-5}"
|
||||
|
||||
if ! curl -fsS --max-time "${TIMEOUT_SECONDS}" "${HEALTH_URL}" >/dev/null; then
|
||||
logger -t staysense-watchdog "healthcheck failed for ${HEALTH_URL}, restarting staysense-api.service"
|
||||
systemctl restart staysense-api.service
|
||||
fi
|
||||
|
|
@ -5,11 +5,15 @@ Wants=network-online.target
|
|||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=www-data
|
||||
Group=www-data
|
||||
User=staysense
|
||||
Group=staysense
|
||||
WorkingDirectory=/opt/staysense/backend
|
||||
Environment=STAYSENSE_SERVER_SALT=CHANGE_ME
|
||||
Environment=STAYSENSE_SIGNAL_COOLDOWN_HOURS=24
|
||||
UMask=0002
|
||||
PermissionsStartOnly=true
|
||||
ExecStartPre=/usr/bin/install -d -o staysense -g staysense -m 2775 /opt/staysense/data
|
||||
ExecStartPre=/bin/sh -c '/usr/bin/chown -f staysense:staysense /opt/staysense/data/staysense.db /opt/staysense/data/staysense.db-wal /opt/staysense/data/staysense.db-shm || true'
|
||||
ExecStart=/usr/bin/python3 /opt/staysense/backend/server.py
|
||||
Restart=always
|
||||
RestartSec=3
|
||||
|
|
|
|||
|
|
@ -5,7 +5,8 @@ Wants=network-online.target
|
|||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
User=www-data
|
||||
Group=www-data
|
||||
User=staysense
|
||||
Group=staysense
|
||||
WorkingDirectory=/opt/staysense/backend
|
||||
UMask=0002
|
||||
ExecStart=/usr/bin/python3 /opt/staysense/backend/run_import_jobs.py --config /opt/staysense/docs/open_data_sources_nrw_live.json --prune-legacy
|
||||
|
|
|
|||
8
deploy/systemd/staysense-watchdog.service
Normal file
8
deploy/systemd/staysense-watchdog.service
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
[Unit]
|
||||
Description=StaySense API Health Watchdog
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/usr/bin/env bash /opt/staysense/deploy/scripts/staysense-watchdog.sh
|
||||
10
deploy/systemd/staysense-watchdog.timer
Normal file
10
deploy/systemd/staysense-watchdog.timer
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
[Unit]
|
||||
Description=Run StaySense API Watchdog every minute
|
||||
|
||||
[Timer]
|
||||
OnBootSec=90s
|
||||
OnUnitActiveSec=60s
|
||||
Unit=staysense-watchdog.service
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
|
|
@ -13,6 +13,7 @@ Install:
|
|||
```bash
|
||||
sudo apt update
|
||||
sudo apt install -y python3 nginx
|
||||
sudo useradd --system --create-home --shell /usr/sbin/nologin staysense || true
|
||||
```
|
||||
|
||||
## 2. Code bereitstellen
|
||||
|
|
@ -21,6 +22,9 @@ sudo apt install -y python3 nginx
|
|||
sudo mkdir -p /opt/staysense
|
||||
sudo chown -R $USER:$USER /opt/staysense
|
||||
git clone <REPO_URL> /opt/staysense
|
||||
sudo mkdir -p /opt/staysense/data
|
||||
sudo chown -R staysense:staysense /opt/staysense/data
|
||||
sudo chmod 2775 /opt/staysense/data
|
||||
```
|
||||
|
||||
## 3. Initialisierung
|
||||
|
|
@ -60,8 +64,11 @@ sudo systemctl status staysense-api.service
|
|||
```bash
|
||||
sudo cp /opt/staysense/deploy/systemd/staysense-import.service /etc/systemd/system/
|
||||
sudo cp /opt/staysense/deploy/systemd/staysense-import.timer /etc/systemd/system/
|
||||
sudo cp /opt/staysense/deploy/systemd/staysense-watchdog.service /etc/systemd/system/
|
||||
sudo cp /opt/staysense/deploy/systemd/staysense-watchdog.timer /etc/systemd/system/
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable --now staysense-import.timer
|
||||
sudo systemctl enable --now staysense-watchdog.timer
|
||||
sudo systemctl list-timers | grep staysense
|
||||
```
|
||||
|
||||
|
|
@ -99,4 +106,5 @@ sudo certbot --nginx -d staysense.example.com
|
|||
curl -s http://127.0.0.1:8787/health
|
||||
sudo journalctl -u staysense-api.service -f
|
||||
sudo journalctl -u staysense-import.service -n 100
|
||||
sudo journalctl -u staysense-watchdog.service -n 50
|
||||
```
|
||||
|
|
|
|||
|
|
@ -34,6 +34,21 @@ Health check:
|
|||
curl -s http://127.0.0.1:8787/health
|
||||
```
|
||||
|
||||
Watchdog pruefen:
|
||||
|
||||
```bash
|
||||
sudo systemctl status staysense-watchdog.timer --no-pager
|
||||
sudo journalctl -u staysense-watchdog.service -n 80 --no-pager
|
||||
```
|
||||
|
||||
## DB Read-Only Sofortfix
|
||||
|
||||
```bash
|
||||
sudo chown -R staysense:staysense /opt/staysense/data
|
||||
sudo chmod 2775 /opt/staysense/data
|
||||
sudo systemctl restart staysense-api.service
|
||||
```
|
||||
|
||||
## Backup
|
||||
|
||||
```bash
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ Zielplattform aktuell: Hetzner + CloudPanel + Nginx + systemd.
|
|||
- App-Code: `/opt/staysense`
|
||||
- API-Service: `staysense-api.service`
|
||||
- Import-Timer: `staysense-import.timer`
|
||||
- API-Watchdog: `staysense-watchdog.timer`
|
||||
- Frontend-Root: `/home/staysense-site/htdocs/staysense.vanityontour.de/`
|
||||
|
||||
## Rollout (vereinfacht)
|
||||
|
|
@ -17,12 +18,14 @@ git pull --ff-only
|
|||
rsync -a --delete /opt/staysense/src/ /home/staysense-site/htdocs/staysense.vanityontour.de/
|
||||
systemctl restart staysense-api.service
|
||||
nginx -t && systemctl reload nginx
|
||||
systemctl restart staysense-watchdog.timer
|
||||
```
|
||||
|
||||
## Pflichtchecks
|
||||
|
||||
```bash
|
||||
systemctl is-active staysense-api.service
|
||||
systemctl is-active staysense-watchdog.timer
|
||||
curl -s -L https://staysense.vanityontour.de/api/health
|
||||
```
|
||||
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
```bash
|
||||
systemctl status staysense-api.service --no-pager
|
||||
systemctl status staysense-import.timer --no-pager
|
||||
systemctl status staysense-watchdog.timer --no-pager
|
||||
curl -s https://staysense.vanityontour.de/api/health
|
||||
```
|
||||
|
||||
|
|
@ -13,6 +14,7 @@ curl -s https://staysense.vanityontour.de/api/health
|
|||
```bash
|
||||
journalctl -u staysense-api.service --no-pager -n 120
|
||||
journalctl -u staysense-import.service --no-pager -n 120
|
||||
journalctl -u staysense-watchdog.service --no-pager -n 120
|
||||
```
|
||||
|
||||
## Häufige Fehlerbilder
|
||||
|
|
@ -25,6 +27,12 @@ journalctl -u staysense-import.service --no-pager -n 120
|
|||
- Datenverzeichnisrechte prüfen
|
||||
- Service-User und Besitzrechte prüfen
|
||||
|
||||
```bash
|
||||
chown -R staysense:staysense /opt/staysense/data
|
||||
chmod 2775 /opt/staysense/data
|
||||
systemctl restart staysense-api.service
|
||||
```
|
||||
|
||||
3. Importdaten veraltet
|
||||
- Timer-Status prüfen
|
||||
- Import-Service manuell starten
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue