UI-Anpassung: Feed-Anzeige entfernt, Tabelle verbessert

This commit is contained in:
Oliver 2025-07-05 16:36:29 +02:00
parent fe2191e6c8
commit 2db9544fda
9 changed files with 438 additions and 156 deletions

219
main.py
View file

@ -1,108 +1,169 @@
# main.py
import feedparser
import json
import os
from bs4 import BeautifulSoup
from datetime import datetime
import feedparser
from utils.image_extractor import extract_images_with_metadata
from openai import OpenAI
from dotenv import load_dotenv
import logging
from utils.image_extractor import extract_images_with_metadata
import openai
load_dotenv()
# Log-Verzeichnis sicherstellen
os.makedirs("logs", exist_ok=True)
# Logging konfigurieren
log_dir = "logs"
os.makedirs(log_dir, exist_ok=True)
logging.basicConfig(
filename=os.path.join(log_dir, "rss_tool.log"),
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s"
)
openai.api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI()
# 📝 Logging konfigurieren
logging.basicConfig(filename='logs/rss_tool.log', level=logging.INFO)
ARTICLES_FILE = "data/articles.json"
FEEDS_FILE = "data/feeds.json"
ARTICLES_FILE = "processed_articles.json"
FEEDS_FILE = "feeds.json"
VALID_STATUSES = ["New", "Rewrite", "Process", "Online", "On Hold", "Trash"]
def load_articles():
if os.path.exists(ARTICLES_FILE):
with open(ARTICLES_FILE, "r") as f:
return json.load(f)
return []
def save_articles(articles):
with open(ARTICLES_FILE, "w") as f:
json.dump(articles, f, indent=2)
def load_feeds():
if os.path.exists(FEEDS_FILE):
with open(FEEDS_FILE, "r") as f:
return json.load(f)
return []
if not os.path.exists(FEEDS_FILE):
return []
with open(FEEDS_FILE, "r") as f:
return json.load(f)
def save_feeds(feeds):
with open(FEEDS_FILE, "w") as f:
json.dump(feeds, f, indent=2)
def fetch_and_process_feed(url):
logging.info(f"Abrufen von Feed: {url}")
feed = feedparser.parse(url)
articles = load_articles()
for entry in feed.entries:
if any(a["link"] == entry.link for a in articles):
continue
try:
images = extract_images_with_metadata(entry.link)
except Exception as e:
logging.warning(f"Fehler beim Bildextrakt: {e}")
images = []
article = {
"id": f"{entry.link}",
"title": entry.title,
"summary": entry.summary,
"link": entry.link,
"date": entry.get("published", datetime.now().isoformat()),
"text": entry.summary,
"status": "New",
"images": images,
"tags": []
}
articles.append(article)
save_articles(articles)
def load_articles():
if not os.path.exists(ARTICLES_FILE):
return []
with open(ARTICLES_FILE, "r") as f:
articles = json.load(f)
def process_articles():
# Sicherstellen, dass jeder Artikel einen gültigen Status hat
for article in articles:
if article.get("status") not in VALID_STATUSES:
article["status"] = "New"
return articles
def save_articles(articles):
with open(ARTICLES_FILE, "w") as f:
json.dump(articles, f, indent=2)
def fetch_and_process_feed(feed_url, existing_ids):
feed = feedparser.parse(feed_url)
new_articles = []
for entry in feed.entries:
article_id = entry.get("id") or entry.get("link")
if not article_id or article_id in existing_ids:
continue
title = entry.get("title", "Kein Titel")
date = entry.get("published", datetime.now().isoformat())
summary = entry.get("summary", "")
content = entry.get("content", [{}])[0].get("value") or entry.get("description", "")
soup = BeautifulSoup(content, "html.parser")
clean_text = soup.get_text(" ", strip=True)
images = extract_images_with_metadata(entry.link)
new_articles.append({
"id": article_id,
"title": title,
"date": date,
"summary": summary,
"text": clean_text,
"tags": [],
"status": "New",
"link": entry.get("link", ""),
"images": images
})
return new_articles
def process_articles(existing_ids):
feeds = load_feeds()
for url in feeds:
fetch_and_process_feed(url)
all_articles = load_articles()
new_entries = []
for feed in feeds:
if isinstance(feed, dict):
url = feed.get("url")
else:
url = feed
if not url:
continue
try:
logging.info(f"Lade Feed: {url}")
entries = fetch_and_process_feed(url, existing_ids)
new_entries.extend(entries)
logging.info(f"{len(entries)} neue Artikel gefunden in {url}")
except Exception as e:
logging.error(f"Fehler beim Verarbeiten von {url}: {e}")
# Nur neue Artikel speichern, deren ID noch nicht vorhanden ist
existing_article_ids = set(article["id"] for article in all_articles)
unique_new_entries = [a for a in new_entries if a["id"] not in existing_article_ids]
if unique_new_entries:
all_articles.extend(unique_new_entries)
save_articles(all_articles)
logging.info(f"{len(unique_new_entries)} neue Artikel gespeichert.")
else:
logging.info("Keine neuen Artikel gefunden.")
def rewrite_articles():
logging.info("Starte Umschreiben von Artikeln mit Status 'Rewrite' ...")
articles = load_articles()
updated = False
for article in articles:
if article["status"] != "Rewrite":
continue
try:
prompt = f"Fasse diesen Text neu und interessant zusammen:\n\n{article['summary']}"
response = client.chat.completions.create(
model="gpt-4",
messages=[{"role": "user", "content": prompt}]
)
rewritten = response.choices[0].message.content.strip()
article["text"] = rewritten
article["status"] = "Done"
if article.get("status") == "Rewrite":
try:
logging.info(f"✍️ Umschreiben von: {article['title']}")
prompt = f"Schreibe folgenden Artikel um und fasse ihn verständlich zusammen:\n\n{article['text']}"
response = openai.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": "Du bist ein professioneller Redakteur."},
{"role": "user", "content": prompt}
]
)
new_text = response.choices[0].message.content.strip()
article["text"] = f"{article['title']}\n\n{new_text}"
article["status"] = "Process"
# Tags generieren
tag_prompt = f"Erstelle passende 3-5 Tags für diesen Text:\n\n{rewritten}"
tag_response = client.chat.completions.create(
model="gpt-4",
messages=[{"role": "user", "content": tag_prompt}]
)
tags = [tag.strip() for tag in tag_response.choices[0].message.content.split(",")]
article["tags"] = tags
tag_prompt = f"Erstelle 3 passende, kurze Stichwörter (Tags) für diesen Artikel:\n\n{new_text}"
tag_response = openai.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": "Du bist ein Blog-Tag-Generator."},
{"role": "user", "content": tag_prompt}
]
)
tags_raw = tag_response.choices[0].message.content.strip()
tags = [tag.strip(" ,") for tag in tags_raw.replace("\n", ",").split(",") if tag.strip()]
article["tags"] = tags
updated = True
logging.info(f"✅ Artikel '{article['title']}' umgeschrieben.")
except Exception as e:
logging.error(f"❌ Fehler beim Umschreiben von '{article['title']}':\n{e}")
if updated:
save_articles(articles)
# Sicherstellen, dass Bildmetadaten vollständig sind
for img in article.get("images", []):
if "caption" not in img:
img["caption"] = "Kein Bildtitel vorhanden"
if "copyright" not in img:
img["copyright"] = "Unbekannt"
if "copyright_url" not in img:
img["copyright_url"] = "#"
logging.info(f"✅ Artikel umgeschrieben: {article['title']}")
except Exception as e:
logging.error(f"❌ Fehler beim Umschreiben von '{article['title']}': {e}")
save_articles(articles)
logging.info("Alle Artikel mit Status 'Rewrite' wurden verarbeitet.")