Bump version to v1.5.3

This commit is contained in:
Oliver 2025-07-11 09:44:31 +02:00
parent c49864c4aa
commit 4eaef89be8
No known key found for this signature in database
10 changed files with 3098 additions and 19 deletions

View file

@ -0,0 +1,27 @@
# utils/article_extractor.py
import requests
from bs4 import BeautifulSoup
def extract_full_article(url: str) -> str:
try:
response = requests.get(url, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
# Promobil & WordPress & allgemeine Fallbacks
candidates = [
{"tag": "div", "class_": "article__text"}, # Promobil
{"tag": "div", "class_": "entry-content"}, # WordPress Standard
{"tag": "article", "class_": None}, # Generisch
]
for selector in candidates:
el = soup.find(selector["tag"], class_=selector["class_"])
if el and len(el.get_text(strip=True).split()) > 50:
return el.get_text(" ", strip=True)
# Fallback: ganzer Seiteninhalt
return soup.get_text(" ", strip=True)
except Exception:
return ""

23
utils/article_utils.py Normal file
View file

@ -0,0 +1,23 @@
# utils/article_utils.py
import hashlib
def clean_text(text: str) -> str:
return text.strip()
def generate_id(link: str) -> str:
return hashlib.md5(link.encode("utf-8")).hexdigest()
def categorize_article(text: str) -> str:
# Dummy-Kategorie
return "Allgemein"
def tag_article(text: str) -> list:
# Dummy-Tags
return ["tag1", "tag2"]
def summarize_text(text: str) -> str:
return text[:200] + "..."
def rewrite_text(text: str) -> str:
return text # Platzhalter, z.B. für GPT-Rewrite später