67 lines
1.8 KiB
Python
67 lines
1.8 KiB
Python
import feedparser
|
|
import json
|
|
import os
|
|
from datetime import datetime
|
|
from .config import Config
|
|
|
|
|
|
def fetch_rss_news(feed_url):
|
|
"""Fetch news from RSS feed"""
|
|
feed = feedparser.parse(feed_url)
|
|
articles = []
|
|
|
|
for entry in feed.entries:
|
|
article = {
|
|
"title": entry.title,
|
|
"content": getattr(entry, 'summary', ''),
|
|
"date": getattr(entry, 'published', ''),
|
|
"slug": entry.title.lower().replace(" ", "-").replace(",", "").replace(".", ""),
|
|
"categories": ["Technology", "AI and Innovation"],
|
|
"tags": ["AI", "Technology", "Innovation"],
|
|
"url": getattr(entry, 'link', ''),
|
|
"source": feed_url
|
|
}
|
|
articles.append(article)
|
|
|
|
return articles
|
|
|
|
|
|
def fetch_all_news():
|
|
"""Fetch news from all RSS feeds"""
|
|
all_articles = []
|
|
|
|
for feed_url in Config.RSS_FEEDS:
|
|
try:
|
|
articles = fetch_rss_news(feed_url)
|
|
all_articles.extend(articles)
|
|
except Exception as e:
|
|
print(f"Error fetching from {feed_url}: {str(e)}")
|
|
|
|
return all_articles
|
|
|
|
|
|
def save_raw_news(articles):
|
|
"""Save raw news articles to file"""
|
|
os.makedirs(Config.RAW_NEWS_PATH, exist_ok=True)
|
|
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
filename = f"{Config.RAW_NEWS_PATH}news_{timestamp}.json"
|
|
|
|
with open(filename, 'w') as f:
|
|
json.dump(articles, f, indent=2)
|
|
|
|
return filename
|
|
|
|
|
|
def save_processed_news(articles):
|
|
"""Save processed news articles to file"""
|
|
os.makedirs(Config.PROCESSED_NEWS_PATH, exist_ok=True)
|
|
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
filename = f"{Config.PROCESSED_NEWS_PATH}processed_news_{timestamp}.json"
|
|
|
|
with open(filename, 'w') as f:
|
|
json.dump(articles, f, indent=2)
|
|
|
|
return filename
|