import feedparser import json import os from datetime import datetime from .config import Config def fetch_rss_news(feed_url): """Fetch news from RSS feed""" feed = feedparser.parse(feed_url) articles = [] for entry in feed.entries: article = { "title": entry.title, "content": getattr(entry, 'summary', ''), "date": getattr(entry, 'published', ''), "slug": entry.title.lower().replace(" ", "-").replace(",", "").replace(".", ""), "categories": ["Technology", "AI and Innovation"], "tags": ["AI", "Technology", "Innovation"], "url": getattr(entry, 'link', ''), "source": feed_url } articles.append(article) return articles def fetch_all_news(): """Fetch news from all RSS feeds""" all_articles = [] for feed_url in Config.RSS_FEEDS: try: articles = fetch_rss_news(feed_url) all_articles.extend(articles) except Exception as e: print(f"Error fetching from {feed_url}: {str(e)}") return all_articles def save_raw_news(articles): """Save raw news articles to file""" os.makedirs(Config.RAW_NEWS_PATH, exist_ok=True) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") filename = f"{Config.RAW_NEWS_PATH}news_{timestamp}.json" with open(filename, 'w') as f: json.dump(articles, f, indent=2) return filename def save_processed_news(articles): """Save processed news articles to file""" os.makedirs(Config.PROCESSED_NEWS_PATH, exist_ok=True) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") filename = f"{Config.PROCESSED_NEWS_PATH}processed_news_{timestamp}.json" with open(filename, 'w') as f: json.dump(articles, f, indent=2) return filename