- Created feed_parser.py module with YouTubeFeedParser and FeedEntry classes - Refactored main.py to focus on Flask routing with two endpoints: - GET / for homepage - GET /api/feed for REST API with query parameters - Updated CLAUDE.md with new architecture documentation - Implemented clean separation between core logic and web server layers 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
69 lines
1.8 KiB
Python
69 lines
1.8 KiB
Python
"""YouTube RSS feed parser module.
|
|
|
|
This module handles fetching and parsing YouTube channel RSS feeds,
|
|
with filtering capabilities to exclude unwanted content like Shorts.
|
|
"""
|
|
|
|
import feedparser
|
|
from typing import Dict, List, Optional
|
|
|
|
|
|
class FeedEntry:
|
|
"""Represents a single entry in a YouTube RSS feed."""
|
|
|
|
def __init__(self, title: str, link: str):
|
|
self.title = title
|
|
self.link = link
|
|
|
|
def to_dict(self) -> Dict[str, str]:
|
|
"""Convert entry to dictionary."""
|
|
return {
|
|
"title": self.title,
|
|
"link": self.link
|
|
}
|
|
|
|
|
|
class YouTubeFeedParser:
|
|
"""Parser for YouTube channel RSS feeds."""
|
|
|
|
BASE_URL = "https://www.youtube.com/feeds/videos.xml"
|
|
|
|
def __init__(self, channel_id: str):
|
|
"""Initialize parser with a YouTube channel ID.
|
|
|
|
Args:
|
|
channel_id: The YouTube channel ID to fetch feeds from
|
|
"""
|
|
self.channel_id = channel_id
|
|
self.url = f"{self.BASE_URL}?channel_id={channel_id}"
|
|
|
|
def fetch_feed(self, filter_shorts: bool = True) -> Optional[Dict]:
|
|
"""Fetch and parse the RSS feed.
|
|
|
|
Args:
|
|
filter_shorts: If True, exclude YouTube Shorts from results
|
|
|
|
Returns:
|
|
Dictionary containing feed metadata and entries, or None if fetch fails
|
|
"""
|
|
feed = feedparser.parse(self.url)
|
|
|
|
if feed.status != 200:
|
|
return None
|
|
|
|
entries = []
|
|
for entry in feed.entries:
|
|
if filter_shorts and "shorts" in entry.link:
|
|
continue
|
|
|
|
entries.append(FeedEntry(
|
|
title=entry.title,
|
|
link=entry.link
|
|
))
|
|
|
|
return {
|
|
"feed_title": feed.feed.title,
|
|
"feed_link": feed.feed.link,
|
|
"entries": [entry.to_dict() for entry in entries]
|
|
}
|