- Added SQLAlchemy 2.0 and Alembic 1.13 dependencies - Created models.py with Channel and VideoEntry ORM models - Created database.py for database configuration and session management - Initialized Alembic migration system with initial migration - Updated feed_parser.py with save_to_db() method for persistence - Updated main.py with database initialization and new API routes: - /api/feed now saves to database by default - /api/channels lists all tracked channels - /api/history/<channel_id> returns video history - Updated .gitignore to exclude database files - Updated CLAUDE.md with comprehensive ORM and migration documentation Database uses SQLite (yottob.db) with upsert logic to avoid duplicates. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
130 lines
3.8 KiB
Python
130 lines
3.8 KiB
Python
"""YouTube RSS feed parser module.
|
|
|
|
This module handles fetching and parsing YouTube channel RSS feeds,
|
|
with filtering capabilities to exclude unwanted content like Shorts.
|
|
"""
|
|
|
|
from datetime import datetime
|
|
import feedparser
|
|
from typing import Dict, List, Optional
|
|
|
|
from sqlalchemy.orm import Session
|
|
from sqlalchemy.exc import IntegrityError
|
|
|
|
from models import Channel, VideoEntry
|
|
|
|
|
|
class FeedEntry:
|
|
"""Represents a single entry in a YouTube RSS feed."""
|
|
|
|
def __init__(self, title: str, link: str):
|
|
self.title = title
|
|
self.link = link
|
|
|
|
def to_dict(self) -> Dict[str, str]:
|
|
"""Convert entry to dictionary."""
|
|
return {
|
|
"title": self.title,
|
|
"link": self.link
|
|
}
|
|
|
|
|
|
class YouTubeFeedParser:
|
|
"""Parser for YouTube channel RSS feeds."""
|
|
|
|
BASE_URL = "https://www.youtube.com/feeds/videos.xml"
|
|
|
|
def __init__(self, channel_id: str):
|
|
"""Initialize parser with a YouTube channel ID.
|
|
|
|
Args:
|
|
channel_id: The YouTube channel ID to fetch feeds from
|
|
"""
|
|
self.channel_id = channel_id
|
|
self.url = f"{self.BASE_URL}?channel_id={channel_id}"
|
|
|
|
def fetch_feed(self, filter_shorts: bool = True) -> Optional[Dict]:
|
|
"""Fetch and parse the RSS feed.
|
|
|
|
Args:
|
|
filter_shorts: If True, exclude YouTube Shorts from results
|
|
|
|
Returns:
|
|
Dictionary containing feed metadata and entries, or None if fetch fails
|
|
"""
|
|
feed = feedparser.parse(self.url)
|
|
|
|
if feed.status != 200:
|
|
return None
|
|
|
|
entries = []
|
|
for entry in feed.entries:
|
|
if filter_shorts and "shorts" in entry.link:
|
|
continue
|
|
|
|
entries.append(FeedEntry(
|
|
title=entry.title,
|
|
link=entry.link
|
|
))
|
|
|
|
return {
|
|
"feed_title": feed.feed.title,
|
|
"feed_link": feed.feed.link,
|
|
"entries": [entry.to_dict() for entry in entries]
|
|
}
|
|
|
|
def save_to_db(self, db_session: Session, feed_data: Dict) -> Channel:
|
|
"""Save feed data to the database.
|
|
|
|
Args:
|
|
db_session: SQLAlchemy database session
|
|
feed_data: Dictionary containing feed metadata and entries (from fetch_feed)
|
|
|
|
Returns:
|
|
The Channel model instance
|
|
|
|
This method uses upsert logic:
|
|
- Updates existing channel if it exists
|
|
- Creates new channel if it doesn't exist
|
|
- Only inserts new video entries (ignores duplicates)
|
|
"""
|
|
# Get or create channel
|
|
channel = db_session.query(Channel).filter_by(
|
|
channel_id=self.channel_id
|
|
).first()
|
|
|
|
if channel:
|
|
# Update existing channel
|
|
channel.title = feed_data["feed_title"]
|
|
channel.link = feed_data["feed_link"]
|
|
channel.last_fetched = datetime.utcnow()
|
|
else:
|
|
# Create new channel
|
|
channel = Channel(
|
|
channel_id=self.channel_id,
|
|
title=feed_data["feed_title"],
|
|
link=feed_data["feed_link"],
|
|
last_fetched=datetime.utcnow()
|
|
)
|
|
db_session.add(channel)
|
|
db_session.flush() # Get the channel ID
|
|
|
|
# Add video entries (ignore duplicates)
|
|
for entry_data in feed_data["entries"]:
|
|
# Check if video already exists
|
|
existing = db_session.query(VideoEntry).filter_by(
|
|
link=entry_data["link"]
|
|
).first()
|
|
|
|
if not existing:
|
|
video = VideoEntry(
|
|
channel_id=channel.id,
|
|
title=entry_data["title"],
|
|
link=entry_data["link"],
|
|
created_at=datetime.utcnow()
|
|
)
|
|
db_session.add(video)
|
|
|
|
db_session.commit()
|
|
return channel
|