Add user authentication system with schema refactor

BREAKING CHANGE: This commit introduces significant schema changes that require
a fresh database. See migration instructions below.

Changes:
- Added Flask-Login and bcrypt dependencies to pyproject.toml
- Created User model with password hashing methods
- Updated Channel model:
  - Added user_id foreign key relationship
  - Added rss_url field
  - Renamed last_fetched to last_fetched_at
  - Added composite unique index on (user_id, channel_id)
- Updated VideoEntry model:
  - Added video_id, video_url, thumbnail_url, description, published_at fields
  - Renamed link to video_url
  - Added indexes for performance
- Updated feed_parser.py:
  - Enhanced FeedEntry to extract thumbnail, description, published date
  - Added _extract_video_id() method for parsing YouTube URLs
  - Updated save_to_db() to require user_id parameter
  - Parse and store all metadata from RSS feeds
- Generated Alembic migration: a3c56d47f42a

Migration Instructions:
1. Stop all services: docker-compose down -v
2. Apply migrations: docker-compose up -d && docker-compose exec app alembic upgrade head
3. Or for local dev: rm yottob.db && alembic upgrade head

Next Steps (TODO):
- Configure Flask-Login in main.py
- Create login/register/logout routes
- Add @login_required decorators to protected routes
- Update all routes to filter by current_user
- Create auth templates (login.html, register.html)
- Update base.html with auth navigation

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-11-26 14:24:47 -05:00
parent 9bcd439024
commit 403d65e4ea
5 changed files with 294 additions and 24 deletions

View File

@@ -7,6 +7,7 @@ with filtering capabilities to exclude unwanted content like Shorts.
from datetime import datetime
import feedparser
from typing import Dict, List, Optional
import re
from sqlalchemy.orm import Session
from sqlalchemy.exc import IntegrityError
@@ -17,15 +18,25 @@ from models import Channel, VideoEntry
class FeedEntry:
"""Represents a single entry in a YouTube RSS feed."""
def __init__(self, title: str, link: str):
def __init__(self, title: str, video_url: str, video_id: str,
published_at: datetime, thumbnail_url: Optional[str] = None,
description: Optional[str] = None):
self.title = title
self.link = link
self.video_url = video_url
self.video_id = video_id
self.published_at = published_at
self.thumbnail_url = thumbnail_url
self.description = description
def to_dict(self) -> Dict[str, str]:
def to_dict(self) -> Dict:
"""Convert entry to dictionary."""
return {
"title": self.title,
"link": self.link
"video_url": self.video_url,
"video_id": self.video_id,
"published_at": self.published_at.isoformat(),
"thumbnail_url": self.thumbnail_url,
"description": self.description
}
@@ -62,34 +73,81 @@ class YouTubeFeedParser:
if filter_shorts and "shorts" in entry.link:
continue
# Extract video ID from URL
video_id = self._extract_video_id(entry.link)
if not video_id:
continue
# Get thumbnail URL (YouTube provides this in media:group)
thumbnail_url = None
if hasattr(entry, 'media_thumbnail') and entry.media_thumbnail:
thumbnail_url = entry.media_thumbnail[0]['url']
# Get description
description = None
if hasattr(entry, 'summary'):
description = entry.summary
# Parse published date
published_at = datetime(*entry.published_parsed[:6])
entries.append(FeedEntry(
title=entry.title,
link=entry.link
video_url=entry.link,
video_id=video_id,
published_at=published_at,
thumbnail_url=thumbnail_url,
description=description
))
return {
"feed_title": feed.feed.title,
"feed_link": feed.feed.link,
"rss_url": self.url,
"entries": [entry.to_dict() for entry in entries]
}
def save_to_db(self, db_session: Session, feed_data: Dict) -> Channel:
@staticmethod
def _extract_video_id(url: str) -> Optional[str]:
"""Extract video ID from YouTube URL.
Args:
url: YouTube video URL
Returns:
Video ID or None if not found
"""
# Match patterns like: youtube.com/watch?v=VIDEO_ID
match = re.search(r'[?&]v=([a-zA-Z0-9_-]{11})', url)
if match:
return match.group(1)
# Match patterns like: youtu.be/VIDEO_ID
match = re.search(r'youtu\.be/([a-zA-Z0-9_-]{11})', url)
if match:
return match.group(1)
return None
def save_to_db(self, db_session: Session, feed_data: Dict, user_id: int) -> Channel:
"""Save feed data to the database.
Args:
db_session: SQLAlchemy database session
feed_data: Dictionary containing feed metadata and entries (from fetch_feed)
user_id: ID of the user subscribing to this channel
Returns:
The Channel model instance
This method uses upsert logic:
- Updates existing channel if it exists
- Updates existing channel if it exists for this user
- Creates new channel if it doesn't exist
- Only inserts new video entries (ignores duplicates)
- Only inserts new video entries (ignores duplicates based on video_id and channel_id)
"""
# Get or create channel
# Get or create channel for this user
channel = db_session.query(Channel).filter_by(
user_id=user_id,
channel_id=self.channel_id
).first()
@@ -97,30 +155,43 @@ class YouTubeFeedParser:
# Update existing channel
channel.title = feed_data["feed_title"]
channel.link = feed_data["feed_link"]
channel.last_fetched = datetime.utcnow()
channel.rss_url = feed_data["rss_url"]
channel.last_fetched_at = datetime.utcnow()
else:
# Create new channel
channel = Channel(
user_id=user_id,
channel_id=self.channel_id,
title=feed_data["feed_title"],
link=feed_data["feed_link"],
last_fetched=datetime.utcnow()
rss_url=feed_data["rss_url"],
last_fetched_at=datetime.utcnow()
)
db_session.add(channel)
db_session.flush() # Get the channel ID
# Add video entries (ignore duplicates)
for entry_data in feed_data["entries"]:
# Check if video already exists
# Check if video already exists for this channel
existing = db_session.query(VideoEntry).filter_by(
link=entry_data["link"]
channel_id=channel.id,
video_id=entry_data["video_id"]
).first()
if not existing:
# Parse published_at if it's a string
published_at = entry_data["published_at"]
if isinstance(published_at, str):
published_at = datetime.fromisoformat(published_at.replace('Z', '+00:00'))
video = VideoEntry(
channel_id=channel.id,
video_id=entry_data["video_id"],
title=entry_data["title"],
link=entry_data["link"],
video_url=entry_data["video_url"],
thumbnail_url=entry_data.get("thumbnail_url"),
description=entry_data.get("description"),
published_at=published_at,
created_at=datetime.utcnow()
)
db_session.add(video)