Add user authentication system with schema refactor

BREAKING CHANGE: This commit introduces significant schema changes that require a fresh database. See migration instructions below. Changes: - Added Flask-Login and bcrypt dependencies to pyproject.toml - Created User model with password hashing methods - Updated Channel model: - Added user_id foreign key relationship - Added rss_url field - Renamed last_fetched to last_fetched_at - Added composite unique index on (user_id, channel_id) - Updated VideoEntry model: - Added video_id, video_url, thumbnail_url, description, published_at fields - Renamed link to video_url - Added indexes for performance - Updated feed_parser.py: - Enhanced FeedEntry to extract thumbnail, description, published date - Added _extract_video_id() method for parsing YouTube URLs - Updated save_to_db() to require user_id parameter - Parse and store all metadata from RSS feeds - Generated Alembic migration: a3c56d47f42a Migration Instructions: 1. Stop all services: docker-compose down -v 2. Apply migrations: docker-compose up -d && docker-compose exec app alembic upgrade head 3. Or for local dev: rm yottob.db && alembic upgrade head Next Steps (TODO): - Configure Flask-Login in main.py - Create login/register/logout routes - Add @login_required decorators to protected routes - Update all routes to filter by current_user - Create auth templates (login.html, register.html) - Update base.html with auth navigation 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-26 14:24:47 -05:00
parent 9bcd439024
commit 403d65e4ea
5 changed files with 294 additions and 24 deletions
--- a/feed_parser.py
+++ b/feed_parser.py
@@ -7,6 +7,7 @@ with filtering capabilities to exclude unwanted content like Shorts.
 from datetime import datetime
 import feedparser
 from typing import Dict, List, Optional
+import re

 from sqlalchemy.orm import Session
 from sqlalchemy.exc import IntegrityError
@@ -17,15 +18,25 @@ from models import Channel, VideoEntry
 class FeedEntry:
    """Represents a single entry in a YouTube RSS feed."""

-    def __init__(self, title: str, link: str):
+    def __init__(self, title: str, video_url: str, video_id: str,
+                 published_at: datetime, thumbnail_url: Optional[str] = None,
+                 description: Optional[str] = None):
        self.title = title
-        self.link = link
+        self.video_url = video_url
+        self.video_id = video_id
+        self.published_at = published_at
+        self.thumbnail_url = thumbnail_url
+        self.description = description

-    def to_dict(self) -> Dict[str, str]:
+    def to_dict(self) -> Dict:
        """Convert entry to dictionary."""
        return {
            "title": self.title,
-            "link": self.link
+            "video_url": self.video_url,
+            "video_id": self.video_id,
+            "published_at": self.published_at.isoformat(),
+            "thumbnail_url": self.thumbnail_url,
+            "description": self.description
        }


@@ -62,34 +73,81 @@ class YouTubeFeedParser:
            if filter_shorts and "shorts" in entry.link:
                continue

+            # Extract video ID from URL
+            video_id = self._extract_video_id(entry.link)
+            if not video_id:
+                continue
+
+            # Get thumbnail URL (YouTube provides this in media:group)
+            thumbnail_url = None
+            if hasattr(entry, 'media_thumbnail') and entry.media_thumbnail:
+                thumbnail_url = entry.media_thumbnail[0]['url']
+
+            # Get description
+            description = None
+            if hasattr(entry, 'summary'):
+                description = entry.summary
+
+            # Parse published date
+            published_at = datetime(*entry.published_parsed[:6])
+
            entries.append(FeedEntry(
                title=entry.title,
-                link=entry.link
+                video_url=entry.link,
+                video_id=video_id,
+                published_at=published_at,
+                thumbnail_url=thumbnail_url,
+                description=description
            ))

        return {
            "feed_title": feed.feed.title,
            "feed_link": feed.feed.link,
+            "rss_url": self.url,
            "entries": [entry.to_dict() for entry in entries]
        }

-    def save_to_db(self, db_session: Session, feed_data: Dict) -> Channel:
+    @staticmethod
+    def _extract_video_id(url: str) -> Optional[str]:
+        """Extract video ID from YouTube URL.
+
+        Args:
+            url: YouTube video URL
+
+        Returns:
+            Video ID or None if not found
+        """
+        # Match patterns like: youtube.com/watch?v=VIDEO_ID
+        match = re.search(r'[?&]v=([a-zA-Z0-9_-]{11})', url)
+        if match:
+            return match.group(1)
+
+        # Match patterns like: youtu.be/VIDEO_ID
+        match = re.search(r'youtu\.be/([a-zA-Z0-9_-]{11})', url)
+        if match:
+            return match.group(1)
+
+        return None
+
+    def save_to_db(self, db_session: Session, feed_data: Dict, user_id: int) -> Channel:
        """Save feed data to the database.

        Args:
            db_session: SQLAlchemy database session
            feed_data: Dictionary containing feed metadata and entries (from fetch_feed)
+            user_id: ID of the user subscribing to this channel

        Returns:
            The Channel model instance

        This method uses upsert logic:
-        - Updates existing channel if it exists
+        - Updates existing channel if it exists for this user
        - Creates new channel if it doesn't exist
-        - Only inserts new video entries (ignores duplicates)
+        - Only inserts new video entries (ignores duplicates based on video_id and channel_id)
        """
-        # Get or create channel
+        # Get or create channel for this user
        channel = db_session.query(Channel).filter_by(
+            user_id=user_id,
            channel_id=self.channel_id
        ).first()

@@ -97,30 +155,43 @@ class YouTubeFeedParser:
            # Update existing channel
            channel.title = feed_data["feed_title"]
            channel.link = feed_data["feed_link"]
-            channel.last_fetched = datetime.utcnow()
+            channel.rss_url = feed_data["rss_url"]
+            channel.last_fetched_at = datetime.utcnow()
        else:
            # Create new channel
            channel = Channel(
+                user_id=user_id,
                channel_id=self.channel_id,
                title=feed_data["feed_title"],
                link=feed_data["feed_link"],
-                last_fetched=datetime.utcnow()
+                rss_url=feed_data["rss_url"],
+                last_fetched_at=datetime.utcnow()
            )
            db_session.add(channel)
            db_session.flush()  # Get the channel ID

        # Add video entries (ignore duplicates)
        for entry_data in feed_data["entries"]:
-            # Check if video already exists
+            # Check if video already exists for this channel
            existing = db_session.query(VideoEntry).filter_by(
-                link=entry_data["link"]
+                channel_id=channel.id,
+                video_id=entry_data["video_id"]
            ).first()

            if not existing:
+                # Parse published_at if it's a string
+                published_at = entry_data["published_at"]
+                if isinstance(published_at, str):
+                    published_at = datetime.fromisoformat(published_at.replace('Z', '+00:00'))
+
                video = VideoEntry(
                    channel_id=channel.id,
+                    video_id=entry_data["video_id"],
                    title=entry_data["title"],
-                    link=entry_data["link"],
+                    video_url=entry_data["video_url"],
+                    thumbnail_url=entry_data.get("thumbnail_url"),
+                    description=entry_data.get("description"),
+                    published_at=published_at,
                    created_at=datetime.utcnow()
                )
                db_session.add(video)