Add async video downloads with yt-dlp and Celery

- Added yt-dlp, celery, and redis dependencies to pyproject.toml - Extended VideoEntry model with download tracking fields: - download_status (enum: pending, downloading, completed, failed) - download_path, download_started_at, download_completed_at - download_error, file_size - Created celery_app.py with Redis broker configuration - Created download_service.py with async download tasks: - download_video() task downloads as MP4 format - Configured yt-dlp for best MP4 quality with fallback - Automatic retries on failure (max 3 attempts) - Progress tracking and database updates - Added Flask API endpoints in main.py: - POST /api/download/<video_id> to trigger download - GET /api/download/status/<video_id> to check status - POST /api/download/batch for bulk downloads - Generated and applied Alembic migration for new fields - Created downloads/ directory for video storage - Updated .gitignore to exclude downloads/ directory - Updated CLAUDE.md with comprehensive documentation: - Redis and Celery setup instructions - Download workflow and architecture - yt-dlp configuration details - New API endpoint examples Videos are downloaded as MP4 files using Celery workers. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-26 14:04:30 -05:00
parent 4892bec986
commit 2305dfddb1
9 changed files with 674 additions and 13 deletions
--- a/main.py
+++ b/main.py
@@ -3,7 +3,8 @@
 from flask import Flask, render_template, request, jsonify
 from feed_parser import YouTubeFeedParser
 from database import init_db, get_db_session
-from models import Channel, VideoEntry
+from models import Channel, VideoEntry, DownloadStatus
+from download_service import download_video, download_videos_batch


 app = Flask(__name__)
@@ -129,6 +130,124 @@ def get_history(channel_id: str):
        return jsonify({"error": f"Failed to fetch history: {str(e)}"}), 500


+@app.route("/api/download/<int:video_id>", methods=["POST"])
+def trigger_download(video_id: int):
+    """Trigger video download for a specific video.
+
+    Args:
+        video_id: Database ID of the VideoEntry
+
+    Returns:
+        JSON response with task information
+    """
+    try:
+        with get_db_session() as session:
+            video = session.query(VideoEntry).filter_by(id=video_id).first()
+            if not video:
+                return jsonify({"error": "Video not found"}), 404
+
+            # Queue download task
+            task = download_video.delay(video_id)
+
+            return jsonify({
+                "video_id": video_id,
+                "task_id": task.id,
+                "status": "queued",
+                "message": "Download task queued successfully"
+            })
+    except Exception as e:
+        return jsonify({"error": f"Failed to queue download: {str(e)}"}), 500
+
+
+@app.route("/api/download/status/<int:video_id>", methods=["GET"])
+def get_download_status(video_id: int):
+    """Get download status for a specific video.
+
+    Args:
+        video_id: Database ID of the VideoEntry
+
+    Returns:
+        JSON response with download status
+    """
+    try:
+        with get_db_session() as session:
+            video = session.query(VideoEntry).filter_by(id=video_id).first()
+            if not video:
+                return jsonify({"error": "Video not found"}), 404
+
+            return jsonify({
+                "video_id": video_id,
+                "title": video.title,
+                "download_status": video.download_status.value,
+                "download_path": video.download_path,
+                "download_started_at": video.download_started_at.isoformat() if video.download_started_at else None,
+                "download_completed_at": video.download_completed_at.isoformat() if video.download_completed_at else None,
+                "download_error": video.download_error,
+                "file_size": video.file_size
+            })
+    except Exception as e:
+        return jsonify({"error": f"Failed to fetch download status: {str(e)}"}), 500
+
+
+@app.route("/api/download/batch", methods=["POST"])
+def trigger_batch_download():
+    """Trigger batch download for multiple videos.
+
+    Query parameters:
+        channel_id: Download all pending videos for this channel (optional)
+        status: Filter by download status (default: pending)
+
+    Request body (alternative to query params):
+        video_ids: List of video IDs to download
+
+    Returns:
+        JSON response with batch task information
+    """
+    try:
+        with get_db_session() as session:
+            # Check if video_ids provided in request body
+            data = request.get_json(silent=True)
+            if data and 'video_ids' in data:
+                video_ids = data['video_ids']
+            else:
+                # Filter by channel and/or status
+                channel_id = request.args.get("channel_id")
+                status_str = request.args.get("status", "pending")
+
+                try:
+                    status = DownloadStatus(status_str)
+                except ValueError:
+                    return jsonify({"error": f"Invalid status: {status_str}"}), 400
+
+                query = session.query(VideoEntry).filter_by(download_status=status)
+
+                if channel_id:
+                    channel = session.query(Channel).filter_by(
+                        channel_id=channel_id
+                    ).first()
+                    if not channel:
+                        return jsonify({"error": "Channel not found"}), 404
+                    query = query.filter_by(channel_id=channel.id)
+
+                videos = query.all()
+                video_ids = [v.id for v in videos]
+
+            if not video_ids:
+                return jsonify({"message": "No videos to download", "total_queued": 0})
+
+            # Queue batch download task
+            task = download_videos_batch.delay(video_ids)
+
+            return jsonify({
+                "task_id": task.id,
+                "total_queued": len(video_ids),
+                "video_ids": video_ids,
+                "message": "Batch download queued successfully"
+            })
+    except Exception as e:
+        return jsonify({"error": f"Failed to queue batch download: {str(e)}"}), 500
+
+
 def main():
    """CLI entry point for testing feed parser."""
    parser = YouTubeFeedParser(DEFAULT_CHANNEL_ID)