Add async video downloads with yt-dlp and Celery

- Added yt-dlp, celery, and redis dependencies to pyproject.toml
- Extended VideoEntry model with download tracking fields:
  - download_status (enum: pending, downloading, completed, failed)
  - download_path, download_started_at, download_completed_at
  - download_error, file_size
- Created celery_app.py with Redis broker configuration
- Created download_service.py with async download tasks:
  - download_video() task downloads as MP4 format
  - Configured yt-dlp for best MP4 quality with fallback
  - Automatic retries on failure (max 3 attempts)
  - Progress tracking and database updates
- Added Flask API endpoints in main.py:
  - POST /api/download/<video_id> to trigger download
  - GET /api/download/status/<video_id> to check status
  - POST /api/download/batch for bulk downloads
- Generated and applied Alembic migration for new fields
- Created downloads/ directory for video storage
- Updated .gitignore to exclude downloads/ directory
- Updated CLAUDE.md with comprehensive documentation:
  - Redis and Celery setup instructions
  - Download workflow and architecture
  - yt-dlp configuration details
  - New API endpoint examples

Videos are downloaded as MP4 files using Celery workers.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2025-11-26 14:04:30 -05:00
parent 4892bec986
commit 2305dfddb1
9 changed files with 674 additions and 13 deletions

121
main.py
View File

@@ -3,7 +3,8 @@
from flask import Flask, render_template, request, jsonify
from feed_parser import YouTubeFeedParser
from database import init_db, get_db_session
from models import Channel, VideoEntry
from models import Channel, VideoEntry, DownloadStatus
from download_service import download_video, download_videos_batch
app = Flask(__name__)
@@ -129,6 +130,124 @@ def get_history(channel_id: str):
return jsonify({"error": f"Failed to fetch history: {str(e)}"}), 500
@app.route("/api/download/<int:video_id>", methods=["POST"])
def trigger_download(video_id: int):
"""Trigger video download for a specific video.
Args:
video_id: Database ID of the VideoEntry
Returns:
JSON response with task information
"""
try:
with get_db_session() as session:
video = session.query(VideoEntry).filter_by(id=video_id).first()
if not video:
return jsonify({"error": "Video not found"}), 404
# Queue download task
task = download_video.delay(video_id)
return jsonify({
"video_id": video_id,
"task_id": task.id,
"status": "queued",
"message": "Download task queued successfully"
})
except Exception as e:
return jsonify({"error": f"Failed to queue download: {str(e)}"}), 500
@app.route("/api/download/status/<int:video_id>", methods=["GET"])
def get_download_status(video_id: int):
"""Get download status for a specific video.
Args:
video_id: Database ID of the VideoEntry
Returns:
JSON response with download status
"""
try:
with get_db_session() as session:
video = session.query(VideoEntry).filter_by(id=video_id).first()
if not video:
return jsonify({"error": "Video not found"}), 404
return jsonify({
"video_id": video_id,
"title": video.title,
"download_status": video.download_status.value,
"download_path": video.download_path,
"download_started_at": video.download_started_at.isoformat() if video.download_started_at else None,
"download_completed_at": video.download_completed_at.isoformat() if video.download_completed_at else None,
"download_error": video.download_error,
"file_size": video.file_size
})
except Exception as e:
return jsonify({"error": f"Failed to fetch download status: {str(e)}"}), 500
@app.route("/api/download/batch", methods=["POST"])
def trigger_batch_download():
"""Trigger batch download for multiple videos.
Query parameters:
channel_id: Download all pending videos for this channel (optional)
status: Filter by download status (default: pending)
Request body (alternative to query params):
video_ids: List of video IDs to download
Returns:
JSON response with batch task information
"""
try:
with get_db_session() as session:
# Check if video_ids provided in request body
data = request.get_json(silent=True)
if data and 'video_ids' in data:
video_ids = data['video_ids']
else:
# Filter by channel and/or status
channel_id = request.args.get("channel_id")
status_str = request.args.get("status", "pending")
try:
status = DownloadStatus(status_str)
except ValueError:
return jsonify({"error": f"Invalid status: {status_str}"}), 400
query = session.query(VideoEntry).filter_by(download_status=status)
if channel_id:
channel = session.query(Channel).filter_by(
channel_id=channel_id
).first()
if not channel:
return jsonify({"error": "Channel not found"}), 404
query = query.filter_by(channel_id=channel.id)
videos = query.all()
video_ids = [v.id for v in videos]
if not video_ids:
return jsonify({"message": "No videos to download", "total_queued": 0})
# Queue batch download task
task = download_videos_batch.delay(video_ids)
return jsonify({
"task_id": task.id,
"total_queued": len(video_ids),
"video_ids": video_ids,
"message": "Batch download queued successfully"
})
except Exception as e:
return jsonify({"error": f"Failed to queue batch download: {str(e)}"}), 500
def main():
"""CLI entry point for testing feed parser."""
parser = YouTubeFeedParser(DEFAULT_CHANNEL_ID)