Files
yottob/main.py
Ryan Chen 2305dfddb1 Add async video downloads with yt-dlp and Celery
- Added yt-dlp, celery, and redis dependencies to pyproject.toml
- Extended VideoEntry model with download tracking fields:
  - download_status (enum: pending, downloading, completed, failed)
  - download_path, download_started_at, download_completed_at
  - download_error, file_size
- Created celery_app.py with Redis broker configuration
- Created download_service.py with async download tasks:
  - download_video() task downloads as MP4 format
  - Configured yt-dlp for best MP4 quality with fallback
  - Automatic retries on failure (max 3 attempts)
  - Progress tracking and database updates
- Added Flask API endpoints in main.py:
  - POST /api/download/<video_id> to trigger download
  - GET /api/download/status/<video_id> to check status
  - POST /api/download/batch for bulk downloads
- Generated and applied Alembic migration for new fields
- Created downloads/ directory for video storage
- Updated .gitignore to exclude downloads/ directory
- Updated CLAUDE.md with comprehensive documentation:
  - Redis and Celery setup instructions
  - Download workflow and architecture
  - yt-dlp configuration details
  - New API endpoint examples

Videos are downloaded as MP4 files using Celery workers.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-26 14:04:30 -05:00

270 lines
8.7 KiB
Python

"""Flask web application for YouTube RSS feed parsing."""
from flask import Flask, render_template, request, jsonify
from feed_parser import YouTubeFeedParser
from database import init_db, get_db_session
from models import Channel, VideoEntry, DownloadStatus
from download_service import download_video, download_videos_batch
app = Flask(__name__)
# Default channel ID for demonstration
DEFAULT_CHANNEL_ID = "UCtTWOND3uyl4tVc_FarDmpw"
# Initialize database on app startup
with app.app_context():
init_db()
@app.route("/", methods=["GET"])
def index():
"""Render the main page."""
return render_template("index.html")
@app.route("/api/feed", methods=["GET"])
def get_feed():
"""API endpoint to fetch YouTube channel feed and save to database.
Query parameters:
channel_id: YouTube channel ID (optional, uses default if not provided)
filter_shorts: Whether to filter out Shorts (default: true)
save: Whether to save to database (default: true)
Returns:
JSON response with feed data or error message
"""
channel_id = request.args.get("channel_id", DEFAULT_CHANNEL_ID)
filter_shorts = request.args.get("filter_shorts", "true").lower() == "true"
save_to_db = request.args.get("save", "true").lower() == "true"
parser = YouTubeFeedParser(channel_id)
result = parser.fetch_feed(filter_shorts=filter_shorts)
if result is None:
return jsonify({"error": "Failed to fetch feed"}), 500
# Save to database if requested
if save_to_db:
try:
with get_db_session() as session:
parser.save_to_db(session, result)
except Exception as e:
return jsonify({"error": f"Failed to save to database: {str(e)}"}), 500
return jsonify(result)
@app.route("/api/channels", methods=["GET"])
def get_channels():
"""API endpoint to list all tracked channels.
Returns:
JSON response with list of channels
"""
try:
with get_db_session() as session:
channels = session.query(Channel).all()
return jsonify({
"channels": [
{
"id": ch.id,
"channel_id": ch.channel_id,
"title": ch.title,
"link": ch.link,
"last_fetched": ch.last_fetched.isoformat(),
"video_count": len(ch.videos)
}
for ch in channels
]
})
except Exception as e:
return jsonify({"error": f"Failed to fetch channels: {str(e)}"}), 500
@app.route("/api/history/<channel_id>", methods=["GET"])
def get_history(channel_id: str):
"""API endpoint to get video history for a specific channel.
Args:
channel_id: YouTube channel ID
Query parameters:
limit: Maximum number of videos to return (default: 50)
Returns:
JSON response with channel info and video history
"""
limit = request.args.get("limit", "50")
try:
limit = int(limit)
except ValueError:
limit = 50
try:
with get_db_session() as session:
channel = session.query(Channel).filter_by(
channel_id=channel_id
).first()
if not channel:
return jsonify({"error": "Channel not found"}), 404
videos = session.query(VideoEntry).filter_by(
channel_id=channel.id
).order_by(VideoEntry.created_at.desc()).limit(limit).all()
return jsonify({
"channel": {
"channel_id": channel.channel_id,
"title": channel.title,
"link": channel.link,
"last_fetched": channel.last_fetched.isoformat()
},
"videos": [video.to_dict() for video in videos],
"total_videos": len(channel.videos)
})
except Exception as e:
return jsonify({"error": f"Failed to fetch history: {str(e)}"}), 500
@app.route("/api/download/<int:video_id>", methods=["POST"])
def trigger_download(video_id: int):
"""Trigger video download for a specific video.
Args:
video_id: Database ID of the VideoEntry
Returns:
JSON response with task information
"""
try:
with get_db_session() as session:
video = session.query(VideoEntry).filter_by(id=video_id).first()
if not video:
return jsonify({"error": "Video not found"}), 404
# Queue download task
task = download_video.delay(video_id)
return jsonify({
"video_id": video_id,
"task_id": task.id,
"status": "queued",
"message": "Download task queued successfully"
})
except Exception as e:
return jsonify({"error": f"Failed to queue download: {str(e)}"}), 500
@app.route("/api/download/status/<int:video_id>", methods=["GET"])
def get_download_status(video_id: int):
"""Get download status for a specific video.
Args:
video_id: Database ID of the VideoEntry
Returns:
JSON response with download status
"""
try:
with get_db_session() as session:
video = session.query(VideoEntry).filter_by(id=video_id).first()
if not video:
return jsonify({"error": "Video not found"}), 404
return jsonify({
"video_id": video_id,
"title": video.title,
"download_status": video.download_status.value,
"download_path": video.download_path,
"download_started_at": video.download_started_at.isoformat() if video.download_started_at else None,
"download_completed_at": video.download_completed_at.isoformat() if video.download_completed_at else None,
"download_error": video.download_error,
"file_size": video.file_size
})
except Exception as e:
return jsonify({"error": f"Failed to fetch download status: {str(e)}"}), 500
@app.route("/api/download/batch", methods=["POST"])
def trigger_batch_download():
"""Trigger batch download for multiple videos.
Query parameters:
channel_id: Download all pending videos for this channel (optional)
status: Filter by download status (default: pending)
Request body (alternative to query params):
video_ids: List of video IDs to download
Returns:
JSON response with batch task information
"""
try:
with get_db_session() as session:
# Check if video_ids provided in request body
data = request.get_json(silent=True)
if data and 'video_ids' in data:
video_ids = data['video_ids']
else:
# Filter by channel and/or status
channel_id = request.args.get("channel_id")
status_str = request.args.get("status", "pending")
try:
status = DownloadStatus(status_str)
except ValueError:
return jsonify({"error": f"Invalid status: {status_str}"}), 400
query = session.query(VideoEntry).filter_by(download_status=status)
if channel_id:
channel = session.query(Channel).filter_by(
channel_id=channel_id
).first()
if not channel:
return jsonify({"error": "Channel not found"}), 404
query = query.filter_by(channel_id=channel.id)
videos = query.all()
video_ids = [v.id for v in videos]
if not video_ids:
return jsonify({"message": "No videos to download", "total_queued": 0})
# Queue batch download task
task = download_videos_batch.delay(video_ids)
return jsonify({
"task_id": task.id,
"total_queued": len(video_ids),
"video_ids": video_ids,
"message": "Batch download queued successfully"
})
except Exception as e:
return jsonify({"error": f"Failed to queue batch download: {str(e)}"}), 500
def main():
"""CLI entry point for testing feed parser."""
parser = YouTubeFeedParser(DEFAULT_CHANNEL_ID)
result = parser.fetch_feed()
if result is None:
print("Failed to retrieve RSS feed")
return
print(f"Feed Title: {result['feed_title']}")
print(f"Feed Link: {result['feed_link']}")
for entry in result['entries']:
print(f"\nEntry Title: {entry['title']}")
print(f"Entry Link: {entry['link']}")
if __name__ == "__main__":
main()