- Added yt-dlp, celery, and redis dependencies to pyproject.toml - Extended VideoEntry model with download tracking fields: - download_status (enum: pending, downloading, completed, failed) - download_path, download_started_at, download_completed_at - download_error, file_size - Created celery_app.py with Redis broker configuration - Created download_service.py with async download tasks: - download_video() task downloads as MP4 format - Configured yt-dlp for best MP4 quality with fallback - Automatic retries on failure (max 3 attempts) - Progress tracking and database updates - Added Flask API endpoints in main.py: - POST /api/download/<video_id> to trigger download - GET /api/download/status/<video_id> to check status - POST /api/download/batch for bulk downloads - Generated and applied Alembic migration for new fields - Created downloads/ directory for video storage - Updated .gitignore to exclude downloads/ directory - Updated CLAUDE.md with comprehensive documentation: - Redis and Celery setup instructions - Download workflow and architecture - yt-dlp configuration details - New API endpoint examples Videos are downloaded as MP4 files using Celery workers. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
270 lines
8.7 KiB
Python
270 lines
8.7 KiB
Python
"""Flask web application for YouTube RSS feed parsing."""
|
|
|
|
from flask import Flask, render_template, request, jsonify
|
|
from feed_parser import YouTubeFeedParser
|
|
from database import init_db, get_db_session
|
|
from models import Channel, VideoEntry, DownloadStatus
|
|
from download_service import download_video, download_videos_batch
|
|
|
|
|
|
app = Flask(__name__)
|
|
|
|
# Default channel ID for demonstration
|
|
DEFAULT_CHANNEL_ID = "UCtTWOND3uyl4tVc_FarDmpw"
|
|
|
|
|
|
# Initialize database on app startup
|
|
with app.app_context():
|
|
init_db()
|
|
|
|
|
|
@app.route("/", methods=["GET"])
|
|
def index():
|
|
"""Render the main page."""
|
|
return render_template("index.html")
|
|
|
|
|
|
@app.route("/api/feed", methods=["GET"])
|
|
def get_feed():
|
|
"""API endpoint to fetch YouTube channel feed and save to database.
|
|
|
|
Query parameters:
|
|
channel_id: YouTube channel ID (optional, uses default if not provided)
|
|
filter_shorts: Whether to filter out Shorts (default: true)
|
|
save: Whether to save to database (default: true)
|
|
|
|
Returns:
|
|
JSON response with feed data or error message
|
|
"""
|
|
channel_id = request.args.get("channel_id", DEFAULT_CHANNEL_ID)
|
|
filter_shorts = request.args.get("filter_shorts", "true").lower() == "true"
|
|
save_to_db = request.args.get("save", "true").lower() == "true"
|
|
|
|
parser = YouTubeFeedParser(channel_id)
|
|
result = parser.fetch_feed(filter_shorts=filter_shorts)
|
|
|
|
if result is None:
|
|
return jsonify({"error": "Failed to fetch feed"}), 500
|
|
|
|
# Save to database if requested
|
|
if save_to_db:
|
|
try:
|
|
with get_db_session() as session:
|
|
parser.save_to_db(session, result)
|
|
except Exception as e:
|
|
return jsonify({"error": f"Failed to save to database: {str(e)}"}), 500
|
|
|
|
return jsonify(result)
|
|
|
|
|
|
@app.route("/api/channels", methods=["GET"])
|
|
def get_channels():
|
|
"""API endpoint to list all tracked channels.
|
|
|
|
Returns:
|
|
JSON response with list of channels
|
|
"""
|
|
try:
|
|
with get_db_session() as session:
|
|
channels = session.query(Channel).all()
|
|
return jsonify({
|
|
"channels": [
|
|
{
|
|
"id": ch.id,
|
|
"channel_id": ch.channel_id,
|
|
"title": ch.title,
|
|
"link": ch.link,
|
|
"last_fetched": ch.last_fetched.isoformat(),
|
|
"video_count": len(ch.videos)
|
|
}
|
|
for ch in channels
|
|
]
|
|
})
|
|
except Exception as e:
|
|
return jsonify({"error": f"Failed to fetch channels: {str(e)}"}), 500
|
|
|
|
|
|
@app.route("/api/history/<channel_id>", methods=["GET"])
|
|
def get_history(channel_id: str):
|
|
"""API endpoint to get video history for a specific channel.
|
|
|
|
Args:
|
|
channel_id: YouTube channel ID
|
|
|
|
Query parameters:
|
|
limit: Maximum number of videos to return (default: 50)
|
|
|
|
Returns:
|
|
JSON response with channel info and video history
|
|
"""
|
|
limit = request.args.get("limit", "50")
|
|
try:
|
|
limit = int(limit)
|
|
except ValueError:
|
|
limit = 50
|
|
|
|
try:
|
|
with get_db_session() as session:
|
|
channel = session.query(Channel).filter_by(
|
|
channel_id=channel_id
|
|
).first()
|
|
|
|
if not channel:
|
|
return jsonify({"error": "Channel not found"}), 404
|
|
|
|
videos = session.query(VideoEntry).filter_by(
|
|
channel_id=channel.id
|
|
).order_by(VideoEntry.created_at.desc()).limit(limit).all()
|
|
|
|
return jsonify({
|
|
"channel": {
|
|
"channel_id": channel.channel_id,
|
|
"title": channel.title,
|
|
"link": channel.link,
|
|
"last_fetched": channel.last_fetched.isoformat()
|
|
},
|
|
"videos": [video.to_dict() for video in videos],
|
|
"total_videos": len(channel.videos)
|
|
})
|
|
except Exception as e:
|
|
return jsonify({"error": f"Failed to fetch history: {str(e)}"}), 500
|
|
|
|
|
|
@app.route("/api/download/<int:video_id>", methods=["POST"])
|
|
def trigger_download(video_id: int):
|
|
"""Trigger video download for a specific video.
|
|
|
|
Args:
|
|
video_id: Database ID of the VideoEntry
|
|
|
|
Returns:
|
|
JSON response with task information
|
|
"""
|
|
try:
|
|
with get_db_session() as session:
|
|
video = session.query(VideoEntry).filter_by(id=video_id).first()
|
|
if not video:
|
|
return jsonify({"error": "Video not found"}), 404
|
|
|
|
# Queue download task
|
|
task = download_video.delay(video_id)
|
|
|
|
return jsonify({
|
|
"video_id": video_id,
|
|
"task_id": task.id,
|
|
"status": "queued",
|
|
"message": "Download task queued successfully"
|
|
})
|
|
except Exception as e:
|
|
return jsonify({"error": f"Failed to queue download: {str(e)}"}), 500
|
|
|
|
|
|
@app.route("/api/download/status/<int:video_id>", methods=["GET"])
|
|
def get_download_status(video_id: int):
|
|
"""Get download status for a specific video.
|
|
|
|
Args:
|
|
video_id: Database ID of the VideoEntry
|
|
|
|
Returns:
|
|
JSON response with download status
|
|
"""
|
|
try:
|
|
with get_db_session() as session:
|
|
video = session.query(VideoEntry).filter_by(id=video_id).first()
|
|
if not video:
|
|
return jsonify({"error": "Video not found"}), 404
|
|
|
|
return jsonify({
|
|
"video_id": video_id,
|
|
"title": video.title,
|
|
"download_status": video.download_status.value,
|
|
"download_path": video.download_path,
|
|
"download_started_at": video.download_started_at.isoformat() if video.download_started_at else None,
|
|
"download_completed_at": video.download_completed_at.isoformat() if video.download_completed_at else None,
|
|
"download_error": video.download_error,
|
|
"file_size": video.file_size
|
|
})
|
|
except Exception as e:
|
|
return jsonify({"error": f"Failed to fetch download status: {str(e)}"}), 500
|
|
|
|
|
|
@app.route("/api/download/batch", methods=["POST"])
|
|
def trigger_batch_download():
|
|
"""Trigger batch download for multiple videos.
|
|
|
|
Query parameters:
|
|
channel_id: Download all pending videos for this channel (optional)
|
|
status: Filter by download status (default: pending)
|
|
|
|
Request body (alternative to query params):
|
|
video_ids: List of video IDs to download
|
|
|
|
Returns:
|
|
JSON response with batch task information
|
|
"""
|
|
try:
|
|
with get_db_session() as session:
|
|
# Check if video_ids provided in request body
|
|
data = request.get_json(silent=True)
|
|
if data and 'video_ids' in data:
|
|
video_ids = data['video_ids']
|
|
else:
|
|
# Filter by channel and/or status
|
|
channel_id = request.args.get("channel_id")
|
|
status_str = request.args.get("status", "pending")
|
|
|
|
try:
|
|
status = DownloadStatus(status_str)
|
|
except ValueError:
|
|
return jsonify({"error": f"Invalid status: {status_str}"}), 400
|
|
|
|
query = session.query(VideoEntry).filter_by(download_status=status)
|
|
|
|
if channel_id:
|
|
channel = session.query(Channel).filter_by(
|
|
channel_id=channel_id
|
|
).first()
|
|
if not channel:
|
|
return jsonify({"error": "Channel not found"}), 404
|
|
query = query.filter_by(channel_id=channel.id)
|
|
|
|
videos = query.all()
|
|
video_ids = [v.id for v in videos]
|
|
|
|
if not video_ids:
|
|
return jsonify({"message": "No videos to download", "total_queued": 0})
|
|
|
|
# Queue batch download task
|
|
task = download_videos_batch.delay(video_ids)
|
|
|
|
return jsonify({
|
|
"task_id": task.id,
|
|
"total_queued": len(video_ids),
|
|
"video_ids": video_ids,
|
|
"message": "Batch download queued successfully"
|
|
})
|
|
except Exception as e:
|
|
return jsonify({"error": f"Failed to queue batch download: {str(e)}"}), 500
|
|
|
|
|
|
def main():
|
|
"""CLI entry point for testing feed parser."""
|
|
parser = YouTubeFeedParser(DEFAULT_CHANNEL_ID)
|
|
result = parser.fetch_feed()
|
|
|
|
if result is None:
|
|
print("Failed to retrieve RSS feed")
|
|
return
|
|
|
|
print(f"Feed Title: {result['feed_title']}")
|
|
print(f"Feed Link: {result['feed_link']}")
|
|
|
|
for entry in result['entries']:
|
|
print(f"\nEntry Title: {entry['title']}")
|
|
print(f"Entry Link: {entry['link']}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|