Files
yottob/download_service.py
Ryan Chen be76f0a610 Add comprehensive deletion functionality and scheduled cleanup
Features:
- Delete entire channels with all videos and downloaded files
- Delete individual video files while keeping database entries
- Scheduled automatic cleanup of videos older than 7 days
- Proper cascading deletes with file cleanup

Channel Deletion:
- New DELETE endpoint at /api/channels/<id>
- Removes channel, all video entries, and downloaded files
- User ownership verification
- Returns count of deleted files
- UI button on channels page with detailed confirmation dialog

Video File Deletion:
- New DELETE endpoint at /api/videos/<id>/file
- Celery async task to remove file from disk
- Resets download status to pending (allows re-download)
- UI button on watch page for completed videos
- Confirmation dialog with clear warnings

Scheduled Cleanup:
- Celery beat configuration for periodic tasks
- cleanup_old_videos task runs daily at midnight
- Automatically deletes videos completed more than 7 days ago
- Removes files and resets database status
- scheduled_tasks.py for beat schedule configuration
- verify_schedule.py helper to check task scheduling

UI Improvements:
- Added .btn-danger CSS class (black/white theme)
- Delete buttons with loading states
- Detailed confirmation dialogs warning about permanent deletion
- Dashboard now filters to show only completed videos

Bug Fixes:
- Fixed navbar alignment issues
- Added proper error handling for file deletion

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-26 20:55:43 -05:00

275 lines
8.1 KiB
Python

"""Video download service using yt-dlp and Celery."""
import os
from datetime import datetime
from pathlib import Path
import yt_dlp
from celery import Task
from celery_app import celery_app
from database import SessionLocal
from models import VideoEntry, DownloadStatus
# Download configuration
DOWNLOAD_DIR = Path("downloads")
DOWNLOAD_DIR.mkdir(exist_ok=True)
class DatabaseTask(Task):
"""Base task with database session management."""
_session = None
def after_return(self, *args, **kwargs):
"""Close database session after task completion."""
if self._session is not None:
self._session.close()
@property
def session(self):
"""Get or create database session."""
if self._session is None:
self._session = SessionLocal()
return self._session
@celery_app.task(base=DatabaseTask, bind=True, max_retries=3)
def download_video(self, video_id: int) -> dict:
"""Download a video using yt-dlp.
Args:
video_id: Database ID of the VideoEntry to download
Returns:
Dictionary with download result information
"""
session = self.session
# Get video entry from database
video = session.query(VideoEntry).filter_by(id=video_id).first()
if not video:
return {"error": f"Video ID {video_id} not found"}
# Update status to downloading
video.download_status = DownloadStatus.DOWNLOADING
video.download_started_at = datetime.utcnow()
session.commit()
try:
# Get video URL from database
youtube_url = video.video_url
# Configure yt-dlp options for MP4 output
ydl_opts = {
'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best',
'outtmpl': str(DOWNLOAD_DIR / f'{video_id}_%(title)s.%(ext)s'),
'merge_output_format': 'mp4', # Ensure output is MP4
'postprocessors': [{
'key': 'FFmpegVideoConvertor',
'preferedformat': 'mp4', # Convert to MP4 if needed
}],
'quiet': False,
'no_warnings': False,
'progress_hooks': [lambda d: _progress_hook(d, video_id, session)],
}
# Download the video
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(youtube_url, download=True)
filename = ydl.prepare_filename(info)
# Handle cases where extension might change
if not filename.endswith('.mp4'):
# Find the actual file with .mp4 extension
base = os.path.splitext(filename)[0]
mp4_file = f"{base}.mp4"
if os.path.exists(mp4_file):
filename = mp4_file
# Get file size
file_size = os.path.getsize(filename) if os.path.exists(filename) else None
# Update video entry with success
video.download_status = DownloadStatus.COMPLETED
video.download_path = filename
video.download_completed_at = datetime.utcnow()
video.file_size = file_size
video.download_error = None
session.commit()
return {
"video_id": video_id,
"status": "completed",
"path": filename,
"file_size": file_size
}
except Exception as e:
# Update video entry with error
video.download_status = DownloadStatus.FAILED
video.download_error = str(e)
video.download_completed_at = datetime.utcnow()
session.commit()
# Retry if we haven't exceeded max retries
if self.request.retries < self.max_retries:
raise self.retry(exc=e, countdown=60) # Retry after 60 seconds
return {
"video_id": video_id,
"status": "failed",
"error": str(e)
}
def _progress_hook(d: dict, video_id: int, session) -> None:
"""Progress hook for yt-dlp downloads.
Args:
d: Progress dictionary from yt-dlp
video_id: Database ID of the video
session: Database session
"""
if d['status'] == 'finished':
print(f"Download finished for video {video_id}, now converting...")
elif d['status'] == 'downloading':
if 'total_bytes' in d:
percent = d['downloaded_bytes'] / d['total_bytes'] * 100
print(f"Downloading video {video_id}: {percent:.1f}%")
@celery_app.task
def download_videos_batch(video_ids: list[int]) -> dict:
"""Download multiple videos in batch.
Args:
video_ids: List of VideoEntry IDs to download
Returns:
Dictionary with batch download results
"""
results = []
for video_id in video_ids:
# Queue each download as a separate task
task = download_video.delay(video_id)
results.append({
"video_id": video_id,
"task_id": task.id
})
return {
"total_queued": len(results),
"tasks": results
}
@celery_app.task(base=DatabaseTask, bind=True)
def delete_video_file(self, video_id: int) -> dict:
"""Delete a downloaded video file and reset its download status.
Args:
video_id: Database ID of the VideoEntry
Returns:
Dictionary with deletion result information
"""
session = self.session
# Get video entry from database
video = session.query(VideoEntry).filter_by(id=video_id).first()
if not video:
return {"error": f"Video ID {video_id} not found"}
# Check if video has a download path
if not video.download_path:
return {"error": "Video has no download path", "video_id": video_id}
# Delete the file if it exists
deleted = False
if os.path.exists(video.download_path):
try:
os.remove(video.download_path)
deleted = True
except OSError as e:
return {
"error": f"Failed to delete file: {str(e)}",
"video_id": video_id,
"path": video.download_path
}
# Reset download status and metadata
video.download_status = DownloadStatus.PENDING
video.download_path = None
video.download_completed_at = None
video.file_size = None
video.download_error = None
session.commit()
return {
"video_id": video_id,
"status": "deleted" if deleted else "reset",
"message": "File deleted and status reset" if deleted else "Status reset (file not found)"
}
@celery_app.task(base=DatabaseTask, bind=True)
def cleanup_old_videos(self) -> dict:
"""Clean up videos older than 7 days.
Returns:
Dictionary with cleanup results
"""
from datetime import timedelta
session = self.session
# Calculate cutoff date (7 days ago)
cutoff_date = datetime.utcnow() - timedelta(days=7)
# Query videos that are completed and older than 7 days
old_videos = session.query(VideoEntry).filter(
VideoEntry.download_status == DownloadStatus.COMPLETED,
VideoEntry.download_completed_at < cutoff_date
).all()
deleted_count = 0
failed_count = 0
results = []
for video in old_videos:
if video.download_path and os.path.exists(video.download_path):
try:
os.remove(video.download_path)
# Reset download status
video.download_status = DownloadStatus.PENDING
video.download_path = None
video.download_completed_at = None
video.file_size = None
video.download_error = None
deleted_count += 1
results.append({
"video_id": video.id,
"title": video.title,
"status": "deleted"
})
except OSError as e:
failed_count += 1
results.append({
"video_id": video.id,
"title": video.title,
"status": "failed",
"error": str(e)
})
session.commit()
return {
"total_processed": len(old_videos),
"deleted_count": deleted_count,
"failed_count": failed_count,
"cutoff_date": cutoff_date.isoformat(),
"results": results
}