import os import yt_dlp from pydub import AudioSegment from datetime import datetime from backend.celery_app import celery from backend.models import db, Question, DownloadJob, DownloadJobStatus from backend.services.audio_service import generate_audio_filename, get_audio_path @celery.task(bind=True) def download_youtube_audio(self, question_id, youtube_url, start_time, end_time): """ Download and trim YouTube audio clip Args: question_id: Question ID to update youtube_url: YouTube video URL start_time: Start time in seconds end_time: End time in seconds """ from backend.app import create_app app = create_app() with app.app_context(): job = DownloadJob.query.filter_by(celery_task_id=self.request.id).first() question = Question.query.get(question_id) if not job or not question: return {'success': False, 'error': 'Job or question not found'} try: # Update status to processing job.status = DownloadJobStatus.PROCESSING job.progress = 10 db.session.commit() # Create temp and final directories audio_folder = app.config['AUDIO_FOLDER'] os.makedirs(audio_folder, exist_ok=True) temp_dir = os.path.join(audio_folder, 'temp') os.makedirs(temp_dir, exist_ok=True) # Download full audio temp_filename = f"{self.request.id}_full" temp_path = os.path.join(temp_dir, temp_filename) def progress_hook(d): """Update progress during download""" if d['status'] == 'downloading': try: # Extract percentage from string like "50.5%" percent_str = d.get('_percent_str', '0%').strip('%') percent = float(percent_str) # Map download progress to 10-60% range progress = 10 + int(percent * 0.5) job.progress = progress db.session.commit() except: pass ydl_opts = { 'format': app.config['YTDLP_FORMAT'], 'outtmpl': temp_path + '.%(ext)s', 'postprocessors': [{ 'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', 'preferredquality': app.config['YTDLP_QUALITY'], }], 'progress_hooks': [progress_hook], 'quiet': True, 'no_warnings': True, } with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download([youtube_url]) job.progress = 60 db.session.commit() # Find downloaded file full_audio_path = temp_path + '.mp3' # Trim audio using pydub audio = AudioSegment.from_mp3(full_audio_path) clip = audio[start_time * 1000:end_time * 1000] # pydub uses milliseconds job.progress = 80 db.session.commit() # Save trimmed clip final_filename = generate_audio_filename('mp3') final_path = os.path.join(audio_folder, final_filename) clip.export(final_path, format='mp3', bitrate='192k') # Clean up temp file if os.path.exists(full_audio_path): os.remove(full_audio_path) # Update question with audio path audio_url = get_audio_path(final_filename) question.audio_path = audio_url # Update job status job.status = DownloadJobStatus.COMPLETED job.progress = 100 job.completed_at = datetime.utcnow() db.session.commit() return { 'success': True, 'audio_path': audio_url, 'question_id': question_id } except Exception as e: job.status = DownloadJobStatus.FAILED job.error_message = str(e) db.session.commit() return {'success': False, 'error': str(e)}