triviathang/backend/tasks/youtube_tasks.py

import os
import yt_dlp
from pydub import AudioSegment
from datetime import datetime

from backend.celery_app import celery
from backend.models import db, Question, DownloadJob, DownloadJobStatus
from backend.services.audio_service import generate_audio_filename, get_audio_path


@celery.task(bind=True)
def download_youtube_audio(self, question_id, youtube_url, start_time, end_time):
    """
    Download and trim YouTube audio clip

    Args:
        question_id: Question ID to update
        youtube_url: YouTube video URL
        start_time: Start time in seconds
        end_time: End time in seconds
    """
    from backend.app import create_app
    app = create_app()

    with app.app_context():
        job = DownloadJob.query.filter_by(celery_task_id=self.request.id).first()
        question = Question.query.get(question_id)

        if not job or not question:
            return {'success': False, 'error': 'Job or question not found'}

        try:
            # Update status to processing
            job.status = DownloadJobStatus.PROCESSING
            job.progress = 10
            db.session.commit()

            # Create temp and final directories
            audio_folder = app.config['AUDIO_FOLDER']
            os.makedirs(audio_folder, exist_ok=True)
            temp_dir = os.path.join(audio_folder, 'temp')
            os.makedirs(temp_dir, exist_ok=True)

            # Download full audio
            temp_filename = f"{self.request.id}_full"
            temp_path = os.path.join(temp_dir, temp_filename)

            def progress_hook(d):
                """Update progress during download"""
                if d['status'] == 'downloading':
                    try:
                        # Extract percentage from string like "50.5%"
                        percent_str = d.get('_percent_str', '0%').strip('%')
                        percent = float(percent_str)
                        # Map download progress to 10-60% range
                        progress = 10 + int(percent * 0.5)
                        job.progress = progress
                        db.session.commit()
                    except:
                        pass

            ydl_opts = {
                'format': app.config['YTDLP_FORMAT'],
                'outtmpl': temp_path + '.%(ext)s',
                'postprocessors': [{
                    'key': 'FFmpegExtractAudio',
                    'preferredcodec': 'mp3',
                    'preferredquality': app.config['YTDLP_QUALITY'],
                }],
                'progress_hooks': [progress_hook],
                'quiet': True,
                'no_warnings': True,
            }

            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                ydl.download([youtube_url])

            job.progress = 60
            db.session.commit()

            # Find downloaded file
            full_audio_path = temp_path + '.mp3'

            # Trim audio using pydub
            audio = AudioSegment.from_mp3(full_audio_path)
            clip = audio[start_time * 1000:end_time * 1000]  # pydub uses milliseconds

            job.progress = 80
            db.session.commit()

            # Save trimmed clip
            final_filename = generate_audio_filename('mp3')
            final_path = os.path.join(audio_folder, final_filename)
            clip.export(final_path, format='mp3', bitrate='192k')

            # Clean up temp file
            if os.path.exists(full_audio_path):
                os.remove(full_audio_path)

            # Update question with audio path
            audio_url = get_audio_path(final_filename)
            question.audio_path = audio_url

            # Update job status
            job.status = DownloadJobStatus.COMPLETED
            job.progress = 100
            job.completed_at = datetime.utcnow()
            db.session.commit()

            return {
                'success': True,
                'audio_path': audio_url,
                'question_id': question_id
            }

        except Exception as e:
            job.status = DownloadJobStatus.FAILED
            job.error_message = str(e)
            db.session.commit()
            return {'success': False, 'error': str(e)}