conversion_project/core/process_manager.py

# core/process_manager.py
"""Main processing logic for batch transcoding."""

import csv
import os
import shutil
import subprocess
import time
from pathlib import Path

from core.audio_handler import get_audio_streams
from core.encode_engine import run_ffmpeg
from core.logger_helper import setup_logger, setup_failure_logger
from core.video_handler import get_source_resolution, determine_target_resolution

logger = setup_logger(Path(__file__).parent.parent / "logs")
failure_logger = setup_failure_logger(Path(__file__).parent.parent / "logs")


def _cleanup_temp_files(temp_input: Path, temp_output: Path):
    """Helper function to clean up temporary input and output files."""
    try:
        if temp_input.exists():
            temp_input.unlink()
            logger.debug(f"Cleaned up temp input: {temp_input.name}")
    except Exception as e:
        logger.warning(f"Could not delete temp input {temp_input.name}: {e}")

    try:
        if temp_output.exists():
            temp_output.unlink()
            logger.debug(f"Cleaned up temp output: {temp_output.name}")
    except Exception as e:
        logger.warning(f"Could not delete temp output {temp_output.name}: {e}")


def process_folder(folder: Path, cq: int, transcode_mode: str, resolution: str, config: dict, tracker_file: Path, test_mode: bool = False, audio_language: str = None):
    """
    Process all video files in folder with appropriate encoding settings.

    Args:
        folder: Input folder path
        cq: CQ override value
        transcode_mode: "cq" or "bitrate"
        resolution: Explicit resolution override ("480", "720", "1080", or None for smart)
        config: Configuration dictionary
        tracker_file: Path to CSV tracker file
        test_mode: If True, only encode first file and skip final move/cleanup
        audio_language: Optional language code to tag audio (e.g., 'eng', 'spa'). If None, no tagging applied.
    """
    if not folder.exists():
        print(f"❌ Folder not found: {folder}")
        logger.error(f"Folder not found: {folder}")
        return

    audio_config = config["audio"]
    bitrate_config = config["encode"]["fallback"]
    filters_config = config["encode"]["filters"]
    suffix = config["suffix"]
    extensions = config["extensions"]
    ignore_tags = config["ignore_tags"]
    reduction_ratio_threshold = config["reduction_ratio_threshold"]

    # Resolution logic: explicit arg takes precedence, else use smart defaults
    explicit_resolution = resolution  # Will be None if not specified

    filter_flags = filters_config.get("default","lanczos")
    folder_lower = str(folder).lower()
    is_tv = "\\tv\\" in folder_lower or "/tv/" in folder_lower
    if is_tv:
        filter_flags = filters_config.get("tv","bicubic")

    processing_folder = Path(config["processing_folder"])
    processing_folder.mkdir(parents=True, exist_ok=True)

    # Determine if we're in smart mode (no explicit mode specified)
    is_smart_mode = transcode_mode not in ["cq", "bitrate"]  # Default/smart mode
    is_forced_cq = transcode_mode == "cq"
    is_forced_bitrate = transcode_mode == "bitrate"

    # Track files for potential retry in smart mode
    failed_cq_files = []  # List of (file_path, metadata) for CQ failures in smart mode
    consecutive_failures = 0
    max_consecutive = 3

    # Phase 1: Process files with initial mode strategy
    print(f"\n{'='*60}")
    if is_smart_mode:
        print("📋 MODE: Smart (Try CQ first, retry with Bitrate if needed)")
    elif is_forced_cq:
        print("📋 MODE: Forced CQ (skip failures, log them)")
    else:
        print("📋 MODE: Forced Bitrate (skip failures, log them)")
    print(f"{'='*60}\n")

    skipped_count = 0
    for file in folder.rglob("*"):
        if file.suffix.lower() not in extensions:
            continue
        if any(tag.lower() in file.name.lower() for tag in ignore_tags):
            skipped_count += 1
            continue

        if skipped_count > 0:
            print(f"⏭️ Skipped {skipped_count} file(s)")
            logger.info(f"Skipped {skipped_count} file(s)")
            skipped_count = 0

        print("="*60)
        logger.info(f"Processing: {file.name}")
        print(f"📁 Processing: {file.name}")

        temp_input = (processing_folder / file.name).resolve()
        shutil.copy2(file, temp_input)
        logger.info(f"Copied {file.name} → {temp_input.name}")

        # Verify file was copied and is accessible
        for attempt in range(3):
            if temp_input.exists() and os.access(temp_input, os.R_OK):
                break

        # Check for matching subtitle file
        subtitle_file = None
        if config.get("general", {}).get("subtitles", {}).get("enabled", True):
            subtitle_exts = config.get("general", {}).get("subtitles", {}).get("extensions", ".vtt,.srt,.ass,.ssa,.sub").split(",")
            # Look for subtitle with same base name (e.g., movie.vtt or movie.en.vtt)
            for ext in subtitle_exts:
                ext = ext.strip()
                # Try exact match first (movie.vtt)
                potential_sub = file.with_suffix(ext)
                if potential_sub.exists():
                    subtitle_file = potential_sub
                    print(f"📝 Found subtitle: {subtitle_file.name}")
                    logger.info(f"Found subtitle file: {subtitle_file.name}")
                    break

                # Try language prefix variants (movie.en.vtt, movie.eng.vtt, etc.)
                # Look for files matching the pattern basename.*language*.ext
                parent_dir = file.parent
                base_name = file.stem
                for item in parent_dir.glob(f"{base_name}.*{ext}"):
                    subtitle_file = item
                    print(f"📝 Found subtitle: {subtitle_file.name}")
                    logger.info(f"Found subtitle file: {subtitle_file.name}")
                    break

                if subtitle_file:
                    break

        try:
            # Detect source resolution and determine target resolution
            src_width, src_height = get_source_resolution(temp_input)
            res_width, res_height, target_resolution = determine_target_resolution(
                src_width, src_height, explicit_resolution
            )

            # Log resolution decision
            if explicit_resolution:
                logger.info(f"Using explicitly specified resolution: {res_width}x{res_height}")
            else:
                if src_height > 1080:
                    print(f"⚠️  Source {src_width}x{src_height} is above 1080p. Scaling down to 1080p.")
                    logger.info(f"Source {src_width}x{src_height} detected. Scaling to 1080p.")
                elif src_height <= 720:
                    print(f"ℹ️  Source {src_width}x{src_height} is 720p or lower. Preserving resolution.")
                    logger.info(f"Source {src_width}x{src_height} (<=720p). Preserving source resolution.")
                else:
                    print(f"ℹ️  Source {src_width}x{src_height} is at or below 1080p. Preserving resolution.")
                    logger.info(f"Source {src_width}x{src_height} (<=1080p). Preserving source resolution.")

            # Set CQ based on content type and target resolution
            content_cq = config["encode"]["cq"].get(f"tv_{target_resolution}" if is_tv else f"movie_{target_resolution}", 32)
            file_cq = cq if cq is not None else content_cq

            # Always output as .mkv (AV1 video codec) with [EHX] suffix
            temp_output = (processing_folder / f"{file.stem}{suffix}.mkv").resolve()

            # Determine which method to try first
            if is_forced_bitrate:
                method = "Bitrate"
            elif is_forced_cq:
                method = "CQ"
            else:  # Smart mode
                method = "CQ"  # Always try CQ first in smart mode

            # Attempt encoding
            try:
                orig_size, out_size, reduction_ratio = run_ffmpeg(
                    temp_input, temp_output, file_cq, res_width, res_height, src_width, src_height,
                    filter_flags, audio_config, method, bitrate_config, subtitle_file, audio_language
                )

                # Check if encode met size target
                encode_succeeded = True
                if method == "CQ" and reduction_ratio >= reduction_ratio_threshold:
                    encode_succeeded = False
                elif method == "Bitrate" and reduction_ratio >= reduction_ratio_threshold:
                    encode_succeeded = False

                if not encode_succeeded:
                    # Size threshold not met
                    if is_smart_mode and method == "CQ":
                        # In smart mode CQ failure, mark for bitrate retry
                        print(f"⚠️  CQ failed size target ({reduction_ratio:.1%}). Will retry with Bitrate.")
                        failure_logger.warning(f"{file.name} | CQ failed size target ({reduction_ratio:.1%})")
                        failed_cq_files.append({
                            'file': file,
                            'temp_input': temp_input,
                            'temp_output': temp_output,
                            'src_width': src_width,
                            'src_height': src_height,
                            'res_width': res_width,
                            'res_height': res_height,
                            'target_resolution': target_resolution,
                            'file_cq': file_cq,
                            'is_tv': is_tv,
                            'subtitle_file': subtitle_file
                        })
                        consecutive_failures += 1
                        if consecutive_failures >= max_consecutive:
                            print(f"\n⚠️  {max_consecutive} consecutive CQ failures. Moving to Phase 2: Bitrate retry.")
                            logger.warning(f"{max_consecutive} consecutive CQ failures. Moving to Phase 2.")
                            break  # Move to Phase 2
                        continue
                    elif is_forced_cq or is_forced_bitrate:
                        # In forced mode, skip the file
                        error_msg = f"Size threshold not met ({reduction_ratio:.1%})"
                        print(f"❌ {method} failed: {error_msg}")
                        failure_logger.warning(f"{file.name} | {method} failed: {error_msg}")
                        consecutive_failures += 1
                        if consecutive_failures >= max_consecutive:
                            print(f"\n❌ {max_consecutive} consecutive failures in forced {method} mode. Stopping.")
                            logger.error(f"{max_consecutive} consecutive failures. Stopping process.")
                            _cleanup_temp_files(temp_input, temp_output)
                            break
                        _cleanup_temp_files(temp_input, temp_output)
                        continue

                # Encoding succeeded - reset failure counter
                consecutive_failures = 0

            except subprocess.CalledProcessError as e:
                # FFmpeg execution failed
                error_msg = str(e).split('\n')[0][:100]  # First 100 chars of error

                if is_smart_mode and method == "CQ":
                    # In smart mode, log and retry with bitrate
                    print(f"❌ CQ encode error. Will retry with Bitrate.")
                    failure_logger.warning(f"{file.name} | CQ error: {error_msg}")
                    failed_cq_files.append({
                        'file': file,
                        'temp_input': temp_input,
                        'temp_output': temp_output,
                        'src_width': src_width,
                        'src_height': src_height,
                        'res_width': res_width,
                        'res_height': res_height,
                        'target_resolution': target_resolution,
                        'file_cq': file_cq,
                        'is_tv': is_tv,
                        'subtitle_file': subtitle_file
                    })
                    consecutive_failures += 1
                    if consecutive_failures >= max_consecutive:
                        print(f"\n⚠️  {max_consecutive} consecutive CQ failures. Moving to Phase 2: Bitrate retry.")
                        logger.warning(f"{max_consecutive} consecutive CQ failures. Moving to Phase 2.")
                        break
                    continue
                elif is_forced_cq or is_forced_bitrate:
                    # In forced mode, skip and log
                    print(f"❌ {method} encode failed: {error_msg}")
                    failure_logger.warning(f"{file.name} | {method} error: {error_msg}")
                    consecutive_failures += 1
                    if consecutive_failures >= max_consecutive:
                        print(f"\n❌ {max_consecutive} consecutive failures in forced {method} mode. Stopping.")
                        logger.error(f"{max_consecutive} consecutive failures. Stopping process.")
                        _cleanup_temp_files(temp_input, temp_output)
                        break
                    _cleanup_temp_files(temp_input, temp_output)
                    continue

            # If we get here, encoding succeeded - save file and log
            _save_successful_encoding(
                file, temp_input, temp_output, orig_size, out_size,
                reduction_ratio, method, src_width, src_height, res_width, res_height,
                file_cq, tracker_file, folder, is_tv, config, test_mode, subtitle_file
            )

            # In test mode, stop after first successful file
            if test_mode:
                print(f"\n✅ TEST MODE: File processed. Encoded file is in temp folder for inspection.")
                break

        except Exception as e:
            # Unexpected error
            error_msg = str(e)[:100]
            print(f"❌ Unexpected error: {error_msg}")
            failure_logger.warning(f"{file.name} | Unexpected error: {error_msg}")
            consecutive_failures += 1
            logger.error(f"Unexpected error processing {file.name}: {e}")
            _cleanup_temp_files(temp_input, temp_output)

            if is_forced_cq or is_forced_bitrate:
                if consecutive_failures >= max_consecutive:
                    print(f"\n❌ {max_consecutive} consecutive failures. Stopping.")
                    break
            else:
                if consecutive_failures >= max_consecutive:
                    print(f"\n⚠️  {max_consecutive} consecutive failures. Moving to Phase 2.")
                    break

    # Phase 2: Retry failed CQ files with Bitrate mode (smart mode only)
    if is_smart_mode and failed_cq_files:
        print(f"\n{'='*60}")
        print(f"📋 PHASE 2: Retrying {len(failed_cq_files)} failed files with Bitrate mode")
        print(f"{'='*60}\n")

        consecutive_failures = 0

        for file_data in failed_cq_files:
            file = file_data['file']
            temp_input = file_data['temp_input']
            temp_output = file_data['temp_output']

            try:
                print(f"🔄 Retrying: {file.name} with Bitrate")
                logger.info(f"Phase 2 Retry: {file.name} with Bitrate mode")

                # Clean up old output if it exists
                if temp_output.exists():
                    temp_output.unlink()

                # Retry with bitrate
                orig_size, out_size, reduction_ratio = run_ffmpeg(
                    temp_input, temp_output, file_data['file_cq'],
                    file_data['res_width'], file_data['res_height'],
                    file_data['src_width'], file_data['src_height'],
                    filter_flags, audio_config, "Bitrate", bitrate_config,
                    file_data.get('subtitle_file'), audio_language
                )

                # Check if bitrate also failed
                if reduction_ratio >= reduction_ratio_threshold:
                    print(f"⚠️  Bitrate also failed size target ({reduction_ratio:.1%}). Skipping.")
                    failure_logger.warning(f"{file.name} | Bitrate retry also failed ({reduction_ratio:.1%})")
                    consecutive_failures += 1
                    _cleanup_temp_files(temp_input, temp_output)
                    if consecutive_failures >= max_consecutive:
                        print(f"\n⚠️  {max_consecutive} consecutive Phase 2 failures. Stopping retries.")
                        break
                    continue

                # Bitrate succeeded
                consecutive_failures = 0
                _save_successful_encoding(
                    file, temp_input, temp_output,
                    orig_size, out_size, reduction_ratio, "Bitrate",
                    file_data['src_width'], file_data['src_height'],
                    file_data['res_width'], file_data['res_height'],
                    file_data['file_cq'], tracker_file,
                    folder, file_data['is_tv'], config, False,
                    file_data.get('subtitle_file')
                )

            except subprocess.CalledProcessError as e:
                error_msg = str(e).split('\n')[0][:100]
                print(f"❌ Bitrate retry failed: {error_msg}")
                failure_logger.warning(f"{file.name} | Bitrate retry error: {error_msg}")
                consecutive_failures += 1
                logger.error(f"Bitrate retry failed for {file.name}: {e}")
                _cleanup_temp_files(temp_input, temp_output)

                if consecutive_failures >= max_consecutive:
                    print(f"\n⚠️  {max_consecutive} consecutive Phase 2 failures. Stopping retries.")
                    break
            except Exception as e:
                error_msg = str(e)[:100]
                print(f"❌ Unexpected error in Phase 2: {error_msg}")
                failure_logger.warning(f"{file.name} | Phase 2 error: {error_msg}")
                consecutive_failures += 1
                _cleanup_temp_files(temp_input, temp_output)
                if consecutive_failures >= max_consecutive:
                    print(f"\n⚠️  {max_consecutive} consecutive Phase 2 failures. Stopping retries.")
                    break

    print(f"\n{'='*60}")
    print("✅ Batch processing complete")
    logger.info("Batch processing complete")


def _save_successful_encoding(file, temp_input, temp_output, orig_size, out_size,
                              reduction_ratio, method, src_width, src_height, res_width, res_height,
                              file_cq, tracker_file, folder, is_tv, config=None, test_mode=False, subtitle_file=None):
    """Helper function to save successfully encoded files with [EHX] tag and clean up subtitle files."""

    # In test mode, show ratio and skip file move/cleanup
    if test_mode:
        orig_size_mb = round(orig_size / 1e6, 2)
        out_size_mb = round(out_size / 1e6, 2)
        percentage = round(out_size_mb / orig_size_mb * 100, 1)

        print(f"\n{'='*60}")
        print(f"📊 TEST MODE RESULTS:")
        print(f"{'='*60}")
        print(f"Original:  {orig_size_mb} MB")
        print(f"Encoded:   {out_size_mb} MB")
        print(f"Ratio:     {percentage}% ({reduction_ratio:.1%} reduction)")
        print(f"Method:    {method} (CQ={file_cq if method == 'CQ' else 'N/A'})")
        print(f"{'='*60}")
        print(f"📁 Encoded file location: {temp_output}")
        logger.info(f"TEST MODE - File: {file.name} | Ratio: {percentage}% | Method: {method}")
        return

    dest_file = file.parent / temp_output.name
    shutil.move(temp_output, dest_file)
    print(f"🚚 Moved {temp_output.name} → {dest_file.name}")
    logger.info(f"Moved {temp_output.name} → {dest_file.name}")

    # Classify file type based on folder
    folder_parts = [p.lower() for p in folder.parts]
    if "tv" in folder_parts:
        f_type = "tv"
        tv_index = folder_parts.index("tv")
        show = folder.parts[tv_index + 1] if len(folder.parts) > tv_index + 1 else "Unknown"
    elif "anime" in folder_parts:
        f_type = "anime"
        anime_index = folder_parts.index("anime")
        show = folder.parts[anime_index + 1] if len(folder.parts) > anime_index + 1 else "Unknown"
    else:
        f_type = "movie"
        show = "N/A"

    orig_size_mb = round(orig_size / 1e6, 2)
    proc_size_mb = round(out_size / 1e6, 2)
    percentage = round(proc_size_mb / orig_size_mb * 100, 1)

    # Get audio stream count for tracking
    try:
        audio_streams = get_audio_streams(temp_input)
        audio_stream_count = len(audio_streams)
    except:
        audio_stream_count = 0

    # Format resolutions for tracking
    src_resolution = f"{src_width}x{src_height}"
    target_res = f"{res_width}x{res_height}"
    cq_str = str(file_cq) if method == "CQ" else "N/A"

    with open(tracker_file, "a", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow([
            f_type, show, dest_file.name, orig_size_mb, proc_size_mb, percentage,
            src_resolution, target_res, audio_stream_count, cq_str, method
        ])

    # Enhanced logging with all conversion details
    logger.info(f"\n✅ CONVERSION COMPLETE: {dest_file.name}")
    logger.info(f"  Type: {f_type.upper()} | Show: {show}")
    logger.info(f"  Size: {orig_size_mb}MB → {proc_size_mb}MB ({percentage}% of original, {100-percentage:.1f}% reduction)")
    logger.info(f"  Method: {method} | Status: SUCCESS")
    print(f"📝 Logged conversion: {dest_file.name} ({percentage}%), method={method}")

    try:
        temp_input.unlink()
        file.unlink()
        logger.info(f"Deleted original and processing copy for {file.name}")

        # Clean up subtitle file if it was embedded
        if subtitle_file and subtitle_file.exists():
            try:
                subtitle_file.unlink()
                print(f"🗑️  Removed embedded subtitle: {subtitle_file.name}")
                logger.info(f"Removed embedded subtitle: {subtitle_file.name}")
            except Exception as e:
                logger.warning(f"Could not delete subtitle file {subtitle_file.name}: {e}")
    except Exception as e:
        print(f"⚠️ Could not delete files: {e}")
        logger.warning(f"Could not delete files: {e}")