conversion_project/core/process_manager.py

# core/process_manager.py
"""Main processing logic for batch transcoding."""

import csv
import os
import shutil
import subprocess
import time
from pathlib import Path

from core.audio_handler import get_audio_streams
from core.encode_engine import run_ffmpeg
from core.logger_helper import setup_logger, setup_failure_logger
from core.video_handler import get_source_resolution, get_source_bit_depth, determine_target_resolution

logger = setup_logger(Path(__file__).parent.parent / "logs")
failure_logger = setup_failure_logger(Path(__file__).parent.parent / "logs")


def _cleanup_temp_files(temp_input: Path, temp_output: Path):
    """Helper function to clean up temporary input and output files."""
    try:
        if temp_input.exists():
            temp_input.unlink()
            logger.debug(f"Cleaned up temp input: {temp_input.name}")
    except Exception as e:
        logger.warning(f"Could not delete temp input {temp_input.name}: {e}")

    try:
        if temp_output.exists():
            temp_output.unlink()
            logger.debug(f"Cleaned up temp output: {temp_output.name}")
    except Exception as e:
        logger.warning(f"Could not delete temp output {temp_output.name}: {e}")


def process_folder(folder: Path, cq: int, transcode_mode: str, resolution: str, config: dict, tracker_file: Path, test_mode: bool = False, audio_language: str = None, filter_audio: bool = None, audio_select: str = None, encoder: str = "hevc", strip_all_titles: bool = False):
    """
    Process all video files in folder with appropriate encoding settings.

    Args:
        folder: Input folder path
        cq: CQ override value
        transcode_mode: "cq" or "bitrate"
        resolution: Explicit resolution override ("480", "720", "1080", or None for smart)
        config: Configuration dictionary
        tracker_file: Path to CSV tracker file
        test_mode: If True, only encode first file and skip final move/cleanup
        audio_language: Optional language code to tag audio (e.g., 'eng', 'spa'). If None, no tagging applied.
        filter_audio: If True, show interactive audio selection prompt. If None, use config setting.
        audio_select: Pre-selected audio streams (comma-separated, e.g., "1,2"). Skips interactive prompt.
        encoder: Video encoder to use - "hevc" for HEVC NVENC 10-bit (default) or "av1" for AV1 NVENC 8-bit.
        strip_all_titles: If True, strip all title metadata from all audio tracks.
    """
    if not folder.exists():
        print(f"❌ Folder not found: {folder}")
        logger.error(f"Folder not found: {folder}")
        return

    audio_config = config["audio"]
    bitrate_config = config["encode"]["fallback"]
    filters_config = config["encode"]["filters"]
    suffix = config["suffix"]
    extensions = config["extensions"]
    ignore_tags = config["ignore_tags"]
    reduction_ratio_threshold = config["reduction_ratio_threshold"]

    # Resolution logic: explicit arg takes precedence, else use smart defaults
    explicit_resolution = resolution  # Will be None if not specified

    filter_flags = filters_config.get("default","lanczos")
    folder_lower = str(folder).lower()
    is_tv = "\\tv\\" in folder_lower or "/tv/" in folder_lower
    is_anime = "\\anime\\" in folder_lower or "/anime/" in folder_lower
    if is_tv:
        filter_flags = filters_config.get("tv","bicubic")
    elif is_anime:
        filter_flags = filters_config.get("anime", filters_config.get("default","lanczos"))

    processing_folder = Path(config["processing_folder"])
    processing_folder.mkdir(parents=True, exist_ok=True)

    # Determine encoding mode
    is_smart_mode = transcode_mode == "compression"  # Try CQ first, then bitrate fallback
    is_forced_cq = transcode_mode == "cq"
    is_forced_bitrate = transcode_mode == "bitrate"

    # Track files for potential retry in smart mode
    failed_cq_files = []  # List of (file_path, metadata) for CQ failures in compression mode
    consecutive_failures = 0
    max_consecutive = 3

    # Phase 1: Process files with initial mode strategy
    print(f"\n{'='*60}")
    if is_smart_mode:
        print("📋 MODE: Compression (Try CQ first, retry with Bitrate if needed)")
    elif is_forced_cq:
        print("📋 MODE: CQ (constant quality, skip failures, log them)")
    else:
        print("📋 MODE: Bitrate (bitrate mode only, skip failures, log them)")
    print(f"{'='*60}\n")

    skipped_count = 0
    for file in folder.rglob("*"):
        if file.suffix.lower() not in extensions:
            continue
        if any(tag.lower() in file.name.lower() for tag in ignore_tags):
            skipped_count += 1
            continue

        if skipped_count > 0:
            print(f"⏭️ Skipped {skipped_count} file(s)")
            logger.info(f"Skipped {skipped_count} file(s)")
            skipped_count = 0

        print("="*60)
        logger.info(f"Processing: {file.name}")
        print(f"📁 Processing: {file.name}")

        temp_input = (processing_folder / file.name).resolve()

        # Check if file already exists in processing folder
        if temp_input.exists() and os.access(temp_input, os.R_OK):
            source_size = file.stat().st_size
            temp_size = temp_input.stat().st_size

            # Verify it's complete (same size as source)
            if source_size == temp_size:
                print(f"✓ Found existing copy in processing folder (verified complete)")
                logger.info(f"File already in processing: {file.name} ({temp_size/1e6:.2f} MB verified complete)")
            else:
                # File exists but incomplete - recopy
                print(f"⚠️  Existing copy incomplete ({temp_size/1e6:.2f} MB vs {source_size/1e6:.2f} MB source). Re-copying...")
                logger.warning(f"Incomplete copy detected for {file.name}. Re-copying.")
                shutil.copy2(file, temp_input)
                logger.info(f"Re-copied {file.name} → {temp_input.name}")
        else:
            # File doesn't exist or not accessible - copy it
            shutil.copy2(file, temp_input)
            logger.info(f"Copied {file.name} → {temp_input.name}")

        # Verify file is accessible
        for attempt in range(3):
            if temp_input.exists() and os.access(temp_input, os.R_OK):
                break

        # Check for matching subtitle files (supports multiple)
        subtitle_files = []
        if config.get("general", {}).get("subtitles", {}).get("enabled", True):
            subtitle_exts = config.get("general", {}).get("subtitles", {}).get("extensions", ".vtt,.srt,.ass,.ssa,.sub").split(",")
            parent_dir = file.parent
            base_name = file.stem
            found_subs = set()  # Track found subtitles to avoid duplicates

            # Look for subtitle files with same base name (e.g., movie.vtt or movie.en.vtt)
            for ext in subtitle_exts:
                ext = ext.strip()
                # Try exact match first (movie.vtt)
                potential_sub = file.with_suffix(ext)
                if potential_sub.exists() and str(potential_sub) not in found_subs:
                    subtitle_files.append(potential_sub)
                    found_subs.add(str(potential_sub))
                    print(f"📝 Found subtitle: {potential_sub.name}")
                    logger.info(f"Found subtitle file: {potential_sub.name}")

                # Try language prefix variants (movie.en.vtt, movie.eng.vtt, movie.en.forced.srt, etc.)
                # Look for all files matching the pattern basename.*ext
                for item in sorted(parent_dir.glob(f"{base_name}.*{ext}")):
                    if str(item) not in found_subs:
                        subtitle_files.append(item)
                        found_subs.add(str(item))
                        print(f"📝 Found subtitle: {item.name}")
                        logger.info(f"Found subtitle file: {item.name}")

        try:
            # Detect source resolution and determine target resolution
            src_width, src_height = get_source_resolution(temp_input)
            res_width, res_height, target_resolution = determine_target_resolution(
                src_width, src_height, explicit_resolution
            )

            # Auto-select encoder based on source bit depth if not explicitly specified
            # (explicit encoder arg is passed in, so if user didn't specify, it's still the default)
            # We need to check if encoder came from CLI or is the default
            # For now, we'll always auto-detect and only skip if encoder was explicitly set
            # Since we can't distinguish in the current flow, we'll add a parameter to track this
            selected_encoder = encoder  # Start with what was passed (may be default)

            # Check source bit depth for auto-selection
            source_bit_depth = get_source_bit_depth(temp_input)

            # Auto-select encoder based on source bit depth
            # 10-bit or higher (including 12-bit) → HEVC (supports up to 10-bit)
            # 8-bit → AV1 (more efficient for 8-bit)
            if source_bit_depth >= 10:
                selected_encoder = "hevc"
                encoder_note = "auto-selected (10+ bit source)"
            else:
                selected_encoder = "av1"
                encoder_note = "auto-selected (8-bit source)"

            print(f"ℹ️  Encoder: {selected_encoder} ({encoder_note})")
            logger.info(f"Selected encoder: {selected_encoder} - Source bit depth: {source_bit_depth}-bit")

            # Log resolution decision
            if explicit_resolution:
                logger.info(f"Using explicitly specified resolution: {res_width}x{res_height}")
            else:
                if src_height > 1080:
                    print(f"⚠️  Source {src_width}x{src_height} is above 1080p. Scaling down to 1080p.")
                    logger.info(f"Source {src_width}x{src_height} detected. Scaling to 1080p.")
                elif src_height <= 720:
                    print(f"ℹ️  Source {src_width}x{src_height} is 720p or lower. Preserving resolution.")
                    logger.info(f"Source {src_width}x{src_height} (<=720p). Preserving source resolution.")
                else:
                    print(f"ℹ️  Source {src_width}x{src_height} is at or below 1080p. Preserving resolution.")
                    logger.info(f"Source {src_width}x{src_height} (<=1080p). Preserving source resolution.")

            # Set CQ based on content type, target resolution, and encoder
            if is_anime:
                cq_key = f"anime_{target_resolution}"
            elif is_tv:
                cq_key = f"tv_{target_resolution}"
            else:
                cq_key = f"movie_{target_resolution}"
            # Look up CQ from encoder-specific section
            encoder_cq_config = config["encode"]["cq"].get(selected_encoder, {})
            content_cq = encoder_cq_config.get(cq_key, 32)
            file_cq = cq if cq is not None else content_cq

            # Always output as .mkv (AV1 video codec) with [EHX] suffix
            temp_output = (processing_folder / f"{file.stem}{suffix}.mkv").resolve()

            # Determine which method to try first
            if is_forced_bitrate:
                method = "Bitrate"
            elif is_forced_cq:
                method = "CQ"
            else:  # Smart mode
                method = "CQ"  # Always try CQ first in smart mode

            # Attempt encoding
            try:
                # Determine audio_filter config (CLI arg overrides config file)
                # --filter-audio flag means: show interactive prompt
                if filter_audio:
                    audio_filter_config = {"enabled": True, "interactive": True}
                    # If --audio-select provided, skip interactive and use pre-selected streams
                    if audio_select:
                        audio_filter_config["preselected"] = audio_select
                else:
                    # Use config file setting (if present)
                    audio_filter_config = config.get("general", {}).get("audio_filter", {})

                orig_size, out_size, reduction_ratio = run_ffmpeg(
                    temp_input, temp_output, file_cq, res_width, res_height, src_width, src_height,
                    filter_flags, audio_config, method, bitrate_config, selected_encoder, subtitle_files, audio_language,
                    audio_filter_config, test_mode, strip_all_titles
                )

                # Check if encode met size target
                encode_succeeded = True
                if method == "CQ" and reduction_ratio >= reduction_ratio_threshold:
                    encode_succeeded = False
                elif method == "Bitrate" and reduction_ratio >= reduction_ratio_threshold:
                    encode_succeeded = False

                if not encode_succeeded:
                    # Size threshold not met
                    if is_smart_mode and method == "CQ":
                        # In smart mode CQ failure, mark for bitrate retry
                        print(f"⚠️  CQ failed size target ({reduction_ratio:.1%}). Will retry with Bitrate.")
                        failure_logger.warning(f"{file.name} | CQ failed size target ({reduction_ratio:.1%})")
                        failed_cq_files.append({
                            'file': file,
                            'temp_input': temp_input,
                            'temp_output': temp_output,
                            'src_width': src_width,
                            'src_height': src_height,
                            'res_width': res_width,
                            'res_height': res_height,
                            'target_resolution': target_resolution,
                            'file_cq': file_cq,
                            'is_tv': is_tv,
                            'subtitle_files': subtitle_files
                        })
                        consecutive_failures += 1
                        if consecutive_failures >= max_consecutive:
                            print(f"\n⚠️  {max_consecutive} consecutive CQ failures. Moving to Phase 2: Bitrate retry.")
                            logger.warning(f"{max_consecutive} consecutive CQ failures. Moving to Phase 2.")
                            break  # Move to Phase 2
                        continue
                    elif is_forced_cq or is_forced_bitrate:
                        # In forced mode, skip the file
                        error_msg = f"Size threshold not met ({reduction_ratio:.1%})"
                        print(f"❌ {method} failed: {error_msg}")
                        failure_logger.warning(f"{file.name} | {method} failed: {error_msg}")
                        consecutive_failures += 1
                        if consecutive_failures >= max_consecutive:
                            print(f"\n❌ {max_consecutive} consecutive failures in forced {method} mode. Stopping.")
                            logger.error(f"{max_consecutive} consecutive failures. Stopping process.")
                            _cleanup_temp_files(temp_input, temp_output)
                            break
                        _cleanup_temp_files(temp_input, temp_output)
                        continue

                # Encoding succeeded - reset failure counter
                consecutive_failures = 0

            except subprocess.CalledProcessError as e:
                # FFmpeg execution failed
                error_msg = str(e).split('\n')[0][:100]  # First 100 chars of error

                if is_smart_mode and method == "CQ":
                    # In smart mode, log and retry with bitrate
                    print(f"❌ CQ encode error. Will retry with Bitrate.")
                    failure_logger.warning(f"{file.name} | CQ error: {error_msg}")
                    failed_cq_files.append({
                        'file': file,
                        'temp_input': temp_input,
                        'temp_output': temp_output,
                        'src_width': src_width,
                        'src_height': src_height,
                        'res_width': res_width,
                        'res_height': res_height,
                        'target_resolution': target_resolution,
                        'file_cq': file_cq,
                        'is_tv': is_tv,
                        'subtitle_files': subtitle_files
                    })
                    consecutive_failures += 1
                    if consecutive_failures >= max_consecutive:
                        print(f"\n⚠️  {max_consecutive} consecutive CQ failures. Moving to Phase 2: Bitrate retry.")
                        logger.warning(f"{max_consecutive} consecutive CQ failures. Moving to Phase 2.")
                        break
                    continue
                elif is_forced_cq or is_forced_bitrate:
                    # In forced mode, skip and log
                    print(f"❌ {method} encode failed: {error_msg}")
                    failure_logger.warning(f"{file.name} | {method} error: {error_msg}")
                    consecutive_failures += 1
                    if consecutive_failures >= max_consecutive:
                        print(f"\n❌ {max_consecutive} consecutive failures in forced {method} mode. Stopping.")
                        logger.error(f"{max_consecutive} consecutive failures. Stopping process.")
                        _cleanup_temp_files(temp_input, temp_output)
                        break
                    _cleanup_temp_files(temp_input, temp_output)
                    continue

            # If we get here, encoding succeeded - save file and log
            _save_successful_encoding(
                file, temp_input, temp_output, orig_size, out_size,
                reduction_ratio, method, src_width, src_height, res_width, res_height,
                file_cq, tracker_file, folder, is_tv, suffix, config, test_mode, subtitle_files
            )

            # In test mode, stop after first successful file
            if test_mode:
                print(f"\n✅ TEST MODE: File processed. Encoded file is in temp folder for inspection.")
                break

        except Exception as e:
            # Unexpected error
            error_msg = str(e)[:100]
            print(f"❌ Unexpected error: {error_msg}")
            failure_logger.warning(f"{file.name} | Unexpected error: {error_msg}")
            consecutive_failures += 1
            logger.error(f"Unexpected error processing {file.name}: {e}")
            _cleanup_temp_files(temp_input, temp_output)

            if is_forced_cq or is_forced_bitrate:
                if consecutive_failures >= max_consecutive:
                    print(f"\n❌ {max_consecutive} consecutive failures. Stopping.")
                    break
            else:
                if consecutive_failures >= max_consecutive:
                    print(f"\n⚠️  {max_consecutive} consecutive failures. Moving to Phase 2.")
                    break

    # Phase 2: Retry failed CQ files with Bitrate mode (smart mode only)
    if is_smart_mode and failed_cq_files:
        print(f"\n{'='*60}")
        print(f"📋 PHASE 2: Retrying {len(failed_cq_files)} failed files with Bitrate mode")
        print(f"{'='*60}\n")

        consecutive_failures = 0

        for file_data in failed_cq_files:
            file = file_data['file']
            temp_input = file_data['temp_input']
            temp_output = file_data['temp_output']

            try:
                print(f"🔄 Retrying: {file.name} with Bitrate")
                logger.info(f"Phase 2 Retry: {file.name} with Bitrate mode")

                # Clean up old output if it exists
                if temp_output.exists():
                    temp_output.unlink()

                # Retry with bitrate
                orig_size, out_size, reduction_ratio = run_ffmpeg(
                    temp_input, temp_output, file_data['file_cq'],
                    file_data['res_width'], file_data['res_height'],
                    file_data['src_width'], file_data['src_height'],
                    filter_flags, audio_config, "Bitrate", bitrate_config, selected_encoder,
                    file_data.get('subtitle_files'), audio_language, None, test_mode, strip_all_titles
                )

                # Check if bitrate also failed
                if reduction_ratio >= reduction_ratio_threshold:
                    print(f"⚠️  Bitrate also failed size target ({reduction_ratio:.1%}). Skipping.")
                    failure_logger.warning(f"{file.name} | Bitrate retry also failed ({reduction_ratio:.1%})")
                    consecutive_failures += 1
                    _cleanup_temp_files(temp_input, temp_output)
                    if consecutive_failures >= max_consecutive:
                        print(f"\n⚠️  {max_consecutive} consecutive Phase 2 failures. Stopping retries.")
                        break
                    continue

                # Bitrate succeeded
                consecutive_failures = 0
                _save_successful_encoding(
                    file, temp_input, temp_output,
                    orig_size, out_size, reduction_ratio, "Bitrate",
                    file_data['src_width'], file_data['src_height'],
                    file_data['res_width'], file_data['res_height'],
                    file_data['file_cq'], tracker_file,
                    folder, file_data['is_tv'], suffix, config, False,
                    file_data.get('subtitle_files')
                )

            except subprocess.CalledProcessError as e:
                error_msg = str(e).split('\n')[0][:100]
                print(f"❌ Bitrate retry failed: {error_msg}")
                failure_logger.warning(f"{file.name} | Bitrate retry error: {error_msg}")
                consecutive_failures += 1
                logger.error(f"Bitrate retry failed for {file.name}: {e}")
                _cleanup_temp_files(temp_input, temp_output)

                if consecutive_failures >= max_consecutive:
                    print(f"\n⚠️  {max_consecutive} consecutive Phase 2 failures. Stopping retries.")
                    break
            except Exception as e:
                error_msg = str(e)[:100]
                print(f"❌ Unexpected error in Phase 2: {error_msg}")
                failure_logger.warning(f"{file.name} | Phase 2 error: {error_msg}")
                consecutive_failures += 1
                _cleanup_temp_files(temp_input, temp_output)
                if consecutive_failures >= max_consecutive:
                    print(f"\n⚠️  {max_consecutive} consecutive Phase 2 failures. Stopping retries.")
                    break

    print(f"\n{'='*60}")
    print("✅ Batch processing complete")
    logger.info("Batch processing complete")


def _save_successful_encoding(file, temp_input, temp_output, orig_size, out_size,
                              reduction_ratio, method, src_width, src_height, res_width, res_height,
                              file_cq, tracker_file, folder, is_tv, suffix, config=None, test_mode=False, subtitle_files=None):
    """Helper function to save successfully encoded files with [EHX] tag and clean up subtitle files."""

    # In test mode, show ratio and skip file move/cleanup
    if test_mode:
        orig_size_mb = round(orig_size / 1e6, 2)
        out_size_mb = round(out_size / 1e6, 2)
        percentage = round(out_size_mb / orig_size_mb * 100, 1)

        print(f"\n{'='*60}")
        print(f"📊 TEST MODE RESULTS:")
        print(f"{'='*60}")
        print(f"Original:  {orig_size_mb} MB")
        print(f"Encoded:   {out_size_mb} MB")
        print(f"Ratio:     {percentage}% ({reduction_ratio:.1%} reduction)")
        print(f"Method:    {method} (CQ={file_cq if method == 'CQ' else 'N/A'})")
        print(f"{'='*60}")
        print(f"📁 Encoded file location: {temp_output}")
        logger.info(f"TEST MODE - File: {file.name} | Ratio: {percentage}% | Method: {method}")
        return

    # Check if file is in a Featurettes folder - if so, remove suffix from destination filename
    folder_parts = [p.lower() for p in file.parent.parts]
    is_featurette = "featurettes" in folder_parts

    if is_featurette:
        # Remove suffix from temp_output.name for Featurettes
        output_name = temp_output.name
        if suffix in output_name:
            output_name = output_name.replace(suffix, "")
        dest_file = file.parent / output_name
    else:
        dest_file = file.parent / temp_output.name

    shutil.move(temp_output, dest_file)
    print(f"🚚 Moved {temp_output.name} → {dest_file.name}")
    logger.info(f"Moved {temp_output.name} → {dest_file.name}")

    # Classify file type based on folder (folder_parts already defined earlier)
    if "tv" in folder_parts:
        f_type = "tv"
        tv_index = folder_parts.index("tv")
        show = folder.parts[tv_index + 1] if len(folder.parts) > tv_index + 1 else "Unknown"
    elif "anime" in folder_parts:
        f_type = "anime"
        anime_index = folder_parts.index("anime")
        show = folder.parts[anime_index + 1] if len(folder.parts) > anime_index + 1 else "Unknown"
    else:
        f_type = "movie"
        show = "N/A"

    orig_size_mb = round(orig_size / 1e6, 2)
    proc_size_mb = round(out_size / 1e6, 2)
    percentage = round(proc_size_mb / orig_size_mb * 100, 1)

    # Get audio stream count for tracking
    try:
        audio_streams = get_audio_streams(temp_input)
        audio_stream_count = len(audio_streams)
    except:
        audio_stream_count = 0

    # Format resolutions for tracking
    src_resolution = f"{src_width}x{src_height}"
    target_res = f"{res_width}x{res_height}"
    cq_str = str(file_cq) if method == "CQ" else "N/A"

    with open(tracker_file, "a", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow([
            f_type, show, dest_file.name, orig_size_mb, proc_size_mb, percentage,
            src_resolution, target_res, audio_stream_count, cq_str, method
        ])

    # Enhanced logging with all conversion details
    logger.info(f"\n✅ CONVERSION COMPLETE: {dest_file.name}")
    logger.info(f"  Type: {f_type.upper()} | Show: {show}")
    logger.info(f"  Size: {orig_size_mb}MB → {proc_size_mb}MB ({percentage}% of original, {100-percentage:.1f}% reduction)")
    logger.info(f"  Method: {method} | Status: SUCCESS")
    print(f"📝 Logged conversion: {dest_file.name} ({percentage}%), method={method}")

    try:
        temp_input.unlink()

        # Only delete original file if NOT in Featurettes folder (Featurettes are re-encoded in place)
        if not is_featurette:
            file.unlink()
            logger.info(f"Deleted original and processing copy for {file.name}")
        else:
            logger.info(f"Featurettes file preserved at origin: {file.name}")

        # Clean up subtitle files if they exist
        if subtitle_files:
            for sub_file in subtitle_files:
                if sub_file.exists():
                    try:
                        sub_file.unlink()
                        print(f"🗑️  Removed subtitle: {sub_file.name}")
                        logger.info(f"Removed subtitle: {sub_file.name}")
                    except Exception as e:
                        logger.warning(f"Could not delete subtitle file {sub_file.name}: {e}")
    except Exception as e:
        print(f"⚠️ Could not delete files: {e}")
        logger.warning(f"Could not delete files: {e}")