conversion_project/main.py

#!/usr/bin/env python3
import argparse
import csv
import json
import os
import shutil
import subprocess
import sys
import tempfile
from pathlib import Path

from core.config_helper import load_config_xml
from core.logger_helper import setup_logger

# =============================
# Setup logger
# =============================
LOG_FOLDER = Path(__file__).parent / "logs"
logger = setup_logger(LOG_FOLDER)

# =============================
# Tracker CSV
# =============================
TRACKER_FILE = Path(__file__).parent / "conversion_tracker.csv"
if not TRACKER_FILE.exists():
    with open(TRACKER_FILE, "w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow([
            "type","show","filename","original_size_MB","processed_size_MB","percentage","method"
        ])

# =============================
# AUDIO BUCKET LOGIC
# =============================
def calculate_stream_bitrate(input_file: Path, stream_index: int) -> int:
    """
    Extract audio stream to temporary file using -c copy, capture bitrate from ffmpeg output.
    Returns bitrate in kbps. Falls back to 0 (and uses metadata) if extraction fails.

    Uses ffmpeg's reported bitrate which is more accurate than calculating from file size/duration.
    """
    temp_fd, temp_audio_path = tempfile.mkstemp(suffix=".aac", dir=None)
    os.close(temp_fd)

    try:
        # Step 1: Extract audio stream with -c copy (lossless extraction)
        # ffmpeg outputs bitrate info to stderr
        extract_cmd = [
            "ffmpeg", "-y", "-i", str(input_file),
            "-map", f"0:a:{stream_index}",
            "-c", "copy",
            temp_audio_path
        ]
        logger.debug(f"Extracting audio stream {stream_index} to temporary file for bitrate calculation...")
        result = subprocess.run(extract_cmd, capture_output=True, text=True, check=True)

        # Step 2: Parse bitrate from ffmpeg's output (stderr)
        # Look for line like: "bitrate= 457.7kbits/s"
        bitrate_kbps = 0
        for line in result.stderr.split("\n"):
            if "bitrate=" in line:
                # Extract bitrate value from line like "size=  352162KiB time=01:45:03.05 bitrate= 457.7kbits/s"
                parts = line.split("bitrate=")
                if len(parts) > 1:
                    bitrate_str = parts[1].strip().split("kbits/s")[0].strip()
                    try:
                        bitrate_kbps = int(float(bitrate_str))
                        logger.debug(f"Stream {stream_index}: Extracted bitrate from ffmpeg output: {bitrate_kbps} kbps")
                        break
                    except ValueError:
                        continue

        # If we couldn't parse bitrate from output, fall back to calculation
        if bitrate_kbps == 0:
            logger.debug(f"Stream {stream_index}: Could not parse bitrate from ffmpeg output, calculating from file size...")
            file_size_bytes = os.path.getsize(temp_audio_path)

            # Get duration using ffprobe
            duration_cmd = [
                "ffprobe", "-v", "error",
                "-show_entries", "format=duration",
                "-of", "default=noprint_wrappers=1:nokey=1:noprint_wrappers=1",
                temp_audio_path
            ]
            duration_result = subprocess.run(duration_cmd, capture_output=True, text=True, check=True)
            duration_seconds = float(duration_result.stdout.strip())

            bitrate_kbps = int((file_size_bytes * 8) / duration_seconds / 1000)
            logger.debug(f"Stream {stream_index}: Calculated bitrate from file: {bitrate_kbps} kbps")

        return bitrate_kbps

    except Exception as e:
        logger.warning(f"Failed to calculate bitrate for stream {stream_index}: {e}. Will fall back to metadata.")
        return 0

    finally:
        # Clean up temporary audio file
        try:
            if os.path.exists(temp_audio_path):
                os.remove(temp_audio_path)
                logger.debug(f"Deleted temporary audio file: {temp_audio_path}")
        except Exception as e:
            logger.warning(f"Could not delete temporary file {temp_audio_path}: {e}")


def choose_audio_bitrate(channels: int, bitrate_kbps: int, audio_config: dict, is_1080_class: bool) -> tuple:
    """
    Choose audio codec and bitrate based on channel count, detected bitrate, and resolution.

    Returns tuple: (codec, target_bitrate_bps)
    - codec: "aac", "libopus", or "copy" (to preserve original without re-encoding)
    - target_bitrate_bps: target bitrate in bits/sec (0 if using "copy")

    Rules:
      Stereo + 1080p:
        - Above 192k → high (192k) with AAC
        - At/below 192k → preserve (copy)

      Stereo + 720p:
        - Above 160k → medium (160k) with AAC
        - At/below 160k → preserve (copy)

      Multi-channel:
        - Below 384k → low (384k) with AAC
        - 384k to below medium → low (384k) with AAC
        - Medium and above → medium with AAC
    """
    # Normalize to 2ch or 6ch output
    output_channels = 6 if channels >= 6 else 2

    if output_channels == 2:
        # Stereo logic
        if is_1080_class:
            # 1080p+ stereo
            high_br = audio_config["stereo"]["high"]
            if bitrate_kbps > (high_br / 1000):  # Above 192k
                return ("aac", high_br)
            else:
                # Preserve original
                return ("copy", 0)
        else:
            # 720p stereo
            medium_br = audio_config["stereo"]["medium"]
            if bitrate_kbps > (medium_br / 1000):  # Above 160k
                return ("aac", medium_br)
            else:
                # Preserve original
                return ("copy", 0)

    else:
        # Multi-channel (6ch+) logic
        low_br = audio_config["multi_channel"]["low"]
        medium_br = audio_config["multi_channel"]["medium"]

        if bitrate_kbps < (medium_br / 1000):
            # Below medium, use low
            return ("aac", low_br)
        else:
            # Medium and above, use medium
            return ("aac", medium_br)

# =============================
# PATH NORMALIZATION
# =============================
def normalize_path_for_service(local_path: str, path_mappings: dict) -> str:
    for win_path, linux_path in path_mappings.items():
        if local_path.lower().startswith(win_path.lower()):
            return local_path.replace(win_path, linux_path).replace("\\", "/")
    return local_path.replace("\\", "/")

# =============================
# AUDIO STREAMS DETECTION
# =============================
def get_source_resolution(input_file: Path) -> tuple:
    """
    Get source video resolution (width, height).
    Returns tuple: (width, height)
    """
    try:
        cmd = [
            "ffprobe", "-v", "error",
            "-select_streams", "v:0",
            "-show_entries", "stream=width,height",
            "-of", "default=noprint_wrappers=1:nokey=1:noprint_wrappers=1",
            str(input_file)
        ]
        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
        lines = result.stdout.strip().split("\n")
        width = int(lines[0]) if len(lines) > 0 else 1920
        height = int(lines[1]) if len(lines) > 1 else 1080
        logger.info(f"Source resolution detected: {width}x{height}")
        return (width, height)
    except Exception as e:
        logger.warning(f"Failed to detect source resolution: {e}. Defaulting to 1920x1080")
        return (1920, 1080)


def get_audio_streams(input_file: Path):
    """
    Detect audio streams and calculate robust bitrates by extracting each stream.
    Returns list of (index, channels, calculated_bitrate_kbps, language, metadata_bitrate_kbps)
    """
    cmd = [
        "ffprobe","-v","error","-select_streams","a",
        "-show_entries","stream=index,channels,bit_rate,tags=language",
        "-of","json", str(input_file)
    ]
    result = subprocess.run(cmd, capture_output=True, text=True)
    data = json.loads(result.stdout)
    streams = []

    for stream_num, s in enumerate(data.get("streams", [])):
        index = s["index"]
        channels = s.get("channels", 2)
        src_lang = s.get("tags", {}).get("language", "und")
        bit_rate_meta = int(s.get("bit_rate", 0)) if s.get("bit_rate") else 0

        # Calculate robust bitrate by extracting the audio stream
        calculated_bitrate_kbps = calculate_stream_bitrate(input_file, stream_num)

        # If calculation failed, fall back to metadata
        if calculated_bitrate_kbps == 0:
            calculated_bitrate_kbps = int(bit_rate_meta / 1000) if bit_rate_meta else 160
            logger.info(f"Stream {index}: Using fallback bitrate {calculated_bitrate_kbps} kbps")

        streams.append((index, channels, calculated_bitrate_kbps, src_lang, int(bit_rate_meta / 1000) if bit_rate_meta else 0))

    return streams


# =============================
# FFmpeg ENCODE
# =============================
def run_ffmpeg(input_file: Path, output_file: Path, cq: int, scale_width: int, scale_height: int,
               filter_flags: str, audio_config: dict, method: str, bitrate_config: dict):
    # Get source resolution
    src_width, src_height = get_source_resolution(input_file)

    streams = get_audio_streams(input_file)

    # Log comprehensive encode settings
    header = f"\n🧩 ENCODE SETTINGS"
    logger.info(header)
    print(" ")

    logger.info(f"  Video:")
    logger.info(f"    • Source Resolution: {src_width}x{src_height}")
    logger.info(f"    • Target Resolution: {scale_width}x{scale_height}")
    logger.info(f"    • Encoder: av1_nvenc (preset p1, pix_fmt p010le)")
    logger.info(f"    • Scale Filter: {filter_flags}")
    logger.info(f"    • Encode Method: {method}")
    if method == "CQ":
        logger.info(f"    • CQ Value: {cq}")
    else:
        res_key = "1080" if scale_height >= 1080 or scale_width >= 1920 else "720"
        vb = bitrate_config.get(f"bitrate_{res_key}", "900k")
        maxrate = bitrate_config.get(f"maxrate_{res_key}", "1250k")
        logger.info(f"    • Bitrate: {vb}, Max: {maxrate}")

    logger.info(f"  Audio Streams ({len(streams)} detected):")
    print(" ")

    for (index, channels, avg_bitrate, src_lang, meta_bitrate) in streams:
        # Normalize to 2ch or 6ch output
        is_1080_class = scale_height >= 1080 or scale_width >= 1920
        output_channels = 6 if is_1080_class and channels >= 6 else 2
        codec, br = choose_audio_bitrate(output_channels, avg_bitrate, audio_config, is_1080_class)

        if codec == "copy":
            action = "COPY (preserve)"
            bitrate_display = f"{avg_bitrate}kbps"
        else:
            action = "ENCODE"
            bitrate_display = f"{br/1000:.0f}kbps"

        line = f"    - Stream #{index}: {channels}ch→{output_channels}ch | Lang: {src_lang} | Detected: {avg_bitrate}kbps | Action: {action} | Target: {bitrate_display}"
        print(line)
        logger.info(line)

    cmd = ["ffmpeg","-y","-i",str(input_file),
           "-vf",f"scale={scale_width}:{scale_height}:flags={filter_flags}:force_original_aspect_ratio=decrease",
           "-map","0:v","-map","0:a","-map","0:s?",
           "-c:v","av1_nvenc","-preset","p1","-pix_fmt","p010le"]

    if method=="CQ":
        cmd += ["-cq", str(cq)]
    else:
        # Use bitrate config (fallback mode)
        res_key = "1080" if scale_height >= 1080 or scale_width >= 1920 else "720"
        vb = bitrate_config.get(f"bitrate_{res_key}", "900k")
        maxrate = bitrate_config.get(f"maxrate_{res_key}", "1250k")
        bufsize = bitrate_config.get(f"bufsize_{res_key}", "1800k")
        cmd += ["-b:v", vb, "-maxrate", maxrate, "-bufsize", bufsize]

    for i, (index, channels, avg_bitrate, src_lang, meta_bitrate) in enumerate(streams):
        # Normalize to 2ch or 6ch output
        is_1080_class = scale_height >= 1080 or scale_width >= 1920
        output_channels = 6 if is_1080_class and channels >= 6 else 2
        codec, br = choose_audio_bitrate(output_channels, avg_bitrate, audio_config, is_1080_class)

        if codec == "copy":
            # Preserve original audio
            cmd += [f"-c:a:{i}", "copy"]
        else:
            # Re-encode with target bitrate
            cmd += [
                f"-c:a:{i}", codec,
                f"-b:a:{i}", str(br),
                f"-ac:{i}", str(output_channels),
                f"-channel_layout:a:{i}", "5.1" if output_channels == 6 else "stereo"
            ]

    cmd += ["-c:s","copy",str(output_file)]

    print(f"\n🎬 Running {method} encode: {output_file.name}")
    logger.info(f"Running {method} encode: {output_file.name}")

    subprocess.run(cmd, check=True)

    orig_size = input_file.stat().st_size
    out_size = output_file.stat().st_size
    reduction_ratio = out_size / orig_size

    # Log comprehensive results
    logger.info(f"\n📊 ENCODE RESULTS:")
    logger.info(f"  Original Size: {orig_size/1e6:.2f} MB")
    logger.info(f"  Encoded Size:  {out_size/1e6:.2f} MB")
    logger.info(f"  Reduction:     {reduction_ratio:.1%} of original ({(1-reduction_ratio):.1%} saved)")
    logger.info(f"  Resolution:    {src_width}x{src_height} → {scale_width}x{scale_height}")
    logger.info(f"  Audio Streams: {len(streams)} streams processed")

    msg = f"📦 Original: {orig_size/1e6:.2f} MB → Encoded: {out_size/1e6:.2f} MB ({reduction_ratio:.1%} of original)"
    print(msg)

    return orig_size, out_size, reduction_ratio

# =============================
# PROCESS FOLDER
# =============================
def process_folder(folder: Path, cq: int, transcode_mode: str, resolution: str, config: dict):
    if not folder.exists():
        print(f"❌ Folder not found: {folder}")
        logger.error(f"Folder not found: {folder}")
        return

    audio_config = config["audio"]
    bitrate_config = config["encode"]["fallback"]
    filters_config = config["encode"]["filters"]
    suffix = config["suffix"]
    extensions = config["extensions"]
    ignore_tags = config["ignore_tags"]
    reduction_ratio_threshold = config["reduction_ratio_threshold"]

    # Resolution logic: explicit arg takes precedence, else use smart defaults
    explicit_resolution = resolution  # Will be None if not specified

    filter_flags = filters_config.get("default","lanczos")
    folder_lower = str(folder).lower()
    is_tv = "\\tv\\" in folder_lower or "/tv/" in folder_lower
    if is_tv:
        filter_flags = filters_config.get("tv","bicubic")

    processing_folder = Path(config["processing_folder"])
    processing_folder.mkdir(parents=True, exist_ok=True)

    # Track if we switch to bitrate mode
    use_bitrate = True if transcode_mode == "bitrate" else False

    for file in folder.rglob("*"):
        if file.suffix.lower() not in extensions:
            continue
        if any(tag.lower() in file.name.lower() for tag in ignore_tags):
            print(f"⏭️ Skipping: {file.name}")
            logger.info(f"Skipping: {file.name}")
            continue

        print("="*60)
        logger.info(f"Processing: {file.name}")
        print(f"📁 Processing: {file.name}")

        temp_input = processing_folder / file.name
        shutil.copy2(file, temp_input)
        logger.info(f"Copied {file.name} → {temp_input.name}")

        # Detect source resolution and determine target resolution
        src_width, src_height = get_source_resolution(temp_input)

        # Smart resolution logic
        if explicit_resolution:
            # User explicitly specified resolution - always use it
            target_resolution = explicit_resolution
            if target_resolution == "1080":
                res_height = 1080
                res_width = 1920
            elif target_resolution == "720":
                res_height = 720
                res_width = 1280
            else:  # 480
                res_height = 480
                res_width = 854
            logger.info(f"Using explicitly specified resolution: {res_width}x{res_height}")
        else:
            # No explicit resolution - use smart defaults
            if src_height > 1080:
                # Scale down anything above 1080p to 1080p
                target_resolution = "1080"
                res_height = 1080
                res_width = 1920
                print(f"⚠️  Source {src_width}x{src_height} is above 1080p. Scaling down to 1080p.")
                logger.info(f"Source {src_width}x{src_height} detected. Scaling to 1080p.")
            else:
                # Preserve source resolution (480p, 720p, 1080p, etc.)
                res_height = src_height
                res_width = src_width
                if src_height <= 720:
                    target_resolution = "720"
                    print(f"ℹ️  Source {src_width}x{src_height} is 720p or lower. Preserving resolution.")
                    logger.info(f"Source {src_width}x{src_height} (<=720p). Preserving source resolution.")
                else:
                    target_resolution = "1080"
                    print(f"ℹ️  Source {src_width}x{src_height} is at or below 1080p. Preserving resolution.")
                    logger.info(f"Source {src_width}x{src_height} (<=1080p). Preserving source resolution.")

        # Set CQ based on content type and target resolution
        content_cq = config["encode"]["cq"].get(f"tv_{target_resolution}" if is_tv else f"movie_{target_resolution}", 32)
        file_cq = cq if cq is not None else content_cq

        temp_output = processing_folder / f"{file.stem}{suffix}{file.suffix}"

        method = "Bitrate" if use_bitrate else "CQ"
        try:
            orig_size, out_size, reduction_ratio = run_ffmpeg(temp_input, temp_output, file_cq, res_width, res_height, filter_flags, audio_config, method, bitrate_config)
        except subprocess.CalledProcessError as e:
            print(f"❌ FFmpeg failed: {e}")
            logger.error(f"FFmpeg failed: {e}")
            temp_input.unlink(missing_ok=True)
            break

        if method=="CQ" and reduction_ratio>=reduction_ratio_threshold:
            print(f"⚠️ CQ encode did not achieve target size ({reduction_ratio:.1%} >= {reduction_ratio_threshold:.1%}). Switching all remaining files to Bitrate.")
            logger.warning(f"CQ encode failed target ({reduction_ratio:.1%}). Switching to Bitrate for remaining files.")
            use_bitrate = True
            try:
                # Retry current file using bitrate
                temp_output.unlink(missing_ok=True)
                orig_size, out_size, reduction_ratio = run_ffmpeg(temp_input, temp_output, cq, res_width, res_height, filter_flags, audio_config, "Bitrate", bitrate_config)
                if reduction_ratio>=reduction_ratio_threshold:
                    print(f"❌ Bitrate encode also failed target ({reduction_ratio:.1%}). Stopping process.")
                    logger.error(f"Bitrate encode failed target ({reduction_ratio:.1%}). Stopping process.")
                    temp_input.unlink(missing_ok=True)
                    break
            except subprocess.CalledProcessError as e:
                print(f"❌ Bitrate retry failed: {e}")
                logger.error(f"Bitrate retry failed: {e}")
                temp_input.unlink(missing_ok=True)
                break
        elif method=="Bitrate" and reduction_ratio>=reduction_ratio_threshold:
            print(f"❌ Bitrate encode failed target ({reduction_ratio:.1%}). Stopping process.")
            logger.error(f"Bitrate encode failed target ({reduction_ratio:.1%}). Stopping process.")
            temp_input.unlink(missing_ok=True)
            break

        dest_file = file.parent / temp_output.name
        shutil.move(temp_output, dest_file)
        print(f"🚚 Moved {temp_output.name} → {dest_file.name}")
        logger.info(f"Moved {temp_output.name} → {dest_file.name}")

        folder_parts = [p.lower() for p in folder.parts]
        if "tv" in folder_parts:
            f_type = "tv"
            tv_index = folder_parts.index("tv")
            show = folder.parts[tv_index + 1] if len(folder.parts) > tv_index + 1 else "Unknown"
        elif "anime" in folder_parts:
            f_type = "anime"
            anime_index = folder_parts.index("anime")
            show = folder.parts[anime_index + 1] if len(folder.parts) > anime_index + 1 else "Unknown"
        else:
            f_type = "movie"
            show = "N/A"

        orig_size_mb = round(orig_size / 1e6, 2)
        proc_size_mb = round(out_size / 1e6, 2)
        percentage = round(proc_size_mb / orig_size_mb * 100, 1)

        with open(TRACKER_FILE, "a", newline="", encoding="utf-8") as f:
            writer = csv.writer(f)
            writer.writerow([f_type, show, dest_file.name, orig_size_mb, proc_size_mb, percentage, method])

        # Enhanced logging with all conversion details
        logger.info(f"\n✅ CONVERSION COMPLETE: {dest_file.name}")
        logger.info(f"  Type: {f_type.upper()} | Show: {show}")
        logger.info(f"  Size: {orig_size_mb}MB → {proc_size_mb}MB ({percentage}% of original, {100-percentage:.1f}% reduction)")
        logger.info(f"  Method: {method} | Status: SUCCESS")
        print(f"📝 Logged conversion: {dest_file.name} ({percentage}%), method={method}")

        try:
            temp_input.unlink()
            file.unlink()
            logger.info(f"Deleted original and processing copy for {file.name}")
        except Exception as e:
            print(f"⚠️ Could not delete files: {e}")
            logger.warning(f"Could not delete files: {e}")

# =============================
# MAIN
# =============================
def main():
    parser = argparse.ArgumentParser(description="Batch encode videos with logging and tracker")
    parser.add_argument("folder", help="Path to folder containing videos")
    parser.add_argument("--cq", type=int, help="Override default CQ")
    parser.add_argument("--m", "--mode", dest="transcode_mode", default="cq", choices=["cq","bitrate"], help="Encode mode (cq or bitrate)")
    parser.add_argument("--r", "--resolution", dest="resolution", default=None, choices=["480","720","1080"], help="Force target resolution (if not specified, preserves source if <=1080p, else scales to 1080p)")
    args = parser.parse_args()

    config_path = Path(__file__).parent / "config.xml"
    config = load_config_xml(config_path)

    process_folder(Path(args.folder), args.cq, args.transcode_mode, args.resolution, config)


if __name__ == "__main__":
    main()