conversion_project/main.py

#!/usr/bin/env python3
import argparse
import csv
import json
import os
import shutil
import subprocess
from pathlib import Path

from core.config_helper import load_config_xml
from core.logger_helper import setup_logger

# =============================
# Setup logger
# =============================
LOG_FOLDER = Path(__file__).parent / "logs"
logger = setup_logger(LOG_FOLDER)

# =============================
# Tracker CSV
# =============================
TRACKER_FILE = Path(__file__).parent / "conversion_tracker.csv"
if not TRACKER_FILE.exists():
    with open(TRACKER_FILE, "w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow([
            "type","show","filename","original_size_MB","processed_size_MB","percentage","method"
        ])

# =============================
# AUDIO BUCKET LOGIC
# =============================
def choose_audio_bitrate(channels: int, bitrate_kbps: int, audio_config: dict) -> int:
    if channels == 2:
        if bitrate_kbps < 80:
            return audio_config["stereo"]["low"]
        elif bitrate_kbps < 112:
            return audio_config["stereo"]["medium"]
        else:
            return audio_config["stereo"]["high"]
    else:
        if bitrate_kbps < 176:
            return audio_config["multi_channel"]["low"]
        else:
            return audio_config["multi_channel"]["high"]

# =============================
# PATH NORMALIZATION
# =============================
def normalize_path_for_service(local_path: str, path_mappings: dict) -> str:
    for win_path, linux_path in path_mappings.items():
        if local_path.lower().startswith(win_path.lower()):
            return local_path.replace(win_path, linux_path).replace("\\", "/")
    return local_path.replace("\\", "/")

# =============================
# AUDIO STREAMS DETECTION
# =============================
def get_audio_streams(input_file: Path):
    cmd = [
        "ffprobe","-v","error","-select_streams","a",
        "-show_entries","stream=index,channels,duration,bit_rate,tags=language",
        "-of","json", str(input_file)
    ]
    result = subprocess.run(cmd, capture_output=True, text=True)
    data = json.loads(result.stdout)
    streams = []
    for s in data.get("streams", []):
        index = s["index"]
        channels = s.get("channels", 2)
        src_lang = s.get("tags", {}).get("language", "und")
        bit_rate_meta = int(s.get("bit_rate", 0)) if s.get("bit_rate") else 0
        try:
            duration = float(s.get("duration", 0))
            if duration and bit_rate_meta == 0:
                fmt_cmd = [
                    "ffprobe","-v","error","-show_entries","format=size,duration",
                    "-of","json", str(input_file)
                ]
                fmt_result = subprocess.run(fmt_cmd, capture_output=True, text=True)
                fmt_data = json.loads(fmt_result.stdout)
                size_bytes = int(fmt_data.get("format", {}).get("size", 0))
                total_duration = float(fmt_data.get("format", {}).get("duration", duration))
                n_streams = len(data.get("streams", []))
                avg_bitrate_kbps = int((size_bytes*8/n_streams)/total_duration/1000)
            elif duration and bit_rate_meta:
                avg_bitrate_kbps = int(bit_rate_meta / 1000)
            else:
                avg_bitrate_kbps = 128
        except Exception:
            avg_bitrate_kbps = 128
        streams.append((index, channels, avg_bitrate_kbps, src_lang, int(bit_rate_meta / 1000)))
    return streams

# =============================
# FFmpeg ENCODE
# =============================
def run_ffmpeg(input_file: Path, output_file: Path, cq: int, scale_width: int, scale_height: int,
               filter_flags: str, audio_config: dict, method: str):
    streams = get_audio_streams(input_file)
    header = f"\n🧩 ENCODE SETTINGS\n • Resolution: {scale_width}x{scale_height}\n • Scale Filter: {filter_flags}\n • CQ: {cq if method=='CQ' else 'N/A'}\n • Video Encoder: av1_nvenc (preset p1, pix_fmt p010le)\n • Audio Streams:"
    logger.info(header)
    print(" ")
    # print(header)

    for (index, channels, avg_bitrate, src_lang, meta_bitrate) in streams:
        br = choose_audio_bitrate(channels, avg_bitrate, audio_config)
        line = f"    - Stream #{index}: {channels}ch, src={src_lang}, avg_bitrate={avg_bitrate}kbps, metadata={meta_bitrate}kbps, bucket_target={br/1000:.1f}kbps"
        print(line)
        logger.info(line)

    cmd = ["ffmpeg","-y","-i",str(input_file),
           "-vf",f"scale={scale_width}:{scale_height}:flags={filter_flags}:force_original_aspect_ratio=decrease",
           "-map","0:v","-map","0:a","-map","0:s?",
           "-c:v","av1_nvenc","-preset","p1","-pix_fmt","p010le"]

    if method=="CQ":
        cmd += ["-cq", str(cq)]
    else:
        if scale_height>=1080:
            vb, maxrate, bufsize = "1500k","1750k","2250k"
        else:
            vb, maxrate, bufsize = "900k","1250k","1600k"
        cmd += ["-b:v",vb,"-maxrate",maxrate,"-bufsize",bufsize]

    for i, (index, channels, avg_bitrate, src_lang, meta_bitrate) in enumerate(streams):
        br = choose_audio_bitrate(channels, avg_bitrate, audio_config)
        cmd += [f"-c:a:{i}","aac",f"-b:a:{i}",str(br),f"-ac:{i}",str(channels)]

    cmd += ["-c:s","copy",str(output_file)]

    print(f"\n🎬 Running {method} encode: {output_file.name}")
    logger.info(f"Running {method} encode: {output_file.name}")

    subprocess.run(cmd, check=True)

    orig_size = input_file.stat().st_size
    out_size = output_file.stat().st_size
    reduction_ratio = out_size / orig_size
    msg = f"📦 Original: {orig_size/1e6:.2f} MB → Encoded: {out_size/1e6:.2f} MB ({reduction_ratio:.1%} of original)"
    print(msg)
    logger.info(msg)

    return orig_size, out_size, reduction_ratio

# =============================
# PROCESS FOLDER
# =============================
def process_folder(folder: Path, cq: int, resolution: str, config: dict):
    if not folder.exists():
        print(f"❌ Folder not found: {folder}")
        logger.error(f"Folder not found: {folder}")
        return

    audio_config = config["audio"]
    filters_config = config["encode"]["filters"]
    suffix = config["suffix"]
    extensions = config["extensions"]
    res_height = 1080 if resolution=="1080" else 720
    res_width = 1920 if resolution=="1080" else 1280
    filter_flags = filters_config.get("default","lanczos")
    folder_lower = str(folder).lower()
    if "\\tv\\" in folder_lower or "/tv/" in folder_lower:
        filter_flags = filters_config.get("tv","bicubic")
        cq_default = config["encode"]["cq"].get(f"tv_{resolution}",32)
    else:
        cq_default = config["encode"]["cq"].get(f"movie_{resolution}",32)
    if cq is None:
        cq = cq_default

    processing_folder = Path(config["processing_folder"])
    processing_folder.mkdir(parents=True, exist_ok=True)

    # Track if we switch to bitrate mode
    use_bitrate = False

    for file in folder.rglob("*"):
        if file.suffix.lower() not in extensions:
            continue
        if any(tag.lower() in file.name.lower() for tag in ["ehx","megusta"]):
            print(f"⏭️ Skipping: {file.name}")
            logger.info(f"Skipping: {file.name}")
            continue

        print("="*60)
        logger.info(f"Processing: {file.name}")
        print(f"📁 Processing: {file.name}")

        temp_input = processing_folder / file.name
        shutil.copy2(file, temp_input)
        logger.info(f"Copied {file.name} → {temp_input.name}")
        temp_output = processing_folder / f"{file.stem}{suffix}{file.suffix}"

        method = "Bitrate" if use_bitrate else "CQ"
        try:
            orig_size, out_size, reduction_ratio = run_ffmpeg(temp_input, temp_output, cq, res_width, res_height, filter_flags, audio_config, method)
        except subprocess.CalledProcessError as e:
            print(f"❌ FFmpeg failed: {e}")
            logger.error(f"FFmpeg failed: {e}")
            temp_input.unlink(missing_ok=True)
            break

        if method=="CQ" and reduction_ratio>=0.5:
            print(f"⚠️ CQ encode did not achieve target size. Switching all remaining files to Bitrate.")
            logger.warning("CQ encode failed target. Switching to Bitrate for remaining files.")
            use_bitrate = True
            try:
                # Retry current file using bitrate
                temp_output.unlink(missing_ok=True)
                orig_size, out_size, reduction_ratio = run_ffmpeg(temp_input, temp_output, cq, res_width, res_height, filter_flags, audio_config, "Bitrate")
                if reduction_ratio>=0.5:
                    print("❌ Bitrate encode also failed target. Stopping process.")
                    logger.error("Bitrate encode failed target. Stopping process.")
                    temp_input.unlink(missing_ok=True)
                    break
            except subprocess.CalledProcessError as e:
                print(f"❌ Bitrate retry failed: {e}")
                logger.error(f"Bitrate retry failed: {e}")
                temp_input.unlink(missing_ok=True)
                break
        elif method=="Bitrate" and reduction_ratio>=0.5:
            print("❌ Bitrate encode failed target. Stopping process.")
            logger.error("Bitrate encode failed target. Stopping process.")
            temp_input.unlink(missing_ok=True)
            break

        dest_file = file.parent / temp_output.name
        shutil.move(temp_output, dest_file)
        print(f"🚚 Moved {temp_output.name} → {dest_file.name}")
        logger.info(f"Moved {temp_output.name} → {dest_file.name}")

        folder_parts = [p.lower() for p in folder.parts]
        if "tv" in folder_parts:
            f_type = "tv"
            tv_index = folder_parts.index("tv")
            show = folder.parts[tv_index + 1] if len(folder.parts) > tv_index + 1 else "Unknown"
        elif "anime" in folder_parts:
            f_type = "anime"
            anime_index = folder_parts.index("anime")
            show = folder.parts[anime_index + 1] if len(folder.parts) > anime_index + 1 else "Unknown"
        else:
            f_type = "movie"
            show = "N/A"

        orig_size_mb = round(orig_size / 1e6, 2)
        proc_size_mb = round(out_size / 1e6, 2)
        percentage = round(proc_size_mb / orig_size_mb * 100, 1)

        with open(TRACKER_FILE, "a", newline="", encoding="utf-8") as f:
            writer = csv.writer(f)
            writer.writerow([f_type, show, dest_file.name, orig_size_mb, proc_size_mb, percentage, method])

        logger.info(f"Tracked conversion: {dest_file.name}, {orig_size_mb}MB → {proc_size_mb}MB ({percentage}%), method={method}")
        print(f"📝 Logged conversion: {dest_file.name} ({percentage}%), method={method}")

        try:
            temp_input.unlink()
            file.unlink()
            logger.info(f"Deleted original and processing copy for {file.name}")
        except Exception as e:
            print(f"⚠️ Could not delete files: {e}")
            logger.warning(f"Could not delete files: {e}")

# =============================
# MAIN
# =============================
def main():
    parser = argparse.ArgumentParser(description="Batch encode videos with logging and tracker")
    parser.add_argument("folder", help="Path to folder containing videos")
    parser.add_argument("--cq", type=int, help="Override default CQ")
    parser.add_argument("--r", "--resolution", dest="resolution", default="1080", choices=["720","1080"], help="Target resolution")
    args = parser.parse_args()

    config_path = Path(__file__).parent / "config.xml"
    config = load_config_xml(config_path)

    process_folder(Path(args.folder), args.cq, args.resolution, config)


if __name__ == "__main__":
    main()