#!/usr/bin/env python3 import argparse import csv import json import os import shutil import subprocess from pathlib import Path from functools import lru_cache from concurrent.futures import ThreadPoolExecutor, as_completed from core.config_helper import load_config_xml from core.logger_helper import setup_logger # ============================= # Setup logger # ============================= LOG_FOLDER = Path(__file__).parent / "logs" logger = setup_logger(LOG_FOLDER) # ============================= # Tracker CSV # ============================= TRACKER_FILE = Path(__file__).parent / "conversion_tracker.csv" if not TRACKER_FILE.exists(): with open(TRACKER_FILE, "w", newline="", encoding="utf-8") as f: writer = csv.writer(f) writer.writerow([ "type","show","filename","original_size_MB","processed_size_MB","percentage","method" ]) # ============================= # FFPROBE CACHING # ============================= @lru_cache(maxsize=256) def get_audio_streams_cached(input_file_str: str): """Cached ffprobe call to avoid redundant queries""" input_file = Path(input_file_str) cmd = [ "ffprobe","-v","error","-select_streams","a", "-show_entries","stream=index,channels,duration,bit_rate,tags=language", "-of","json", str(input_file) ] result = subprocess.run(cmd, capture_output=True, text=True) return json.loads(result.stdout) # ============================= # AUDIO BUCKET LOGIC # ============================= def choose_audio_bitrate(channels: int, bitrate_kbps: int, audio_config: dict) -> int: if channels == 2: if bitrate_kbps < 100: return audio_config["stereo"]["low"] elif bitrate_kbps < 130: return audio_config["stereo"]["medium"] else: return audio_config["stereo"]["high"] else: if bitrate_kbps < 390: return audio_config["multi_channel"]["low"] elif bitrate_kbps < 515: return audio_config["multi_channel"]["medium"] else: return audio_config["multi_channel"]["high"] # ============================= # PATH NORMALIZATION # ============================= def normalize_path_for_service(local_path: str, path_mappings: dict) -> str: for win_path, linux_path in path_mappings.items(): if local_path.lower().startswith(win_path.lower()): return local_path.replace(win_path, linux_path).replace("\\", "/") return local_path.replace("\\", "/") # ============================= # AUDIO STREAMS DETECTION # ============================= def get_audio_streams(input_file: Path): cmd = [ "ffprobe","-v","error","-select_streams","a", "-show_entries","stream=index,channels,duration,bit_rate,tags=language", "-of","json", str(input_file) ] result = subprocess.run(cmd, capture_output=True, text=True) data = json.loads(result.stdout) streams = [] for s in data.get("streams", []): index = s["index"] channels = s.get("channels", 2) src_lang = s.get("tags", {}).get("language", "und") bit_rate_meta = int(s.get("bit_rate", 0)) if s.get("bit_rate") else 0 try: duration = float(s.get("duration", 0)) if duration and bit_rate_meta == 0: fmt_cmd = [ "ffprobe","-v","error","-show_entries","format=size,duration", "-of","json", str(input_file) ] fmt_result = subprocess.run(fmt_cmd, capture_output=True, text=True) fmt_data = json.loads(fmt_result.stdout) size_bytes = int(fmt_data.get("format", {}).get("size", 0)) total_duration = float(fmt_data.get("format", {}).get("duration", duration)) n_streams = len(data.get("streams", [])) avg_bitrate_kbps = int((size_bytes*8/n_streams)/total_duration/1000) elif duration and bit_rate_meta: avg_bitrate_kbps = int(bit_rate_meta / 1000) else: avg_bitrate_kbps = 128 except Exception: avg_bitrate_kbps = 128 streams.append((index, channels, avg_bitrate_kbps, src_lang, int(bit_rate_meta / 1000))) return streams # ============================= # OUTPUT VALIDATION # ============================= def validate_output(input_file: Path, output_file: Path, expected_width: int, expected_height: int) -> bool: """Validate that output file has correct resolution and audio tracks""" try: cmd = [ "ffprobe", "-v", "error", "-select_streams", "v:0", "-show_entries", "stream=width,height", "-of", "json", str(output_file) ] result = subprocess.run(cmd, capture_output=True, text=True, timeout=10) data = json.loads(result.stdout) if not data.get("streams"): logger.warning(f"❌ Validation failed: No video stream in {output_file.name}") return False width = data["streams"][0].get("width", 0) height = data["streams"][0].get("height", 0) # Allow small variance for scaling if abs(width - expected_width) > 10 or abs(height - expected_height) > 10: logger.warning(f"❌ Validation failed: Resolution {width}x{height}, expected ~{expected_width}x{expected_height}") return False logger.info(f"✅ Validation passed: {output_file.name} ({width}x{height})") return True except Exception as e: logger.warning(f"⚠️ Validation skipped (probe error): {e}") return True # Don't fail on validation errors # ============================= # FFmpeg ENCODE (GPU + CPU fallback, per-resolution CPU preset) # ============================= def run_ffmpeg(input_file: Path, output_file: Path, cq: int, scale_width: int, scale_height: int, filter_flags: str, audio_config: dict, method: str, crf_cpu: int, verbose: bool = False): streams = get_audio_streams(input_file) encoder_name = "av1_nvenc" pix_fmt = "p010le" header = ( f"\n🧩 ENCODE SETTINGS\n" f" • Resolution: {scale_width}x{scale_height}\n" f" • Scale Filter: {filter_flags}\n" f" • CQ: {cq if method=='CQ' else 'N/A'}\n" f" • CPU CRF: {crf_cpu}\n" f" • Video Encoder: {encoder_name} (preset p1, pix_fmt {pix_fmt})\n" f" • Audio Streams:" ) logger.info(header) print(header) for (index, channels, avg_bitrate, src_lang, meta_bitrate) in streams: output_channels = 2 if scale_height <= 720 else (6 if channels >= 6 else 2) br = choose_audio_bitrate(output_channels, avg_bitrate, audio_config) line = ( f" - Stream #{index}: {channels}ch→{output_channels}ch, src={src_lang}, " f"avg_bitrate={avg_bitrate}kbps, metadata={meta_bitrate}kbps, bucket_target={br/1000:.1f}kbps" ) print(line) logger.info(line) cmd = [ "ffmpeg", "-y", "-i", str(input_file), "-vf", f"scale={scale_width}:{scale_height}:flags={filter_flags}:force_original_aspect_ratio=decrease", "-map", "0:v", "-map", "0:a", "-map", "0:s?", "-c:v", encoder_name, "-preset", "p1", "-pix_fmt", pix_fmt ] # Video quality if method == "CQ": cmd += ["-cq", str(cq)] else: if scale_height >= 1080: vb, maxrate, bufsize = "1500k", "1750k", "2250k" else: vb, maxrate, bufsize = "900k", "1250k", "1600k" cmd += ["-b:v", vb, "-maxrate", maxrate, "-bufsize", bufsize] # Audio streams for i, (index, channels, avg_bitrate, src_lang, meta_bitrate) in enumerate(streams): # Determine output channels: 720p -> 2ch, 1080p -> 6ch if input>=6 else 2ch output_channels = 2 if scale_height <= 720 else (6 if channels >= 6 else 2) # Choose bitrate based on OUTPUT channels, not input br = choose_audio_bitrate(output_channels, avg_bitrate, audio_config) cmd += [f"-c:a:{i}", "aac", f"-b:a:{i}", str(br), f"-ac:{i}", str(output_channels)] cmd += ["-c:s", "copy", str(output_file)] print(f"\n🎬 Running {method} encode: {output_file.name}") logger.info(f"Running {method} encode: {output_file.name}") if verbose: logger.info(f"FFmpeg command: {' '.join(cmd)}") # Try GPU encoder first try: if verbose: subprocess.run(cmd, check=True) else: subprocess.run(cmd, check=True, capture_output=True) except subprocess.CalledProcessError as e: print(f"❌ FFmpeg failed with GPU encoder on {input_file.name}: {e}") logger.error(f"GPU encode failed for {input_file.name}. Command: {' '.join(cmd)}") # CPU fallback cmd_cpu = cmd.copy() idx = cmd_cpu.index(encoder_name) cmd_cpu[idx] = "libsvtav1" # CPU preset based on resolution cpu_preset = "8" if scale_height <= 720 else "6" # faster for 720p, slower for 1080p preset_idx = cmd_cpu.index("p1") cmd_cpu[preset_idx] = cpu_preset # Replace -cq with -crf if "-cq" in cmd_cpu: cq_idx = cmd_cpu.index("-cq") cmd_cpu[cq_idx] = "-crf" cmd_cpu[cq_idx + 1] = str(crf_cpu) try: if verbose: subprocess.run(cmd_cpu, check=True) else: subprocess.run(cmd_cpu, check=True, capture_output=True) print("✅ CPU fallback succeeded") logger.info("CPU fallback succeeded") except subprocess.CalledProcessError as e_cpu: print(f"❌ CPU fallback also failed for {input_file.name}: {e_cpu}") logger.error(f"CPU fallback failed for {input_file.name}. Command: {' '.join(cmd_cpu)}") raise e_cpu orig_size = input_file.stat().st_size out_size = output_file.stat().st_size reduction_ratio = out_size / orig_size msg = f"📦 Original: {orig_size/1e6:.2f} MB → Encoded: {out_size/1e6:.2f} MB ({reduction_ratio:.1%} of original)" print(msg) logger.info(msg) return orig_size, out_size, reduction_ratio # ============================= # PROCESS FOLDER # ============================= def process_folder(folder: Path, cq: int, resolution: str, config: dict, dry_run: bool = False, verbose: bool = False, backup: bool = False, cleanup: bool = False, parallel: int = 1): if not folder.exists(): print(f"❌ Folder not found: {folder}") logger.error(f"Folder not found: {folder}") return audio_config = config["audio"] filters_config = config["encode"]["filters"] suffix = config["suffix"] extensions = config["extensions"] ignore_tags = config["ignore_tags"] reduction_ratio_threshold = config["reduction_ratio_threshold"] res_height = 1080 if resolution == "1080" else 720 res_width = 1920 if resolution == "1080" else 1280 # Determine type and resolution keys folder_lower = str(folder).lower() if "\\tv\\" in folder_lower or "/tv/" in folder_lower: type_key = "tv" filter_flags = filters_config.get("tv", "bicubic") else: type_key = "movie" filter_flags = filters_config.get("default", "lanczos") res_key = "1080" if resolution == "1080" else "720" # Get CQ and CRF from config cq_default = config["encode"]["cq"].get(f"{type_key}_{res_key}", 32) crf_cpu = config["encode"]["crf"].get(f"{type_key}_{res_key}", 32) if cq is None: cq = cq_default processing_folder = Path(config["processing_folder"]) processing_folder.mkdir(parents=True, exist_ok=True) # Cleanup old processing folder if requested if cleanup and processing_folder.exists(): print(f"🧹 Cleaning up old processing folder: {processing_folder}") logger.info(f"Cleaning up old processing folder: {processing_folder}") shutil.rmtree(processing_folder, ignore_errors=True) processing_folder.mkdir(parents=True, exist_ok=True) # Backup folder setup backup_folder = None if backup: backup_folder = folder.parent / f"{folder.name}_backup" backup_folder.mkdir(parents=True, exist_ok=True) print(f"💾 Backup enabled: {backup_folder}") logger.info(f"Backup folder: {backup_folder}") # Dry-run message if dry_run: print("🔍 DRY-RUN MODE: No files will be encoded or deleted") logger.info("DRY-RUN MODE: No files will be encoded or deleted") # Track if we switch to bitrate mode use_bitrate = False # Collect all files to process first files_to_process = [] for file in folder.rglob("*"): if file.suffix.lower() not in extensions: continue if any(tag.lower() in file.name.lower() for tag in ignore_tags): print(f"⏭️ Skipping: {file.name}") logger.info(f"Skipping: {file.name}") continue files_to_process.append(file) if not files_to_process: print("❌ No files found to process") logger.info("No files found to process") return print(f"📋 Found {len(files_to_process)} file(s) to process") # Define the encoding task def encode_file(file: Path): """Encodes a single file - used for parallel processing""" try: print("="*60) logger.info(f"Processing: {file.name}") print(f"📁 Processing: {file.name}") temp_input = processing_folder / file.name shutil.copy2(file, temp_input) logger.info(f"Copied {file.name} → {temp_input.name}") temp_output = processing_folder / f"{file.stem}{suffix}{file.suffix}" method = "Bitrate" if use_bitrate else "CQ" if dry_run: print(f"🔍 [DRY-RUN] Would encode: {temp_output}") logger.info(f"[DRY-RUN] Would encode: {temp_output}") return None try: orig_size, out_size, reduction_ratio = run_ffmpeg( temp_input, temp_output, cq, res_width, res_height, filter_flags, audio_config, method, crf_cpu, verbose ) except subprocess.CalledProcessError as e: print(f"❌ FFmpeg failed: {e}") logger.error(f"FFmpeg failed: {e}") temp_input.unlink(missing_ok=True) temp_output.unlink(missing_ok=True) return None # Validate output if not validate_output(temp_input, temp_output, res_width, res_height): print(f"⚠️ Validation failed for {temp_output.name}, keeping original") logger.warning(f"Validation failed for {temp_output.name}") temp_input.unlink(missing_ok=True) temp_output.unlink(missing_ok=True) return None # Handle fallback if CQ/Bitrate didn't reach target if method == "CQ" and reduction_ratio >= reduction_ratio_threshold: print(f"⚠️ CQ encode did not achieve target size ({reduction_ratio:.1%} >= {reduction_ratio_threshold:.1%}). Retrying with Bitrate.") logger.warning(f"CQ encode failed target ({reduction_ratio:.1%}). Retrying with Bitrate.") try: temp_output.unlink(missing_ok=True) orig_size, out_size, reduction_ratio = run_ffmpeg( temp_input, temp_output, cq, res_width, res_height, filter_flags, audio_config, "Bitrate", crf_cpu, verbose ) if reduction_ratio >= reduction_ratio_threshold: print("❌ Bitrate encode also failed target.") logger.error("Bitrate encode failed target.") temp_input.unlink(missing_ok=True) temp_output.unlink(missing_ok=True) return None except subprocess.CalledProcessError as e: print(f"❌ Bitrate retry failed: {e}") logger.error(f"Bitrate retry failed: {e}") temp_input.unlink(missing_ok=True) temp_output.unlink(missing_ok=True) return None elif reduction_ratio >= reduction_ratio_threshold: print("❌ Encode failed target. Stopping.") logger.error("Encode failed target.") temp_input.unlink(missing_ok=True) temp_output.unlink(missing_ok=True) return None # Move final file back to original folder dest_file = file.parent / temp_output.name if not dry_run: shutil.move(temp_output, dest_file) print(f"🚚 Moved {temp_output.name} → {dest_file.name}") logger.info(f"Moved {temp_output.name} → {dest_file.name}") # Backup original if requested if backup and not dry_run: backup_dest = backup_folder / file.name shutil.copy2(file, backup_dest) logger.info(f"Backed up original to {backup_dest}") # Determine folder type and show folder_parts = [p.lower() for p in folder.parts] if "tv" in folder_parts: f_type = "tv" tv_index = folder_parts.index("tv") show = folder.parts[tv_index + 1] if len(folder.parts) > tv_index + 1 else "Unknown" elif "anime" in folder_parts: f_type = "anime" anime_index = folder_parts.index("anime") show = folder.parts[anime_index + 1] if len(folder.parts) > anime_index + 1 else "Unknown" else: f_type = "movie" show = "N/A" orig_size_mb = round(orig_size / 1e6, 2) proc_size_mb = round(out_size / 1e6, 2) percentage = round(proc_size_mb / orig_size_mb * 100, 1) # Log conversion in tracker CSV (skip in dry-run) if not dry_run: with open(TRACKER_FILE, "a", newline="", encoding="utf-8") as f: writer = csv.writer(f) writer.writerow([f_type, show, dest_file.name, orig_size_mb, proc_size_mb, percentage, method]) logger.info(f"Tracked conversion: {dest_file.name}, {orig_size_mb}MB → {proc_size_mb}MB ({percentage}%), method={method}") print(f"📝 Logged conversion: {dest_file.name} ({percentage}%), method={method}") # Delete temporary and original files if not dry_run: try: temp_input.unlink() file.unlink() logger.info(f"Deleted original and processing copy for {file.name}") except Exception as e: print(f"⚠️ Could not delete files: {e}") logger.warning(f"Could not delete files: {e}") return {"file": file.name, "orig": orig_size_mb, "proc": proc_size_mb, "pct": percentage} except Exception as e: logger.error(f"Unexpected error processing {file.name}: {e}", exc_info=True) return None # Process files sequentially or in parallel if parallel > 1: with ThreadPoolExecutor(max_workers=parallel) as executor: futures = [executor.submit(encode_file, f) for f in files_to_process] for future in as_completed(futures): result = future.result() else: for file in files_to_process: encode_file(file) if dry_run: print("🔍 DRY-RUN COMPLETE: No actual changes made") else: print("✅ Processing complete!") # ============================= # MAIN # ============================= def main(): parser = argparse.ArgumentParser(description="Batch encode videos with logging and tracker") parser.add_argument("folder", help="Path to folder containing videos") parser.add_argument("--cq", type=int, help="Override default CQ") parser.add_argument("--r", "--resolution", dest="resolution", default="1080", choices=["720","1080"], help="Target resolution") parser.add_argument("--dry-run", action="store_true", help="Preview files without encoding") parser.add_argument("--verbose", "-v", action="store_true", help="Show FFmpeg output") parser.add_argument("--backup", action="store_true", help="Backup original files before encoding") parser.add_argument("--cleanup", action="store_true", help="Clean old processing folder on startup") parser.add_argument("--parallel", type=int, default=1, metavar="N", help="Encode N files in parallel (experimental)") parser.add_argument("--ratio", type=float, help="Reduction ratio threshold (default 0.5 from config)") args = parser.parse_args() config_path = Path(__file__).parent / "config.xml" config = load_config_xml(config_path) # Override reduction ratio if provided if args.ratio: config["reduction_ratio_threshold"] = args.ratio process_folder(Path(args.folder), args.cq, args.resolution, config, dry_run=args.dry_run, verbose=args.verbose, backup=args.backup, cleanup=args.cleanup, parallel=args.parallel) if __name__ == "__main__": main()