508 lines
21 KiB
Python
508 lines
21 KiB
Python
#!/usr/bin/env python3
|
|
import argparse
|
|
import csv
|
|
import json
|
|
import os
|
|
import shutil
|
|
import subprocess
|
|
from pathlib import Path
|
|
from functools import lru_cache
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
|
|
from core.config_helper import load_config_xml
|
|
from core.logger_helper import setup_logger
|
|
|
|
# =============================
|
|
# Setup logger
|
|
# =============================
|
|
LOG_FOLDER = Path(__file__).parent / "logs"
|
|
logger = setup_logger(LOG_FOLDER)
|
|
|
|
# =============================
|
|
# Tracker CSV
|
|
# =============================
|
|
TRACKER_FILE = Path(__file__).parent / "conversion_tracker.csv"
|
|
if not TRACKER_FILE.exists():
|
|
with open(TRACKER_FILE, "w", newline="", encoding="utf-8") as f:
|
|
writer = csv.writer(f)
|
|
writer.writerow([
|
|
"type","show","filename","original_size_MB","processed_size_MB","percentage","method"
|
|
])
|
|
|
|
# =============================
|
|
# FFPROBE CACHING
|
|
# =============================
|
|
@lru_cache(maxsize=256)
|
|
def get_audio_streams_cached(input_file_str: str):
|
|
"""Cached ffprobe call to avoid redundant queries"""
|
|
input_file = Path(input_file_str)
|
|
cmd = [
|
|
"ffprobe","-v","error","-select_streams","a",
|
|
"-show_entries","stream=index,channels,duration,bit_rate,tags=language",
|
|
"-of","json", str(input_file)
|
|
]
|
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
return json.loads(result.stdout)
|
|
|
|
# =============================
|
|
# AUDIO BUCKET LOGIC
|
|
# =============================
|
|
def choose_audio_bitrate(channels: int, bitrate_kbps: int, audio_config: dict) -> int:
|
|
if channels == 2:
|
|
if bitrate_kbps < 100:
|
|
return audio_config["stereo"]["low"]
|
|
elif bitrate_kbps < 130:
|
|
return audio_config["stereo"]["medium"]
|
|
else:
|
|
return audio_config["stereo"]["high"]
|
|
else:
|
|
if bitrate_kbps < 390:
|
|
return audio_config["multi_channel"]["low"]
|
|
elif bitrate_kbps < 515:
|
|
return audio_config["multi_channel"]["medium"]
|
|
else:
|
|
return audio_config["multi_channel"]["high"]
|
|
|
|
# =============================
|
|
# PATH NORMALIZATION
|
|
# =============================
|
|
def normalize_path_for_service(local_path: str, path_mappings: dict) -> str:
|
|
for win_path, linux_path in path_mappings.items():
|
|
if local_path.lower().startswith(win_path.lower()):
|
|
return local_path.replace(win_path, linux_path).replace("\\", "/")
|
|
return local_path.replace("\\", "/")
|
|
|
|
# =============================
|
|
# AUDIO STREAMS DETECTION
|
|
# =============================
|
|
def get_audio_streams(input_file: Path):
|
|
cmd = [
|
|
"ffprobe","-v","error","-select_streams","a",
|
|
"-show_entries","stream=index,channels,duration,bit_rate,tags=language",
|
|
"-of","json", str(input_file)
|
|
]
|
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
data = json.loads(result.stdout)
|
|
streams = []
|
|
for s in data.get("streams", []):
|
|
index = s["index"]
|
|
channels = s.get("channels", 2)
|
|
src_lang = s.get("tags", {}).get("language", "und")
|
|
bit_rate_meta = int(s.get("bit_rate", 0)) if s.get("bit_rate") else 0
|
|
try:
|
|
duration = float(s.get("duration", 0))
|
|
if duration and bit_rate_meta == 0:
|
|
fmt_cmd = [
|
|
"ffprobe","-v","error","-show_entries","format=size,duration",
|
|
"-of","json", str(input_file)
|
|
]
|
|
fmt_result = subprocess.run(fmt_cmd, capture_output=True, text=True)
|
|
fmt_data = json.loads(fmt_result.stdout)
|
|
size_bytes = int(fmt_data.get("format", {}).get("size", 0))
|
|
total_duration = float(fmt_data.get("format", {}).get("duration", duration))
|
|
n_streams = len(data.get("streams", []))
|
|
avg_bitrate_kbps = int((size_bytes*8/n_streams)/total_duration/1000)
|
|
elif duration and bit_rate_meta:
|
|
avg_bitrate_kbps = int(bit_rate_meta / 1000)
|
|
else:
|
|
avg_bitrate_kbps = 128
|
|
except Exception:
|
|
avg_bitrate_kbps = 128
|
|
streams.append((index, channels, avg_bitrate_kbps, src_lang, int(bit_rate_meta / 1000)))
|
|
return streams
|
|
|
|
# =============================
|
|
# OUTPUT VALIDATION
|
|
# =============================
|
|
def validate_output(input_file: Path, output_file: Path, expected_width: int, expected_height: int) -> bool:
|
|
"""Validate that output file has correct resolution and audio tracks"""
|
|
try:
|
|
cmd = [
|
|
"ffprobe", "-v", "error",
|
|
"-select_streams", "v:0",
|
|
"-show_entries", "stream=width,height",
|
|
"-of", "json", str(output_file)
|
|
]
|
|
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
|
|
data = json.loads(result.stdout)
|
|
|
|
if not data.get("streams"):
|
|
logger.warning(f"❌ Validation failed: No video stream in {output_file.name}")
|
|
return False
|
|
|
|
width = data["streams"][0].get("width", 0)
|
|
height = data["streams"][0].get("height", 0)
|
|
|
|
# Allow small variance for scaling
|
|
if abs(width - expected_width) > 10 or abs(height - expected_height) > 10:
|
|
logger.warning(f"❌ Validation failed: Resolution {width}x{height}, expected ~{expected_width}x{expected_height}")
|
|
return False
|
|
|
|
logger.info(f"✅ Validation passed: {output_file.name} ({width}x{height})")
|
|
return True
|
|
except Exception as e:
|
|
logger.warning(f"⚠️ Validation skipped (probe error): {e}")
|
|
return True # Don't fail on validation errors
|
|
|
|
# =============================
|
|
# FFmpeg ENCODE (GPU + CPU fallback, per-resolution CPU preset)
|
|
# =============================
|
|
def run_ffmpeg(input_file: Path, output_file: Path, cq: int, scale_width: int, scale_height: int,
|
|
filter_flags: str, audio_config: dict, method: str, crf_cpu: int, verbose: bool = False):
|
|
streams = get_audio_streams(input_file)
|
|
encoder_name = "av1_nvenc"
|
|
pix_fmt = "p010le"
|
|
header = (
|
|
f"\n🧩 ENCODE SETTINGS\n"
|
|
f" • Resolution: {scale_width}x{scale_height}\n"
|
|
f" • Scale Filter: {filter_flags}\n"
|
|
f" • CQ: {cq if method=='CQ' else 'N/A'}\n"
|
|
f" • CPU CRF: {crf_cpu}\n"
|
|
f" • Video Encoder: {encoder_name} (preset p1, pix_fmt {pix_fmt})\n"
|
|
f" • Audio Streams:"
|
|
)
|
|
logger.info(header)
|
|
print(header)
|
|
|
|
for (index, channels, avg_bitrate, src_lang, meta_bitrate) in streams:
|
|
output_channels = 2 if scale_height <= 720 else (6 if channels >= 6 else 2)
|
|
br = choose_audio_bitrate(output_channels, avg_bitrate, audio_config)
|
|
line = (
|
|
f" - Stream #{index}: {channels}ch→{output_channels}ch, src={src_lang}, "
|
|
f"avg_bitrate={avg_bitrate}kbps, metadata={meta_bitrate}kbps, bucket_target={br/1000:.1f}kbps"
|
|
)
|
|
print(line)
|
|
logger.info(line)
|
|
|
|
cmd = [
|
|
"ffmpeg", "-y", "-i", str(input_file),
|
|
"-vf", f"scale={scale_width}:{scale_height}:flags={filter_flags}:force_original_aspect_ratio=decrease",
|
|
"-map", "0:v", "-map", "0:a", "-map", "0:s?",
|
|
"-c:v", encoder_name, "-preset", "p1", "-pix_fmt", pix_fmt
|
|
]
|
|
|
|
# Video quality
|
|
if method == "CQ":
|
|
cmd += ["-cq", str(cq)]
|
|
else:
|
|
if scale_height >= 1080:
|
|
vb, maxrate, bufsize = "1500k", "1750k", "2250k"
|
|
else:
|
|
vb, maxrate, bufsize = "900k", "1250k", "1600k"
|
|
cmd += ["-b:v", vb, "-maxrate", maxrate, "-bufsize", bufsize]
|
|
|
|
# Audio streams
|
|
for i, (index, channels, avg_bitrate, src_lang, meta_bitrate) in enumerate(streams):
|
|
# Determine output channels: 720p -> 2ch, 1080p -> 6ch if input>=6 else 2ch
|
|
output_channels = 2 if scale_height <= 720 else (6 if channels >= 6 else 2)
|
|
# Choose bitrate based on OUTPUT channels, not input
|
|
br = choose_audio_bitrate(output_channels, avg_bitrate, audio_config)
|
|
cmd += [f"-c:a:{i}", "aac", f"-b:a:{i}", str(br), f"-ac:{i}", str(output_channels)]
|
|
|
|
cmd += ["-c:s", "copy", str(output_file)]
|
|
|
|
print(f"\n🎬 Running {method} encode: {output_file.name}")
|
|
logger.info(f"Running {method} encode: {output_file.name}")
|
|
if verbose:
|
|
logger.info(f"FFmpeg command: {' '.join(cmd)}")
|
|
|
|
# Try GPU encoder first
|
|
try:
|
|
if verbose:
|
|
subprocess.run(cmd, check=True)
|
|
else:
|
|
subprocess.run(cmd, check=True, capture_output=True)
|
|
except subprocess.CalledProcessError as e:
|
|
print(f"❌ FFmpeg failed with GPU encoder on {input_file.name}: {e}")
|
|
logger.error(f"GPU encode failed for {input_file.name}. Command: {' '.join(cmd)}")
|
|
|
|
# CPU fallback
|
|
cmd_cpu = cmd.copy()
|
|
idx = cmd_cpu.index(encoder_name)
|
|
cmd_cpu[idx] = "libsvtav1"
|
|
|
|
# CPU preset based on resolution
|
|
cpu_preset = "8" if scale_height <= 720 else "6" # faster for 720p, slower for 1080p
|
|
preset_idx = cmd_cpu.index("p1")
|
|
cmd_cpu[preset_idx] = cpu_preset
|
|
|
|
# Replace -cq with -crf
|
|
if "-cq" in cmd_cpu:
|
|
cq_idx = cmd_cpu.index("-cq")
|
|
cmd_cpu[cq_idx] = "-crf"
|
|
cmd_cpu[cq_idx + 1] = str(crf_cpu)
|
|
|
|
try:
|
|
if verbose:
|
|
subprocess.run(cmd_cpu, check=True)
|
|
else:
|
|
subprocess.run(cmd_cpu, check=True, capture_output=True)
|
|
print("✅ CPU fallback succeeded")
|
|
logger.info("CPU fallback succeeded")
|
|
except subprocess.CalledProcessError as e_cpu:
|
|
print(f"❌ CPU fallback also failed for {input_file.name}: {e_cpu}")
|
|
logger.error(f"CPU fallback failed for {input_file.name}. Command: {' '.join(cmd_cpu)}")
|
|
raise e_cpu
|
|
|
|
orig_size = input_file.stat().st_size
|
|
out_size = output_file.stat().st_size
|
|
reduction_ratio = out_size / orig_size
|
|
msg = f"📦 Original: {orig_size/1e6:.2f} MB → Encoded: {out_size/1e6:.2f} MB ({reduction_ratio:.1%} of original)"
|
|
print(msg)
|
|
logger.info(msg)
|
|
|
|
return orig_size, out_size, reduction_ratio
|
|
|
|
|
|
# =============================
|
|
# PROCESS FOLDER
|
|
# =============================
|
|
def process_folder(folder: Path, cq: int, resolution: str, config: dict, dry_run: bool = False,
|
|
verbose: bool = False, backup: bool = False, cleanup: bool = False, parallel: int = 1):
|
|
if not folder.exists():
|
|
print(f"❌ Folder not found: {folder}")
|
|
logger.error(f"Folder not found: {folder}")
|
|
return
|
|
|
|
audio_config = config["audio"]
|
|
filters_config = config["encode"]["filters"]
|
|
suffix = config["suffix"]
|
|
extensions = config["extensions"]
|
|
ignore_tags = config["ignore_tags"]
|
|
reduction_ratio_threshold = config["reduction_ratio_threshold"]
|
|
res_height = 1080 if resolution == "1080" else 720
|
|
res_width = 1920 if resolution == "1080" else 1280
|
|
|
|
# Determine type and resolution keys
|
|
folder_lower = str(folder).lower()
|
|
if "\\tv\\" in folder_lower or "/tv/" in folder_lower:
|
|
type_key = "tv"
|
|
filter_flags = filters_config.get("tv", "bicubic")
|
|
else:
|
|
type_key = "movie"
|
|
filter_flags = filters_config.get("default", "lanczos")
|
|
|
|
res_key = "1080" if resolution == "1080" else "720"
|
|
|
|
# Get CQ and CRF from config
|
|
cq_default = config["encode"]["cq"].get(f"{type_key}_{res_key}", 32)
|
|
crf_cpu = config["encode"]["crf"].get(f"{type_key}_{res_key}", 32)
|
|
if cq is None:
|
|
cq = cq_default
|
|
|
|
processing_folder = Path(config["processing_folder"])
|
|
processing_folder.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Cleanup old processing folder if requested
|
|
if cleanup and processing_folder.exists():
|
|
print(f"🧹 Cleaning up old processing folder: {processing_folder}")
|
|
logger.info(f"Cleaning up old processing folder: {processing_folder}")
|
|
shutil.rmtree(processing_folder, ignore_errors=True)
|
|
processing_folder.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Backup folder setup
|
|
backup_folder = None
|
|
if backup:
|
|
backup_folder = folder.parent / f"{folder.name}_backup"
|
|
backup_folder.mkdir(parents=True, exist_ok=True)
|
|
print(f"💾 Backup enabled: {backup_folder}")
|
|
logger.info(f"Backup folder: {backup_folder}")
|
|
|
|
# Dry-run message
|
|
if dry_run:
|
|
print("🔍 DRY-RUN MODE: No files will be encoded or deleted")
|
|
logger.info("DRY-RUN MODE: No files will be encoded or deleted")
|
|
|
|
# Track if we switch to bitrate mode
|
|
use_bitrate = False
|
|
|
|
# Collect all files to process first
|
|
files_to_process = []
|
|
for file in folder.rglob("*"):
|
|
if file.suffix.lower() not in extensions:
|
|
continue
|
|
if any(tag.lower() in file.name.lower() for tag in ignore_tags):
|
|
print(f"⏭️ Skipping: {file.name}")
|
|
logger.info(f"Skipping: {file.name}")
|
|
continue
|
|
files_to_process.append(file)
|
|
|
|
if not files_to_process:
|
|
print("❌ No files found to process")
|
|
logger.info("No files found to process")
|
|
return
|
|
|
|
print(f"📋 Found {len(files_to_process)} file(s) to process")
|
|
|
|
# Define the encoding task
|
|
def encode_file(file: Path):
|
|
"""Encodes a single file - used for parallel processing"""
|
|
try:
|
|
print("="*60)
|
|
logger.info(f"Processing: {file.name}")
|
|
print(f"📁 Processing: {file.name}")
|
|
|
|
temp_input = processing_folder / file.name
|
|
shutil.copy2(file, temp_input)
|
|
logger.info(f"Copied {file.name} → {temp_input.name}")
|
|
temp_output = processing_folder / f"{file.stem}{suffix}{file.suffix}"
|
|
|
|
method = "Bitrate" if use_bitrate else "CQ"
|
|
|
|
if dry_run:
|
|
print(f"🔍 [DRY-RUN] Would encode: {temp_output}")
|
|
logger.info(f"[DRY-RUN] Would encode: {temp_output}")
|
|
return None
|
|
|
|
try:
|
|
orig_size, out_size, reduction_ratio = run_ffmpeg(
|
|
temp_input, temp_output, cq, res_width, res_height, filter_flags,
|
|
audio_config, method, crf_cpu, verbose
|
|
)
|
|
except subprocess.CalledProcessError as e:
|
|
print(f"❌ FFmpeg failed: {e}")
|
|
logger.error(f"FFmpeg failed: {e}")
|
|
temp_input.unlink(missing_ok=True)
|
|
temp_output.unlink(missing_ok=True)
|
|
return None
|
|
|
|
# Validate output
|
|
if not validate_output(temp_input, temp_output, res_width, res_height):
|
|
print(f"⚠️ Validation failed for {temp_output.name}, keeping original")
|
|
logger.warning(f"Validation failed for {temp_output.name}")
|
|
temp_input.unlink(missing_ok=True)
|
|
temp_output.unlink(missing_ok=True)
|
|
return None
|
|
|
|
# Handle fallback if CQ/Bitrate didn't reach target
|
|
if method == "CQ" and reduction_ratio >= reduction_ratio_threshold:
|
|
print(f"⚠️ CQ encode did not achieve target size ({reduction_ratio:.1%} >= {reduction_ratio_threshold:.1%}). Retrying with Bitrate.")
|
|
logger.warning(f"CQ encode failed target ({reduction_ratio:.1%}). Retrying with Bitrate.")
|
|
try:
|
|
temp_output.unlink(missing_ok=True)
|
|
orig_size, out_size, reduction_ratio = run_ffmpeg(
|
|
temp_input, temp_output, cq, res_width, res_height, filter_flags,
|
|
audio_config, "Bitrate", crf_cpu, verbose
|
|
)
|
|
if reduction_ratio >= reduction_ratio_threshold:
|
|
print("❌ Bitrate encode also failed target.")
|
|
logger.error("Bitrate encode failed target.")
|
|
temp_input.unlink(missing_ok=True)
|
|
temp_output.unlink(missing_ok=True)
|
|
return None
|
|
except subprocess.CalledProcessError as e:
|
|
print(f"❌ Bitrate retry failed: {e}")
|
|
logger.error(f"Bitrate retry failed: {e}")
|
|
temp_input.unlink(missing_ok=True)
|
|
temp_output.unlink(missing_ok=True)
|
|
return None
|
|
elif reduction_ratio >= reduction_ratio_threshold:
|
|
print("❌ Encode failed target. Stopping.")
|
|
logger.error("Encode failed target.")
|
|
temp_input.unlink(missing_ok=True)
|
|
temp_output.unlink(missing_ok=True)
|
|
return None
|
|
|
|
# Move final file back to original folder
|
|
dest_file = file.parent / temp_output.name
|
|
if not dry_run:
|
|
shutil.move(temp_output, dest_file)
|
|
print(f"🚚 Moved {temp_output.name} → {dest_file.name}")
|
|
logger.info(f"Moved {temp_output.name} → {dest_file.name}")
|
|
|
|
# Backup original if requested
|
|
if backup and not dry_run:
|
|
backup_dest = backup_folder / file.name
|
|
shutil.copy2(file, backup_dest)
|
|
logger.info(f"Backed up original to {backup_dest}")
|
|
|
|
# Determine folder type and show
|
|
folder_parts = [p.lower() for p in folder.parts]
|
|
if "tv" in folder_parts:
|
|
f_type = "tv"
|
|
tv_index = folder_parts.index("tv")
|
|
show = folder.parts[tv_index + 1] if len(folder.parts) > tv_index + 1 else "Unknown"
|
|
elif "anime" in folder_parts:
|
|
f_type = "anime"
|
|
anime_index = folder_parts.index("anime")
|
|
show = folder.parts[anime_index + 1] if len(folder.parts) > anime_index + 1 else "Unknown"
|
|
else:
|
|
f_type = "movie"
|
|
show = "N/A"
|
|
|
|
orig_size_mb = round(orig_size / 1e6, 2)
|
|
proc_size_mb = round(out_size / 1e6, 2)
|
|
percentage = round(proc_size_mb / orig_size_mb * 100, 1)
|
|
|
|
# Log conversion in tracker CSV (skip in dry-run)
|
|
if not dry_run:
|
|
with open(TRACKER_FILE, "a", newline="", encoding="utf-8") as f:
|
|
writer = csv.writer(f)
|
|
writer.writerow([f_type, show, dest_file.name, orig_size_mb, proc_size_mb, percentage, method])
|
|
|
|
logger.info(f"Tracked conversion: {dest_file.name}, {orig_size_mb}MB → {proc_size_mb}MB ({percentage}%), method={method}")
|
|
print(f"📝 Logged conversion: {dest_file.name} ({percentage}%), method={method}")
|
|
|
|
# Delete temporary and original files
|
|
if not dry_run:
|
|
try:
|
|
temp_input.unlink()
|
|
file.unlink()
|
|
logger.info(f"Deleted original and processing copy for {file.name}")
|
|
except Exception as e:
|
|
print(f"⚠️ Could not delete files: {e}")
|
|
logger.warning(f"Could not delete files: {e}")
|
|
|
|
return {"file": file.name, "orig": orig_size_mb, "proc": proc_size_mb, "pct": percentage}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Unexpected error processing {file.name}: {e}", exc_info=True)
|
|
return None
|
|
|
|
# Process files sequentially or in parallel
|
|
if parallel > 1:
|
|
with ThreadPoolExecutor(max_workers=parallel) as executor:
|
|
futures = [executor.submit(encode_file, f) for f in files_to_process]
|
|
for future in as_completed(futures):
|
|
result = future.result()
|
|
else:
|
|
for file in files_to_process:
|
|
encode_file(file)
|
|
|
|
if dry_run:
|
|
print("🔍 DRY-RUN COMPLETE: No actual changes made")
|
|
else:
|
|
print("✅ Processing complete!")
|
|
|
|
|
|
# =============================
|
|
# MAIN
|
|
# =============================
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Batch encode videos with logging and tracker")
|
|
parser.add_argument("folder", help="Path to folder containing videos")
|
|
parser.add_argument("--cq", type=int, help="Override default CQ")
|
|
parser.add_argument("--r", "--resolution", dest="resolution", default="1080", choices=["720","1080"], help="Target resolution")
|
|
parser.add_argument("--dry-run", action="store_true", help="Preview files without encoding")
|
|
parser.add_argument("--verbose", "-v", action="store_true", help="Show FFmpeg output")
|
|
parser.add_argument("--backup", action="store_true", help="Backup original files before encoding")
|
|
parser.add_argument("--cleanup", action="store_true", help="Clean old processing folder on startup")
|
|
parser.add_argument("--parallel", type=int, default=1, metavar="N", help="Encode N files in parallel (experimental)")
|
|
parser.add_argument("--ratio", type=float, help="Reduction ratio threshold (default 0.5 from config)")
|
|
args = parser.parse_args()
|
|
|
|
config_path = Path(__file__).parent / "config.xml"
|
|
config = load_config_xml(config_path)
|
|
|
|
# Override reduction ratio if provided
|
|
if args.ratio:
|
|
config["reduction_ratio_threshold"] = args.ratio
|
|
|
|
process_folder(Path(args.folder), args.cq, args.resolution, config,
|
|
dry_run=args.dry_run, verbose=args.verbose, backup=args.backup,
|
|
cleanup=args.cleanup, parallel=args.parallel)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|