conversion_project/core/process_manager.py
2026-01-10 12:53:01 -05:00

565 lines
28 KiB
Python
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# core/process_manager.py
"""Main processing logic for batch transcoding."""
import csv
import os
import shutil
import subprocess
import time
from pathlib import Path
from core.audio_handler import get_audio_streams
from core.encode_engine import run_ffmpeg
from core.logger_helper import setup_logger, setup_failure_logger
from core.video_handler import get_source_resolution, get_source_bit_depth, determine_target_resolution
logger = setup_logger(Path(__file__).parent.parent / "logs")
failure_logger = setup_failure_logger(Path(__file__).parent.parent / "logs")
def _cleanup_temp_files(temp_input: Path, temp_output: Path):
"""Helper function to clean up temporary input and output files."""
try:
if temp_input.exists():
temp_input.unlink()
logger.debug(f"Cleaned up temp input: {temp_input.name}")
except Exception as e:
logger.warning(f"Could not delete temp input {temp_input.name}: {e}")
try:
if temp_output.exists():
temp_output.unlink()
logger.debug(f"Cleaned up temp output: {temp_output.name}")
except Exception as e:
logger.warning(f"Could not delete temp output {temp_output.name}: {e}")
def process_folder(folder: Path, cq: int, transcode_mode: str, resolution: str, config: dict, tracker_file: Path, test_mode: bool = False, audio_language: str = None, filter_audio: bool = None, audio_select: str = None, encoder: str = "hevc", strip_all_titles: bool = False):
"""
Process all video files in folder with appropriate encoding settings.
Args:
folder: Input folder path
cq: CQ override value
transcode_mode: "cq" or "bitrate"
resolution: Explicit resolution override ("480", "720", "1080", or None for smart)
config: Configuration dictionary
tracker_file: Path to CSV tracker file
test_mode: If True, only encode first file and skip final move/cleanup
audio_language: Optional language code to tag audio (e.g., 'eng', 'spa'). If None, no tagging applied.
filter_audio: If True, show interactive audio selection prompt. If None, use config setting.
audio_select: Pre-selected audio streams (comma-separated, e.g., "1,2"). Skips interactive prompt.
encoder: Video encoder to use - "hevc" for HEVC NVENC 10-bit (default) or "av1" for AV1 NVENC 8-bit.
strip_all_titles: If True, strip all title metadata from all audio tracks.
"""
if not folder.exists():
print(f"❌ Folder not found: {folder}")
logger.error(f"Folder not found: {folder}")
return
audio_config = config["audio"]
bitrate_config = config["encode"]["fallback"]
filters_config = config["encode"]["filters"]
suffix = config["suffix"]
extensions = config["extensions"]
ignore_tags = config["ignore_tags"]
reduction_ratio_threshold = config["reduction_ratio_threshold"]
# Resolution logic: explicit arg takes precedence, else use smart defaults
explicit_resolution = resolution # Will be None if not specified
filter_flags = filters_config.get("default","lanczos")
folder_lower = str(folder).lower()
is_tv = "\\tv\\" in folder_lower or "/tv/" in folder_lower
is_anime = "\\anime\\" in folder_lower or "/anime/" in folder_lower
if is_tv:
filter_flags = filters_config.get("tv","bicubic")
elif is_anime:
filter_flags = filters_config.get("anime", filters_config.get("default","lanczos"))
processing_folder = Path(config["processing_folder"])
processing_folder.mkdir(parents=True, exist_ok=True)
# Determine encoding mode
is_smart_mode = transcode_mode == "compression" # Try CQ first, then bitrate fallback
is_forced_cq = transcode_mode == "cq"
is_forced_bitrate = transcode_mode == "bitrate"
# Track files for potential retry in smart mode
failed_cq_files = [] # List of (file_path, metadata) for CQ failures in compression mode
consecutive_failures = 0
max_consecutive = 3
# Phase 1: Process files with initial mode strategy
print(f"\n{'='*60}")
if is_smart_mode:
print("📋 MODE: Compression (Try CQ first, retry with Bitrate if needed)")
elif is_forced_cq:
print("📋 MODE: CQ (constant quality, skip failures, log them)")
else:
print("📋 MODE: Bitrate (bitrate mode only, skip failures, log them)")
print(f"{'='*60}\n")
skipped_count = 0
for file in folder.rglob("*"):
if file.suffix.lower() not in extensions:
continue
if any(tag.lower() in file.name.lower() for tag in ignore_tags):
skipped_count += 1
continue
if skipped_count > 0:
print(f"⏭️ Skipped {skipped_count} file(s)")
logger.info(f"Skipped {skipped_count} file(s)")
skipped_count = 0
print("="*60)
logger.info(f"Processing: {file.name}")
print(f"📁 Processing: {file.name}")
temp_input = (processing_folder / file.name).resolve()
# Check if file already exists in processing folder
if temp_input.exists() and os.access(temp_input, os.R_OK):
source_size = file.stat().st_size
temp_size = temp_input.stat().st_size
# Verify it's complete (same size as source)
if source_size == temp_size:
print(f"✓ Found existing copy in processing folder (verified complete)")
logger.info(f"File already in processing: {file.name} ({temp_size/1e6:.2f} MB verified complete)")
else:
# File exists but incomplete - recopy
print(f"⚠️ Existing copy incomplete ({temp_size/1e6:.2f} MB vs {source_size/1e6:.2f} MB source). Re-copying...")
logger.warning(f"Incomplete copy detected for {file.name}. Re-copying.")
shutil.copy2(file, temp_input)
logger.info(f"Re-copied {file.name}{temp_input.name}")
else:
# File doesn't exist or not accessible - copy it
shutil.copy2(file, temp_input)
logger.info(f"Copied {file.name}{temp_input.name}")
# Verify file is accessible
for attempt in range(3):
if temp_input.exists() and os.access(temp_input, os.R_OK):
break
# Check for matching subtitle files (supports multiple)
subtitle_files = []
if config.get("general", {}).get("subtitles", {}).get("enabled", True):
subtitle_exts = config.get("general", {}).get("subtitles", {}).get("extensions", ".vtt,.srt,.ass,.ssa,.sub").split(",")
parent_dir = file.parent
base_name = file.stem
found_subs = set() # Track found subtitles to avoid duplicates
# Look for subtitle files with same base name (e.g., movie.vtt or movie.en.vtt)
for ext in subtitle_exts:
ext = ext.strip()
# Try exact match first (movie.vtt)
potential_sub = file.with_suffix(ext)
if potential_sub.exists() and str(potential_sub) not in found_subs:
subtitle_files.append(potential_sub)
found_subs.add(str(potential_sub))
print(f"📝 Found subtitle: {potential_sub.name}")
logger.info(f"Found subtitle file: {potential_sub.name}")
# Try language prefix variants (movie.en.vtt, movie.eng.vtt, movie.en.forced.srt, etc.)
# Look for all files matching the pattern basename.*ext
for item in sorted(parent_dir.glob(f"{base_name}.*{ext}")):
if str(item) not in found_subs:
subtitle_files.append(item)
found_subs.add(str(item))
print(f"📝 Found subtitle: {item.name}")
logger.info(f"Found subtitle file: {item.name}")
try:
# Detect source resolution and determine target resolution
src_width, src_height = get_source_resolution(temp_input)
res_width, res_height, target_resolution = determine_target_resolution(
src_width, src_height, explicit_resolution
)
# Auto-select encoder based on source bit depth if not explicitly specified
# (explicit encoder arg is passed in, so if user didn't specify, it's still the default)
# We need to check if encoder came from CLI or is the default
# For now, we'll always auto-detect and only skip if encoder was explicitly set
# Since we can't distinguish in the current flow, we'll add a parameter to track this
selected_encoder = encoder # Start with what was passed (may be default)
# Check source bit depth for auto-selection
source_bit_depth = get_source_bit_depth(temp_input)
# Auto-select encoder based on source bit depth
# 10-bit or higher (including 12-bit) → HEVC (supports up to 10-bit)
# 8-bit → AV1 (more efficient for 8-bit)
if source_bit_depth >= 10:
selected_encoder = "hevc"
encoder_note = "auto-selected (10+ bit source)"
else:
selected_encoder = "av1"
encoder_note = "auto-selected (8-bit source)"
print(f" Encoder: {selected_encoder} ({encoder_note})")
logger.info(f"Selected encoder: {selected_encoder} - Source bit depth: {source_bit_depth}-bit")
# Log resolution decision
if explicit_resolution:
logger.info(f"Using explicitly specified resolution: {res_width}x{res_height}")
else:
if src_height > 1080:
print(f"⚠️ Source {src_width}x{src_height} is above 1080p. Scaling down to 1080p.")
logger.info(f"Source {src_width}x{src_height} detected. Scaling to 1080p.")
elif src_height <= 720:
print(f" Source {src_width}x{src_height} is 720p or lower. Preserving resolution.")
logger.info(f"Source {src_width}x{src_height} (<=720p). Preserving source resolution.")
else:
print(f" Source {src_width}x{src_height} is at or below 1080p. Preserving resolution.")
logger.info(f"Source {src_width}x{src_height} (<=1080p). Preserving source resolution.")
# Set CQ based on content type, target resolution, and encoder
if is_anime:
cq_key = f"anime_{target_resolution}"
elif is_tv:
cq_key = f"tv_{target_resolution}"
else:
cq_key = f"movie_{target_resolution}"
# Look up CQ from encoder-specific section
encoder_cq_config = config["encode"]["cq"].get(selected_encoder, {})
content_cq = encoder_cq_config.get(cq_key, 32)
file_cq = cq if cq is not None else content_cq
# Always output as .mkv (AV1 video codec) with [EHX] suffix
temp_output = (processing_folder / f"{file.stem}{suffix}.mkv").resolve()
# Determine which method to try first
if is_forced_bitrate:
method = "Bitrate"
elif is_forced_cq:
method = "CQ"
else: # Smart mode
method = "CQ" # Always try CQ first in smart mode
# Attempt encoding
try:
# Determine audio_filter config (CLI arg overrides config file)
# --filter-audio flag means: show interactive prompt
if filter_audio:
audio_filter_config = {"enabled": True, "interactive": True}
# If --audio-select provided, skip interactive and use pre-selected streams
if audio_select:
audio_filter_config["preselected"] = audio_select
else:
# Use config file setting (if present)
audio_filter_config = config.get("general", {}).get("audio_filter", {})
orig_size, out_size, reduction_ratio = run_ffmpeg(
temp_input, temp_output, file_cq, res_width, res_height, src_width, src_height,
filter_flags, audio_config, method, bitrate_config, selected_encoder, subtitle_files, audio_language,
audio_filter_config, test_mode, strip_all_titles
)
# Check if encode met size target
encode_succeeded = True
if method == "CQ" and reduction_ratio >= reduction_ratio_threshold:
encode_succeeded = False
elif method == "Bitrate" and reduction_ratio >= reduction_ratio_threshold:
encode_succeeded = False
if not encode_succeeded:
# Size threshold not met
if is_smart_mode and method == "CQ":
# In smart mode CQ failure, mark for bitrate retry
print(f"⚠️ CQ failed size target ({reduction_ratio:.1%}). Will retry with Bitrate.")
failure_logger.warning(f"{file.name} | CQ failed size target ({reduction_ratio:.1%})")
failed_cq_files.append({
'file': file,
'temp_input': temp_input,
'temp_output': temp_output,
'src_width': src_width,
'src_height': src_height,
'res_width': res_width,
'res_height': res_height,
'target_resolution': target_resolution,
'file_cq': file_cq,
'is_tv': is_tv,
'subtitle_files': subtitle_files
})
consecutive_failures += 1
if consecutive_failures >= max_consecutive:
print(f"\n⚠️ {max_consecutive} consecutive CQ failures. Moving to Phase 2: Bitrate retry.")
logger.warning(f"{max_consecutive} consecutive CQ failures. Moving to Phase 2.")
break # Move to Phase 2
continue
elif is_forced_cq or is_forced_bitrate:
# In forced mode, skip the file
error_msg = f"Size threshold not met ({reduction_ratio:.1%})"
print(f"{method} failed: {error_msg}")
failure_logger.warning(f"{file.name} | {method} failed: {error_msg}")
consecutive_failures += 1
if consecutive_failures >= max_consecutive:
print(f"\n{max_consecutive} consecutive failures in forced {method} mode. Stopping.")
logger.error(f"{max_consecutive} consecutive failures. Stopping process.")
_cleanup_temp_files(temp_input, temp_output)
break
_cleanup_temp_files(temp_input, temp_output)
continue
# Encoding succeeded - reset failure counter
consecutive_failures = 0
except subprocess.CalledProcessError as e:
# FFmpeg execution failed
error_msg = str(e).split('\n')[0][:100] # First 100 chars of error
if is_smart_mode and method == "CQ":
# In smart mode, log and retry with bitrate
print(f"❌ CQ encode error. Will retry with Bitrate.")
failure_logger.warning(f"{file.name} | CQ error: {error_msg}")
failed_cq_files.append({
'file': file,
'temp_input': temp_input,
'temp_output': temp_output,
'src_width': src_width,
'src_height': src_height,
'res_width': res_width,
'res_height': res_height,
'target_resolution': target_resolution,
'file_cq': file_cq,
'is_tv': is_tv,
'subtitle_files': subtitle_files
})
consecutive_failures += 1
if consecutive_failures >= max_consecutive:
print(f"\n⚠️ {max_consecutive} consecutive CQ failures. Moving to Phase 2: Bitrate retry.")
logger.warning(f"{max_consecutive} consecutive CQ failures. Moving to Phase 2.")
break
continue
elif is_forced_cq or is_forced_bitrate:
# In forced mode, skip and log
print(f"{method} encode failed: {error_msg}")
failure_logger.warning(f"{file.name} | {method} error: {error_msg}")
consecutive_failures += 1
if consecutive_failures >= max_consecutive:
print(f"\n{max_consecutive} consecutive failures in forced {method} mode. Stopping.")
logger.error(f"{max_consecutive} consecutive failures. Stopping process.")
_cleanup_temp_files(temp_input, temp_output)
break
_cleanup_temp_files(temp_input, temp_output)
continue
# If we get here, encoding succeeded - save file and log
_save_successful_encoding(
file, temp_input, temp_output, orig_size, out_size,
reduction_ratio, method, src_width, src_height, res_width, res_height,
file_cq, tracker_file, folder, is_tv, suffix, config, test_mode, subtitle_files
)
# In test mode, stop after first successful file
if test_mode:
print(f"\n✅ TEST MODE: File processed. Encoded file is in temp folder for inspection.")
break
except Exception as e:
# Unexpected error
error_msg = str(e)[:100]
print(f"❌ Unexpected error: {error_msg}")
failure_logger.warning(f"{file.name} | Unexpected error: {error_msg}")
consecutive_failures += 1
logger.error(f"Unexpected error processing {file.name}: {e}")
_cleanup_temp_files(temp_input, temp_output)
if is_forced_cq or is_forced_bitrate:
if consecutive_failures >= max_consecutive:
print(f"\n{max_consecutive} consecutive failures. Stopping.")
break
else:
if consecutive_failures >= max_consecutive:
print(f"\n⚠️ {max_consecutive} consecutive failures. Moving to Phase 2.")
break
# Phase 2: Retry failed CQ files with Bitrate mode (smart mode only)
if is_smart_mode and failed_cq_files:
print(f"\n{'='*60}")
print(f"📋 PHASE 2: Retrying {len(failed_cq_files)} failed files with Bitrate mode")
print(f"{'='*60}\n")
consecutive_failures = 0
for file_data in failed_cq_files:
file = file_data['file']
temp_input = file_data['temp_input']
temp_output = file_data['temp_output']
try:
print(f"🔄 Retrying: {file.name} with Bitrate")
logger.info(f"Phase 2 Retry: {file.name} with Bitrate mode")
# Clean up old output if it exists
if temp_output.exists():
temp_output.unlink()
# Retry with bitrate
orig_size, out_size, reduction_ratio = run_ffmpeg(
temp_input, temp_output, file_data['file_cq'],
file_data['res_width'], file_data['res_height'],
file_data['src_width'], file_data['src_height'],
filter_flags, audio_config, "Bitrate", bitrate_config, selected_encoder,
file_data.get('subtitle_files'), audio_language, None, test_mode, strip_all_titles
)
# Check if bitrate also failed
if reduction_ratio >= reduction_ratio_threshold:
print(f"⚠️ Bitrate also failed size target ({reduction_ratio:.1%}). Skipping.")
failure_logger.warning(f"{file.name} | Bitrate retry also failed ({reduction_ratio:.1%})")
consecutive_failures += 1
_cleanup_temp_files(temp_input, temp_output)
if consecutive_failures >= max_consecutive:
print(f"\n⚠️ {max_consecutive} consecutive Phase 2 failures. Stopping retries.")
break
continue
# Bitrate succeeded
consecutive_failures = 0
_save_successful_encoding(
file, temp_input, temp_output,
orig_size, out_size, reduction_ratio, "Bitrate",
file_data['src_width'], file_data['src_height'],
file_data['res_width'], file_data['res_height'],
file_data['file_cq'], tracker_file,
folder, file_data['is_tv'], suffix, config, False,
file_data.get('subtitle_files')
)
except subprocess.CalledProcessError as e:
error_msg = str(e).split('\n')[0][:100]
print(f"❌ Bitrate retry failed: {error_msg}")
failure_logger.warning(f"{file.name} | Bitrate retry error: {error_msg}")
consecutive_failures += 1
logger.error(f"Bitrate retry failed for {file.name}: {e}")
_cleanup_temp_files(temp_input, temp_output)
if consecutive_failures >= max_consecutive:
print(f"\n⚠️ {max_consecutive} consecutive Phase 2 failures. Stopping retries.")
break
except Exception as e:
error_msg = str(e)[:100]
print(f"❌ Unexpected error in Phase 2: {error_msg}")
failure_logger.warning(f"{file.name} | Phase 2 error: {error_msg}")
consecutive_failures += 1
_cleanup_temp_files(temp_input, temp_output)
if consecutive_failures >= max_consecutive:
print(f"\n⚠️ {max_consecutive} consecutive Phase 2 failures. Stopping retries.")
break
print(f"\n{'='*60}")
print("✅ Batch processing complete")
logger.info("Batch processing complete")
def _save_successful_encoding(file, temp_input, temp_output, orig_size, out_size,
reduction_ratio, method, src_width, src_height, res_width, res_height,
file_cq, tracker_file, folder, is_tv, suffix, config=None, test_mode=False, subtitle_files=None):
"""Helper function to save successfully encoded files with [EHX] tag and clean up subtitle files."""
# In test mode, show ratio and skip file move/cleanup
if test_mode:
orig_size_mb = round(orig_size / 1e6, 2)
out_size_mb = round(out_size / 1e6, 2)
percentage = round(out_size_mb / orig_size_mb * 100, 1)
print(f"\n{'='*60}")
print(f"📊 TEST MODE RESULTS:")
print(f"{'='*60}")
print(f"Original: {orig_size_mb} MB")
print(f"Encoded: {out_size_mb} MB")
print(f"Ratio: {percentage}% ({reduction_ratio:.1%} reduction)")
print(f"Method: {method} (CQ={file_cq if method == 'CQ' else 'N/A'})")
print(f"{'='*60}")
print(f"📁 Encoded file location: {temp_output}")
logger.info(f"TEST MODE - File: {file.name} | Ratio: {percentage}% | Method: {method}")
return
# Check if file is in a Featurettes folder - if so, remove suffix from destination filename
folder_parts = [p.lower() for p in file.parent.parts]
is_featurette = "featurettes" in folder_parts
if is_featurette:
# Remove suffix from temp_output.name for Featurettes
output_name = temp_output.name
if suffix in output_name:
output_name = output_name.replace(suffix, "")
dest_file = file.parent / output_name
else:
dest_file = file.parent / temp_output.name
shutil.move(temp_output, dest_file)
print(f"🚚 Moved {temp_output.name}{dest_file.name}")
logger.info(f"Moved {temp_output.name}{dest_file.name}")
# Classify file type based on folder (folder_parts already defined earlier)
if "tv" in folder_parts:
f_type = "tv"
tv_index = folder_parts.index("tv")
show = folder.parts[tv_index + 1] if len(folder.parts) > tv_index + 1 else "Unknown"
elif "anime" in folder_parts:
f_type = "anime"
anime_index = folder_parts.index("anime")
show = folder.parts[anime_index + 1] if len(folder.parts) > anime_index + 1 else "Unknown"
else:
f_type = "movie"
show = "N/A"
orig_size_mb = round(orig_size / 1e6, 2)
proc_size_mb = round(out_size / 1e6, 2)
percentage = round(proc_size_mb / orig_size_mb * 100, 1)
# Get audio stream count for tracking
try:
audio_streams = get_audio_streams(temp_input)
audio_stream_count = len(audio_streams)
except:
audio_stream_count = 0
# Format resolutions for tracking
src_resolution = f"{src_width}x{src_height}"
target_res = f"{res_width}x{res_height}"
cq_str = str(file_cq) if method == "CQ" else "N/A"
with open(tracker_file, "a", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerow([
f_type, show, dest_file.name, orig_size_mb, proc_size_mb, percentage,
src_resolution, target_res, audio_stream_count, cq_str, method
])
# Enhanced logging with all conversion details
logger.info(f"\n✅ CONVERSION COMPLETE: {dest_file.name}")
logger.info(f" Type: {f_type.upper()} | Show: {show}")
logger.info(f" Size: {orig_size_mb}MB → {proc_size_mb}MB ({percentage}% of original, {100-percentage:.1f}% reduction)")
logger.info(f" Method: {method} | Status: SUCCESS")
print(f"📝 Logged conversion: {dest_file.name} ({percentage}%), method={method}")
try:
temp_input.unlink()
# Only delete original file if NOT in Featurettes folder (Featurettes are re-encoded in place)
if not is_featurette:
file.unlink()
logger.info(f"Deleted original and processing copy for {file.name}")
else:
logger.info(f"Featurettes file preserved at origin: {file.name}")
# Clean up subtitle files if they exist
if subtitle_files:
for sub_file in subtitle_files:
if sub_file.exists():
try:
sub_file.unlink()
print(f"🗑️ Removed subtitle: {sub_file.name}")
logger.info(f"Removed subtitle: {sub_file.name}")
except Exception as e:
logger.warning(f"Could not delete subtitle file {sub_file.name}: {e}")
except Exception as e:
print(f"⚠️ Could not delete files: {e}")
logger.warning(f"Could not delete files: {e}")