conversion_project/core/process_manager.py
2026-01-01 15:37:38 -05:00

479 lines
22 KiB
Python
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# core/process_manager.py
"""Main processing logic for batch transcoding."""
import csv
import os
import shutil
import subprocess
import time
from pathlib import Path
from core.audio_handler import get_audio_streams
from core.encode_engine import run_ffmpeg
from core.logger_helper import setup_logger, setup_failure_logger
from core.video_handler import get_source_resolution, determine_target_resolution
logger = setup_logger(Path(__file__).parent.parent / "logs")
failure_logger = setup_failure_logger(Path(__file__).parent.parent / "logs")
def _cleanup_temp_files(temp_input: Path, temp_output: Path):
"""Helper function to clean up temporary input and output files."""
try:
if temp_input.exists():
temp_input.unlink()
logger.debug(f"Cleaned up temp input: {temp_input.name}")
except Exception as e:
logger.warning(f"Could not delete temp input {temp_input.name}: {e}")
try:
if temp_output.exists():
temp_output.unlink()
logger.debug(f"Cleaned up temp output: {temp_output.name}")
except Exception as e:
logger.warning(f"Could not delete temp output {temp_output.name}: {e}")
def process_folder(folder: Path, cq: int, transcode_mode: str, resolution: str, config: dict, tracker_file: Path, test_mode: bool = False, audio_language: str = None):
"""
Process all video files in folder with appropriate encoding settings.
Args:
folder: Input folder path
cq: CQ override value
transcode_mode: "cq" or "bitrate"
resolution: Explicit resolution override ("480", "720", "1080", or None for smart)
config: Configuration dictionary
tracker_file: Path to CSV tracker file
test_mode: If True, only encode first file and skip final move/cleanup
audio_language: Optional language code to tag audio (e.g., 'eng', 'spa'). If None, no tagging applied.
"""
if not folder.exists():
print(f"❌ Folder not found: {folder}")
logger.error(f"Folder not found: {folder}")
return
audio_config = config["audio"]
bitrate_config = config["encode"]["fallback"]
filters_config = config["encode"]["filters"]
suffix = config["suffix"]
extensions = config["extensions"]
ignore_tags = config["ignore_tags"]
reduction_ratio_threshold = config["reduction_ratio_threshold"]
# Resolution logic: explicit arg takes precedence, else use smart defaults
explicit_resolution = resolution # Will be None if not specified
filter_flags = filters_config.get("default","lanczos")
folder_lower = str(folder).lower()
is_tv = "\\tv\\" in folder_lower or "/tv/" in folder_lower
if is_tv:
filter_flags = filters_config.get("tv","bicubic")
processing_folder = Path(config["processing_folder"])
processing_folder.mkdir(parents=True, exist_ok=True)
# Determine if we're in smart mode (no explicit mode specified)
is_smart_mode = transcode_mode not in ["cq", "bitrate"] # Default/smart mode
is_forced_cq = transcode_mode == "cq"
is_forced_bitrate = transcode_mode == "bitrate"
# Track files for potential retry in smart mode
failed_cq_files = [] # List of (file_path, metadata) for CQ failures in smart mode
consecutive_failures = 0
max_consecutive = 3
# Phase 1: Process files with initial mode strategy
print(f"\n{'='*60}")
if is_smart_mode:
print("📋 MODE: Smart (Try CQ first, retry with Bitrate if needed)")
elif is_forced_cq:
print("📋 MODE: Forced CQ (skip failures, log them)")
else:
print("📋 MODE: Forced Bitrate (skip failures, log them)")
print(f"{'='*60}\n")
skipped_count = 0
for file in folder.rglob("*"):
if file.suffix.lower() not in extensions:
continue
if any(tag.lower() in file.name.lower() for tag in ignore_tags):
skipped_count += 1
continue
if skipped_count > 0:
print(f"⏭️ Skipped {skipped_count} file(s)")
logger.info(f"Skipped {skipped_count} file(s)")
skipped_count = 0
print("="*60)
logger.info(f"Processing: {file.name}")
print(f"📁 Processing: {file.name}")
temp_input = (processing_folder / file.name).resolve()
shutil.copy2(file, temp_input)
logger.info(f"Copied {file.name}{temp_input.name}")
# Verify file was copied and is accessible
for attempt in range(3):
if temp_input.exists() and os.access(temp_input, os.R_OK):
break
# Check for matching subtitle file
subtitle_file = None
if config.get("general", {}).get("subtitles", {}).get("enabled", True):
subtitle_exts = config.get("general", {}).get("subtitles", {}).get("extensions", ".vtt,.srt,.ass,.ssa,.sub").split(",")
# Look for subtitle with same base name (e.g., movie.vtt or movie.en.vtt)
for ext in subtitle_exts:
ext = ext.strip()
# Try exact match first (movie.vtt)
potential_sub = file.with_suffix(ext)
if potential_sub.exists():
subtitle_file = potential_sub
print(f"📝 Found subtitle: {subtitle_file.name}")
logger.info(f"Found subtitle file: {subtitle_file.name}")
break
# Try language prefix variants (movie.en.vtt, movie.eng.vtt, etc.)
# Look for files matching the pattern basename.*language*.ext
parent_dir = file.parent
base_name = file.stem
for item in parent_dir.glob(f"{base_name}.*{ext}"):
subtitle_file = item
print(f"📝 Found subtitle: {subtitle_file.name}")
logger.info(f"Found subtitle file: {subtitle_file.name}")
break
if subtitle_file:
break
try:
# Detect source resolution and determine target resolution
src_width, src_height = get_source_resolution(temp_input)
res_width, res_height, target_resolution = determine_target_resolution(
src_width, src_height, explicit_resolution
)
# Log resolution decision
if explicit_resolution:
logger.info(f"Using explicitly specified resolution: {res_width}x{res_height}")
else:
if src_height > 1080:
print(f"⚠️ Source {src_width}x{src_height} is above 1080p. Scaling down to 1080p.")
logger.info(f"Source {src_width}x{src_height} detected. Scaling to 1080p.")
elif src_height <= 720:
print(f" Source {src_width}x{src_height} is 720p or lower. Preserving resolution.")
logger.info(f"Source {src_width}x{src_height} (<=720p). Preserving source resolution.")
else:
print(f" Source {src_width}x{src_height} is at or below 1080p. Preserving resolution.")
logger.info(f"Source {src_width}x{src_height} (<=1080p). Preserving source resolution.")
# Set CQ based on content type and target resolution
content_cq = config["encode"]["cq"].get(f"tv_{target_resolution}" if is_tv else f"movie_{target_resolution}", 32)
file_cq = cq if cq is not None else content_cq
# Always output as .mkv (AV1 video codec) with [EHX] suffix
temp_output = (processing_folder / f"{file.stem}{suffix}.mkv").resolve()
# Determine which method to try first
if is_forced_bitrate:
method = "Bitrate"
elif is_forced_cq:
method = "CQ"
else: # Smart mode
method = "CQ" # Always try CQ first in smart mode
# Attempt encoding
try:
orig_size, out_size, reduction_ratio = run_ffmpeg(
temp_input, temp_output, file_cq, res_width, res_height, src_width, src_height,
filter_flags, audio_config, method, bitrate_config, subtitle_file, audio_language
)
# Check if encode met size target
encode_succeeded = True
if method == "CQ" and reduction_ratio >= reduction_ratio_threshold:
encode_succeeded = False
elif method == "Bitrate" and reduction_ratio >= reduction_ratio_threshold:
encode_succeeded = False
if not encode_succeeded:
# Size threshold not met
if is_smart_mode and method == "CQ":
# In smart mode CQ failure, mark for bitrate retry
print(f"⚠️ CQ failed size target ({reduction_ratio:.1%}). Will retry with Bitrate.")
failure_logger.warning(f"{file.name} | CQ failed size target ({reduction_ratio:.1%})")
failed_cq_files.append({
'file': file,
'temp_input': temp_input,
'temp_output': temp_output,
'src_width': src_width,
'src_height': src_height,
'res_width': res_width,
'res_height': res_height,
'target_resolution': target_resolution,
'file_cq': file_cq,
'is_tv': is_tv,
'subtitle_file': subtitle_file
})
consecutive_failures += 1
if consecutive_failures >= max_consecutive:
print(f"\n⚠️ {max_consecutive} consecutive CQ failures. Moving to Phase 2: Bitrate retry.")
logger.warning(f"{max_consecutive} consecutive CQ failures. Moving to Phase 2.")
break # Move to Phase 2
continue
elif is_forced_cq or is_forced_bitrate:
# In forced mode, skip the file
error_msg = f"Size threshold not met ({reduction_ratio:.1%})"
print(f"{method} failed: {error_msg}")
failure_logger.warning(f"{file.name} | {method} failed: {error_msg}")
consecutive_failures += 1
if consecutive_failures >= max_consecutive:
print(f"\n{max_consecutive} consecutive failures in forced {method} mode. Stopping.")
logger.error(f"{max_consecutive} consecutive failures. Stopping process.")
_cleanup_temp_files(temp_input, temp_output)
break
_cleanup_temp_files(temp_input, temp_output)
continue
# Encoding succeeded - reset failure counter
consecutive_failures = 0
except subprocess.CalledProcessError as e:
# FFmpeg execution failed
error_msg = str(e).split('\n')[0][:100] # First 100 chars of error
if is_smart_mode and method == "CQ":
# In smart mode, log and retry with bitrate
print(f"❌ CQ encode error. Will retry with Bitrate.")
failure_logger.warning(f"{file.name} | CQ error: {error_msg}")
failed_cq_files.append({
'file': file,
'temp_input': temp_input,
'temp_output': temp_output,
'src_width': src_width,
'src_height': src_height,
'res_width': res_width,
'res_height': res_height,
'target_resolution': target_resolution,
'file_cq': file_cq,
'is_tv': is_tv,
'subtitle_file': subtitle_file
})
consecutive_failures += 1
if consecutive_failures >= max_consecutive:
print(f"\n⚠️ {max_consecutive} consecutive CQ failures. Moving to Phase 2: Bitrate retry.")
logger.warning(f"{max_consecutive} consecutive CQ failures. Moving to Phase 2.")
break
continue
elif is_forced_cq or is_forced_bitrate:
# In forced mode, skip and log
print(f"{method} encode failed: {error_msg}")
failure_logger.warning(f"{file.name} | {method} error: {error_msg}")
consecutive_failures += 1
if consecutive_failures >= max_consecutive:
print(f"\n{max_consecutive} consecutive failures in forced {method} mode. Stopping.")
logger.error(f"{max_consecutive} consecutive failures. Stopping process.")
_cleanup_temp_files(temp_input, temp_output)
break
_cleanup_temp_files(temp_input, temp_output)
continue
# If we get here, encoding succeeded - save file and log
_save_successful_encoding(
file, temp_input, temp_output, orig_size, out_size,
reduction_ratio, method, src_width, src_height, res_width, res_height,
file_cq, tracker_file, folder, is_tv, config, test_mode, subtitle_file
)
# In test mode, stop after first successful file
if test_mode:
print(f"\n✅ TEST MODE: File processed. Encoded file is in temp folder for inspection.")
break
except Exception as e:
# Unexpected error
error_msg = str(e)[:100]
print(f"❌ Unexpected error: {error_msg}")
failure_logger.warning(f"{file.name} | Unexpected error: {error_msg}")
consecutive_failures += 1
logger.error(f"Unexpected error processing {file.name}: {e}")
_cleanup_temp_files(temp_input, temp_output)
if is_forced_cq or is_forced_bitrate:
if consecutive_failures >= max_consecutive:
print(f"\n{max_consecutive} consecutive failures. Stopping.")
break
else:
if consecutive_failures >= max_consecutive:
print(f"\n⚠️ {max_consecutive} consecutive failures. Moving to Phase 2.")
break
# Phase 2: Retry failed CQ files with Bitrate mode (smart mode only)
if is_smart_mode and failed_cq_files:
print(f"\n{'='*60}")
print(f"📋 PHASE 2: Retrying {len(failed_cq_files)} failed files with Bitrate mode")
print(f"{'='*60}\n")
consecutive_failures = 0
for file_data in failed_cq_files:
file = file_data['file']
temp_input = file_data['temp_input']
temp_output = file_data['temp_output']
try:
print(f"🔄 Retrying: {file.name} with Bitrate")
logger.info(f"Phase 2 Retry: {file.name} with Bitrate mode")
# Clean up old output if it exists
if temp_output.exists():
temp_output.unlink()
# Retry with bitrate
orig_size, out_size, reduction_ratio = run_ffmpeg(
temp_input, temp_output, file_data['file_cq'],
file_data['res_width'], file_data['res_height'],
file_data['src_width'], file_data['src_height'],
filter_flags, audio_config, "Bitrate", bitrate_config,
file_data.get('subtitle_file'), audio_language
)
# Check if bitrate also failed
if reduction_ratio >= reduction_ratio_threshold:
print(f"⚠️ Bitrate also failed size target ({reduction_ratio:.1%}). Skipping.")
failure_logger.warning(f"{file.name} | Bitrate retry also failed ({reduction_ratio:.1%})")
consecutive_failures += 1
_cleanup_temp_files(temp_input, temp_output)
if consecutive_failures >= max_consecutive:
print(f"\n⚠️ {max_consecutive} consecutive Phase 2 failures. Stopping retries.")
break
continue
# Bitrate succeeded
consecutive_failures = 0
_save_successful_encoding(
file, temp_input, temp_output,
orig_size, out_size, reduction_ratio, "Bitrate",
file_data['src_width'], file_data['src_height'],
file_data['res_width'], file_data['res_height'],
file_data['file_cq'], tracker_file,
folder, file_data['is_tv'], config, False,
file_data.get('subtitle_file')
)
except subprocess.CalledProcessError as e:
error_msg = str(e).split('\n')[0][:100]
print(f"❌ Bitrate retry failed: {error_msg}")
failure_logger.warning(f"{file.name} | Bitrate retry error: {error_msg}")
consecutive_failures += 1
logger.error(f"Bitrate retry failed for {file.name}: {e}")
_cleanup_temp_files(temp_input, temp_output)
if consecutive_failures >= max_consecutive:
print(f"\n⚠️ {max_consecutive} consecutive Phase 2 failures. Stopping retries.")
break
except Exception as e:
error_msg = str(e)[:100]
print(f"❌ Unexpected error in Phase 2: {error_msg}")
failure_logger.warning(f"{file.name} | Phase 2 error: {error_msg}")
consecutive_failures += 1
_cleanup_temp_files(temp_input, temp_output)
if consecutive_failures >= max_consecutive:
print(f"\n⚠️ {max_consecutive} consecutive Phase 2 failures. Stopping retries.")
break
print(f"\n{'='*60}")
print("✅ Batch processing complete")
logger.info("Batch processing complete")
def _save_successful_encoding(file, temp_input, temp_output, orig_size, out_size,
reduction_ratio, method, src_width, src_height, res_width, res_height,
file_cq, tracker_file, folder, is_tv, config=None, test_mode=False, subtitle_file=None):
"""Helper function to save successfully encoded files with [EHX] tag and clean up subtitle files."""
# In test mode, show ratio and skip file move/cleanup
if test_mode:
orig_size_mb = round(orig_size / 1e6, 2)
out_size_mb = round(out_size / 1e6, 2)
percentage = round(out_size_mb / orig_size_mb * 100, 1)
print(f"\n{'='*60}")
print(f"📊 TEST MODE RESULTS:")
print(f"{'='*60}")
print(f"Original: {orig_size_mb} MB")
print(f"Encoded: {out_size_mb} MB")
print(f"Ratio: {percentage}% ({reduction_ratio:.1%} reduction)")
print(f"Method: {method} (CQ={file_cq if method == 'CQ' else 'N/A'})")
print(f"{'='*60}")
print(f"📁 Encoded file location: {temp_output}")
logger.info(f"TEST MODE - File: {file.name} | Ratio: {percentage}% | Method: {method}")
return
dest_file = file.parent / temp_output.name
shutil.move(temp_output, dest_file)
print(f"🚚 Moved {temp_output.name}{dest_file.name}")
logger.info(f"Moved {temp_output.name}{dest_file.name}")
# Classify file type based on folder
folder_parts = [p.lower() for p in folder.parts]
if "tv" in folder_parts:
f_type = "tv"
tv_index = folder_parts.index("tv")
show = folder.parts[tv_index + 1] if len(folder.parts) > tv_index + 1 else "Unknown"
elif "anime" in folder_parts:
f_type = "anime"
anime_index = folder_parts.index("anime")
show = folder.parts[anime_index + 1] if len(folder.parts) > anime_index + 1 else "Unknown"
else:
f_type = "movie"
show = "N/A"
orig_size_mb = round(orig_size / 1e6, 2)
proc_size_mb = round(out_size / 1e6, 2)
percentage = round(proc_size_mb / orig_size_mb * 100, 1)
# Get audio stream count for tracking
try:
audio_streams = get_audio_streams(temp_input)
audio_stream_count = len(audio_streams)
except:
audio_stream_count = 0
# Format resolutions for tracking
src_resolution = f"{src_width}x{src_height}"
target_res = f"{res_width}x{res_height}"
cq_str = str(file_cq) if method == "CQ" else "N/A"
with open(tracker_file, "a", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerow([
f_type, show, dest_file.name, orig_size_mb, proc_size_mb, percentage,
src_resolution, target_res, audio_stream_count, cq_str, method
])
# Enhanced logging with all conversion details
logger.info(f"\n✅ CONVERSION COMPLETE: {dest_file.name}")
logger.info(f" Type: {f_type.upper()} | Show: {show}")
logger.info(f" Size: {orig_size_mb}MB → {proc_size_mb}MB ({percentage}% of original, {100-percentage:.1f}% reduction)")
logger.info(f" Method: {method} | Status: SUCCESS")
print(f"📝 Logged conversion: {dest_file.name} ({percentage}%), method={method}")
try:
temp_input.unlink()
file.unlink()
logger.info(f"Deleted original and processing copy for {file.name}")
# Clean up subtitle file if it was embedded
if subtitle_file and subtitle_file.exists():
try:
subtitle_file.unlink()
print(f"🗑️ Removed embedded subtitle: {subtitle_file.name}")
logger.info(f"Removed embedded subtitle: {subtitle_file.name}")
except Exception as e:
logger.warning(f"Could not delete subtitle file {subtitle_file.name}: {e}")
except Exception as e:
print(f"⚠️ Could not delete files: {e}")
logger.warning(f"Could not delete files: {e}")