220 lines
8.9 KiB
Python
220 lines
8.9 KiB
Python
# core/audio_handler.py
|
|
"""Audio stream detection, bitrate calculation, and codec selection."""
|
|
|
|
import json
|
|
import os
|
|
import subprocess
|
|
import tempfile
|
|
from pathlib import Path
|
|
|
|
from core.logger_helper import setup_logger
|
|
|
|
logger = setup_logger(Path(__file__).parent.parent / "logs")
|
|
|
|
|
|
def calculate_stream_bitrate(input_file: Path, stream_index: int) -> int:
|
|
"""
|
|
Extract audio stream to temporary file using -c copy, capture bitrate from ffmpeg output.
|
|
Returns bitrate in kbps. Falls back to 0 (and uses metadata) if extraction fails.
|
|
|
|
Uses ffmpeg's reported bitrate which is more accurate than calculating from file size/duration.
|
|
"""
|
|
# Ensure input file exists and is readable
|
|
input_file = Path(input_file)
|
|
if not input_file.exists():
|
|
logger.error(f"Input file does not exist: {input_file}")
|
|
return 0
|
|
|
|
if not os.access(input_file, os.R_OK):
|
|
logger.error(f"Input file is not readable (permission denied): {input_file}")
|
|
return 0
|
|
|
|
# Use project processing directory for temp files
|
|
processing_dir = Path(__file__).parent.parent / "processing"
|
|
processing_dir.mkdir(exist_ok=True)
|
|
|
|
# Determine the codec of this audio stream first
|
|
probe_cmd = [
|
|
"ffprobe", "-v", "error",
|
|
"-select_streams", f"a:{stream_index}",
|
|
"-show_entries", "stream=codec_name",
|
|
"-of", "default=noprint_wrappers=1:nokey=1",
|
|
str(input_file)
|
|
]
|
|
try:
|
|
probe_result = subprocess.run(probe_cmd, capture_output=True, text=True, check=False)
|
|
codec_name = probe_result.stdout.strip().lower() if probe_result.returncode == 0 else "aac"
|
|
except:
|
|
codec_name = "aac"
|
|
|
|
# Use MKA (Matroska Audio) which supports any codec
|
|
# This is a universal container that works with AC3, AAC, FLAC, DTS, Opus, etc.
|
|
temp_ext = ".mka"
|
|
|
|
temp_fd, temp_audio_path = tempfile.mkstemp(suffix=temp_ext, dir=str(processing_dir))
|
|
os.close(temp_fd)
|
|
|
|
try:
|
|
# Step 1: Extract audio stream with -c copy (lossless extraction)
|
|
# ffmpeg outputs bitrate info to stderr
|
|
extract_cmd = [
|
|
"ffmpeg", "-y", "-i", str(input_file),
|
|
"-map", f"0:a:{stream_index}",
|
|
"-c", "copy",
|
|
temp_audio_path
|
|
]
|
|
logger.debug(f"Extracting audio stream {stream_index} ({codec_name}) to temporary file for bitrate calculation...")
|
|
result = subprocess.run(extract_cmd, capture_output=True, text=True, check=False)
|
|
|
|
# Check if extraction succeeded
|
|
if result.returncode != 0:
|
|
logger.warning(f"Stream {stream_index}: ffmpeg extraction failed (return code {result.returncode})")
|
|
if result.stderr:
|
|
logger.debug(f"ffmpeg stderr: {result.stderr[:300]}")
|
|
return 0
|
|
|
|
# Step 2: Parse bitrate from ffmpeg's output (stderr)
|
|
# Look for line like: "bitrate= 457.7kbits/s"
|
|
bitrate_kbps = 0
|
|
for line in result.stderr.split("\n"):
|
|
if "bitrate=" in line:
|
|
# Extract bitrate value from line like "size= 352162KiB time=01:45:03.05 bitrate= 457.7kbits/s"
|
|
parts = line.split("bitrate=")
|
|
if len(parts) > 1:
|
|
bitrate_str = parts[1].strip().split("kbits/s")[0].strip()
|
|
try:
|
|
bitrate_kbps = int(float(bitrate_str))
|
|
logger.debug(f"Stream {stream_index}: Extracted bitrate from ffmpeg output: {bitrate_kbps} kbps")
|
|
break
|
|
except ValueError:
|
|
continue
|
|
|
|
# If we couldn't parse bitrate from output, fall back to calculation
|
|
if bitrate_kbps == 0:
|
|
logger.debug(f"Stream {stream_index}: Could not parse bitrate from ffmpeg output, calculating from file size...")
|
|
file_size_bytes = os.path.getsize(temp_audio_path)
|
|
|
|
# Get duration using ffprobe
|
|
duration_cmd = [
|
|
"ffprobe", "-v", "error",
|
|
"-show_entries", "format=duration",
|
|
"-of", "default=noprint_wrappers=1:nokey=1:noprint_wrappers=1",
|
|
temp_audio_path
|
|
]
|
|
duration_result = subprocess.run(duration_cmd, capture_output=True, text=True, check=True)
|
|
duration_seconds = float(duration_result.stdout.strip())
|
|
|
|
bitrate_kbps = int((file_size_bytes * 8) / duration_seconds / 1000)
|
|
logger.debug(f"Stream {stream_index}: Calculated bitrate from file: {bitrate_kbps} kbps")
|
|
|
|
return bitrate_kbps
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Failed to calculate bitrate for stream {stream_index}: {e}. Will fall back to metadata.")
|
|
return 0
|
|
|
|
finally:
|
|
# Clean up temporary audio file
|
|
try:
|
|
if os.path.exists(temp_audio_path):
|
|
os.remove(temp_audio_path)
|
|
logger.debug(f"Deleted temporary audio file: {temp_audio_path}")
|
|
except Exception as e:
|
|
logger.warning(f"Could not delete temporary file {temp_audio_path}: {e}")
|
|
|
|
|
|
def get_audio_streams(input_file: Path):
|
|
"""
|
|
Detect audio streams and calculate robust bitrates by extracting each stream.
|
|
Returns list of (index, channels, calculated_bitrate_kbps, language, metadata_bitrate_kbps)
|
|
"""
|
|
cmd = [
|
|
"ffprobe","-v","error","-select_streams","a",
|
|
"-show_entries","stream=index,channels,bit_rate,tags=language",
|
|
"-of","json", str(input_file)
|
|
]
|
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
data = json.loads(result.stdout)
|
|
streams = []
|
|
|
|
for stream_num, s in enumerate(data.get("streams", [])):
|
|
index = s["index"]
|
|
channels = s.get("channels", 2)
|
|
src_lang = s.get("tags", {}).get("language", "und")
|
|
bit_rate_meta = int(s.get("bit_rate", 0)) if s.get("bit_rate") else 0
|
|
|
|
# Calculate robust bitrate by extracting the audio stream
|
|
calculated_bitrate_kbps = calculate_stream_bitrate(input_file, stream_num)
|
|
|
|
# If calculation failed, fall back to metadata
|
|
if calculated_bitrate_kbps == 0:
|
|
calculated_bitrate_kbps = int(bit_rate_meta / 1000) if bit_rate_meta else 160
|
|
logger.info(f"Stream {index}: Using fallback bitrate {calculated_bitrate_kbps} kbps")
|
|
|
|
streams.append((index, channels, calculated_bitrate_kbps, src_lang, int(bit_rate_meta / 1000) if bit_rate_meta else 0))
|
|
|
|
return streams
|
|
|
|
|
|
def choose_audio_bitrate(channels: int, bitrate_kbps: int, audio_config: dict, is_1080_class: bool) -> tuple:
|
|
"""
|
|
Choose audio codec and bitrate based on channel count, detected bitrate, and resolution.
|
|
|
|
Returns tuple: (codec, target_bitrate_bps)
|
|
- codec: "aac", "libopus", or "copy" (to preserve original without re-encoding)
|
|
- target_bitrate_bps: target bitrate in bits/sec (0 if using "copy")
|
|
|
|
Rules:
|
|
Stereo + 1080p:
|
|
- Above 192k → high (192k) with AAC
|
|
- At/below 192k → preserve (copy)
|
|
|
|
Stereo + 720p:
|
|
- Above 160k → medium (160k) with AAC
|
|
- At/below 160k → preserve (copy)
|
|
|
|
Multi-channel:
|
|
- Below minimum threshold (low setting) → preserve original (copy)
|
|
- Low to medium → use low bitrate
|
|
- Medium and above → use medium bitrate
|
|
"""
|
|
# Normalize to 2ch or 6ch output
|
|
output_channels = 6 if channels >= 6 else 2
|
|
|
|
if output_channels == 2:
|
|
# Stereo logic
|
|
if is_1080_class:
|
|
# 1080p+ stereo
|
|
high_br = audio_config["stereo"]["high"]
|
|
if bitrate_kbps > (high_br / 1000): # Above 192k
|
|
return ("aac", high_br)
|
|
else:
|
|
# Preserve original
|
|
logger.info(f"Stereo audio {bitrate_kbps}kbps ≤ {high_br/1000:.0f}k threshold - copying original")
|
|
return ("copy", 0)
|
|
else:
|
|
# 720p stereo
|
|
medium_br = audio_config["stereo"]["medium"]
|
|
if bitrate_kbps > (medium_br / 1000): # Above 160k
|
|
return ("aac", medium_br)
|
|
else:
|
|
# Preserve original
|
|
logger.info(f"Stereo audio {bitrate_kbps}kbps ≤ {medium_br/1000:.0f}k threshold - copying original")
|
|
return ("copy", 0)
|
|
|
|
else:
|
|
# Multi-channel (6ch+) logic
|
|
low_br = audio_config["multi_channel"]["low"]
|
|
medium_br = audio_config["multi_channel"]["medium"]
|
|
|
|
# If below the lowest threshold, copy the original audio instead of re-encoding
|
|
if bitrate_kbps < (low_br / 1000):
|
|
logger.info(f"Multi-channel audio {bitrate_kbps}kbps < {low_br/1000:.0f}k minimum - copying original to avoid artifical inflation")
|
|
return ("copy", 0)
|
|
elif bitrate_kbps < (medium_br / 1000):
|
|
# Below medium, use low
|
|
return ("aac", low_br)
|
|
else:
|
|
# Medium and above, use medium
|
|
return ("aac", medium_br)
|