From 5b784b5ac2e88df314be4554dcc9f0ff37a664e1 Mon Sep 17 00:00:00 2001 From: TylerCG <117808427+TylerCG@users.noreply.github.com> Date: Wed, 22 Apr 2026 21:52:09 -0400 Subject: [PATCH] commit --- README.md | 64 ++++++++++++++-- audio_extractor/cli.py | 98 ++++++++++++++++++++++++- audio_extractor/extractor.py | 137 +++++++++++++++++++++++++++++++++-- 3 files changed, 284 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 2823cbc..a6fddb7 100644 --- a/README.md +++ b/README.md @@ -5,16 +5,13 @@ A Python tool for extracting and managing audio tracks from video files using FF ## Features - **Extract Audio**: Extract all audio channels from video files as individual files +- **Add Tracks**: Add individual audio files as new tracks to video files - **Preserve Quality**: Maintains original bitrate and codec without re-encoding - **Batch Processing**: Process multiple video files from a folder - **Multi-track Support**: Automatically handles videos with multiple audio tracks -- **Flexible Output**: Specify custom output folder - -## Future Features - -- **Add Tracks**: Add individual audio files as new tracks to video files - **Track Titles**: Assign custom titles/names to audio tracks -- **Batch Operations**: Apply operations to multiple files with matching base names +- **Flexible Output**: Specify custom output folder +- **Smart Matching**: Automatically matches audio files to videos by base name ## Installation @@ -57,6 +54,20 @@ python main.py extract "path/to/video.mp4" -o ./audio_output python main.py extract "./videos_folder" -o ./audio_output ``` +### Add Audio Tracks to Videos + +```bash +# Add audio files from one folder to matching video files in another folder +python main.py add "./audio_files" -i "./videos_folder" -o ./output_videos +``` + +### Add Audio with Track Titles + +```bash +# Add audio tracks with a custom title (e.g., "Commentary") +python main.py add "./audio_files" -i "./videos_folder" -o ./output_videos --title "Commentary" +``` + ### Legacy Command Format The tool also supports the original command format: @@ -82,21 +93,62 @@ python main.py extract "./my_videos" -o "./audio_tracks" python main.py extract "video.mkv" ``` +**Add audio files to matching videos:** +```bash +python main.py add "./commentary_tracks" -i "./videos" -o "./videos_with_commentary" +``` + +**Add audio with custom track title:** +```bash +python main.py add "./audio_files" -i "./videos" -o "./output" --title "English Commentary" +``` + +**Batch add multiple audio files to the same video:** +```bash +# Create audio files named like: video_name_01.aac, video_name_02.aac +# Then add them all to video_name.mp4 +python main.py add "./audio_files" -i "./videos" -o "./output" +``` + ## How It Works +### Extraction + 1. **Identifies video files** in the target path 2. **Analyzes audio streams** using ffprobe to detect codec and bitrate information 3. **Extracts each audio track** using FFmpeg's codec copy mode (no re-encoding) 4. **Preserves quality** by maintaining original bitrate and codec 5. **Names files** appropriately based on source video and track number +### Addition + +1. **Identifies audio files** in the audio folder +2. **Matches audio to videos** by comparing base names (filename without extension) +3. **Adds audio as new track** using FFmpeg's codec copy mode (no re-encoding) +4. **Applies metadata** (track title if specified via `--title`) +5. **Handles multiple tracks** by adding all matching audio files as separate tracks +6. **Preserves video** and maintains original quality + ## Output +### Audio Extraction + Extracted audio files are saved with the following naming: - **Single audio track**: `video_name.aac` (or appropriate extension) - **Multiple audio tracks**: `video_name_audio_0.aac`, `video_name_audio_1.aac`, etc. +### Audio Addition + +When adding audio tracks: + +- **File Matching**: Audio files are matched to videos by their **base name** (filename without extension) + - Example: `movie.aac` matches with `movie.mp4` +- **Multiple Tracks**: If multiple audio files match a video's base name, they are added as separate audio tracks + - Example: `movie_01.aac` and `movie_02.aac` both add to `movie.mp4` +- **Track Titles**: If `--title` is provided, it's applied to all added audio tracks +- **Output**: Modified video files are saved to the output folder with the same name as the original + ## Troubleshooting **"ffmpeg is not installed or not found in PATH"** diff --git a/audio_extractor/cli.py b/audio_extractor/cli.py index ad21f45..d16b45c 100644 --- a/audio_extractor/cli.py +++ b/audio_extractor/cli.py @@ -1,6 +1,7 @@ """CLI interface for audio extraction operations""" from pathlib import Path +import tempfile from audio_extractor.extractor import AudioExtractor @@ -50,13 +51,102 @@ class AudioExtractorCLI: def add_audio_tracks(self, target: str, input_folder: str, output: str, title: str = None) -> None: """ - Add audio tracks to video files (future feature). + Add audio tracks to video files. Args: target: Path to folder containing audio files input_folder: Path to folder containing video files output: Output folder for processed video files - title: Title/name for the added audio tracks + title: Title/name for the added audio tracks (applied to all added tracks) """ - print("Feature not yet implemented") - # TODO: Implement add_audio_tracks functionality + audio_folder = Path(target) + video_folder = Path(input_folder) + output_folder = Path(output) + + if not audio_folder.exists(): + raise FileNotFoundError(f"Audio folder not found: {target}") + if not video_folder.exists(): + raise FileNotFoundError(f"Video folder not found: {input_folder}") + + # Create output directory if it doesn't exist + output_folder.mkdir(parents=True, exist_ok=True) + + # Find all audio files + audio_files = self.extractor.find_audio_files(audio_folder) + if not audio_files: + print(f"No audio files found in: {audio_folder}") + return + + print(f"Found {len(audio_files)} audio file(s)") + + # Group audio files by matching video + videos_with_audio = {} + + # Process each audio file + for audio_file in audio_files: + try: + # Find matching video file + video_file = self.extractor.find_matching_video(audio_file, video_folder) + video_stem = video_file.stem + + if video_stem not in videos_with_audio: + videos_with_audio[video_stem] = { + 'video': video_file, + 'audio_files': [] + } + + videos_with_audio[video_stem]['audio_files'].append(audio_file) + + except FileNotFoundError as e: + print(f"⚠ Skipped: {e}") + + if not videos_with_audio: + print("No matching video files found for audio files") + return + + print(f"Found {len(videos_with_audio)} video(s) to process\n") + + # Process each video with its audio files + for idx, (video_stem, data) in enumerate(videos_with_audio.items(), 1): + video_file = data['video'] + audio_files_to_add = data['audio_files'] + + print(f"[{idx}/{len(videos_with_audio)}] Processing: {video_file.name}") + + try: + # Copy original video to output + video_ext = video_file.suffix + output_video = output_folder / f"{video_stem}{video_ext}" + + print(f" → Copying original video...") + self.extractor.copy_file(video_file, output_video) + + # Process each audio file + for audio_idx, audio_file in enumerate(audio_files_to_add): + # For multiple audio files, use temp file for intermediate versions + if len(audio_files_to_add) > 1 and audio_idx < len(audio_files_to_add) - 1: + # Use temp file for intermediate processing + temp_dir = Path(tempfile.gettempdir()) + working_video = temp_dir / f"{video_stem}_processing_{audio_idx}{video_ext}" + else: + # Use final output file + working_video = output_folder / f"{video_stem}_new{video_ext}" + + # Determine input file (original copy for first audio, or previous output) + if audio_idx == 0: + input_video = output_video + else: + # Use the previous output as input + prev_output = temp_dir / f"{video_stem}_processing_{audio_idx - 1}{video_ext}" if audio_idx > 0 else output_video + input_video = prev_output if prev_output.exists() else output_video + + print(f" ↳ Adding audio: {audio_file.name}") + self.extractor.add_audio_to_video( + input_video, + audio_file, + working_video, + track_title=title + ) + + except Exception as e: + print(f" ✗ Error: {e}") diff --git a/audio_extractor/extractor.py b/audio_extractor/extractor.py index 0c1372b..cc98e86 100644 --- a/audio_extractor/extractor.py +++ b/audio_extractor/extractor.py @@ -2,6 +2,7 @@ import subprocess import json +import shutil from pathlib import Path from typing import List, Dict, Any @@ -48,6 +49,26 @@ class AudioExtractor: video_files.extend(folder.glob(f"*{ext.upper()}")) return sorted(set(video_files)) # Remove duplicates and sort + def copy_file(self, source: Path, destination: Path) -> None: + """ + Copy a file from source to destination with verification. + + Args: + source: Source file path + destination: Destination file path + """ + shutil.copy2(source, destination) + # Verify the copy completed + if not destination.exists(): + raise RuntimeError(f"File copy failed: {destination} was not created") + + source_size = source.stat().st_size + dest_size = destination.stat().st_size + if source_size != dest_size: + raise RuntimeError( + f"File copy incomplete: source {source_size} bytes != destination {dest_size} bytes" + ) + def get_stream_info(self, video_file: Path) -> Dict[str, Any]: """ Get stream information from video file using ffprobe. @@ -105,8 +126,7 @@ class AudioExtractor: # Extract each audio stream file_stem = video_file.stem - for stream in audio_streams: - stream_index = stream.get("index") + for audio_index, stream in enumerate(audio_streams): codec_name = stream.get("codec_name", "aac") # Determine output file extension based on codec @@ -114,13 +134,13 @@ class AudioExtractor: # Handle multiple audio tracks if len(audio_streams) > 1: - output_filename = f"{file_stem}_audio_{stream_index}.{output_ext}" + output_filename = f"{file_stem}_audio_{audio_index}.{output_ext}" else: output_filename = f"{file_stem}.{output_ext}" output_path = output_folder / output_filename - self._extract_stream(video_file, output_path, stream_index) + self._extract_stream(video_file, output_path, audio_index) def _get_audio_extension(self, codec_name: str) -> str: """ @@ -158,7 +178,7 @@ class AudioExtractor: Args: video_file: Path to input video file output_path: Path to output audio file - stream_index: Index of the audio stream to extract + stream_index: Index of the audio stream to extract (0, 1, 2... for audio streams only) """ try: # Use ffmpeg to copy the audio codec without re-encoding @@ -178,3 +198,110 @@ class AudioExtractor: raise RuntimeError( f"Failed to extract audio stream {stream_index}: {e.stderr.decode() if e.stderr else 'Unknown error'}" ) + + # Audio file extensions for finding audio files + AUDIO_EXTENSIONS = { + ".aac", ".mp3", ".flac", ".opus", ".ogg", ".ac3", ".ec3", + ".dts", ".thd", ".m4a", ".wav", ".wma", ".ape", ".alac" + } + + def find_audio_files(self, folder: Path) -> List[Path]: + """ + Find all audio files in a folder. + + Args: + folder: Path to folder to search + + Returns: + List of audio file paths + """ + audio_files = [] + for ext in self.AUDIO_EXTENSIONS: + audio_files.extend(folder.glob(f"*{ext}")) + audio_files.extend(folder.glob(f"*{ext.upper()}")) + return sorted(set(audio_files)) # Remove duplicates and sort + + def find_matching_video(self, audio_file: Path, video_folder: Path) -> Path: + """ + Find a video file matching the audio file's base name. + + Args: + audio_file: Path to audio file + video_folder: Path to folder containing video files + + Returns: + Path to matching video file + + Raises: + FileNotFoundError: If no matching video file is found + """ + audio_stem = audio_file.stem + # Try to find a matching video file by base name + for video_ext in self.VIDEO_EXTENSIONS: + video_path = video_folder / f"{audio_stem}{video_ext}" + if video_path.exists(): + return video_path + # Try uppercase extension + video_path = video_folder / f"{audio_stem}{video_ext.upper()}" + if video_path.exists(): + return video_path + + raise FileNotFoundError( + f"No matching video found for audio file: {audio_file.name}" + ) + + def add_audio_to_video(self, video_file: Path, audio_file: Path, output_file: Path, + track_title: str = None) -> None: + """ + Add an audio track to a video file. + + Args: + video_file: Path to input video file + audio_file: Path to audio file to add + output_file: Path to output video file + track_title: Title/name for the audio track (optional) + """ + if not video_file.exists(): + raise FileNotFoundError(f"Video file not found: {video_file}") + if not audio_file.exists(): + raise FileNotFoundError(f"Audio file not found: {audio_file}") + + try: + # Determine the index of the newly added audio stream by counting existing streams + try: + stream_info = self.get_stream_info(video_file) + audio_streams = [s for s in stream_info.get("streams", []) if s.get("codec_type") == "audio"] + new_audio_index = len(audio_streams) + except: + # If we can't get stream info, assume it's the first audio (index 0) + new_audio_index = 0 + + # Build ffmpeg command to add audio track + cmd = [ + "ffmpeg", "-i", str(video_file), + "-i", str(audio_file), + "-c:v", "copy", # Copy video codec + "-c:a", "copy", # Copy audio codec + "-map", "0", # Include all streams from video + "-map", "1:a", # Add audio from audio file + "-y" # Overwrite output + ] + + # Add metadata for track title if provided + # Only apply to the newly added audio stream + if track_title: + cmd.extend([ + f"-metadata:s:a:{new_audio_index}", f"title={track_title}" + ]) + + cmd.append(str(output_file)) + + subprocess.run(cmd, capture_output=True, check=True) + print(f" ✓ Added audio track from: {audio_file.name}") + + except subprocess.CalledProcessError as e: + raise RuntimeError( + f"Failed to add audio track: {e.stderr.decode() if e.stderr else 'Unknown error'}" + ) + except Exception as e: + raise RuntimeError(f"Error during audio addition: {e}")