From 4a2c40da0e94b1df7fbe5a348a1d304a5dd1675c Mon Sep 17 00:00:00 2001 From: TylerCG <117808427+TylerCG@users.noreply.github.com> Date: Wed, 22 Apr 2026 21:01:43 -0400 Subject: [PATCH] 1st --- .gitignore | 134 ++++++++++++++++++++++++++ README.md | 140 +++++++++++++++++++++++++++ audio_extractor/__init__.py | 4 + audio_extractor/cli.py | 62 ++++++++++++ audio_extractor/extractor.py | 180 +++++++++++++++++++++++++++++++++++ main.py | 117 +++++++++++++++++++++++ requirements.txt | 8 ++ 7 files changed, 645 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 audio_extractor/__init__.py create mode 100644 audio_extractor/cli.py create mode 100644 audio_extractor/extractor.py create mode 100644 main.py create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0db3bf1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,134 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +Pipfile.lock + +# PEP 582 +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# Output directories +audio_output/ +video_output/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..2823cbc --- /dev/null +++ b/README.md @@ -0,0 +1,140 @@ +# Audio Extractor + +A Python tool for extracting and managing audio tracks from video files using FFmpeg. + +## Features + +- **Extract Audio**: Extract all audio channels from video files as individual files +- **Preserve Quality**: Maintains original bitrate and codec without re-encoding +- **Batch Processing**: Process multiple video files from a folder +- **Multi-track Support**: Automatically handles videos with multiple audio tracks +- **Flexible Output**: Specify custom output folder + +## Future Features + +- **Add Tracks**: Add individual audio files as new tracks to video files +- **Track Titles**: Assign custom titles/names to audio tracks +- **Batch Operations**: Apply operations to multiple files with matching base names + +## Installation + +### Prerequisites + +- Python 3.6+ +- FFmpeg installed and accessible in your PATH +- FFprobe (usually included with FFmpeg) + +### Install FFmpeg + +**macOS** (using Homebrew): +```bash +brew install ffmpeg +``` + +**Ubuntu/Debian**: +```bash +sudo apt-get install ffmpeg +``` + +**Windows** (using Chocolatey): +```bash +choco install ffmpeg +``` + +Or download from: https://ffmpeg.org/download.html + +## Usage + +### Extract Audio from a Single Video + +```bash +python main.py extract "path/to/video.mp4" -o ./audio_output +``` + +### Extract Audio from All Videos in a Folder + +```bash +python main.py extract "./videos_folder" -o ./audio_output +``` + +### Legacy Command Format + +The tool also supports the original command format: + +```bash +python main.py --extract "target" -o output_folder +``` + +## Examples + +**Extract from single file:** +```bash +python main.py extract "movie.mp4" -o ./extracted_audio +``` + +**Extract from entire folder:** +```bash +python main.py extract "./my_videos" -o "./audio_tracks" +``` + +**Extract with default output folder (./audio_output):** +```bash +python main.py extract "video.mkv" +``` + +## How It Works + +1. **Identifies video files** in the target path +2. **Analyzes audio streams** using ffprobe to detect codec and bitrate information +3. **Extracts each audio track** using FFmpeg's codec copy mode (no re-encoding) +4. **Preserves quality** by maintaining original bitrate and codec +5. **Names files** appropriately based on source video and track number + +## Output + +Extracted audio files are saved with the following naming: + +- **Single audio track**: `video_name.aac` (or appropriate extension) +- **Multiple audio tracks**: `video_name_audio_0.aac`, `video_name_audio_1.aac`, etc. + +## Troubleshooting + +**"ffmpeg is not installed or not found in PATH"** +- Ensure FFmpeg is installed and the `ffmpeg` command is accessible from your terminal +- Test with: `ffmpeg -version` + +**"No audio streams found"** +- The video file may not contain any audio tracks +- Try analyzing the file with: `ffprobe "video.mp4"` + +**Extraction fails** +- Check that the video file is not corrupted +- Try opening it with a media player first +- Check disk space in the output folder + +## Development + +### Project Structure + +``` +audio-extractor/ +├── main.py # Entry point and CLI argument parsing +├── audio_extractor/ +│ ├── __init__.py +│ ├── cli.py # CLI interface +│ └── extractor.py # Core extraction logic +├── requirements.txt +└── README.md +``` + +### Adding Features + +To add new features: + +1. Add command logic to `audio_extractor/extractor.py` +2. Add CLI interface to `audio_extractor/cli.py` +3. Add new command to the argument parser in `main.py` + +## License + +MIT diff --git a/audio_extractor/__init__.py b/audio_extractor/__init__.py new file mode 100644 index 0000000..f81d86a --- /dev/null +++ b/audio_extractor/__init__.py @@ -0,0 +1,4 @@ +"""Audio Extractor - FFmpeg-based audio extraction and management tool""" + +__version__ = "0.1.0" +__author__ = "Audio Extractor Contributors" diff --git a/audio_extractor/cli.py b/audio_extractor/cli.py new file mode 100644 index 0000000..ad21f45 --- /dev/null +++ b/audio_extractor/cli.py @@ -0,0 +1,62 @@ +"""CLI interface for audio extraction operations""" + +from pathlib import Path +from audio_extractor.extractor import AudioExtractor + + +class AudioExtractorCLI: + """Command-line interface for audio extraction""" + + def __init__(self): + self.extractor = AudioExtractor() + + def extract_audio(self, target: str, output: str) -> None: + """ + Extract audio from video file(s). + + Args: + target: Path to video file or folder containing video files + output: Output folder path for extracted audio files + """ + target_path = Path(target) + output_path = Path(output) + + if not target_path.exists(): + raise FileNotFoundError(f"Target not found: {target}") + + # Create output directory if it doesn't exist + output_path.mkdir(parents=True, exist_ok=True) + + if target_path.is_file(): + # Single file + print(f"Extracting audio from: {target_path}") + self.extractor.extract_audio_from_file(target_path, output_path) + elif target_path.is_dir(): + # Directory - process all video files + video_files = self.extractor.find_video_files(target_path) + if not video_files: + print(f"No video files found in: {target_path}") + return + + print(f"Found {len(video_files)} video file(s)") + for i, video_file in enumerate(video_files, 1): + print(f"[{i}/{len(video_files)}] Extracting audio from: {video_file.name}") + try: + self.extractor.extract_audio_from_file(video_file, output_path) + except Exception as e: + print(f" Error processing {video_file.name}: {e}") + else: + raise ValueError(f"Invalid target: {target}") + + def add_audio_tracks(self, target: str, input_folder: str, output: str, title: str = None) -> None: + """ + Add audio tracks to video files (future feature). + + Args: + target: Path to folder containing audio files + input_folder: Path to folder containing video files + output: Output folder for processed video files + title: Title/name for the added audio tracks + """ + print("Feature not yet implemented") + # TODO: Implement add_audio_tracks functionality diff --git a/audio_extractor/extractor.py b/audio_extractor/extractor.py new file mode 100644 index 0000000..0c1372b --- /dev/null +++ b/audio_extractor/extractor.py @@ -0,0 +1,180 @@ +"""Core audio extraction logic using ffmpeg""" + +import subprocess +import json +from pathlib import Path +from typing import List, Dict, Any + + +class AudioExtractor: + """Handles audio extraction from video files using ffmpeg""" + + # Common video file extensions + VIDEO_EXTENSIONS = { + ".mp4", ".mkv", ".mov", ".avi", ".flv", ".wmv", ".webm", + ".m4v", ".mpg", ".mpeg", ".3gp", ".ts", ".m2ts", ".mts" + } + + def __init__(self): + self._verify_ffmpeg_installed() + + def _verify_ffmpeg_installed(self) -> None: + """Verify that ffmpeg is installed and accessible""" + try: + subprocess.run( + ["ffmpeg", "-version"], + capture_output=True, + check=True + ) + except (subprocess.CalledProcessError, FileNotFoundError): + raise RuntimeError( + "ffmpeg is not installed or not found in PATH. " + "Please install ffmpeg to use this tool." + ) + + def find_video_files(self, folder: Path) -> List[Path]: + """ + Find all video files in a folder. + + Args: + folder: Path to folder to search + + Returns: + List of video file paths + """ + video_files = [] + for ext in self.VIDEO_EXTENSIONS: + video_files.extend(folder.glob(f"*{ext}")) + video_files.extend(folder.glob(f"*{ext.upper()}")) + return sorted(set(video_files)) # Remove duplicates and sort + + def get_stream_info(self, video_file: Path) -> Dict[str, Any]: + """ + Get stream information from video file using ffprobe. + + Args: + video_file: Path to video file + + Returns: + Dictionary containing stream information + """ + try: + result = subprocess.run( + [ + "ffprobe", "-v", "error", + "-show_entries", "stream=index,codec_type,codec_name", + "-of", "json", + str(video_file) + ], + capture_output=True, + text=True, + check=True + ) + return json.loads(result.stdout) + except subprocess.CalledProcessError as e: + raise RuntimeError(f"Failed to get stream info: {e.stderr}") + except json.JSONDecodeError: + raise RuntimeError("Failed to parse ffprobe output") + + def extract_audio_from_file(self, video_file: Path, output_folder: Path) -> None: + """ + Extract all audio tracks from a video file. + + Args: + video_file: Path to video file + output_folder: Path to output folder + """ + if not video_file.exists(): + raise FileNotFoundError(f"Video file not found: {video_file}") + + # Get stream information + try: + stream_info = self.get_stream_info(video_file) + except RuntimeError as e: + raise RuntimeError(f"Could not analyze {video_file.name}: {e}") + + # Find audio streams + audio_streams = [ + stream for stream in stream_info.get("streams", []) + if stream.get("codec_type") == "audio" + ] + + if not audio_streams: + print(f" No audio streams found in {video_file.name}") + return + + # Extract each audio stream + file_stem = video_file.stem + for stream in audio_streams: + stream_index = stream.get("index") + codec_name = stream.get("codec_name", "aac") + + # Determine output file extension based on codec + output_ext = self._get_audio_extension(codec_name) + + # Handle multiple audio tracks + if len(audio_streams) > 1: + output_filename = f"{file_stem}_audio_{stream_index}.{output_ext}" + else: + output_filename = f"{file_stem}.{output_ext}" + + output_path = output_folder / output_filename + + self._extract_stream(video_file, output_path, stream_index) + + def _get_audio_extension(self, codec_name: str) -> str: + """ + Get file extension based on audio codec. + + Args: + codec_name: FFmpeg codec name + + Returns: + File extension (without dot) + """ + extension_map = { + "aac": "aac", + "mp3": "mp3", + "libmp3lame": "mp3", + "flac": "flac", + "opus": "opus", + "vorbis": "ogg", + "libvorbis": "ogg", + "ac3": "ac3", + "eac3": "ec3", + "dts": "dts", + "truehd": "thd", + "alac": "m4a", + "pcm_s16le": "wav", + "pcm_s24le": "wav", + "pcm_s32le": "wav", + } + return extension_map.get(codec_name, "aac") + + def _extract_stream(self, video_file: Path, output_path: Path, stream_index: int) -> None: + """ + Extract a single audio stream using ffmpeg. + + Args: + video_file: Path to input video file + output_path: Path to output audio file + stream_index: Index of the audio stream to extract + """ + try: + # Use ffmpeg to copy the audio codec without re-encoding + # This preserves the original bitrate and codec + cmd = [ + "ffmpeg", "-i", str(video_file), + "-map", f"0:a:{stream_index}", + "-c", "copy", # Copy codec without re-encoding + "-y", # Overwrite output file + str(output_path) + ] + + subprocess.run(cmd, capture_output=True, check=True) + print(f" ✓ Extracted: {output_path.name}") + + except subprocess.CalledProcessError as e: + raise RuntimeError( + f"Failed to extract audio stream {stream_index}: {e.stderr.decode() if e.stderr else 'Unknown error'}" + ) diff --git a/main.py b/main.py new file mode 100644 index 0000000..ebd079b --- /dev/null +++ b/main.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python3 +""" +Audio Extractor - Extract and manage audio tracks from video files using ffmpeg +""" + +import argparse +import sys +from pathlib import Path + +from audio_extractor.cli import AudioExtractorCLI + + +def main(): + parser = argparse.ArgumentParser( + description="Extract and manage audio tracks from video files", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Extract audio from a single video file + python main.py --extract "video.mp4" -o ./audio_output + + # Extract audio from all videos in a folder + python main.py --extract "./videos" -o ./audio_output + + # Add audio tracks to video (future feature) + python main.py --add "./audio_files" -i "./video_files" -o ./output + """ + ) + + # Create subcommands for better organization + subparsers = parser.add_subparsers(dest="command", help="Command to execute") + + # Extract command + extract_parser = subparsers.add_parser("extract", help="Extract audio from video files") + extract_parser.add_argument( + "target", + type=str, + help="Path to video file or folder containing video files" + ) + extract_parser.add_argument( + "-o", "--output", + type=str, + default="./audio_output", + help="Output folder for extracted audio files (default: ./audio_output)" + ) + + # Add command (future feature) + add_parser = subparsers.add_parser("add", help="Add audio tracks to video files") + add_parser.add_argument( + "target", + type=str, + help="Path to folder containing audio files to add" + ) + add_parser.add_argument( + "-i", "--input", + type=str, + required=True, + help="Path to folder containing video files" + ) + add_parser.add_argument( + "-o", "--output", + type=str, + default="./video_output", + help="Output folder for processed video files (default: ./video_output)" + ) + add_parser.add_argument( + "--title", + type=str, + default=None, + help="Title/name for the added audio tracks" + ) + + # Also support old-style --extract flag for backwards compatibility + parser.add_argument( + "--extract", + type=str, + default=None, + metavar="TARGET", + help="(Legacy) Extract audio from video file or folder" + ) + parser.add_argument( + "-o", "--output", + type=str, + dest="output", + default="./audio_output", + help="Output folder path" + ) + + args = parser.parse_args() + + # Initialize CLI + cli = AudioExtractorCLI() + + # Handle legacy --extract flag + if args.extract: + args.command = "extract" + args.target = args.extract + + if not args.command: + parser.print_help() + sys.exit(1) + + try: + if args.command == "extract": + cli.extract_audio(args.target, args.output) + elif args.command == "add": + cli.add_audio_tracks(args.target, args.input, args.output, args.title) + else: + parser.print_help() + sys.exit(1) + except Exception as e: + print(f"Error: {e}", file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..3f141a9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +# Audio Extractor Requirements +# No external Python dependencies required - uses ffmpeg system command +# ffmpeg must be installed separately on your system + +# For development/testing (optional): +# pytest>=7.0 +# black>=22.0 +# pylint>=2.0