1st

2026-04-22 21:01:43 -04:00 · 2026-04-22 21:01:43 -04:00 · 4a2c40da0e
commit 4a2c40da0e
7 changed files with 645 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,134 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+Pipfile.lock
+
+# PEP 582
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+
+# Output directories
+audio_output/
+video_output/
--- a/README.md
+++ b/README.md
@ -0,0 +1,140 @@
+# Audio Extractor
+
+A Python tool for extracting and managing audio tracks from video files using FFmpeg.
+
+## Features
+
+- **Extract Audio**: Extract all audio channels from video files as individual files
+- **Preserve Quality**: Maintains original bitrate and codec without re-encoding
+- **Batch Processing**: Process multiple video files from a folder
+- **Multi-track Support**: Automatically handles videos with multiple audio tracks
+- **Flexible Output**: Specify custom output folder
+
+## Future Features
+
+- **Add Tracks**: Add individual audio files as new tracks to video files
+- **Track Titles**: Assign custom titles/names to audio tracks
+- **Batch Operations**: Apply operations to multiple files with matching base names
+
+## Installation
+
+### Prerequisites
+
+- Python 3.6+
+- FFmpeg installed and accessible in your PATH
+- FFprobe (usually included with FFmpeg)
+
+### Install FFmpeg
+
+**macOS** (using Homebrew):
+```bash
+brew install ffmpeg
+```
+
+**Ubuntu/Debian**:
+```bash
+sudo apt-get install ffmpeg
+```
+
+**Windows** (using Chocolatey):
+```bash
+choco install ffmpeg
+```
+
+Or download from: https://ffmpeg.org/download.html
+
+## Usage
+
+### Extract Audio from a Single Video
+
+```bash
+python main.py extract "path/to/video.mp4" -o ./audio_output
+```
+
+### Extract Audio from All Videos in a Folder
+
+```bash
+python main.py extract "./videos_folder" -o ./audio_output
+```
+
+### Legacy Command Format
+
+The tool also supports the original command format:
+
+```bash
+python main.py --extract "target" -o output_folder
+```
+
+## Examples
+
+**Extract from single file:**
+```bash
+python main.py extract "movie.mp4" -o ./extracted_audio
+```
+
+**Extract from entire folder:**
+```bash
+python main.py extract "./my_videos" -o "./audio_tracks"
+```
+
+**Extract with default output folder (./audio_output):**
+```bash
+python main.py extract "video.mkv"
+```
+
+## How It Works
+
+1. **Identifies video files** in the target path
+2. **Analyzes audio streams** using ffprobe to detect codec and bitrate information
+3. **Extracts each audio track** using FFmpeg's codec copy mode (no re-encoding)
+4. **Preserves quality** by maintaining original bitrate and codec
+5. **Names files** appropriately based on source video and track number
+
+## Output
+
+Extracted audio files are saved with the following naming:
+
+- **Single audio track**: `video_name.aac` (or appropriate extension)
+- **Multiple audio tracks**: `video_name_audio_0.aac`, `video_name_audio_1.aac`, etc.
+
+## Troubleshooting
+
+**"ffmpeg is not installed or not found in PATH"**
+- Ensure FFmpeg is installed and the `ffmpeg` command is accessible from your terminal
+- Test with: `ffmpeg -version`
+
+**"No audio streams found"**
+- The video file may not contain any audio tracks
+- Try analyzing the file with: `ffprobe "video.mp4"`
+
+**Extraction fails**
+- Check that the video file is not corrupted
+- Try opening it with a media player first
+- Check disk space in the output folder
+
+## Development
+
+### Project Structure
+
+```
+audio-extractor/
+├── main.py              # Entry point and CLI argument parsing
+├── audio_extractor/
+│   ├── __init__.py
+│   ├── cli.py          # CLI interface
+│   └── extractor.py    # Core extraction logic
+├── requirements.txt
+└── README.md
+```
+
+### Adding Features
+
+To add new features:
+
+1. Add command logic to `audio_extractor/extractor.py`
+2. Add CLI interface to `audio_extractor/cli.py`
+3. Add new command to the argument parser in `main.py`
+
+## License
+
+MIT
--- a/audio_extractor/init.py
+++ b/audio_extractor/init.py
@ -0,0 +1,4 @@
+"""Audio Extractor - FFmpeg-based audio extraction and management tool"""
+
+__version__ = "0.1.0"
+__author__ = "Audio Extractor Contributors"
--- a/audio_extractor/cli.py
+++ b/audio_extractor/cli.py
@ -0,0 +1,62 @@
+"""CLI interface for audio extraction operations"""
+
+from pathlib import Path
+from audio_extractor.extractor import AudioExtractor
+
+
+class AudioExtractorCLI:
+    """Command-line interface for audio extraction"""
+
+    def __init__(self):
+        self.extractor = AudioExtractor()
+
+    def extract_audio(self, target: str, output: str) -> None:
+        """
+        Extract audio from video file(s).
+
+        Args:
+            target: Path to video file or folder containing video files
+            output: Output folder path for extracted audio files
+        """
+        target_path = Path(target)
+        output_path = Path(output)
+
+        if not target_path.exists():
+            raise FileNotFoundError(f"Target not found: {target}")
+
+        # Create output directory if it doesn't exist
+        output_path.mkdir(parents=True, exist_ok=True)
+
+        if target_path.is_file():
+            # Single file
+            print(f"Extracting audio from: {target_path}")
+            self.extractor.extract_audio_from_file(target_path, output_path)
+        elif target_path.is_dir():
+            # Directory - process all video files
+            video_files = self.extractor.find_video_files(target_path)
+            if not video_files:
+                print(f"No video files found in: {target_path}")
+                return
+
+            print(f"Found {len(video_files)} video file(s)")
+            for i, video_file in enumerate(video_files, 1):
+                print(f"[{i}/{len(video_files)}] Extracting audio from: {video_file.name}")
+                try:
+                    self.extractor.extract_audio_from_file(video_file, output_path)
+                except Exception as e:
+                    print(f"  Error processing {video_file.name}: {e}")
+        else:
+            raise ValueError(f"Invalid target: {target}")
+
+    def add_audio_tracks(self, target: str, input_folder: str, output: str, title: str = None) -> None:
+        """
+        Add audio tracks to video files (future feature).
+
+        Args:
+            target: Path to folder containing audio files
+            input_folder: Path to folder containing video files
+            output: Output folder for processed video files
+            title: Title/name for the added audio tracks
+        """
+        print("Feature not yet implemented")
+        # TODO: Implement add_audio_tracks functionality
--- a/audio_extractor/extractor.py
+++ b/audio_extractor/extractor.py
@ -0,0 +1,180 @@
+"""Core audio extraction logic using ffmpeg"""
+
+import subprocess
+import json
+from pathlib import Path
+from typing import List, Dict, Any
+
+
+class AudioExtractor:
+    """Handles audio extraction from video files using ffmpeg"""
+
+    # Common video file extensions
+    VIDEO_EXTENSIONS = {
+        ".mp4", ".mkv", ".mov", ".avi", ".flv", ".wmv", ".webm",
+        ".m4v", ".mpg", ".mpeg", ".3gp", ".ts", ".m2ts", ".mts"
+    }
+
+    def __init__(self):
+        self._verify_ffmpeg_installed()
+
+    def _verify_ffmpeg_installed(self) -> None:
+        """Verify that ffmpeg is installed and accessible"""
+        try:
+            subprocess.run(
+                ["ffmpeg", "-version"],
+                capture_output=True,
+                check=True
+            )
+        except (subprocess.CalledProcessError, FileNotFoundError):
+            raise RuntimeError(
+                "ffmpeg is not installed or not found in PATH. "
+                "Please install ffmpeg to use this tool."
+            )
+
+    def find_video_files(self, folder: Path) -> List[Path]:
+        """
+        Find all video files in a folder.
+
+        Args:
+            folder: Path to folder to search
+
+        Returns:
+            List of video file paths
+        """
+        video_files = []
+        for ext in self.VIDEO_EXTENSIONS:
+            video_files.extend(folder.glob(f"*{ext}"))
+            video_files.extend(folder.glob(f"*{ext.upper()}"))
+        return sorted(set(video_files))  # Remove duplicates and sort
+
+    def get_stream_info(self, video_file: Path) -> Dict[str, Any]:
+        """
+        Get stream information from video file using ffprobe.
+
+        Args:
+            video_file: Path to video file
+
+        Returns:
+            Dictionary containing stream information
+        """
+        try:
+            result = subprocess.run(
+                [
+                    "ffprobe", "-v", "error",
+                    "-show_entries", "stream=index,codec_type,codec_name",
+                    "-of", "json",
+                    str(video_file)
+                ],
+                capture_output=True,
+                text=True,
+                check=True
+            )
+            return json.loads(result.stdout)
+        except subprocess.CalledProcessError as e:
+            raise RuntimeError(f"Failed to get stream info: {e.stderr}")
+        except json.JSONDecodeError:
+            raise RuntimeError("Failed to parse ffprobe output")
+
+    def extract_audio_from_file(self, video_file: Path, output_folder: Path) -> None:
+        """
+        Extract all audio tracks from a video file.
+
+        Args:
+            video_file: Path to video file
+            output_folder: Path to output folder
+        """
+        if not video_file.exists():
+            raise FileNotFoundError(f"Video file not found: {video_file}")
+
+        # Get stream information
+        try:
+            stream_info = self.get_stream_info(video_file)
+        except RuntimeError as e:
+            raise RuntimeError(f"Could not analyze {video_file.name}: {e}")
+
+        # Find audio streams
+        audio_streams = [
+            stream for stream in stream_info.get("streams", [])
+            if stream.get("codec_type") == "audio"
+        ]
+
+        if not audio_streams:
+            print(f"  No audio streams found in {video_file.name}")
+            return
+
+        # Extract each audio stream
+        file_stem = video_file.stem
+        for stream in audio_streams:
+            stream_index = stream.get("index")
+            codec_name = stream.get("codec_name", "aac")
+
+            # Determine output file extension based on codec
+            output_ext = self._get_audio_extension(codec_name)
+
+            # Handle multiple audio tracks
+            if len(audio_streams) > 1:
+                output_filename = f"{file_stem}_audio_{stream_index}.{output_ext}"
+            else:
+                output_filename = f"{file_stem}.{output_ext}"
+
+            output_path = output_folder / output_filename
+
+            self._extract_stream(video_file, output_path, stream_index)
+
+    def _get_audio_extension(self, codec_name: str) -> str:
+        """
+        Get file extension based on audio codec.
+
+        Args:
+            codec_name: FFmpeg codec name
+
+        Returns:
+            File extension (without dot)
+        """
+        extension_map = {
+            "aac": "aac",
+            "mp3": "mp3",
+            "libmp3lame": "mp3",
+            "flac": "flac",
+            "opus": "opus",
+            "vorbis": "ogg",
+            "libvorbis": "ogg",
+            "ac3": "ac3",
+            "eac3": "ec3",
+            "dts": "dts",
+            "truehd": "thd",
+            "alac": "m4a",
+            "pcm_s16le": "wav",
+            "pcm_s24le": "wav",
+            "pcm_s32le": "wav",
+        }
+        return extension_map.get(codec_name, "aac")
+
+    def _extract_stream(self, video_file: Path, output_path: Path, stream_index: int) -> None:
+        """
+        Extract a single audio stream using ffmpeg.
+
+        Args:
+            video_file: Path to input video file
+            output_path: Path to output audio file
+            stream_index: Index of the audio stream to extract
+        """
+        try:
+            # Use ffmpeg to copy the audio codec without re-encoding
+            # This preserves the original bitrate and codec
+            cmd = [
+                "ffmpeg", "-i", str(video_file),
+                "-map", f"0:a:{stream_index}",
+                "-c", "copy",  # Copy codec without re-encoding
+                "-y",  # Overwrite output file
+                str(output_path)
+            ]
+
+            subprocess.run(cmd, capture_output=True, check=True)
+            print(f"  ✓ Extracted: {output_path.name}")
+
+        except subprocess.CalledProcessError as e:
+            raise RuntimeError(
+                f"Failed to extract audio stream {stream_index}: {e.stderr.decode() if e.stderr else 'Unknown error'}"
+            )
--- a/main.py
+++ b/main.py
@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+"""
+Audio Extractor - Extract and manage audio tracks from video files using ffmpeg
+"""
+
+import argparse
+import sys
+from pathlib import Path
+
+from audio_extractor.cli import AudioExtractorCLI
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Extract and manage audio tracks from video files",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Extract audio from a single video file
+  python main.py --extract "video.mp4" -o ./audio_output
+  
+  # Extract audio from all videos in a folder
+  python main.py --extract "./videos" -o ./audio_output
+  
+  # Add audio tracks to video (future feature)
+  python main.py --add "./audio_files" -i "./video_files" -o ./output
+        """
+    )
+
+    # Create subcommands for better organization
+    subparsers = parser.add_subparsers(dest="command", help="Command to execute")
+
+    # Extract command
+    extract_parser = subparsers.add_parser("extract", help="Extract audio from video files")
+    extract_parser.add_argument(
+        "target",
+        type=str,
+        help="Path to video file or folder containing video files"
+    )
+    extract_parser.add_argument(
+        "-o", "--output",
+        type=str,
+        default="./audio_output",
+        help="Output folder for extracted audio files (default: ./audio_output)"
+    )
+
+    # Add command (future feature)
+    add_parser = subparsers.add_parser("add", help="Add audio tracks to video files")
+    add_parser.add_argument(
+        "target",
+        type=str,
+        help="Path to folder containing audio files to add"
+    )
+    add_parser.add_argument(
+        "-i", "--input",
+        type=str,
+        required=True,
+        help="Path to folder containing video files"
+    )
+    add_parser.add_argument(
+        "-o", "--output",
+        type=str,
+        default="./video_output",
+        help="Output folder for processed video files (default: ./video_output)"
+    )
+    add_parser.add_argument(
+        "--title",
+        type=str,
+        default=None,
+        help="Title/name for the added audio tracks"
+    )
+
+    # Also support old-style --extract flag for backwards compatibility
+    parser.add_argument(
+        "--extract",
+        type=str,
+        default=None,
+        metavar="TARGET",
+        help="(Legacy) Extract audio from video file or folder"
+    )
+    parser.add_argument(
+        "-o", "--output",
+        type=str,
+        dest="output",
+        default="./audio_output",
+        help="Output folder path"
+    )
+
+    args = parser.parse_args()
+
+    # Initialize CLI
+    cli = AudioExtractorCLI()
+
+    # Handle legacy --extract flag
+    if args.extract:
+        args.command = "extract"
+        args.target = args.extract
+
+    if not args.command:
+        parser.print_help()
+        sys.exit(1)
+
+    try:
+        if args.command == "extract":
+            cli.extract_audio(args.target, args.output)
+        elif args.command == "add":
+            cli.add_audio_tracks(args.target, args.input, args.output, args.title)
+        else:
+            parser.print_help()
+            sys.exit(1)
+    except Exception as e:
+        print(f"Error: {e}", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,8 @@
+# Audio Extractor Requirements
+# No external Python dependencies required - uses ffmpeg system command
+# ffmpeg must be installed separately on your system
+
+# For development/testing (optional):
+# pytest>=7.0
+# black>=22.0
+# pylint>=2.0