2026-01-08 13:26:15 -05:00

315 lines
12 KiB
Python

"""API Routes for media downloading."""
import logging
import json
import os
import re
from typing import Optional, Dict, Any
from fastapi import APIRouter, BackgroundTasks, Form
from fastapi.responses import JSONResponse
import config
import download
from core.cache import series_cache
logger = logging.getLogger("syllabus")
router = APIRouter(prefix="/api", tags=["API"])
@router.get("/dropout/update", description="Update the series list from dropout.tv")
async def dropout_update_route(background_tasks: BackgroundTasks, force: bool = False) -> JSONResponse:
"""Queue series list update as background task."""
try:
background_tasks.add_task(download.dropout.series, force)
return JSONResponse(status_code=202, content={"status": "success", "message": "Series update queued in background"})
except Exception as e:
logger.error(f"Error queuing series update: {e}")
return JSONResponse(status_code=500, content={"status": "error", "message": str(e)})
@router.get("/posters/update", description="Force update all show posters")
async def posters_update_route(background_tasks: BackgroundTasks) -> JSONResponse:
"""Queue poster update as background task."""
try:
background_tasks.add_task(download.dropout.series, True)
return JSONResponse(status_code=202, content={"status": "success", "message": "Poster update queued in background"})
except Exception as e:
logger.error(f"Error queuing poster update: {e}")
return JSONResponse(status_code=500, content={"status": "error", "message": str(e)})
@router.post("/dropout/upload-html", description="Upload Dropout series HTML for manual scraping")
async def dropout_upload_html(html: str = Form(...)) -> JSONResponse:
"""
Upload HTML from Dropout series page to extract shows manually.
Useful when the automatic scraper misses shows that require scrolling.
"""
try:
from bs4 import BeautifulSoup
logger.info(f"HTML upload received: {len(html)} characters")
if not html or len(html) < 100:
return JSONResponse(
status_code=400,
content={"status": "error", "message": "HTML content is too short or empty"}
)
soup = BeautifulSoup(html, 'html.parser')
elements = soup.find_all('a', class_='browse-item-link')
logger.info(f"Found {len(elements)} show elements in HTML")
if not elements:
return JSONResponse(
status_code=400,
content={"status": "error", "message": "No shows found in HTML. Make sure you copied the full page HTML from https://watch.dropout.tv/series"}
)
shows = []
for element in elements:
show_data = {}
show_data['href'] = element.get('href', '')
img = element.find('img')
if img:
show_data['src'] = img.get('src', '')
show_data['alt'] = img.get('alt', '')
shows.append(show_data)
logger.info(f"Processing {len(shows)} shows for poster download")
# Load existing shows to merge
existing_shows = {}
try:
with open(config.DROPOUT_JSON, 'r') as f:
existing_data = json.load(f, object_pairs_hook=dict)
for show in existing_data:
existing_shows[show['SHOW']] = show
except (FileNotFoundError, json.JSONDecodeError):
existing_data = []
# Process new shows
json_data = list(existing_data) if existing_data else []
new_count = 0
updated_count = 0
poster_failures = []
for show in shows:
show_title = show.get('alt', 'No title')
info_data = {}
info_data['SHOW'] = show_title
info_data['URL'] = show.get('href', 'No link')
info_data['LINK'] = re.sub(r".*dropout.tv/", "", show.get('href', ''))
# Handle poster URL - prepend base URL if relative
poster_url = show.get('src', '')
if poster_url and not poster_url.startswith('http'):
# Relative path, prepend base URL
poster_url = config.DROPOUT_POSTER_BASE_URL.rstrip('/') + '/' + poster_url.lstrip('./')
logger.debug(f"Processing poster for {show_title}: {poster_url}")
from download.base import grab
poster_path = grab.poster(poster_url, show.get('alt', ''), force_download=False)
if poster_path:
info_data['POSTER'] = poster_path
logger.debug(f"Successfully grabbed poster: {poster_path}")
else:
logger.warning(f"Failed to grab poster for {show_title} from {poster_url}")
poster_failures.append(show_title)
# Check if show exists
if show_title in existing_shows:
# Update existing
idx = next((i for i, s in enumerate(json_data) if s.get('SHOW') == show_title), -1)
if idx >= 0:
json_data[idx] = info_data
updated_count += 1
else:
# Add new
json_data.append(info_data)
new_count += 1
# Save updated JSON
logger.info(f"Saving {len(json_data)} total shows to JSON")
os.makedirs(config.DATA_DIR, exist_ok=True)
with open(config.DROPOUT_JSON, 'w') as f:
json.dump(json_data, f, indent=4, separators=(',', ': '))
series_cache.load_from_file(str(config.DROPOUT_JSON))
logger.info(f"HTML upload complete: {new_count} new, {updated_count} updated")
return JSONResponse(
status_code=200,
content={
"status": "success",
"message": f"Added {new_count} new shows, updated {updated_count} existing shows",
"total_shows": len(json_data),
"new": new_count,
"updated": updated_count,
"poster_failures": len(poster_failures),
"failed_shows": poster_failures[:10] if poster_failures else []
}
)
except Exception as e:
logger.error(f"Error uploading HTML: {e}", exc_info=True)
return JSONResponse(
status_code=500,
content={"status": "error", "message": str(e)}
)
@router.get("/dropout/series", description="Get the list of available shows")
async def dropout_series_route() -> JSONResponse:
"""Get the cached series data."""
try:
data = series_cache.get()
if data is None:
series_cache.load_from_file(str(config.DROPOUT_JSON))
data = series_cache.get()
if data is None:
return JSONResponse(content={"error": "Series data not available"}, status_code=503)
return JSONResponse(content=data)
except Exception as e:
logger.error(f"Error fetching series: {e}")
return JSONResponse(content={"error": "Failed to fetch series"}, status_code=500)
@router.post("/dropout/custom", description="Download content from a custom URL")
async def dropout_custom_route(
background_tasks: BackgroundTasks,
url: str = Form(...),
directory: str = Form(...),
prefix: Optional[str] = Form(None)
) -> JSONResponse:
"""Download from a custom URL with optional prefix."""
try:
import os
os.makedirs(directory, exist_ok=True)
background_tasks.add_task(download.dropout.custom, url, directory, prefix)
return JSONResponse(status_code=202, content={"status": "success", "message": "Download started"})
except Exception as e:
logger.error(f"Error starting custom download: {e}")
return JSONResponse(status_code=500, content={"status": "error", "message": str(e)})
@router.post("/dropout/download", description="Download an entire season from episode 1")
async def dropout_download_route(
background_tasks: BackgroundTasks,
show: str = Form(...),
season: Optional[int] = Form(None),
latest: bool = Form(True),
archive: bool = Form(False),
specials: bool = Form(False),
episode_start: Optional[int] = Form(None)
) -> JSONResponse:
"""Download a season of a show."""
try:
# Resolve latest season if requested
if latest and season is None:
show_data = await get_show_data(show, True)
if not show_data:
return JSONResponse(
status_code=404,
content={"status": "error", "message": "Show not found"}
)
season = get_latest_season(show_data)
if season is None:
return JSONResponse(
status_code=400,
content={"status": "error", "message": "No valid seasons found"}
)
# Ensure season is specified by now
if season is None:
return JSONResponse(
status_code=400,
content={"status": "error", "message": "Season is required unless 'latest' is used."}
)
task_msg = f"{'Adding to archive' if archive else 'Starting download'} for show '{show}', season {season}{' specials' if specials else ''}."
logger.info(f"message={task_msg}")
# Schedule the background task
if archive:
background_tasks.add_task(download.dropout.archive, show, season)
else:
background_tasks.add_task(download.dropout.show, show, season, specials, episode_start)
return JSONResponse(
status_code=202,
content={
"status": "success",
"message": task_msg
}
)
except Exception as e:
logger.exception(f"Unhandled exception during /dropout/download: {e}")
return JSONResponse(
status_code=500,
content={"status": "error", "message": "An unexpected error occurred."}
)
@router.post("/ydl", description="Download a YouTube video")
async def youtube_download_route(
background_tasks: BackgroundTasks,
url: str = Form(...),
location: str = Form(...)
) -> JSONResponse:
"""Download a YouTube video to the specified location."""
try:
background_tasks.add_task(download.youtube.ydl, url, location)
return JSONResponse(status_code=202, content={"status": "success", "message": "Download started"})
except Exception as e:
logger.error(f"Error starting YouTube download: {e}")
return JSONResponse(status_code=500, content={"status": "error", "message": str(e)})
# Helper functions
async def get_show_data(show: str, force: bool = False) -> Optional[Dict[str, Any]]:
"""Get data for a specific show by name or link."""
try:
cached = series_cache.get()
if cached is None:
series_cache.load_from_file(str(config.DROPOUT_JSON))
cached = series_cache.get()
if cached is None:
return None
for item in cached:
if show == item.get("SHOW") or show == item.get("LINK"):
if "SEASONS" not in item or force:
try:
item['SEASONS'] = download.grab.season(item['URL'])
except Exception as e:
logger.error(f"Failed to fetch seasons for {show}: {e}")
item['SEASONS'] = []
return item
return None
except Exception as e:
logger.error(f"Error getting show data: {e}")
return None
def get_latest_season(item: Dict[str, Any]) -> Optional[int]:
"""Extract the latest season number from show data."""
try:
seasons = item.get("SEASONS")
if seasons and isinstance(seasons, list):
numeric_seasons = [int(s) for s in seasons if str(s).isdigit()]
if numeric_seasons:
return max(numeric_seasons)
except ValueError as e:
logger.error(f"Error parsing season numbers: {e}")
except Exception as e:
logger.error(f"Error getting latest season: {e}")
return None