syllabus/app/download.py
2025-04-24 14:04:37 -04:00

161 lines
6.7 KiB
Python

from fastapi.responses import JSONResponse
import os, yt_dlp, json, requests
from bs4 import BeautifulSoup
class grab():
def season(url):
page_html=requests.get(url)
soup = BeautifulSoup(page_html.text, 'html.parser')
select_element = soup.find('select', class_='js-switch-season')
options = select_element.find_all('option')
option_values = [option['value'] for option in options if option.has_attr('value')]
seasons = [item.replace(url+'/season:', '') for item in option_values]
return seasons
def thumbnail(ydl,url,location):
# Extracting video information
video_info = ydl.extract_info(url, download=False)
thumbnail_url = video_info.get('thumbnail')
# Download the thumbnail image
if thumbnail_url:
try:
thumbnail_filename = os.path.join(location, f"{video_info['id']}.jpg")
with open(thumbnail_filename, 'wb') as thumbnail_file:
thumbnail_file.write(requests.get(thumbnail_url).content)
print("Downloaded MP4 and downloaded thumbnail successfully!")
except Exception as e:
print(f"Error downloading thumbnail: {str(e)}")
else:
print("Downloaded MP4 but no thumbnail found.")
class dropout():
def show(show,season,episode):
directory='/tv/'+show+'/Season '+season+'/'
with open('/data/dropout.json', 'r') as json_file:
url_mapping = json.load(json_file)
url = next((item['URL'] for item in url_mapping if item['SHOW'] == show), None)
if url is not None:
url = f'{url}/season:{season}'
else:
raise ValueError(f"Show '{show}' not found in the JSON data.")
if not os.path.exists(directory):
os.makedirs(directory)
if episode is None or episode == '':
episode = '%(playlist_index)02d'
else:
try:
dl_ops['playliststart'] = int(episode)
except ValueError:
# Handle the error, e.g., log it or set a default value
dl_ops['playliststart'] = 0 # or some appropriate default value
dl_ops = {
'format': 'bestvideo+bestaudio/best',
'audio_quality': '256K',
'paths': {
'temp': '/temp',
'home': directory
},
'cookiefile': '/data/dropout.cookies.txt',
'reject_title': [
r'(?i).*behind.?the.?scenes.*', # Reject titles with "behind the scenes" (case-insensitive)
r'(?i).*trailer.*', # Reject titles with "trailer" (case-insensitive)
r'(?i).*recap.*', # Reject titles with "recap" (case-insensitive)
r'(?i).*last.looks.*' # Reject titles with "last looks" (case-insensitive)
],
'outtmpl': show + ' - S'+f"{int(season):02}"+'E'+episode+' - %(title)s.%(ext)s',
'noplaylist': True,
# Additional options for downloading subtitles
'writesubtitles': True, # Download subtitles
'subtitleslangs': ['en'] # Specify the language for subtitles (e.g., 'en' for English)
}
with yt_dlp.YoutubeDL(dl_ops) as ydl:
ydl.download([url])
def series():
json_data=[]
page_html=requests.get('https://www.dropout.tv/series')
# If you want to parse the HTML
soup = BeautifulSoup(page_html.text, 'html.parser')
# Example: Find all elements with a specific class
elements = soup.find_all(class_='browse-item-link')
# Extract URLs from href attributes
urls = [element['href'] for element in elements if 'href' in element.attrs]
for url in urls:
info_data={}
name=url.replace('https://www.dropout.tv/','').replace('-s-',"'s-").replace('-',' ').title().replace('Of','of').replace("'S","'s")
info_data['SHOW'] = name
info_data['URL'] = url
info_data['SEASONS'] = grab.season(url)
json_data.append(info_data)
# Sort the json_data by the 'SHOW' key
sorted_json_data = sorted(json_data, key=lambda x: x['SHOW'])
with open('/data/dropout.json', 'w') as json_file:
json.dump(sorted_json_data, json_file, indent=4)
class youtube():
def ydl(url, location):
dl_ops = {'paths': {'temp': '/temp', 'home': location}, 'outtmpl': '%(uploader)s/%(title)s.%(ext)s'}
if dl_ops['paths']['home'] == '/podcasts':
dl_ops['format'] = 'bestaudio/best[ext=mp3]'
dl_ops['postprocessors'] = [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '192',
}, {
'key': 'FFmpegMetadata',
'add_metadata': True,
}]
elif dl_ops['paths']['home'] == '/asmr':
dl_ops['format'] = 'bestaudio/best[ext=mp3]'
dl_ops['postprocessors'] = [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '192',
}, {
'key': 'FFmpegMetadata',
'add_metadata': True,
}]
elif dl_ops['paths']['home'] == '/youtube':
dl_ops['format'] = 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best'
dl_ops['cookiefile'] = '/data/youtube.cookies.txt'
else:
dl_ops['format'] = 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best'
with yt_dlp.YoutubeDL(dl_ops) as ydl:
ydl.download([url])
# grab.thumbnail(ydl,url,location)
def downloadOptions(dl_ops):
if dl_ops['paths']['home'] == "/podcasts":
dl_ops['format'] = 'bestaudio/best[ext=mp3]'
dl_ops['postprocessors'] = [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '192',
}, {
'key': 'FFmpegMetadata',
'add_metadata': True,
}]
elif dl_ops['paths']['home'] == "/asmr":
dl_ops['format'] = 'bestaudio/best[ext=mp3]'
dl_ops['postprocessors'] = [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '192',
}, {
'key': 'FFmpegMetadata',
'add_metadata': True,
}]
elif dl_ops['paths']['home'] == "/nsfw":
dl_ops['format'] = 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best'
else:
dl_ops = {
'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best',
}
return dl_ops