import os, yt_dlp, json, requests, re from bs4 import BeautifulSoup from urllib.parse import urlsplit class grab(): def season(url): page_html=requests.get(url) soup = BeautifulSoup(page_html.text, 'html.parser') select_element = soup.find('select', class_='js-switch-season') options = select_element.find_all('option') option_values = [option['value'] for option in options if option.has_attr('value')] seasons = [item.replace(url+'/season:', '') for item in option_values] return seasons def poster(url, save_dir='/data/posters/', force_download=False): page_html = requests.get(url) soup = BeautifulSoup(page_html.text, 'html.parser') # Find the first anywhere inside a .product-feature feature_section = soup.find(class_='product-feature') if not feature_section: return None img_tag = feature_section.find('img', attrs={'data-image-primary': True}) or feature_section.find('img') if img_tag and img_tag.has_attr('src'): img_url = img_tag['src'] # Use alt for filename if available, fallback to a generic name alt_value = img_tag.get('alt', 'image') path = urlsplit(img_url).path ext = os.path.splitext(path)[-1] or '.jpeg' safe_name = re.sub(r'[^a-zA-Z0-9\s]', '', alt_value).replace(' ', '_') filename = f"{safe_name}{ext}" filepath = os.path.join(save_dir, filename) if not os.path.exists(filepath) or force_download: os.makedirs(save_dir, exist_ok=True) img_data = requests.get(img_url).content with open(filepath, 'wb') as handler: handler.write(img_data) return filepath else: return None def thumbnail(ydl,url,location): # Extracting video information video_info = ydl.extract_info(url, download=False) thumbnail_url = video_info.get('thumbnail') # Download the thumbnail image if thumbnail_url: try: thumbnail_filename = os.path.join(location, f"{video_info['id']}.jpg") with open(thumbnail_filename, 'wb') as thumbnail_file: thumbnail_file.write(requests.get(thumbnail_url).content) print("Downloaded MP4 and downloaded thumbnail successfully!") except Exception as e: print(f"Error downloading thumbnail: {str(e)}") else: print("Downloaded MP4 but no thumbnail found.") class dropout(): def show(show, season, episode_start): directory = f'/tv/{show}/Season {season}/' if not os.path.exists(directory): os.makedirs(directory) with open('/data/dropout.json', 'r') as json_file: url_mapping = json.load(json_file) url = next((item['URL'] for item in url_mapping if item['SHOW'] == show), None) if url is None: raise ValueError(f"Show '{show}' not found in the JSON data.") playlist_url = f'{url}/season:{season}' # Create match_filter filter_pattern = ( "title !~= " r"'(?i).*behind.?the.?scenes.*" r"|.*trailer.*" r"|.*recap.*" r"|.*last.looks.*'" ) match_filter = yt_dlp.utils.match_filter_func(filter_pattern) ydl_opts = { 'quiet': True, 'skip_download': True, 'cookiefile': '/data/dropout.cookies.txt', } # Step 1: Extract playlist info with yt_dlp.YoutubeDL(ydl_opts) as ydl: playlist_info = ydl.extract_info(playlist_url, download=False) entries = playlist_info.get('entries', []) filtered_entries = [] for entry in entries: if match_filter(entry) is None: # Not filtered out filtered_entries.append(entry) # Step 2: Download filtered entries with corrected episode numbers episode_start = int(episode_start) if episode_start else 1 for i, entry in enumerate(filtered_entries, start=episode_start): episode_number = f"{i:02}" filename_template = f"{show} - S{int(season):02}E{episode_number} - %(title)s.%(ext)s" dl_opts = { 'format': 'bestvideo+bestaudio/best', 'audio_quality': '256K', 'paths': { 'temp': '/temp', 'home': directory }, 'cookiefile': '/data/dropout.cookies.txt', 'writesubtitles': True, 'subtitleslangs': ['en'], 'outtmpl': filename_template, } with yt_dlp.YoutubeDL(dl_opts) as ydl: ydl.download([entry['webpage_url']]) def specials(show, season, episode_start): directory = f'/tv/{show}/Specials/' if not os.path.exists(directory): os.makedirs(directory) with open('/data/dropout.json', 'r') as json_file: url_mapping = json.load(json_file) url = next((item['URL'] for item in url_mapping if item['SHOW'] == show), None) if url is None: raise ValueError(f"Show '{show}' not found in the JSON data.") playlist_url = f'{url}/season:{season}' # Create match_filter filter_pattern = ( "title ~= " r"'(?i).*behind.?the.?scenes.*" r"|.*trailer.*" r"|.*recap.*" r"|.*last.looks.*'" ) match_filter = yt_dlp.utils.match_filter_func(filter_pattern) ydl_opts = { 'quiet': True, 'skip_download': True, 'cookiefile': '/data/dropout.cookies.txt', } # Step 1: Extract playlist info with yt_dlp.YoutubeDL(ydl_opts) as ydl: playlist_info = ydl.extract_info(playlist_url, download=False) entries = playlist_info.get('entries', []) filtered_entries = [] for entry in entries: if match_filter(entry) is None: # Not filtered out filtered_entries.append(entry) # Step 2: Download filtered entries with corrected episode numbers episode_start = int(episode_start) if episode_start else 1 for i, entry in enumerate(filtered_entries, start=episode_start): episode_number = f"{i:02}" filename_template = f"{show} - S00E{episode_number} - %(title)s.%(ext)s" dl_opts = { 'format': 'bestvideo+bestaudio/best', 'audio_quality': '256K', 'paths': { 'temp': '/temp', 'home': directory }, 'cookiefile': '/data/dropout.cookies.txt', 'writesubtitles': True, 'subtitleslangs': ['en'], 'outtmpl': filename_template, } with yt_dlp.YoutubeDL(dl_opts) as ydl: ydl.download([entry['webpage_url']]) def series(): json_data=[] page_html=requests.get('https://www.dropout.tv/series') # If you want to parse the HTML soup = BeautifulSoup(page_html.text, 'html.parser') # Example: Find all elements with a specific class elements = soup.find_all(class_='browse-item-link') # Extract URLs from href attributes urls = [element['href'] for element in elements if 'href' in element.attrs] for url in urls: info_data={} name=url.replace('https://www.dropout.tv/','').replace('-s-',"'s-").replace('-',' ').title().replace('Of','of').replace("'S","'s") info_data['SHOW'] = name info_data['URL'] = url info_data['POSTER'] = grab.poster(url) info_data['SEASONS'] = grab.season(url) json_data.append(info_data) # Sort the json_data by the 'SHOW' key sorted_json_data = sorted(json_data, key=lambda x: x['SHOW']) with open('/data/dropout.json', 'w') as json_file: json.dump(sorted_json_data, json_file, indent=4) class youtube(): def ydl(url, location): dl_ops = {'paths': {'temp': '/temp', 'home': location}, 'outtmpl': '%(uploader)s/%(title)s.%(ext)s'} if dl_ops['paths']['home'] == '/podcasts': dl_ops['format'] = 'bestaudio/best[ext=mp3]' dl_ops['postprocessors'] = [{ 'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', 'preferredquality': '192', }, { 'key': 'FFmpegMetadata', 'add_metadata': True, }] elif dl_ops['paths']['home'] == '/asmr': dl_ops['format'] = 'bestaudio/best[ext=mp3]' dl_ops['postprocessors'] = [{ 'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', 'preferredquality': '192', }, { 'key': 'FFmpegMetadata', 'add_metadata': True, }] elif dl_ops['paths']['home'] == '/youtube': dl_ops['format'] = 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best' dl_ops['cookiefile'] = '/data/youtube.cookies.txt' else: dl_ops['format'] = 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best' with yt_dlp.YoutubeDL(dl_ops) as ydl: ydl.download([url]) # grab.thumbnail(ydl,url,location)