syllabus/app/download.py

from fastapi.responses import JSONResponse
import os, yt_dlp, json, requests
from bs4 import BeautifulSoup

class grab():
    def season(url):
        page_html=requests.get(url)
        soup = BeautifulSoup(page_html.text, 'html.parser')
        select_element = soup.find('select', class_='js-switch-season')
        options = select_element.find_all('option')
        option_values = [option['value'] for option in options if option.has_attr('value')]
        seasons = [item.replace(url+'/season:', '') for item in option_values]
        return seasons

    def thumbnail(ydl,url,location):
            # Extracting video information
            video_info = ydl.extract_info(url, download=False)
            thumbnail_url = video_info.get('thumbnail')

            # Download the thumbnail image
            if thumbnail_url:
                try:
                    thumbnail_filename = os.path.join(location, f"{video_info['id']}.jpg")
                    with open(thumbnail_filename, 'wb') as thumbnail_file:
                        thumbnail_file.write(requests.get(thumbnail_url).content)
                    print("Downloaded MP4 and downloaded thumbnail successfully!")
                except Exception as e:
                    print(f"Error downloading thumbnail: {str(e)}")
            else:
                print("Downloaded MP4 but no thumbnail found.")

class dropout():
    def show(show,season,episode):
        directory='/tv/'+show+'/Season '+season+'/'
        with open('/data/dropout.json', 'r') as json_file:
            url_mapping = json.load(json_file)
        url = next((item['URL'] for item in url_mapping if item['SHOW'] == show), None)
        if url is not None:
            url = f'{url}/season:{season}'
        else:
            raise ValueError(f"Show '{show}' not found in the JSON data.")
        if not os.path.exists(directory):
            os.makedirs(directory)
        if episode is None or episode == '':
            episode = '%(playlist_index)02d'
        else:
            try:
                dl_ops['playliststart'] = int(episode)
            except ValueError:
                # Handle the error, e.g., log it or set a default value
                dl_ops['playliststart'] = 0  # or some appropriate default value
        dl_ops = {
            'format': 'bestvideo+bestaudio/best',
            'audio_quality': '256K',
            'paths': {
                'temp': '/temp',
                'home': directory
                },
                'cookiefile': '/data/dropout.cookies.txt',
            'reject_title': [
                r'(?i).*behind.?the.?scenes.*',  # Reject titles with "behind the scenes" (case-insensitive)
                r'(?i).*trailer.*',  # Reject titles with "trailer" (case-insensitive)
                r'(?i).*recap.*',  # Reject titles with "recap" (case-insensitive)
                r'(?i).*last.looks.*'  # Reject titles with "last looks" (case-insensitive)
            ],
            'outtmpl': show + ' - S'+f"{int(season):02}"+'E'+episode+' - %(title)s.%(ext)s',
            'noplaylist': True,
            # Additional options for downloading subtitles
            'writesubtitles': True,  # Download subtitles
            'subtitleslangs': ['en']  # Specify the language for subtitles (e.g., 'en' for English)
            }
        with yt_dlp.YoutubeDL(dl_ops) as ydl:
            ydl.download([url])


    def series():
        json_data=[]
        page_html=requests.get('https://www.dropout.tv/series')
        # If you want to parse the HTML
        soup = BeautifulSoup(page_html.text, 'html.parser')
        # Example: Find all elements with a specific class
        elements = soup.find_all(class_='browse-item-link')
        # Extract URLs from href attributes
        urls = [element['href'] for element in elements if 'href' in element.attrs]

        for url in urls:
            info_data={}
            name=url.replace('https://www.dropout.tv/','').replace('-s-',"'s-").replace('-',' ').title().replace('Of','of').replace("'S","'s")
            info_data['SHOW'] = name
            info_data['URL'] = url
            info_data['SEASONS'] = grab.season(url)
            json_data.append(info_data)

        # Sort the json_data by the 'SHOW' key
        sorted_json_data = sorted(json_data, key=lambda x: x['SHOW'])
        with open('/data/dropout.json', 'w') as json_file:
            json.dump(sorted_json_data, json_file, indent=4)

class youtube():
    def ydl(url, location):
        dl_ops = {'paths': {'temp': '/temp', 'home': location}, 'outtmpl': '%(uploader)s/%(title)s.%(ext)s'}
        if dl_ops['paths']['home'] == '/podcasts':
            dl_ops['format'] = 'bestaudio/best[ext=mp3]'
            dl_ops['postprocessors'] = [{
                'key': 'FFmpegExtractAudio',
                'preferredcodec': 'mp3',
                'preferredquality': '192',
            }, {
                'key': 'FFmpegMetadata',
                'add_metadata': True,
            }]
        elif dl_ops['paths']['home'] == '/asmr':
            dl_ops['format'] = 'bestaudio/best[ext=mp3]'
            dl_ops['postprocessors'] = [{
                'key': 'FFmpegExtractAudio',
                'preferredcodec': 'mp3',
                'preferredquality': '192',
            }, {
                'key': 'FFmpegMetadata',
                'add_metadata': True,
            }]
        elif dl_ops['paths']['home'] == '/youtube':
            dl_ops['format'] = 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best'
            dl_ops['cookiefile'] = '/data/youtube.cookies.txt'
        else:
            dl_ops['format'] = 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best'

        with yt_dlp.YoutubeDL(dl_ops) as ydl:
            ydl.download([url])
        # grab.thumbnail(ydl,url,location)

    def downloadOptions(dl_ops):
        if dl_ops['paths']['home'] == "/podcasts":
            dl_ops['format'] = 'bestaudio/best[ext=mp3]'
            dl_ops['postprocessors'] = [{
                'key': 'FFmpegExtractAudio',
                'preferredcodec': 'mp3',
                'preferredquality': '192',
            }, {
                'key': 'FFmpegMetadata',
                'add_metadata': True,
            }]
        elif dl_ops['paths']['home'] == "/asmr":
            dl_ops['format'] = 'bestaudio/best[ext=mp3]'
            dl_ops['postprocessors'] = [{
                'key': 'FFmpegExtractAudio',
                'preferredcodec': 'mp3',
                'preferredquality': '192',
            }, {
                'key': 'FFmpegMetadata',
                'add_metadata': True,
            }]
        elif dl_ops['paths']['home'] == "/nsfw":
            dl_ops['format'] = 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best'
        else:
            dl_ops = {
                'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best',
            }

        return dl_ops