diff --git a/app/download.py b/app/download.py index 1aa387a..95fb63d 100644 --- a/app/download.py +++ b/app/download.py @@ -1,6 +1,15 @@ from fastapi.responses import JSONResponse import os, yt_dlp, json, requests from bs4 import BeautifulSoup +import re + +# Define your patterns +reject_patterns = [ + re.compile(r'(?i).*behind.?the.?scenes.*'), + re.compile(r'(?i).*trailer.*'), + re.compile(r'(?i).*recap.*'), + re.compile(r'(?i).*last.looks.*'), +] class grab(): def season(url): @@ -57,12 +66,13 @@ class dropout(): 'home': directory }, 'cookiefile': '/data/dropout.cookies.txt', - 'reject_title': [ - r'(?i).*behind.?the.?scenes.*', # Reject titles with "behind the scenes" (case-insensitive) - r'(?i).*trailer.*', # Reject titles with "trailer" (case-insensitive) - r'(?i).*recap.*', # Reject titles with "recap" (case-insensitive) - r'(?i).*last.looks.*' # Reject titles with "last looks" (case-insensitive) - ], + 'match_filter': yt_dlp.utils.match_filter_func("title !~= '(?i).*behind.?the.?scenes.*|.*trailer.*|.*recap.*|.*last.looks.*'"), + # 'reject_title': [ + # r'(?i).*behind.?the.?scenes.*', # Reject titles with "behind the scenes" (case-insensitive) + # r'(?i).*trailer.*', # Reject titles with "trailer" (case-insensitive) + # r'(?i).*recap.*', # Reject titles with "recap" (case-insensitive) + # r'(?i).*last.looks.*' # Reject titles with "last looks" (case-insensitive) + # ], 'outtmpl': show + ' - S'+f"{int(season):02}"+'E'+episode+' - %(title)s.%(ext)s', 'noplaylist': True, # Additional options for downloading subtitles @@ -71,6 +81,25 @@ class dropout(): } with yt_dlp.YoutubeDL(dl_ops) as ydl: ydl.download([url]) + # ydl_opts = { + # 'quiet': True, # Turn off download progress messages + # 'skip_download': True, # Don't download anything + # 'cookiefile': '/data/dropout.cookies.txt', + # } + + # with yt_dlp.YoutubeDL(ydl_opts) as ydl: + # info = ydl.extract_info(url, download=False) + + # print(f"Playlist Title: {info.get('title')}") + # print(f"Uploader: {info.get('uploader')}") + # print(f"Number of videos: {len(info.get('entries', []))}") + # print() + + # for i, entry in enumerate(info['entries'], 1): + # print(f"{i}. {entry.get('title')}") + # print(f" URL: {entry.get('webpage_url')}") + # print(f" Duration: {entry.get('duration')} seconds") + # print()