diff --git a/.vscode/launch.json b/.vscode/launch.json index b5c085c..ecacfa7 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -13,7 +13,7 @@ "jinja": true, "justMyCode": true, "env": { - "PYTHONPATH": "${workspaceFolder}/app" + "PYTHONPATH": "${workspaceFolder}/app", } } ] diff --git a/app/download.py b/app/download.py index 14f9d38..3e7bc1a 100644 --- a/app/download.py +++ b/app/download.py @@ -1,4 +1,6 @@ -import os, yt_dlp, json, requests, re +import os, yt_dlp, json, requests, re, time +from selenium import webdriver +from selenium.webdriver.common.keys import Keys from bs4 import BeautifulSoup from urllib.parse import urlsplit @@ -12,38 +14,23 @@ class grab(): seasons = [item.replace(url+'/season:', '') for item in option_values] return seasons - def poster(url, save_dir='/data/posters/', force_download=False): - page_html = requests.get(url) - soup = BeautifulSoup(page_html.text, 'html.parser') + def poster(url, name, save_dir='/data/posters/', force_download=False): + # Use alt for filename if available, fallback to a generic name + alt_value = name + path = urlsplit(url).path + ext = os.path.splitext(path)[-1] or '.jpeg' - # Find the first anywhere inside a .product-feature - feature_section = soup.find(class_='product-feature') - if not feature_section: - return None + safe_name = re.sub(r'[^a-zA-Z0-9\s]', '', alt_value).replace(' ', '_') + filename = f"{safe_name}{ext}" + filepath = os.path.join(save_dir, filename) - img_tag = feature_section.find('img', attrs={'data-image-primary': True}) or feature_section.find('img') - - if img_tag and img_tag.has_attr('src'): - img_url = img_tag['src'] - - # Use alt for filename if available, fallback to a generic name - alt_value = img_tag.get('alt', 'image') - path = urlsplit(img_url).path - ext = os.path.splitext(path)[-1] or '.jpeg' - - safe_name = re.sub(r'[^a-zA-Z0-9\s]', '', alt_value).replace(' ', '_') - filename = f"{safe_name}{ext}" - filepath = os.path.join(save_dir, filename) + if not os.path.exists(filepath) or force_download: + os.makedirs(save_dir, exist_ok=True) + img_data = requests.get(url).content + with open(filepath, 'wb') as handler: + handler.write(img_data) - if not os.path.exists(filepath) or force_download: - os.makedirs(save_dir, exist_ok=True) - img_data = requests.get(img_url).content - with open(filepath, 'wb') as handler: - handler.write(img_data) - - return filepath - else: - return None + return filepath def thumbnail(ydl,url,location): @@ -191,26 +178,43 @@ class dropout(): def series(): json_data=[] - page_html=requests.get('https://www.dropout.tv/series') - # If you want to parse the HTML - soup = BeautifulSoup(page_html.text, 'html.parser') - # Example: Find all elements with a specific class - elements = soup.find_all(class_='browse-item-link') - # Extract URLs from href attributes - urls = [element['href'] for element in elements if 'href' in element.attrs] + # driver = webdriver.Chrome(executable_path='/path/to/chromedriver') + # driver.get('https://www.dropout.tv/series') + # for _ in range(5): # Adjust the range as needed + # driver.find_element_by_tag_name('body').send_keys(Keys.END) + # time.sleep(2) # Wait for new content to load + # html = driver.page_source - for url in urls: - info_data={} - name=url.replace('https://www.dropout.tv/','').replace('-s-',"'s-").replace('-',' ').title().replace('Of','of').replace("'S","'s") - info_data['SHOW'] = name - info_data['URL'] = url - info_data['POSTER'] = grab.poster(url) - info_data['SEASONS'] = grab.season(url) + html=requests.get('https://www.dropout.tv/series').text + + # If you want to parse the HTML + soup = BeautifulSoup(html, 'html.parser') + elements = soup.find_all('a', class_='browse-item-link') + + shows = [] + for element in elements: + show_data = {} + show_data['href'] = element.get('href', '') + + img = element.find('img') + if img: + show_data['src'] = img.get('src', '') + show_data['alt'] = img.get('alt', '') + + shows.append(show_data) + + # Now 'shows' is a list of dicts, so this works: + for show in shows: + info_data = {} + info_data['SHOW'] = show.get('alt', 'No title') + info_data['URL'] = show.get('href', 'No link') + info_data['LINK'] = re.sub(r".*dropout.tv/", "", show.get('href', '')) + info_data['POSTER'] = grab.poster(show.get('src', ''), show.get('alt', '')) json_data.append(info_data) # Sort the json_data by the 'SHOW' key sorted_json_data = sorted(json_data, key=lambda x: x['SHOW']) - with open('/data/dropout.json', 'w') as json_file: + with open('./data/dropout.json', 'w') as json_file: json.dump(sorted_json_data, json_file, indent=4) class youtube(): diff --git a/app/main.py b/app/main.py index fd40909..3a8a4ef 100644 --- a/app/main.py +++ b/app/main.py @@ -7,32 +7,38 @@ from pathlib import Path from functools import partial import json, download, asyncio from typing import Optional +import os app = FastAPI() -# app.mount("/static", StaticFiles(directory="/app/app/static"), name="static") + app.mount("/data", StaticFiles(directory="/data"), name="data") -templates = Jinja2Templates(directory="templates") + +templates = Jinja2Templates(directory="app/templates") loop = asyncio.get_running_loop() +# JSON cache +cached_data = None # api @app.get("/dropout/update") async def dropoutUpdate(): + global cached_data try: download.dropout.series() + with open('/data/dropout.json') as f: + cached_data = json.load(f) return JSONResponse(status_code=200, content={"status": "success", "message": "Series grab complete."}) except Exception as e: return JSONResponse(status_code=500, content={"status": "error", "message": str(e)}) @app.get("/dropout/series") async def dropoutSeries(): - file_path = Path("/data/dropout.json") - if file_path.exists(): - with file_path.open("r", encoding="utf-8") as f: - data = json.load(f) - return JSONResponse(content=data) + global cached_data + if cached_data is None: + await dropoutUpdate() + return JSONResponse(content=cached_data) return JSONResponse(content={"error": "File not found"}, status_code=404) @app.post("/dropout/download", description="Download an entire season from episode 1. Ignores behind the scenes and trailers.") @@ -70,25 +76,29 @@ async def ydl(url: str, location: str): except Exception as e: return JSONResponse(status_code=500, content={"status": "error", "message": str(e)}) -# html -# @app.get("/", include_in_schema=False, response_class=HTMLResponse) -# async def index(request: Request): -# apps = [ -# {"name": "Notes", "url": "/notes"}, -# {"name": "Todo List", "url": "/todos"}, -# {"name": "Weather", "url": "/weather"}, -# # Add more apps here -# ] -# return templates.TemplateResponse("index.html", {"request": request, "apps": apps, "title": "Welcome to My App Hub"}) - - -# JSON cache -cached_data = None @app.get("/", response_class=HTMLResponse) async def index(request: Request): global cached_data - if cached_data is None: - with open('/data/dropout.json') as f: - cached_data = json.load(f) - return templates.TemplateResponse("index.html", {"request": request, "data": cached_data}) \ No newline at end of file + try: + if cached_data is None: + await dropoutUpdate() + return templates.TemplateResponse("index.html", {"request": request, "data": cached_data}) + except Exception as e: + return JSONResponse(status_code=500, content={"status": "error", "message": str(e)}) + +@app.get("/show/{show}", include_in_schema=False, response_class=HTMLResponse) +async def index(request: Request, show = str): + try: + global cached_data + if cached_data is None: + await dropoutUpdate() + try: + for item in cached_data: + if show == item['LINK']: + show_data = item + return templates.TemplateResponse("show.html", {"request": request, "show": show_data}) + except Exception as e: + return JSONResponse(status_code=500, content={"status": "error", "message": str(e)}) + except Exception as e: + return JSONResponse(status_code=500, content={"status": "error", "message": str(e)}) \ No newline at end of file diff --git a/app/templates/index.html b/app/templates/index.html index 976e45a..7ff1a95 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -1,40 +1,164 @@ - - - - Show Posters - - - - {% for item in data %} -
- {{ item['SHOW'] }} + + + + Show Posters + + + + {% for item in data %} +
+ + + {{ item['SHOW'] }} +
{{ item['SHOW'] }}
+
- {% endfor %} - + {% endfor %} + + + + diff --git a/app/templates/show.html b/app/templates/show.html new file mode 100644 index 0000000..ec93488 --- /dev/null +++ b/app/templates/show.html @@ -0,0 +1,46 @@ + + + + + {{ show['SHOW'] }} + + + +
+ {{ show['SHOW'] }} +
{{ show['SHOW'] }}
+
+ {{ show['DESCRIPTION'] }} +
+ +

← Back to all shows

+
+ + diff --git a/requirements.txt b/requirements.txt index e3558ec..8f334b3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,5 @@ fastapi pathlib uvicorn jinja2 -python-multipart \ No newline at end of file +python-multipart +selenium \ No newline at end of file