using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using AudibleDotCom;
using Dinah.Core.Humanizer;
using OpenQA.Selenium;
using OpenQA.Selenium.Chrome;
using OpenQA.Selenium.Support.UI;
namespace AudibleDotComAutomation
{
/// browser manipulation. web driver access
/// browser operators. create and store web driver, browser navigation which can vary depending on whether anon or auth'd
///
/// this base class: is online. no auth. used for most pages. retain no chrome cookies
public abstract class SeleniumRetriever : IPageRetriever
{
#region // chrome driver details
/*
HIDING CHROME CONSOLE WINDOW
hiding chrome console window has proven to cause more headaches than it solves. here's how to do it though:
// can also use CreateDefaultService() overloads to specify driver path and/or file name
var chromeDriverService = ChromeDriverService.CreateDefaultService();
chromeDriverService.HideCommandPromptWindow = true;
return new ChromeDriver(chromeDriverService, options);
HEADLESS CHROME
this WOULD be how to do headless. but amazon/audible are far too tricksy about their changes and anti-scraping measures
which renders 'headless' mode useless
var options = new ChromeOptions();
options.AddArgument("--headless");
SPECIFYING DRIVER LOCATION
if continues to have trouble finding driver:
var driver = new ChromeDriver(@"C:\my\path\to\chromedriver\directory");
var chromeDriverService = ChromeDriverService.CreateDefaultService(@"C:\my\path\to\chromedriver\directory");
*/
#endregion
protected IWebDriver Driver { get; }
Humanizer humanizer { get; } = new Humanizer();
protected SeleniumRetriever()
{
Driver = new ChromeDriver(ctorCreateChromeOptions());
}
/// no auth. retain no chrome cookies
protected virtual ChromeOptions ctorCreateChromeOptions() => new ChromeOptions();
protected async Task AudibleLinkClickAsync(IWebElement element)
{
// EACH CALL to audible should have a small random wait to reduce chances of scrape detection
await humanizer.Wait();
await Task.Run(() => Driver.Click(element));
await waitForSpinnerAsync();
// sometimes these clicks just take a while. add a few more seconds
await Task.Delay(5000);
}
By spinnerLocator { get; } = By.Id("library-main-overlay");
private async Task waitForSpinnerAsync()
{
// if loading overlay w/spinner exists: pause, wait for it to end
await Task.Delay(100);
if (Driver.FindElements(spinnerLocator).Count > 0)
new WebDriverWait(Driver, TimeSpan.FromSeconds(60))
.Until(ExpectedConditions.InvisibilityOfElementLocated(spinnerLocator));
}
private bool isFirstRun = true;
protected virtual async Task FirstRunAsync()
{
// load with no beginning wait. then wait 7 seconds to allow for page flicker. it usually happens after ~5 seconds. can happen irrespective of login state
await Task.Run(() => Driver.Navigate().GoToUrl("http://www.audible.com/"));
await Task.Delay(7000);
}
public async Task> GetPageSourcesAsync(AudiblePageType audiblePage, string pageId = null)
{
if (isFirstRun)
{
await FirstRunAsync();
isFirstRun = false;
}
await initFirstPageAsync(audiblePage, pageId);
return await processUrl(audiblePage, pageId);
}
private async Task initFirstPageAsync(AudiblePageType audiblePage, string pageId)
{
// EACH CALL to audible should have a small random wait to reduce chances of scrape detection
await humanizer.Wait();
var url = audiblePage.GetAudiblePageRobust().GetUrl(pageId);
await Task.Run(() => Driver.Navigate().GoToUrl(url));
await waitForSpinnerAsync();
}
private async Task> processUrl(AudiblePageType audiblePage, string pageId)
{
var pageSources = new List();
do
{
pageSources.Add(new AudiblePageSource(audiblePage, Driver.PageSource, pageId));
}
while (await hasMorePagesAsync());
return pageSources;
}
#region has more pages
/// if no more pages, return false. else, navigate to next page and return true
private async Task hasMorePagesAsync()
{
var next = //old_hasMorePages() ??
new_hasMorePages();
if (next == null)
return false;
await AudibleLinkClickAsync(next);
return true;
}
private IWebElement old_hasMorePages()
{
var parentElements = Driver.FindElements(By.ClassName("adbl-page-next"));
if (parentElements.Count == 0)
return null;
var childElements = parentElements[0].FindElements(By.LinkText("NEXT"));
if (childElements.Count != 1)
return null;
return childElements[0];
}
// ~ oct 2017
private IWebElement new_hasMorePages()
{
// get all active/enabled navigation links
var pageNavLinks = Driver.FindElements(By.ClassName("library-load-page"));
if (pageNavLinks.Count == 0)
return null;
// get only the right chevron if active.
// note: there are also right chevrons which are not for wish list navigation which is why we first filter by library-load-page
var nextLink = pageNavLinks
.Where(p => p.FindElements(By.ClassName("bc-icon-chevron-right")).Count > 0)
.ToList(); // cut-off delayed execution
if (nextLink.Count == 0)
return null;
return nextLink.Single().FindElement(By.TagName("button"));
}
#endregion
#region IDisposable pattern
public void Dispose()
{
Dispose(true);
GC.SuppressFinalize(this);
}
protected virtual void Dispose(bool disposing)
{
if (disposing && Driver != null)
{
// Quit() does cleanup AND disposes
Driver.Quit();
}
}
#endregion
}
}