All scraping code removed

This commit is contained in:
Robert McRackan 2019-11-05 13:42:11 -05:00
parent c61bc27a7b
commit df90fc5361
106 changed files with 666 additions and 5319 deletions

View File

@ -4,7 +4,7 @@ using AudibleApi;
using DtoImporterService;
using InternalUtilities;
namespace ApplicationService
namespace ApplicationServices
{
public class LibraryIndexer
{

View File

@ -1,7 +1,7 @@
using System.Threading.Tasks;
using DataLayer;
namespace ApplicationService
namespace ApplicationServices
{
public static class SearchEngineActions
{
@ -16,11 +16,5 @@ namespace ApplicationService
var engine = new LibationSearchEngine.SearchEngine();
engine.UpdateTags(book.AudibleProductId, book.UserDefinedItem.Tags);
}
public static async Task ProductReIndexAsync(string productId)
{
var engine = new LibationSearchEngine.SearchEngine();
await engine.UpdateBookAsync(productId).ConfigureAwait(false);
}
}
}

View File

@ -0,0 +1,21 @@
using DataLayer;
namespace ApplicationServices
{
public static class TagUpdater
{
public static int IndexChangedTags(Book book)
{
// update disconnected entity
using var context = LibationContext.Create();
context.Update(book);
var qtyChanges = context.SaveChanges();
// this part is tags-specific
if (qtyChanges > 0)
SearchEngineActions.UpdateBookTags(book);
return qtyChanges;
}
}
}

View File

@ -1,11 +0,0 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>netstandard2.1</TargetFramework>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\FileManager\FileManager.csproj" />
</ItemGroup>
</Project>

View File

@ -1,66 +0,0 @@
using System;
using System.Linq;
using Dinah.Core;
namespace AudibleDotCom
{
public enum AudiblePageType
{
ProductDetails = 1,
Library = 2
}
public static class AudiblePageExt
{
public static AudiblePage GetAudiblePageRobust(this AudiblePageType audiblePage) => AudiblePage.FromPageType(audiblePage);
}
public abstract partial class AudiblePage : Enumeration<AudiblePage>
{
// useful for generic classes:
// public abstract class PageScraper<T> where T : AudiblePageRobust {
// public AudiblePage AudiblePage => AudiblePageRobust.GetAudiblePageFromType(typeof(T));
public static AudiblePageType GetAudiblePageFromType(Type audiblePageRobustType)
=> (AudiblePageType)GetAll().Single(t => t.GetType() == audiblePageRobustType).Id;
public AudiblePageType AudiblePageType { get; }
protected AudiblePage(AudiblePageType audiblePage, string abbreviation) : base((int)audiblePage, abbreviation) => AudiblePageType = audiblePage;
public static AudiblePage FromPageType(AudiblePageType audiblePage) => FromValue((int)audiblePage);
/// <summary>For pages which need a param, the param is marked with {0}</summary>
protected abstract string Url { get; }
public string GetUrl(string id) => string.Format(Url, id);
public string Abbreviation => DisplayName;
}
public abstract partial class AudiblePage : Enumeration<AudiblePage>
{
public static AudiblePage Library { get; } = LibraryPage.Instance;
public class LibraryPage : AudiblePage
{
#region singleton stuff
public static LibraryPage Instance { get; } = new LibraryPage();
static LibraryPage() { }
private LibraryPage() : base(AudiblePageType.Library, "LIB") { }
#endregion
protected override string Url => "http://www.audible.com/lib";
}
}
public abstract partial class AudiblePage : Enumeration<AudiblePage>
{
public static AudiblePage Product { get; } = ProductDetailPage.Instance;
public class ProductDetailPage : AudiblePage
{
#region singleton stuff
public static ProductDetailPage Instance { get; } = new ProductDetailPage();
static ProductDetailPage() { }
private ProductDetailPage() : base(AudiblePageType.ProductDetails, "PD") { }
#endregion
protected override string Url => "http://www.audible.com/pd/{0}";
}
}
}

View File

@ -1,43 +0,0 @@
using FileManager;
namespace AudibleDotCom
{
public class AudiblePageSource
{
public AudiblePageType AudiblePage { get; }
public string Source { get; }
public string PageId { get; }
public AudiblePageSource(AudiblePageType audiblePage, string source, string pageId)
{
AudiblePage = audiblePage;
Source = source;
PageId = pageId;
}
/// <summary>declawed allows local file to safely be reloaded in chrome
/// NOTE ABOUT DECLAWED FILES
/// making them safer also breaks functionality
/// eg: previously hidden parts become visible. this changes how selenium can parse pages.
/// hidden elements don't expose .Text property</summary>
public AudiblePageSource Declawed() => new AudiblePageSource(AudiblePage, FileUtility.Declaw(Source), PageId);
public string Serialized() => $"<!-- |{AudiblePage.GetAudiblePageRobust().Abbreviation}|{(PageId ?? "").Trim()}| -->\r\n" + Source;
public static AudiblePageSource Deserialize(string serializedSource)
{
var endOfLine1 = serializedSource.IndexOf('\n');
var parameters = serializedSource
.Substring(0, endOfLine1)
.Split('|');
var abbrev = parameters[1];
var pageId = parameters[2];
var source = serializedSource.Substring(endOfLine1 + 1);
var audiblePage = AudibleDotCom.AudiblePage.FromDisplayName(abbrev).AudiblePageType;
return new AudiblePageSource(audiblePage, source, pageId);
}
}
}

View File

@ -1,23 +0,0 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>netstandard2.1</TargetFramework>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Selenium.Support" Version="3.141.0" />
<PackageReference Include="Selenium.WebDriver" Version="3.141.0" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\AudibleDotCom\AudibleDotCom.csproj" />
<ProjectReference Include="..\CookieMonster\CookieMonster.csproj" />
</ItemGroup>
<ItemGroup>
<None Update="chromedriver.exe">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>
</Project>

View File

@ -1,184 +0,0 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using AudibleDotCom;
using Dinah.Core.Humanizer;
using OpenQA.Selenium;
using OpenQA.Selenium.Chrome;
using OpenQA.Selenium.Support.UI;
namespace AudibleDotComAutomation
{
/// <summary>browser manipulation. web driver access
/// browser operators. create and store web driver, browser navigation which can vary depending on whether anon or auth'd
///
/// this base class: is online. no auth. used for most pages. retain no chrome cookies</summary>
public abstract class SeleniumRetriever : IPageRetriever
{
#region // chrome driver details
/*
HIDING CHROME CONSOLE WINDOW
hiding chrome console window has proven to cause more headaches than it solves. here's how to do it though:
// can also use CreateDefaultService() overloads to specify driver path and/or file name
var chromeDriverService = ChromeDriverService.CreateDefaultService();
chromeDriverService.HideCommandPromptWindow = true;
return new ChromeDriver(chromeDriverService, options);
HEADLESS CHROME
this WOULD be how to do headless. but amazon/audible are far too tricksy about their changes and anti-scraping measures
which renders 'headless' mode useless
var options = new ChromeOptions();
options.AddArgument("--headless");
SPECIFYING DRIVER LOCATION
if continues to have trouble finding driver:
var driver = new ChromeDriver(@"C:\my\path\to\chromedriver\directory");
var chromeDriverService = ChromeDriverService.CreateDefaultService(@"C:\my\path\to\chromedriver\directory");
*/
#endregion
protected IWebDriver Driver { get; }
Humanizer humanizer { get; } = new Humanizer();
protected SeleniumRetriever()
{
Driver = new ChromeDriver(ctorCreateChromeOptions());
}
/// <summary>no auth. retain no chrome cookies</summary>
protected virtual ChromeOptions ctorCreateChromeOptions() => new ChromeOptions();
protected async Task AudibleLinkClickAsync(IWebElement element)
{
// EACH CALL to audible should have a small random wait to reduce chances of scrape detection
await humanizer.Wait();
await Task.Run(() => Driver.Click(element));
await waitForSpinnerAsync();
// sometimes these clicks just take a while. add a few more seconds
await Task.Delay(5000);
}
By spinnerLocator { get; } = By.Id("library-main-overlay");
private async Task waitForSpinnerAsync()
{
// if loading overlay w/spinner exists: pause, wait for it to end
await Task.Delay(100);
if (Driver.FindElements(spinnerLocator).Count > 0)
new WebDriverWait(Driver, TimeSpan.FromSeconds(60))
.Until(ExpectedConditions.InvisibilityOfElementLocated(spinnerLocator));
}
private bool isFirstRun = true;
protected virtual async Task FirstRunAsync()
{
// load with no beginning wait. then wait 7 seconds to allow for page flicker. it usually happens after ~5 seconds. can happen irrespective of login state
await Task.Run(() => Driver.Navigate().GoToUrl("http://www.audible.com/"));
await Task.Delay(7000);
}
public async Task<IEnumerable<AudiblePageSource>> GetPageSourcesAsync(AudiblePageType audiblePage, string pageId = null)
{
if (isFirstRun)
{
await FirstRunAsync();
isFirstRun = false;
}
await initFirstPageAsync(audiblePage, pageId);
return await processUrl(audiblePage, pageId);
}
private async Task initFirstPageAsync(AudiblePageType audiblePage, string pageId)
{
// EACH CALL to audible should have a small random wait to reduce chances of scrape detection
await humanizer.Wait();
var url = audiblePage.GetAudiblePageRobust().GetUrl(pageId);
await Task.Run(() => Driver.Navigate().GoToUrl(url));
await waitForSpinnerAsync();
}
private async Task<IEnumerable<AudiblePageSource>> processUrl(AudiblePageType audiblePage, string pageId)
{
var pageSources = new List<AudiblePageSource>();
do
{
pageSources.Add(new AudiblePageSource(audiblePage, Driver.PageSource, pageId));
}
while (await hasMorePagesAsync());
return pageSources;
}
#region has more pages
/// <summary>if no more pages, return false. else, navigate to next page and return true</summary>
private async Task<bool> hasMorePagesAsync()
{
var next = //old_hasMorePages() ??
new_hasMorePages();
if (next == null)
return false;
await AudibleLinkClickAsync(next);
return true;
}
private IWebElement old_hasMorePages()
{
var parentElements = Driver.FindElements(By.ClassName("adbl-page-next"));
if (parentElements.Count == 0)
return null;
var childElements = parentElements[0].FindElements(By.LinkText("NEXT"));
if (childElements.Count != 1)
return null;
return childElements[0];
}
// ~ oct 2017
private IWebElement new_hasMorePages()
{
// get all active/enabled navigation links
var pageNavLinks = Driver.FindElements(By.ClassName("library-load-page"));
if (pageNavLinks.Count == 0)
return null;
// get only the right chevron if active.
// note: there are also right chevrons which are not for wish list navigation which is why we first filter by library-load-page
var nextLink = pageNavLinks
.Where(p => p.FindElements(By.ClassName("bc-icon-chevron-right")).Count > 0)
.ToList(); // cut-off delayed execution
if (nextLink.Count == 0)
return null;
return nextLink.Single().FindElement(By.TagName("button"));
}
#endregion
#region IDisposable pattern
public void Dispose()
{
Dispose(true);
GC.SuppressFinalize(this);
}
protected virtual void Dispose(bool disposing)
{
if (disposing && Driver != null)
{
// Quit() does cleanup AND disposes
Driver.Quit();
}
}
#endregion
}
}

View File

@ -1,26 +0,0 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using OpenQA.Selenium;
namespace AudibleDotComAutomation
{
/// <summary>for user collections: lib, WL</summary>
public abstract class AuthSeleniumRetriever : SeleniumRetriever
{
protected bool IsLoggedIn => GetListenerPageLink() != null;
// needed?
protected AuthSeleniumRetriever() : base() { }
protected IWebElement GetListenerPageLink()
{
var listenerPageElement = Driver.FindElements(By.XPath("//a[contains(@href, '/review-by-author')]"));
if (listenerPageElement.Count > 0)
return listenerPageElement[0];
return null;
}
}
}

View File

@ -1,130 +0,0 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Threading.Tasks;
using AudibleDotCom;
using CookieMonster;
using Dinah.Core;
using Dinah.Core.Humanizer;
namespace AudibleDotComAutomation
{
public class BrowserlessRetriever : IPageRetriever
{
Humanizer humanizer { get; } = new Humanizer();
public async Task<IEnumerable<AudiblePageSource>> GetPageSourcesAsync(AudiblePageType audiblePage, string pageId = null)
{
switch (audiblePage)
{
case AudiblePageType.Library: return await getLibraryPageSourcesAsync();
default: throw new NotImplementedException();
}
}
private async Task<IEnumerable<AudiblePageSource>> getLibraryPageSourcesAsync()
{
var collection = new List<AudiblePageSource>();
var cookies = await getAudibleCookiesAsync();
var currPageNum = 1;
bool hasMorePages;
do
{
// EACH CALL to audible should have a small random wait to reduce chances of scrape detection
await humanizer.Wait();
var html = await getLibraryPageAsync(cookies, currPageNum);
var pageSource = new AudiblePageSource(AudiblePageType.Library, html, null);
collection.Add(pageSource);
hasMorePages = getHasMorePages(pageSource.Source);
currPageNum++;
} while (hasMorePages);
return collection;
}
private static async Task<CookieContainer> getAudibleCookiesAsync()
{
var liveCookies = await CookiesHelper.GetLiveCookieValuesAsync();
var audibleCookies = liveCookies.Where(c
=> c.Domain.ContainsInsensitive("audible.com")
|| c.Domain.ContainsInsensitive("adbl")
|| c.Domain.ContainsInsensitive("amazon.com"))
.ToList();
var cookies = new CookieContainer();
foreach (var c in audibleCookies)
cookies.Add(new Cookie(c.Name, c.Value, "/", c.Domain));
return cookies;
}
private static bool getHasMorePages(string html)
{
var doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(html);
// final page, invalid page:
// <span class="bc-button
// bc-button-secondary
// nextButton
// bc-button-disabled">
// only page: ???
// has more pages:
// <span class="bc-button
// bc-button-secondary
// refinementFormButton
// nextButton">
var next_active_link = doc
.DocumentNode
.Descendants()
.FirstOrDefault(n =>
n.HasClass("nextButton") &&
!n.HasClass("bc-button-disabled"));
return next_active_link != null;
}
private static async Task<string> getLibraryPageAsync(CookieContainer cookies, int pageNum)
{
#region // POST example (from 2017 ajax)
// var destination = "https://www.audible.com/lib-ajax";
// var webRequest = (HttpWebRequest)WebRequest.Create(destination);
// webRequest.Method = "POST";
// webRequest.Accept = "*/*";
// webRequest.AllowAutoRedirect = false;
// webRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; .NET CLR 1.0.3705)";
// webRequest.ContentType = "application/x-www-form-urlencoded; charset=UTF-8";
// webRequest.Credentials = null;
//
// webRequest.CookieContainer = new CookieContainer();
// webRequest.CookieContainer.Add(cookies.GetCookies(new Uri(destination)));
//
// var postData = $"progType=all&timeFilter=all&itemsPerPage={itemsPerPage}&searchTerm=&searchType=&sortColumn=&sortType=down&page={pageNum}&mode=normal&subId=&subTitle=";
// var data = Encoding.UTF8.GetBytes(postData);
// webRequest.ContentLength = data.Length;
// using var dataStream = webRequest.GetRequestStream();
// dataStream.Write(data, 0, data.Length);
#endregion
var destination = "https://" + $"www.audible.com/lib?purchaseDateFilter=all&programFilter=all&sortBy=PURCHASE_DATE.dsc&page={pageNum}";
var webRequest = (HttpWebRequest)WebRequest.Create(destination);
webRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; .NET CLR 1.0.3705)";
webRequest.CookieContainer = new CookieContainer();
webRequest.CookieContainer.Add(cookies.GetCookies(new Uri(destination)));
var webResponse = await webRequest.GetResponseAsync();
return new StreamReader(webResponse.GetResponseStream()).ReadToEnd();
}
public void Dispose() { }
}
}

View File

@ -1,75 +0,0 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using OpenQA.Selenium;
using OpenQA.Selenium.Support.UI;
namespace AudibleDotComAutomation
{
/// <summary>online. get auth by logging in with provided username and password
/// retain no chrome cookies. enter user + pw login</summary>
public class ManualLoginSeleniumRetriever : AuthSeleniumRetriever
{
string _username;
string _password;
public ManualLoginSeleniumRetriever(string username, string password) : base()
{
_username = username;
_password = password;
}
protected override async Task FirstRunAsync()
{
await base.FirstRunAsync();
// can't extract this into AuthSeleniumRetriever ctor. can't use username/pw until prev ctors are complete
// click login link
await AudibleLinkClickAsync(getLoginLink());
// wait until login page loads
new WebDriverWait(Driver, TimeSpan.FromSeconds(60)).Until(ExpectedConditions.ElementIsVisible(By.Id("ap_email")));
// insert credentials
Driver
.FindElement(By.Id("ap_email"))
.SendKeys(_username);
Driver
.FindElement(By.Id("ap_password"))
.SendKeys(_password);
// submit
var submitElement
= Driver.FindElements(By.Id("signInSubmit")).FirstOrDefault()
?? Driver.FindElement(By.Id("signInSubmit-input"));
await AudibleLinkClickAsync(submitElement);
// wait until audible page loads
new WebDriverWait(Driver, TimeSpan.FromSeconds(60))
.Until(d => GetListenerPageLink());
if (!IsLoggedIn)
throw new Exception("not logged in");
}
private IWebElement getLoginLink()
{
{
var loginLinkElements1 = Driver.FindElements(By.XPath("//a[contains(@href, '/signin')]"));
if (loginLinkElements1.Any())
return loginLinkElements1[0];
}
//
// ADD ADDITIONAL ACCEPTABLE PATTERNS HERE
//
//{
// var loginLinkElements2 = Driver.FindElements(By.XPath("//a[contains(@href, '/signin')]"));
// if (loginLinkElements2.Any())
// return loginLinkElements2[0];
//}
throw new NotFoundException("Cannot locate login link");
}
}
}

View File

@ -1,38 +0,0 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using OpenQA.Selenium.Chrome;
namespace AudibleDotComAutomation
{
/// <summary>online. load auth, cookies etc from user data</summary>
public class UserDataSeleniumRetriever : AuthSeleniumRetriever
{
public UserDataSeleniumRetriever() : base()
{
// can't extract this into AuthSeleniumRetriever ctor. can't use username/pw until prev ctors are complete
if (!IsLoggedIn)
throw new Exception("not logged in");
}
/// <summary>Use current user data/chrome cookies. DO NOT use if chrome is already open</summary>
protected override ChromeOptions ctorCreateChromeOptions()
{
var options = base.ctorCreateChromeOptions();
// load user data incl cookies. default on windows:
// %LOCALAPPDATA%\Google\Chrome\User Data
// C:\Users\username\AppData\Local\Google\Chrome\User Data
var chromeDefaultWindowsUserDataDir = System.IO.Path.Combine(
Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData),
"Google",
"Chrome",
"User Data");
options.AddArguments($"user-data-dir={chromeDefaultWindowsUserDataDir}");
return options;
}
}
}

View File

@ -1,12 +0,0 @@
using System;
using System.Collections.Generic;
using System.Threading.Tasks;
using AudibleDotCom;
namespace AudibleDotComAutomation
{
public interface IPageRetriever : IDisposable
{
Task<IEnumerable<AudiblePageSource>> GetPageSourcesAsync(AudiblePageType audiblePage, string pageId = null);
}
}

View File

@ -1,115 +0,0 @@
using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.Linq;
using OpenQA.Selenium;
using OpenQA.Selenium.Support.UI;
namespace AudibleDotComAutomation.Examples
{
public class SeleniumExamples
{
public IWebDriver Driver { get; set; }
IWebElement GetListenerPageLink()
{
var listenerPageElement = Driver.FindElements(By.XPath("//a[contains(@href, '/review-by-author')]"));
if (listenerPageElement.Count > 0)
return listenerPageElement[0];
return null;
}
void wait_examples()
{
new WebDriverWait(Driver, TimeSpan.FromSeconds(60))
.Until(ExpectedConditions.ElementIsVisible(By.Id("mast-member-acct-name")));
new WebDriverWait(Driver, TimeSpan.FromSeconds(60))
.Until(d => GetListenerPageLink());
// https://stackoverflow.com/questions/21339339/how-to-add-custom-expectedconditions-for-selenium
new WebDriverWait(Driver, TimeSpan.FromSeconds(60))
.Until((d) =>
{
// could be refactored into OR, AND per the java selenium library
// check 1
var e1 = Driver.FindElements(By.Id("mast-member-acct-name"));
if (e1.Count > 0)
return e1[0];
// check 2
var e2 = Driver.FindElements(By.Id("header-account-info-0"));
if (e2.Count > 0)
return e2[0];
return null;
});
}
void XPath_examples()
{
// <tr>
// <td>1</td>
// <td>2</td>
// </tr>
// <tr>
// <td>3</td>
// <td>4</td>
// </tr>
ReadOnlyCollection<IWebElement> all_tr = Driver.FindElements(By.XPath("/tr"));
IWebElement first_tr = Driver.FindElement(By.XPath("/tr"));
IWebElement second_tr = Driver.FindElement(By.XPath("/tr[2]"));
// beginning with a single / starts from root
IWebElement ERROR_not_at_root = Driver.FindElement(By.XPath("/td"));
// 2 slashes searches all, NOT just descendants
IWebElement td1 = Driver.FindElement(By.XPath("//td"));
// 2 slashes still searches all, NOT just descendants
IWebElement still_td1 = first_tr.FindElement(By.XPath("//td"));
// dot operator starts from current node specified by first_tr
// single slash: immediate descendant
IWebElement td3 = first_tr.FindElement(By.XPath(
".//td"));
// double slash: descendant at any depth
IWebElement td3_also = first_tr.FindElement(By.XPath(
"./td"));
// <input type="hidden" name="asin" value="ABCD1234">
IWebElement find_anywhere_in_doc = first_tr.FindElement(By.XPath(
"//input[@name='asin']"));
IWebElement find_in_subsection = first_tr.FindElement(By.XPath(
".//input[@name='asin']"));
// search entire page. useful for:
// - RulesLocator to find something that only appears once on the page
// - non-list pages. eg: product details
var onePerPageRules = new RuleFamily
{
RowsLocator = By.XPath("/*"), // search entire page
Rules = new RuleSet {
(row, productItem) => productItem.CustomerId = row.FindElement(By.XPath("//input[@name='cust_id']")).GetValue(),
(row, productItem) => productItem.UserName = row.FindElement(By.XPath("//input[@name='user_name']")).GetValue()
}
};
// - applying conditionals to entire page
var ruleFamily = new RuleFamily
{
RowsLocator = By.XPath("//*[starts-with(@id,'adbl-library-content-row-')]"),
// Rules = getRuleSet()
};
}
#region Rules classes stubs
public class RuleFamily { public By RowsLocator; public IRuleClass Rules; }
public interface IRuleClass { }
public class RuleSet : IRuleClass, IEnumerable<IRuleClass>
{
public void Add(IRuleClass ruleClass) { }
public void Add(RuleAction action) { }
public IEnumerator<IRuleClass> GetEnumerator() => throw new NotImplementedException();
System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator() => throw new NotImplementedException();
}
public delegate void RuleAction(IWebElement row, ProductItem productItem);
public class ProductItem { public string CustomerId; public string UserName; }
#endregion
}
}

View File

@ -1,47 +0,0 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using OpenQA.Selenium;
using OpenQA.Selenium.Interactions;
namespace AudibleDotComAutomation
{
public static class IWebElementExt
{
// allows getting Text from elements even if hidden
// this only works on visible elements: webElement.Text
// http://yizeng.me/2014/04/08/get-text-from-hidden-elements-using-selenium-webdriver/#c-sharp
//
public static string GetText(this IWebElement webElement) => webElement.GetAttribute("textContent");
public static string GetValue(this IWebElement webElement) => webElement.GetAttribute("value");
}
public static class IWebDriverExt
{
/// <summary>Use this instead of element.Click() to ensure that the element is clicked even if it's not currently scrolled into view</summary>
public static void Click(this IWebDriver driver, IWebElement element)
{
// from: https://stackoverflow.com/questions/12035023/selenium-webdriver-cant-click-on-a-link-outside-the-page
//// this works but isn't really the same
//element.SendKeys(Keys.Enter);
//// didn't work for me
//new Actions(driver)
// .MoveToElement(element)
// .Click()
// .Build()
// .Perform();
driver.ScrollIntoView(element);
element.Click();
}
public static void ScrollIntoView(this IWebDriver driver, IWebElement element)
=> ((IJavaScriptExecutor)driver).ExecuteScript($"window.scroll({element.Location.X}, {element.Location.Y})");
}
}

View File

@ -1,16 +0,0 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>netstandard2.1</TargetFramework>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="System.Data.SQLite.Core" Version="1.0.112" />
<PackageReference Include="System.Security.Cryptography.ProtectedData" Version="4.6.0" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\FileManager\FileManager.csproj" />
</ItemGroup>
</Project>

View File

@ -1,66 +0,0 @@
using System;
using System.Collections.Generic;
using System.Data.SQLite;
using System.IO;
using System.Text;
using System.Threading.Tasks;
using FileManager;
namespace CookieMonster
{
internal class Chrome : IBrowser
{
public async Task<IEnumerable<CookieValue>> GetAllCookiesAsync()
{
var col = new List<CookieValue>();
var strPath = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.LocalApplicationData), @"Google\Chrome\User Data\Default\Cookies");
if (!FileUtility.FileExists(strPath))
return col;
//
// IF WE GET AN ERROR HERE
// then add a reference to sqlite core in the project which is ultimately calling this.
// a project which directly references CookieMonster doesn't need to also ref sqlite.
// however, for any further number of abstractions, the project needs to directly ref sqlite.
// eg: this will not work unless the winforms proj adds sqlite to ref.s:
// LibationWinForm > AudibleDotComAutomation > CookieMonster
//
using var conn = new SQLiteConnection("Data Source=" + strPath + ";pooling=false");
using var cmd = conn.CreateCommand();
cmd.CommandText = "SELECT host_key, name, value, encrypted_value, last_access_utc, expires_utc FROM cookies;";
conn.Open();
using var reader = await cmd.ExecuteReaderAsync().ConfigureAwait(false);
while (reader.Read())
{
var host_key = reader.GetString(0);
var name = reader.GetString(1);
var value = reader.GetString(2);
var last_access_utc = reader.GetInt64(4);
var expires_utc = reader.GetInt64(5);
// https://stackoverflow.com/a/25874366
if (string.IsNullOrWhiteSpace(value))
{
var encrypted_value = (byte[])reader[3];
var decodedData = System.Security.Cryptography.ProtectedData.Unprotect(encrypted_value, null, System.Security.Cryptography.DataProtectionScope.CurrentUser);
value = Encoding.ASCII.GetString(decodedData);
}
try
{
// if something goes wrong in this step (eg: a cookie has an invalid filetime), then just skip this cookie
col.Add(new CookieValue { Browser = "chrome", Domain = host_key, Name = name, Value = value, LastAccess = chromeTimeToDateTimeUtc(last_access_utc), Expires = chromeTimeToDateTimeUtc(expires_utc) });
}
catch { }
}
return col;
}
// Chrome uses 1601-01-01 00:00:00 UTC as the epoch (ie the starting point for the millisecond time counter).
// this is the same as "FILETIME" in Win32 except FILETIME uses 100ns ticks instead of ms.
private static DateTime chromeTimeToDateTimeUtc(long time) => DateTime.SpecifyKind(DateTime.FromFileTime(time * 10), DateTimeKind.Utc);
}
}

View File

@ -1,61 +0,0 @@
using System;
using System.Collections.Generic;
using System.Data.SQLite;
using System.IO;
using System.Threading.Tasks;
using FileManager;
namespace CookieMonster
{
internal class FireFox : IBrowser
{
public async Task<IEnumerable<CookieValue>> GetAllCookiesAsync()
{
var col = new List<CookieValue>();
string strPath = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), @"Mozilla\Firefox\Profiles");
if (!FileUtility.FileExists(strPath))
return col;
var dirs = new DirectoryInfo(strPath).GetDirectories("*.default");
if (dirs.Length != 1)
return col;
strPath = Path.Combine(strPath, dirs[0].Name, "cookies.sqlite");
if (!FileUtility.FileExists(strPath))
return col;
// First copy the cookie jar so that we can read the cookies from unlocked copy while FireFox is running
var strTemp = strPath + ".temp";
File.Copy(strPath, strTemp, true);
// Now open the temporary cookie jar and extract Value from the cookie if we find it.
using var conn = new SQLiteConnection("Data Source=" + strTemp + ";pooling=false");
using var cmd = conn.CreateCommand();
cmd.CommandText = "SELECT host, name, value, lastAccessed, expiry FROM moz_cookies; ";
conn.Open();
using var reader = await cmd.ExecuteReaderAsync().ConfigureAwait(false);
while (reader.Read())
{
var host_key = reader.GetString(0);
var name = reader.GetString(1);
var value = reader.GetString(2);
var lastAccessed = reader.GetInt32(3);
var expiry = reader.GetInt32(4);
col.Add(new CookieValue { Browser = "firefox", Domain = host_key, Name = name, Value = value, LastAccess = lastAccessedToDateTime(lastAccessed), Expires = expiryToDateTime(expiry) });
}
if (FileUtility.FileExists(strTemp))
File.Delete(strTemp);
return col;
}
// time is in microseconds since unix epoch
private static DateTime lastAccessedToDateTime(int time) => new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc).AddMilliseconds(time);
// time is in normal seconds since unix epoch
private static DateTime expiryToDateTime(int time) => new DateTime(1970, 1, 1, 0, 0, 0, 0, System.DateTimeKind.Utc).AddSeconds(time);
}
}

View File

@ -1,12 +0,0 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
namespace CookieMonster
{
internal interface IBrowser
{
Task<IEnumerable<CookieValue>> GetAllCookiesAsync();
}
}

View File

@ -1,87 +0,0 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace CookieMonster
{
internal class InternetExplorer : IBrowser
{
public async Task<IEnumerable<CookieValue>> GetAllCookiesAsync()
{
// real locations of Windows Cookies folders
//
// Windows 7:
// C:\Users\username\AppData\Roaming\Microsoft\Windows\Cookies
// C:\Users\username\AppData\Roaming\Microsoft\Windows\Cookies\Low
//
// Windows 8, Windows 8.1, Windows 10:
// C:\Users\username\AppData\Local\Microsoft\Windows\INetCookies
// C:\Users\username\AppData\Local\Microsoft\Windows\INetCookies\Low
var strPath = Environment.GetFolderPath(Environment.SpecialFolder.Cookies);
var col = (await getIECookiesAsync(strPath).ConfigureAwait(false)).ToList();
col = col.Concat(await getIECookiesAsync(Path.Combine(strPath, "Low"))).ToList();
return col;
}
private static async Task<IEnumerable<CookieValue>> getIECookiesAsync(string strPath)
{
var cookies = new List<CookieValue>();
var files = await Task.Run(() => Directory.EnumerateFiles(strPath, "*.txt"));
foreach (string path in files)
{
var cookiesInFile = new List<CookieValue>();
var cookieLines = File.ReadAllLines(path);
CookieValue currCookieVal = null;
for (var i = 0; i < cookieLines.Length; i++)
{
var line = cookieLines[i];
// IE cookie format
// 0 Cookie name
// 1 Cookie value
// 2 Host / path for the web server setting the cookie
// 3 Flags
// 4 Expiration time (low int)
// 5 Expiration time (high int)
// 6 Creation time (low int)
// 7 Creation time (high int)
// 8 Record delimiter == "*"
var pos = i % 9;
long expLoTemp = 0;
long creatLoTemp = 0;
if (pos == 0)
{
currCookieVal = new CookieValue { Browser = "ie", Name = line };
cookiesInFile.Add(currCookieVal);
}
else if (pos == 1)
currCookieVal.Value = line;
else if (pos == 2)
currCookieVal.Domain = line;
else if (pos == 4)
expLoTemp = Int64.Parse(line);
else if (pos == 5)
currCookieVal.Expires = LoHiToDateTime(expLoTemp, Int64.Parse(line));
else if (pos == 6)
creatLoTemp = Int64.Parse(line);
else if (pos == 7)
currCookieVal.LastAccess = LoHiToDateTime(creatLoTemp, Int64.Parse(line));
}
cookies.AddRange(cookiesInFile);
}
return cookies;
}
private static DateTime LoHiToDateTime(long lo, long hi) => DateTime.FromFileTimeUtc(((hi << 32) + lo));
}
}

View File

@ -1,32 +0,0 @@
using System;
namespace CookieMonster
{
public class CookieValue
{
public string Browser { get; set; }
public string Name { get; set; }
public string Value { get; set; }
public string Domain { get; set; }
public DateTime LastAccess { get; set; }
public DateTime Expires { get; set; }
public bool IsValid
{
get
{
// sanity check. datetimes are stored weird in each cookie type. make sure i haven't converted these incredibly wrong.
// some early conversion attempts produced years like 42, 1955, 4033
var _5yearsPast = DateTime.UtcNow.AddYears(-5);
if (LastAccess < _5yearsPast || LastAccess > DateTime.UtcNow)
return false;
// don't check expiry. some sites are setting stupid values for year. eg: 9999
return true;
}
}
public bool HasExpired => Expires < DateTime.UtcNow;
}
}

View File

@ -1,57 +0,0 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using Dinah.Core.Collections.Generic;
namespace CookieMonster
{
public static class CookiesHelper
{
internal static IEnumerable<IBrowser> GetBrowsers()
=> AppDomain.CurrentDomain
.GetAssemblies()
.SelectMany(s => s.GetTypes())
.Where(p => typeof(IBrowser).IsAssignableFrom(p) && !p.IsAbstract && !p.IsInterface)
.Select(t => Activator.CreateInstance(t) as IBrowser)
.ToList();
/// <summary>all. including expired</summary>
public static async Task<IEnumerable<CookieValue>> GetAllCookieValuesAsync()
{
//// foreach{await} runs in serial
//var allCookies = new List<CookieValue>();
//foreach (var b in GetBrowsers())
//{
// var browserCookies = await b.GetAllCookiesAsync().ConfigureAwait(false);
// allCookies.AddRange(browserCookies);
//}
//// WhenAll runs in parallel
// this 1st step LOOKS like a bug which runs each method until completion. However, since we don't use await, it's actually returning a Task. That resulting task is awaited asynchronously
var browserTasks = GetBrowsers().Select(b => b.GetAllCookiesAsync());
var results = await Task.WhenAll(browserTasks).ConfigureAwait(false);
var allCookies = results.SelectMany(a => a).ToList();
if (allCookies.Any(c => !c.IsValid))
throw new Exception("some date time was converted way too far");
foreach (var c in allCookies)
c.Domain = c.Domain.TrimEnd('/');
// for each domain+name, only keep the 1 with the most recent access
var sortedCookies = allCookies
.OrderByDescending(c => c.LastAccess)
.DistinctBy(c => new { c.Domain, c.Name })
.ToList();
return sortedCookies;
}
/// <summary>not expired</summary>
public static async Task<IEnumerable<CookieValue>> GetLiveCookieValuesAsync()
=> (await GetAllCookieValuesAsync().ConfigureAwait(false))
.Where(c => !c.HasExpired)
.ToList();
}
}

View File

@ -0,0 +1,335 @@
// <auto-generated />
using System;
using DataLayer;
using Microsoft.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore.Infrastructure;
using Microsoft.EntityFrameworkCore.Metadata;
using Microsoft.EntityFrameworkCore.Migrations;
using Microsoft.EntityFrameworkCore.Storage.ValueConversion;
namespace DataLayer.Migrations
{
[DbContext(typeof(LibationContext))]
[Migration("20191105183104_NoScraping")]
partial class NoScraping
{
protected override void BuildTargetModel(ModelBuilder modelBuilder)
{
#pragma warning disable 612, 618
modelBuilder
.HasAnnotation("ProductVersion", "3.0.0")
.HasAnnotation("Relational:MaxIdentifierLength", 128)
.HasAnnotation("SqlServer:ValueGenerationStrategy", SqlServerValueGenerationStrategy.IdentityColumn);
modelBuilder.Entity("DataLayer.Book", b =>
{
b.Property<int>("BookId")
.ValueGeneratedOnAdd()
.HasColumnType("int")
.HasAnnotation("SqlServer:ValueGenerationStrategy", SqlServerValueGenerationStrategy.IdentityColumn);
b.Property<string>("AudibleProductId")
.HasColumnType("nvarchar(450)");
b.Property<int>("CategoryId")
.HasColumnType("int");
b.Property<DateTime?>("DatePublished")
.HasColumnType("datetime2");
b.Property<string>("Description")
.HasColumnType("nvarchar(max)");
b.Property<bool>("IsAbridged")
.HasColumnType("bit");
b.Property<int>("LengthInMinutes")
.HasColumnType("int");
b.Property<string>("PictureId")
.HasColumnType("nvarchar(max)");
b.Property<string>("Title")
.HasColumnType("nvarchar(max)");
b.HasKey("BookId");
b.HasIndex("AudibleProductId");
b.HasIndex("CategoryId");
b.ToTable("Books");
});
modelBuilder.Entity("DataLayer.BookContributor", b =>
{
b.Property<int>("BookId")
.HasColumnType("int");
b.Property<int>("ContributorId")
.HasColumnType("int");
b.Property<int>("Role")
.HasColumnType("int");
b.Property<byte>("Order")
.HasColumnType("tinyint");
b.HasKey("BookId", "ContributorId", "Role");
b.HasIndex("BookId");
b.HasIndex("ContributorId");
b.ToTable("BookContributor");
});
modelBuilder.Entity("DataLayer.Category", b =>
{
b.Property<int>("CategoryId")
.ValueGeneratedOnAdd()
.HasColumnType("int")
.HasAnnotation("SqlServer:ValueGenerationStrategy", SqlServerValueGenerationStrategy.IdentityColumn);
b.Property<string>("AudibleCategoryId")
.HasColumnType("nvarchar(450)");
b.Property<string>("Name")
.HasColumnType("nvarchar(max)");
b.Property<int?>("ParentCategoryCategoryId")
.HasColumnType("int");
b.HasKey("CategoryId");
b.HasIndex("AudibleCategoryId");
b.HasIndex("ParentCategoryCategoryId");
b.ToTable("Categories");
b.HasData(
new
{
CategoryId = -1,
AudibleCategoryId = "",
Name = ""
});
});
modelBuilder.Entity("DataLayer.Contributor", b =>
{
b.Property<int>("ContributorId")
.ValueGeneratedOnAdd()
.HasColumnType("int")
.HasAnnotation("SqlServer:ValueGenerationStrategy", SqlServerValueGenerationStrategy.IdentityColumn);
b.Property<string>("AudibleAuthorId")
.HasColumnType("nvarchar(max)");
b.Property<string>("Name")
.HasColumnType("nvarchar(450)");
b.HasKey("ContributorId");
b.HasIndex("Name");
b.ToTable("Contributors");
});
modelBuilder.Entity("DataLayer.LibraryBook", b =>
{
b.Property<int>("BookId")
.HasColumnType("int");
b.Property<DateTime>("DateAdded")
.HasColumnType("datetime2");
b.HasKey("BookId");
b.ToTable("Library");
});
modelBuilder.Entity("DataLayer.Series", b =>
{
b.Property<int>("SeriesId")
.ValueGeneratedOnAdd()
.HasColumnType("int")
.HasAnnotation("SqlServer:ValueGenerationStrategy", SqlServerValueGenerationStrategy.IdentityColumn);
b.Property<string>("AudibleSeriesId")
.HasColumnType("nvarchar(450)");
b.Property<string>("Name")
.HasColumnType("nvarchar(max)");
b.HasKey("SeriesId");
b.HasIndex("AudibleSeriesId");
b.ToTable("Series");
});
modelBuilder.Entity("DataLayer.SeriesBook", b =>
{
b.Property<int>("SeriesId")
.HasColumnType("int");
b.Property<int>("BookId")
.HasColumnType("int");
b.Property<float?>("Index")
.HasColumnType("real");
b.HasKey("SeriesId", "BookId");
b.HasIndex("BookId");
b.HasIndex("SeriesId");
b.ToTable("SeriesBook");
});
modelBuilder.Entity("DataLayer.Book", b =>
{
b.HasOne("DataLayer.Category", "Category")
.WithMany()
.HasForeignKey("CategoryId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.OwnsOne("DataLayer.Rating", "Rating", b1 =>
{
b1.Property<int>("BookId")
.ValueGeneratedOnAdd()
.HasColumnType("int")
.HasAnnotation("SqlServer:ValueGenerationStrategy", SqlServerValueGenerationStrategy.IdentityColumn);
b1.Property<float>("OverallRating")
.HasColumnType("real");
b1.Property<float>("PerformanceRating")
.HasColumnType("real");
b1.Property<float>("StoryRating")
.HasColumnType("real");
b1.HasKey("BookId");
b1.ToTable("Books");
b1.WithOwner()
.HasForeignKey("BookId");
});
b.OwnsMany("DataLayer.Supplement", "Supplements", b1 =>
{
b1.Property<int>("SupplementId")
.ValueGeneratedOnAdd()
.HasColumnType("int")
.HasAnnotation("SqlServer:ValueGenerationStrategy", SqlServerValueGenerationStrategy.IdentityColumn);
b1.Property<int>("BookId")
.HasColumnType("int");
b1.Property<string>("Url")
.HasColumnType("nvarchar(max)");
b1.HasKey("SupplementId");
b1.HasIndex("BookId");
b1.ToTable("Supplement");
b1.WithOwner("Book")
.HasForeignKey("BookId");
});
b.OwnsOne("DataLayer.UserDefinedItem", "UserDefinedItem", b1 =>
{
b1.Property<int>("BookId")
.HasColumnType("int");
b1.Property<string>("Tags")
.HasColumnType("nvarchar(max)");
b1.HasKey("BookId");
b1.ToTable("UserDefinedItem");
b1.WithOwner("Book")
.HasForeignKey("BookId");
b1.OwnsOne("DataLayer.Rating", "Rating", b2 =>
{
b2.Property<int>("UserDefinedItemBookId")
.HasColumnType("int");
b2.Property<float>("OverallRating")
.HasColumnType("real");
b2.Property<float>("PerformanceRating")
.HasColumnType("real");
b2.Property<float>("StoryRating")
.HasColumnType("real");
b2.HasKey("UserDefinedItemBookId");
b2.ToTable("UserDefinedItem");
b2.WithOwner()
.HasForeignKey("UserDefinedItemBookId");
});
});
});
modelBuilder.Entity("DataLayer.BookContributor", b =>
{
b.HasOne("DataLayer.Book", "Book")
.WithMany("ContributorsLink")
.HasForeignKey("BookId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.HasOne("DataLayer.Contributor", "Contributor")
.WithMany("BooksLink")
.HasForeignKey("ContributorId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
});
modelBuilder.Entity("DataLayer.Category", b =>
{
b.HasOne("DataLayer.Category", "ParentCategory")
.WithMany()
.HasForeignKey("ParentCategoryCategoryId");
});
modelBuilder.Entity("DataLayer.LibraryBook", b =>
{
b.HasOne("DataLayer.Book", "Book")
.WithOne()
.HasForeignKey("DataLayer.LibraryBook", "BookId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
});
modelBuilder.Entity("DataLayer.SeriesBook", b =>
{
b.HasOne("DataLayer.Book", "Book")
.WithMany("SeriesLink")
.HasForeignKey("BookId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
b.HasOne("DataLayer.Series", "Series")
.WithMany("BooksLink")
.HasForeignKey("SeriesId")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
});
#pragma warning restore 612, 618
}
}
}

View File

@ -0,0 +1,82 @@
using Microsoft.EntityFrameworkCore.Migrations;
namespace DataLayer.Migrations
{
public partial class NoScraping : Migration
{
protected override void Up(MigrationBuilder migrationBuilder)
{
migrationBuilder.DropForeignKey(
name: "FK_Supplement_Books_BookId",
table: "Supplement");
migrationBuilder.DropForeignKey(
name: "FK_UserDefinedItem_Books_BookId",
table: "UserDefinedItem");
migrationBuilder.DropColumn(
name: "DownloadBookLink",
table: "Library");
migrationBuilder.DropColumn(
name: "HasBookDetails",
table: "Books");
migrationBuilder.AddForeignKey(
name: "FK_Supplement_Books_BookId",
table: "Supplement",
column: "BookId",
principalTable: "Books",
principalColumn: "BookId",
onDelete: ReferentialAction.Cascade);
migrationBuilder.AddForeignKey(
name: "FK_UserDefinedItem_Books_BookId",
table: "UserDefinedItem",
column: "BookId",
principalTable: "Books",
principalColumn: "BookId",
onDelete: ReferentialAction.Cascade);
}
protected override void Down(MigrationBuilder migrationBuilder)
{
migrationBuilder.DropForeignKey(
name: "FK_Supplement_Books_BookId",
table: "Supplement");
migrationBuilder.DropForeignKey(
name: "FK_UserDefinedItem_Books_BookId",
table: "UserDefinedItem");
migrationBuilder.AddColumn<string>(
name: "DownloadBookLink",
table: "Library",
type: "nvarchar(max)",
nullable: true);
migrationBuilder.AddColumn<bool>(
name: "HasBookDetails",
table: "Books",
type: "bit",
nullable: false,
defaultValue: false);
migrationBuilder.AddForeignKey(
name: "FK_Supplement_Books_BookId",
table: "Supplement",
column: "BookId",
principalTable: "Books",
principalColumn: "BookId",
onDelete: ReferentialAction.Restrict);
migrationBuilder.AddForeignKey(
name: "FK_UserDefinedItem_Books_BookId",
table: "UserDefinedItem",
column: "BookId",
principalTable: "Books",
principalColumn: "BookId",
onDelete: ReferentialAction.Restrict);
}
}
}

View File

@ -38,9 +38,6 @@ namespace DataLayer.Migrations
b.Property<string>("Description")
.HasColumnType("nvarchar(max)");
b.Property<bool>("HasBookDetails")
.HasColumnType("bit");
b.Property<bool>("IsAbridged")
.HasColumnType("bit");
@ -146,9 +143,6 @@ namespace DataLayer.Migrations
b.Property<DateTime>("DateAdded")
.HasColumnType("datetime2");
b.Property<string>("DownloadBookLink")
.HasColumnType("nvarchar(max)");
b.HasKey("BookId");
b.ToTable("Library");

View File

@ -1,19 +0,0 @@
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.EntityFrameworkCore;
namespace DataLayer
{
public static class RemoveOrphansCommand
{
public static int RemoveOrphans(this LibationContext context)
=> context.Database.ExecuteSqlRaw(@"
delete c
from Contributors c
left join BookContributor bc on c.ContributorId = bc.ContributorId
left join Books b on bc.BookId = b.BookId
where bc.ContributorId is null
");
}
}

View File

@ -30,7 +30,6 @@ namespace DataLayer
public string PictureId { get; set; }
// book details
public bool HasBookDetails { get; private set; }
public bool IsAbridged { get; private set; }
public DateTime? DatePublished { get; private set; }
@ -231,8 +230,6 @@ namespace DataLayer
// don't overwrite with default values
IsAbridged |= isAbridged;
DatePublished = datePublished ?? DatePublished;
HasBookDetails = true;
}
public void UpdateCategory(Category category, DbContext context = null)

View File

@ -10,18 +10,12 @@ namespace DataLayer
public DateTime DateAdded { get; private set; }
/// <summary>For downloading AAX file</summary>
public string DownloadBookLink { get; private set; }
private LibraryBook() { }
public LibraryBook(Book book, DateTime dateAdded
, string downloadBookLink = null
)
public LibraryBook(Book book, DateTime dateAdded)
{
ArgumentValidator.EnsureNotNull(book, nameof(book));
Book = book;
DateAdded = dateAdded;
DownloadBookLink = downloadBookLink;
}
}
}

View File

@ -8,14 +8,6 @@ namespace DataLayer
{
public static class BookQueries
{
public static int BooksWithoutDetailsCount()
{
using var context = LibationContext.Create();
return context
.Books
.Count(b => !b.HasBookDetails);
}
public static Book GetBook_Flat_NoTracking(string productId)
{
using var context = LibationContext.Create();

View File

@ -1,6 +1,5 @@
HOW TO CREATE: EF CORE PROJECT
==============================
easiest with .NET Core but there's also a work-around for .NET Standard
example is for sqlite but the same works with MsSql
@ -26,7 +25,6 @@ set project "Set as StartUp Project"
Tools >> Nuget Package Manager >> Package Manager Console
default project: Examples\SQLite_NETCore2_0
note: in EFCore, Enable-Migrations is no longer used. start with add-migration
PM> add-migration InitialCreate
PM> Update-Database

View File

@ -1,55 +0,0 @@
proposed extensible schema to generalize beyond audible
problems
0) reeks of premature optimization
- i'm currently only doing audible audiobooks. this adds several layers of abstraction for the sake of possible expansion
- there's a good chance that supporting another platform may not conform to this schema, in which case i'd have done this for nothing. genres are one likely pain point
- libation is currently single-user. hopefully the below would suffice for adding users, but if i'm wrong it might be all pain and no gain
1) very thorough == very complex
2) there are some books which would still be difficult to taxonimize
- joy of cooking. has become more of a brand
- the bible. has different versions that aren't just editions
- dictionary. authored by a publisher
3) "books" vs "editions" is a confusing problem waiting to happen
[AIPK=auto increm PK]
(libation) users [AIPK id, name, join date]
audible users [AIPK id, AUDIBLE-PK username]
libation audible users [PK user id, PK audible user id -- cluster PK across all FKs]
- potential danger in multi-user environment. wouldn't want one libation user getting access to a different libation user's audible info
contributors [AIPK id, name]. prev people. incl publishers
audible authors [PK/FK contributor id, AUDIBLE-PK author id]
roles [AIPK id, name]. seeded: author, narrator, publisher. could expand (eg: translator, editor) without each needing a new table
books [AIPK id, title, desc]
book contributors [FK book id, FK contributor id, FK role id, order -- cluster PK across all FKs]
- likely only authors
editions [AIPK id, FK book id, title]. could expand to include year, is first edition, is abridged
- reasons for optional different title: "Ender's Game: Special 20th Anniversary Edition", "Harry Potter and the Sorcerer's Stone" vs "Harry Potter and the Philosopher's Stone" vs "Harry Potter y la piedra filosofal", "Midnight Riot" vs "Rivers of London"
edition contributors [FK edition id, FK contributor id, FK role id, order -- cluster PK across all FKs]
- likely everything except authors. eg narrators, publisher
audiobooks [PK/FK edition id, lengthInMinutes]
- could expand to other formats by adding other similar tables. eg: print with #pages and isbn, ebook with mb
audible origins [AIPK id, name]. seeded: library. detail. json. series
audible books [PK/FK edition id, AUDIBLE-PK product id, picture id, sku, 3 ratings, audible category id, audible origin id]
- could expand to other vendors by adding other similar tables
audible user ratings [PK/FK edition id, audible user id, 3 ratings]
audible supplements [AIPK id, FK edition id, download url]
- pdfs only. although book download info could be the same format, they're substantially different and subject to change
audible book downloads [PK/FK edition id, audible user id, bookdownloadlink]
pictures [AIPK id, FK edition id, filename (xyz.jpg -- not incl path)]
audible categories [AIPK id, AUDIBLE-PK category id, name, parent]. may only nest 1 deep
(libation) library [FK libation user id, FK edition id, date added -- cluster PK across all FKs]
(libation) user defined [FK libation user id, FK edition id, tagsRaw (, notes...) -- cluster PK across all FKs]
- there's no reason to restrict tags to library items, so don't combine/link this table with library
series [AIPK id, name]
audible series [FK series id, AUDIBLE-PK series id/asin, audible origin id]
- could also include a 'name' field for what audible calls this series
series books [FK series id, FK book id (NOT edition id), index -- cluster PK across all FKs]
- "index" not "order". display this number; don't just put in this sequence
- index is float instead of int to allow for in-between books. eg 2.5
- if only using "editions" (ie: getting rid of the "books" table), to show 2 editions as the same book in a series, give them the same index
(libation) user shelves [AIPK id, FK libation user id, name, desc]
- custom shelf. similar to library but very different in philosophy. likely different in evolving details
(libation) shelf books [AIPK id, FK user shelf id, date added, order]
- technically, it's no violation to list a book more than once so use AIPK

View File

@ -1,76 +0,0 @@
ignore for now:
authorProperties [PK/FK contributor id, AUDIBLE-PK author id]
notes in Contributor.cs for later refactoring
c# enum only, not their own tables:
roles [AIPK id, name]. seeded: author, narrator, publisher. could expand (eg: translator, editor) without each needing a new table
origins [AIPK id, name]. seeded: library. detail. json. series
-- begin SCHEMA ---------------------------------------------------------------------------------------------------------------------
any audible keys should be indexed
SCHEMA
======
contributors [AIPK id, name]. people and publishers
books [AIPK id, AUDIBLE-PK product id, title, desc, lengthInMinutes, picture id, 3 ratings, category id, origin id]
- product instances. each edition and version is discrete: unique and disconnected from different editions of the same book
- on book re-import
update:
update book origin and series origin with the new source type
overwrite simple fields
invoke complex contributor updates
details page gets
un/abridged
release date
language
publisher
series info incl name
categories
if new == series: ignore. do update series info. do not update book info
else if old == json: update (incl if new == json)
else if old == library && new == detail: update
else: ignore
book contributors [FK book id, FK contributor id, FK role id, order -- cluster PK across all FKs]
supplements [AIPK id, FK book id, download url]
categories [AIPK id, AUDIBLE-PK category id, name, parent]. may only nest 1 deep
user defined [PK/FK book id, 3 ratings, tagsRaw]
series [AIPK id, AUDIBLE-PK series id/asin, name, origin id]
series books [FK series id, FK book id, index -- cluster PK across all FKs]
- "index" not "order". display this number; don't just put in this sequence
- index is float instead of int to allow for in-between books. eg 2.5
- to show 2 editions as the same book in a series, give them the same index
- re-import using series page, there will need to be a re-eval of import logic
library [PK/FK book id, date added, bookdownloadlink]
-- end SCHEMA ---------------------------------------------------------------------------------------------------------------------
-- begin SIMPLIFIED DDD ---------------------------------------------------------------------------------------------------------------------
combine domain and persistence (C(r)UD). no repository pattern. encapsulated in domain objects; direct calls to EF Core
https://www.thereformedprogrammer.net/creating-domain-driven-design-entity-classes-with-entity-framework-core/
// pattern for x-to-many
public void AddReview(int numStars, DbContext context = null)
{
if (_reviews != null) _reviews.Add(new Review(numStars));
else if (context == null) throw new Exception("need context");
else if (context.Entry(this).IsKeySet) context.Add(new Review(numStars, BookId));
else throw new Exception("Could not add");
}
// pattern for optional one-to-one
MyPropClass MyProps { get; private set; }
public void AddMyProps(string s, int i, DbContext context = null)
{
// avoid a trip to the db
if (MyProps != null) { MyProps.Update(s, i); return; }
if (BookId == 0) { MyProps = new MyPropClass(s, i); return; }
if (context == null) throw new Exception("need context");
// per Jon P Smith, this single trip to db loads the property if there is one
// note: .Reference() is for single object references. for collections use .Collection()
context.Entry(this).Reference(s => s.MyProps).Load();
if (MyProps != null) MyProps.Update(s, i);
else MyProps = new MyPropClass(s, i);
}
repository reads are 'query object'-like extension methods
https://www.thereformedprogrammer.net/is-the-repository-pattern-useful-with-entity-framework-core/#1-query-objects-a-way-to-isolate-and-hide-database-read-code
-- and SIMPLIFIED DDD ---------------------------------------------------------------------------------------------------------------------

View File

@ -28,10 +28,7 @@ namespace DtoImporterService
{
var libraryBook = new LibraryBook(
context.Books.Local.Single(b => b.AudibleProductId == newItem.ProductId),
newItem.DateAdded
// needed for scraping
//,FileManager.FileUtility.RestoreDeclawed(newLibraryDTO.DownloadBookLink)
);
newItem.DateAdded);
context.Library.Add(libraryBook);
}

View File

@ -5,10 +5,11 @@
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\..\Dinah.Core\Dinah.Core\Dinah.Core.csproj" />
<ProjectReference Include="..\AaxDecrypter\AaxDecrypter.csproj" />
<ProjectReference Include="..\ApplicationService\ApplicationService.csproj" />
<ProjectReference Include="..\AudibleDotComAutomation\AudibleDotComAutomation.csproj" />
<ProjectReference Include="..\ApplicationServices\ApplicationServices.csproj" />
<ProjectReference Include="..\DataLayer\DataLayer.csproj" />
<ProjectReference Include="..\FileManager\FileManager.csproj" />
<ProjectReference Include="..\InternalUtilities\InternalUtilities.csproj" />
</ItemGroup>

View File

@ -4,7 +4,7 @@ using DataLayer;
using Dinah.Core.ErrorHandling;
using FileManager;
namespace ScrapingDomainServices
namespace FileLiberator
{
/// <summary>
/// Download DRM book and decrypt audiobook files.

View File

@ -9,7 +9,7 @@ using Dinah.Core;
using Dinah.Core.ErrorHandling;
using FileManager;
namespace ScrapingDomainServices
namespace FileLiberator
{
/// <summary>
/// Download DRM book and decrypt audiobook files.
@ -39,7 +39,7 @@ namespace ScrapingDomainServices
=> await validateAsync_ConfigureAwaitFalse(libraryBook.Book.AudibleProductId).ConfigureAwait(false);
private async Task<bool> validateAsync_ConfigureAwaitFalse(string productId)
=> await AudibleFileStorage.AAX.ExistsAsync(productId)
&& !(await AudibleFileStorage.Audio.ExistsAsync(productId));
&& !await AudibleFileStorage.Audio.ExistsAsync(productId);
// do NOT use ConfigureAwait(false) on ProcessUnregistered()
// often does a lot with forms in the UI context

View File

@ -5,7 +5,7 @@ using FileManager;
using DataLayer;
using Dinah.Core.ErrorHandling;
namespace ScrapingDomainServices
namespace FileLiberator
{
/// <summary>
/// Download DRM book and decrypt audiobook files.
@ -38,10 +38,8 @@ namespace ScrapingDomainServices
// in cases where title includes '&', just use everything before the '&' and ignore the rest
//// var adhTitle = product.Title.Split('&')[0]
// legacy/scraping method
//await performDownloadAsync(libraryBook, tempAaxFilename);
// new/api method
tempAaxFilename = await performApiDownloadAsync(libraryBook, tempAaxFilename);
// new/api method
tempAaxFilename = await performApiDownloadAsync(libraryBook, tempAaxFilename);
// move
var aaxFilename = FileUtility.GetValidFilename(
@ -54,33 +52,12 @@ tempAaxFilename = await performApiDownloadAsync(libraryBook, tempAaxFilename);
var statusHandler = new StatusHandler();
var isDownloaded = await AudibleFileStorage.AAX.ExistsAsync(libraryBook.Book.AudibleProductId);
if (isDownloaded)
DoStatusUpdate($"Downloaded: {aaxFilename}");
Invoke_StatusUpdate($"Downloaded: {aaxFilename}");
else
statusHandler.AddError("Downloaded AAX file cannot be found");
return statusHandler;
}
// GetWebClientAsync:
// wires up webClient events
// [DownloadProgressChanged, DownloadFileCompleted, DownloadDataCompleted, DownloadStringCompleted]
// to DownloadableBase events
// DownloadProgressChanged, DownloadCompleted
// fires DownloadBegin event
// method begins async file download
private async Task performDownloadAsync(LibraryBook libraryBook, string tempAaxFilename)
{
var aaxDownloadLink = libraryBook.DownloadBookLink
.Replace("/admhelper", "")
.Replace("&DownloadType=Now", "")
+ "&asin=&source=audible_adm&size=&browser_type=&assemble_url=http://cds.audible.com/download";
var uri = new Uri(aaxDownloadLink);
using var webClient = await GetWebClientAsync(tempAaxFilename);
// for book downloads only: pretend to be the audible download manager. from inAudible:
webClient.Headers["User-Agent"] = "Audible ADM 6.6.0.15;Windows Vista Service Pack 1 Build 7601";
await webClient.DownloadFileTaskAsync(uri, tempAaxFilename);
}
private async Task<string> performApiDownloadAsync(LibraryBook libraryBook, string tempAaxFilename)
{
var api = await AudibleApi.EzApiCreator.GetApiAsync(AudibleApiStorage.IdentityTokensFile);

View File

@ -0,0 +1,105 @@
using System;
using System.IO;
using System.Linq;
using System.Net;
using System.Threading.Tasks;
using DataLayer;
using Dinah.Core.ErrorHandling;
using FileManager;
namespace FileLiberator
{
public class DownloadPdf : DownloadableBase
{
static DownloadPdf()
{
// https://stackoverflow.com/a/15483698
ServicePointManager.ServerCertificateValidationCallback = delegate { return true; };
}
public override async Task<bool> ValidateAsync(LibraryBook libraryBook)
{
var product = libraryBook.Book;
if (!product.Supplements.Any())
return false;
return !await AudibleFileStorage.PDF.ExistsAsync(product.AudibleProductId);
}
public override async Task<StatusHandler> ProcessItemAsync(LibraryBook libraryBook)
{
var product = libraryBook.Book;
if (product == null)
return new StatusHandler { "Book not found" };
var urls = product.Supplements.Select(d => d.Url).ToList();
if (urls.Count == 0)
return new StatusHandler { "PDF download url not found" };
// sanity check
if (urls.Count > 1)
throw new Exception("Multiple PDF downloads are not currently supported. typically indicates an error");
var url = urls.Single();
var destinationDir = await getDestinationDirectory(product.AudibleProductId);
if (destinationDir == null)
return new StatusHandler { "Destination directory not found for PDF download" };
var destinationFilename = Path.Combine(destinationDir, Path.GetFileName(url));
using var webClient = GetWebClient(destinationFilename);
await webClient.DownloadFileTaskAsync(url, destinationFilename);
var statusHandler = new StatusHandler();
var exists = await AudibleFileStorage.PDF.ExistsAsync(product.AudibleProductId);
if (!exists)
statusHandler.AddError("Downloaded PDF cannot be found");
return statusHandler;
}
private async Task<string> getDestinationDirectory(string productId)
{
// if audio file exists, get it's dir
var audioFile = await AudibleFileStorage.Audio.GetAsync(productId);
if (audioFile != null)
return Path.GetDirectoryName(audioFile);
// else return base Book dir
return AudibleFileStorage.PDF.StorageDirectory;
}
// other user agents from my chrome. from: https://www.whoishostingthis.com/tools/user-agent/
private static string[] userAgents { get; } = new[]
{
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.96 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.87 Safari/537.36",
};
private WebClient GetWebClient(string downloadMessage)
{
var webClient = new WebClient();
var userAgentIndex = new Random().Next(0, userAgents.Length); // upper bound is exclusive
webClient.Headers["User-Agent"] = userAgents[userAgentIndex];
webClient.Headers["Referer"] = "https://google.com";
webClient.Headers["Upgrade-Insecure-Requests"] = "1";
webClient.Headers["DNT"] = "1";
webClient.Headers["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8";
webClient.Headers["Accept-Language"] = "en-US,en;q=0.9";
webClient.DownloadProgressChanged += (s, e) => Invoke_DownloadProgressChanged(s, new Dinah.Core.Net.Http.DownloadProgress { BytesReceived = e.BytesReceived, ProgressPercentage = e.ProgressPercentage, TotalBytesToReceive = e.TotalBytesToReceive });
webClient.DownloadFileCompleted += (s, e) => Invoke_DownloadCompleted(s, $"Completed: {downloadMessage}");
webClient.DownloadDataCompleted += (s, e) => Invoke_DownloadCompleted(s, $"Completed: {downloadMessage}");
webClient.DownloadStringCompleted += (s, e) => Invoke_DownloadCompleted(s, $"Completed: {downloadMessage}");
Invoke_DownloadBegin(downloadMessage);
return webClient;
}
}
}

View File

@ -0,0 +1,45 @@
using System;
using System.Threading.Tasks;
using DataLayer;
using Dinah.Core.ErrorHandling;
namespace FileLiberator
{
public abstract class DownloadableBase : IDownloadable
{
public event EventHandler<string> Begin;
public event EventHandler<string> Completed;
public event EventHandler<string> StatusUpdate;
public event EventHandler<string> DownloadBegin;
public event EventHandler<Dinah.Core.Net.Http.DownloadProgress> DownloadProgressChanged;
public event EventHandler<string> DownloadCompleted;
protected void Invoke_StatusUpdate(string message) => StatusUpdate?.Invoke(this, message);
protected void Invoke_DownloadBegin(string downloadMessage) => DownloadBegin?.Invoke(this, downloadMessage);
protected void Invoke_DownloadProgressChanged(object sender, Dinah.Core.Net.Http.DownloadProgress progress) => DownloadProgressChanged?.Invoke(sender, progress);
protected void Invoke_DownloadCompleted(object sender, string str) => DownloadCompleted?.Invoke(sender, str);
public abstract Task<bool> ValidateAsync(LibraryBook libraryBook);
public abstract Task<StatusHandler> ProcessItemAsync(LibraryBook libraryBook);
// do NOT use ConfigureAwait(false) on ProcessUnregistered()
// often does a lot with forms in the UI context
public async Task<StatusHandler> ProcessAsync(LibraryBook libraryBook)
{
var displayMessage = $"[{libraryBook.Book.AudibleProductId}] {libraryBook.Book.Title}";
Begin?.Invoke(this, displayMessage);
try
{
return await ProcessItemAsync(libraryBook);
}
finally
{
Completed?.Invoke(this, displayMessage);
}
}
}
}

View File

@ -1,6 +1,6 @@
using System;
namespace ScrapingDomainServices
namespace FileLiberator
{
public interface IDecryptable : IProcessable
{

View File

@ -1,6 +1,6 @@
using System;
namespace ScrapingDomainServices
namespace FileLiberator
{
public interface IDownloadable : IProcessable
{

View File

@ -3,7 +3,7 @@ using System.Threading.Tasks;
using DataLayer;
using Dinah.Core.ErrorHandling;
namespace ScrapingDomainServices
namespace FileLiberator
{
public interface IProcessable
{

View File

@ -1,10 +1,9 @@
using System;
using System.Threading.Tasks;
using DataLayer;
using Dinah.Core.Collections.Generic;
using Dinah.Core.ErrorHandling;
namespace ScrapingDomainServices
namespace FileLiberator
{
public static class IProcessableExt
{
@ -30,9 +29,6 @@ namespace ScrapingDomainServices
return status;
}
// i'd love to turn this into Task<IEnumerable<LibraryBook>>
// since enumeration is a blocking operation, this won't be possible until
// 2019's C# 8 async streams, aka async enumerables, aka async iterators: https://blogs.msdn.microsoft.com/dotnet/2018/11/12/building-c-8-0/
public static async Task<LibraryBook> GetNextValidAsync(this IProcessable processable)
{
var libraryBooks = LibraryQueries.GetLibrary_Flat_NoTracking();
@ -43,12 +39,5 @@ namespace ScrapingDomainServices
return null;
}
public static async Task<StatusHandler> ProcessValidateLibraryBookAsync(this IProcessable processable, LibraryBook libraryBook)
{
if (!await processable.ValidateAsync(libraryBook))
return new StatusHandler { "Validation failed" };
return await processable.ProcessAsync(libraryBook);
}
}
}

View File

@ -4,6 +4,7 @@ namespace FileManager
{
public static class AudibleApiStorage
{
// not customizable. don't move to config
public static string IdentityTokensFile => Path.Combine(Configuration.Instance.LibationFiles, "IdentityTokens.json");
}
}

View File

@ -27,13 +27,6 @@ namespace FileManager
return File.Exists(path);
}
/// <param name="proposedPath">acceptable inputs:
/// example.txt
/// C:\Users\username\Desktop\example.txt</param>
/// <returns>Returns full name and path of unused filename. including (#)</returns>
public static string GetValidFilename(string proposedPath)
=> GetValidFilename(Path.GetDirectoryName(proposedPath), Path.GetFileNameWithoutExtension(proposedPath), Path.GetExtension(proposedPath));
public static string GetValidFilename(string dirFullPath, string filename, string extension, params string[] metadataSuffixes)
{
if (string.IsNullOrWhiteSpace(dirFullPath))
@ -78,21 +71,6 @@ namespace FileManager
return property;
}
public static string Declaw(string str)
=> str
.Replace("<script", "<sxcript")
.Replace(".net", ".nxet")
.Replace(".com", ".cxom")
.Replace("<link", "<lxink")
.Replace("http", "hxttp");
public static string RestoreDeclawed(string str)
=> str
?.Replace("<sxcript", "<script")
.Replace(".nxet", ".net")
.Replace(".cxom", ".com")
.Replace("<lxink", "<link")
.Replace("hxttp", "http");
public static string TitleCompressed(string title)
=> new string(title
.Where(c => (char.IsLetterOrDigit(c)))

View File

@ -1,68 +0,0 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
namespace FileManager
{
public static class WebpageStorage
{
// not customizable. don't move to config
private static string PagesDirectory { get; }
= new DirectoryInfo(Configuration.Instance.LibationFiles).CreateSubdirectory("Pages").FullName;
private static string BookDetailsDirectory { get; }
= new DirectoryInfo(PagesDirectory).CreateSubdirectory("Book Details").FullName;
public static string GetLibraryBatchName() => "Library_" + DateTime.Now.ToString("yyyy-MM-dd_HH-mm-ss");
public static string SavePageToBatch(string contents, string batchName, string extension)
{
var batch_dir = Path.Combine(PagesDirectory, batchName);
Directory.CreateDirectory(batch_dir);
var file = Path.Combine(batch_dir, batchName + '.' + extension.Trim('.'));
var filename = FileUtility.GetValidFilename(file);
File.WriteAllText(filename, contents);
return filename;
}
public static List<FileInfo> GetJsonFiles(DirectoryInfo libDir)
=> libDir == null
? new List<FileInfo>()
: Directory
.EnumerateFiles(libDir.FullName, "*.json")
.Select(f => new FileInfo(f))
.ToList();
public static DirectoryInfo GetMostRecentLibraryDir()
{
var dir = Directory
.EnumerateDirectories(PagesDirectory, "Library_*")
.OrderBy(a => a)
.LastOrDefault();
if (string.IsNullOrWhiteSpace(dir))
return null;
return new DirectoryInfo(dir);
}
public static FileInfo GetBookDetailHtmFileInfo(string productId)
{
var path = Path.Combine(BookDetailsDirectory, $"BookDetail-{productId}.htm");
return new FileInfo(path);
}
public static FileInfo GetBookDetailJsonFileInfo(string productId)
{
var path = Path.Combine(BookDetailsDirectory, $"BookDetail-{productId}.json");
return new FileInfo(path);
}
public static FileInfo SaveBookDetailsToHtm(string productId, string contents)
{
var fi = GetBookDetailHtmFileInfo(productId);
File.WriteAllText(fi.FullName, contents);
return fi;
}
}
}

View File

@ -6,7 +6,7 @@
<ItemGroup>
<ProjectReference Include="..\..\audible api\AudibleApi\AudibleApi\AudibleApi.csproj" />
<ProjectReference Include="..\Scraping\Scraping.csproj" />
<ProjectReference Include="..\FileManager\FileManager.csproj" />
</ItemGroup>
</Project>

View File

@ -33,7 +33,7 @@ namespace InternalUtilities
var items = await AudibleApiExtensions.GetAllLibraryItemsAsync(api);
// remove episode parents
items.RemoveAll(i => i.Episodes);
items.RemoveAll(i => i.IsEpisodes);
#region // episode handling. doesn't quite work
// // add individual/children episodes

View File

@ -1,51 +0,0 @@
using System.IO;
using AudibleDotCom;
using FileManager;
using Newtonsoft.Json;
namespace InternalUtilities
{
public static partial class DataConverter
{
// also need: htm file => PageSource
public static AudiblePageSource HtmFile_2_AudiblePageSource(string htmFilepath)
{
var htmContentsDeclawed = File.ReadAllText(htmFilepath);
var htmContents = FileUtility.RestoreDeclawed(htmContentsDeclawed);
return AudiblePageSource.Deserialize(htmContents);
}
public static FileInfo Value_2_JsonFile(object value, string jsonFilepath)
{
var json = JsonConvert.SerializeObject(value, Formatting.Indented);
File.WriteAllText(jsonFilepath, json);
return new FileInfo(jsonFilepath);
}
/// <summary>AudiblePageSource => declawed htm file</summary>
/// <returns>path of htm file</returns>
public static FileInfo AudiblePageSource_2_HtmFile_Batch(AudiblePageSource audiblePageSource, string batchName)
{
var source = audiblePageSource.Declawed().Serialized();
var htmFile = WebpageStorage.SavePageToBatch(source, batchName, "htm");
return new FileInfo(htmFile);
}
/// <summary>AudiblePageSource => declawed htm file</summary>
/// <returns>path of htm file</returns>
public static FileInfo AudiblePageSource_2_HtmFile_Product(AudiblePageSource audiblePageSource)
{
if (audiblePageSource.AudiblePage == AudiblePageType.ProductDetails)
{
var source = audiblePageSource.Declawed().Serialized();
var htmFile = WebpageStorage.SaveBookDetailsToHtm(audiblePageSource.PageId, source);
return htmFile;
}
throw new System.NotImplementedException();
}
}
}

View File

@ -27,15 +27,7 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "FileManager", "FileManager\
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DataLayer", "DataLayer\DataLayer.csproj", "{59A10DF3-63EC-43F1-A3BF-4000CFA118D2}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AudibleDotCom", "AudibleDotCom\AudibleDotCom.csproj", "{4ABB61D3-4959-4F09-883A-9EDC8CE473FB}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Scraping", "Scraping\Scraping.csproj", "{C2C89551-44FD-41E4-80D3-69AF8CE3F174}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AudibleDotComAutomation", "AudibleDotComAutomation\AudibleDotComAutomation.csproj", "{4CDE10DD-60EC-4CCA-99D1-75224A201C89}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CookieMonster", "CookieMonster\CookieMonster.csproj", "{7BD02E29-3430-4D06-88D2-5CECEE9ABD01}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ScrapingDomainServices", "ScrapingDomainServices\ScrapingDomainServices.csproj", "{393B5B27-D15C-4F77-9457-FA14BA8F3C73}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "FileLiberator", "FileLiberator\FileLiberator.csproj", "{393B5B27-D15C-4F77-9457-FA14BA8F3C73}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "InternalUtilities", "InternalUtilities\InternalUtilities.csproj", "{06882742-27A6-4347-97D9-56162CEC9C11}"
EndProject
@ -107,22 +99,6 @@ Global
{59A10DF3-63EC-43F1-A3BF-4000CFA118D2}.Debug|Any CPU.Build.0 = Debug|Any CPU
{59A10DF3-63EC-43F1-A3BF-4000CFA118D2}.Release|Any CPU.ActiveCfg = Release|Any CPU
{59A10DF3-63EC-43F1-A3BF-4000CFA118D2}.Release|Any CPU.Build.0 = Release|Any CPU
{4ABB61D3-4959-4F09-883A-9EDC8CE473FB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{4ABB61D3-4959-4F09-883A-9EDC8CE473FB}.Debug|Any CPU.Build.0 = Debug|Any CPU
{4ABB61D3-4959-4F09-883A-9EDC8CE473FB}.Release|Any CPU.ActiveCfg = Release|Any CPU
{4ABB61D3-4959-4F09-883A-9EDC8CE473FB}.Release|Any CPU.Build.0 = Release|Any CPU
{C2C89551-44FD-41E4-80D3-69AF8CE3F174}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{C2C89551-44FD-41E4-80D3-69AF8CE3F174}.Debug|Any CPU.Build.0 = Debug|Any CPU
{C2C89551-44FD-41E4-80D3-69AF8CE3F174}.Release|Any CPU.ActiveCfg = Release|Any CPU
{C2C89551-44FD-41E4-80D3-69AF8CE3F174}.Release|Any CPU.Build.0 = Release|Any CPU
{4CDE10DD-60EC-4CCA-99D1-75224A201C89}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{4CDE10DD-60EC-4CCA-99D1-75224A201C89}.Debug|Any CPU.Build.0 = Debug|Any CPU
{4CDE10DD-60EC-4CCA-99D1-75224A201C89}.Release|Any CPU.ActiveCfg = Release|Any CPU
{4CDE10DD-60EC-4CCA-99D1-75224A201C89}.Release|Any CPU.Build.0 = Release|Any CPU
{7BD02E29-3430-4D06-88D2-5CECEE9ABD01}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{7BD02E29-3430-4D06-88D2-5CECEE9ABD01}.Debug|Any CPU.Build.0 = Debug|Any CPU
{7BD02E29-3430-4D06-88D2-5CECEE9ABD01}.Release|Any CPU.ActiveCfg = Release|Any CPU
{7BD02E29-3430-4D06-88D2-5CECEE9ABD01}.Release|Any CPU.Build.0 = Release|Any CPU
{393B5B27-D15C-4F77-9457-FA14BA8F3C73}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{393B5B27-D15C-4F77-9457-FA14BA8F3C73}.Debug|Any CPU.Build.0 = Debug|Any CPU
{393B5B27-D15C-4F77-9457-FA14BA8F3C73}.Release|Any CPU.ActiveCfg = Release|Any CPU
@ -227,10 +203,6 @@ Global
{8BD8E012-F44F-4EE2-A234-D66C14D5FE4B} = {7FBBB086-0807-4998-85BF-6D1A49C8AD05}
{1AE65B61-9C05-4C80-ABFF-48F16E22FDF1} = {7FBBB086-0807-4998-85BF-6D1A49C8AD05}
{59A10DF3-63EC-43F1-A3BF-4000CFA118D2} = {751093DD-5DBA-463E-ADBE-E05FAFB6983E}
{4ABB61D3-4959-4F09-883A-9EDC8CE473FB} = {7FBBB086-0807-4998-85BF-6D1A49C8AD05}
{C2C89551-44FD-41E4-80D3-69AF8CE3F174} = {7FBBB086-0807-4998-85BF-6D1A49C8AD05}
{4CDE10DD-60EC-4CCA-99D1-75224A201C89} = {7FBBB086-0807-4998-85BF-6D1A49C8AD05}
{7BD02E29-3430-4D06-88D2-5CECEE9ABD01} = {7FBBB086-0807-4998-85BF-6D1A49C8AD05}
{393B5B27-D15C-4F77-9457-FA14BA8F3C73} = {41CDCC73-9B81-49DD-9570-C54406E852AF}
{06882742-27A6-4347-97D9-56162CEC9C11} = {F0CBB7A7-D3FB-41FF-8F47-CF3F6A592249}
{2E1F5DB4-40CC-4804-A893-5DCE0193E598} = {41CDCC73-9B81-49DD-9570-C54406E852AF}

View File

@ -231,8 +231,8 @@ namespace LibationSearchEngine
return doc;
}
public async Task UpdateBookAsync(string productId) => await Task.Run(() => updateBook(productId));
private void updateBook(string productId)
/// <summary>Long running. Use await Task.Run(() => UpdateBook(productId))</summary>
public void UpdateBook(string productId)
{
var libraryBook = LibraryQueries.GetLibraryBook_Flat_NoTracking(productId);
var term = new Term(_ID_, productId);

View File

@ -8,13 +8,9 @@
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\..\audible api\AudibleApi\AudibleApi\AudibleApi.csproj" />
<ProjectReference Include="..\..\Dinah.Core\Dinah.Core.Drawing\Dinah.Core.Drawing.csproj" />
<ProjectReference Include="..\..\Dinah.Core\Dinah.Core.Windows.Forms\Dinah.Core.Windows.Forms.csproj" />
<ProjectReference Include="..\ApplicationService\ApplicationService.csproj" />
<ProjectReference Include="..\DtoImporterService\DtoImporterService.csproj" />
<ProjectReference Include="..\LibationSearchEngine\LibationSearchEngine.csproj" />
<ProjectReference Include="..\ScrapingDomainServices\ScrapingDomainServices.csproj" />
<ProjectReference Include="..\FileLiberator\FileLiberator.csproj" />
</ItemGroup>
<ItemGroup>

View File

@ -1,129 +0,0 @@
namespace LibationWinForm.BookLiberation
{
partial class NoLongerAvailableForm
{
/// <summary>
/// Required designer variable.
/// </summary>
private System.ComponentModel.IContainer components = null;
/// <summary>
/// Clean up any resources being used.
/// </summary>
/// <param name="disposing">true if managed resources should be disposed; otherwise, false.</param>
protected override void Dispose(bool disposing)
{
if (disposing && (components != null))
{
components.Dispose();
}
base.Dispose(disposing);
}
#region Windows Form Designer generated code
/// <summary>
/// Required method for Designer support - do not modify
/// the contents of this method with the code editor.
/// </summary>
private void InitializeComponent()
{
this.label1 = new System.Windows.Forms.Label();
this.textBox1 = new System.Windows.Forms.TextBox();
this.missingBtn = new System.Windows.Forms.Button();
this.abortBtn = new System.Windows.Forms.Button();
this.label2 = new System.Windows.Forms.Label();
this.label3 = new System.Windows.Forms.Label();
this.SuspendLayout();
//
// label1
//
this.label1.AutoSize = true;
this.label1.Location = new System.Drawing.Point(12, 9);
this.label1.Name = "label1";
this.label1.Size = new System.Drawing.Size(174, 39);
this.label1.TabIndex = 0;
this.label1.Text = "Book details download failed.\r\n{0} may be no longer available.\r\nVerify the book i" +
"s still available here";
//
// textBox1
//
this.textBox1.Location = new System.Drawing.Point(15, 51);
this.textBox1.Name = "textBox1";
this.textBox1.ReadOnly = true;
this.textBox1.Size = new System.Drawing.Size(384, 20);
this.textBox1.TabIndex = 1;
//
// missingBtn
//
this.missingBtn.Location = new System.Drawing.Point(324, 77);
this.missingBtn.Name = "missingBtn";
this.missingBtn.Size = new System.Drawing.Size(75, 23);
this.missingBtn.TabIndex = 3;
this.missingBtn.Text = "Missing";
this.missingBtn.UseVisualStyleBackColor = true;
this.missingBtn.Click += new System.EventHandler(this.missingBtn_Click);
//
// abortBtn
//
this.abortBtn.Location = new System.Drawing.Point(324, 126);
this.abortBtn.Name = "abortBtn";
this.abortBtn.Size = new System.Drawing.Size(75, 23);
this.abortBtn.TabIndex = 5;
this.abortBtn.Text = "Abort";
this.abortBtn.UseVisualStyleBackColor = true;
this.abortBtn.Click += new System.EventHandler(this.abortBtn_Click);
//
// label2
//
this.label2.AutoSize = true;
this.label2.Location = new System.Drawing.Point(12, 74);
this.label2.Name = "label2";
this.label2.Size = new System.Drawing.Size(306, 26);
this.label2.TabIndex = 2;
this.label2.Text = "If the book is not available, click here to mark it as missing\r\nNo further book d" +
"etails download will be attempted for this book";
//
// label3
//
this.label3.AutoSize = true;
this.label3.Location = new System.Drawing.Point(12, 123);
this.label3.Name = "label3";
this.label3.Size = new System.Drawing.Size(204, 26);
this.label3.TabIndex = 4;
this.label3.Text = "If the book is actually available, click here\r\nto abort and try again later";
//
// NoLongerAvailableForm
//
this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
this.ClientSize = new System.Drawing.Size(411, 161);
this.Controls.Add(this.label3);
this.Controls.Add(this.label2);
this.Controls.Add(this.abortBtn);
this.Controls.Add(this.missingBtn);
this.Controls.Add(this.textBox1);
this.Controls.Add(this.label1);
this.FormBorderStyle = System.Windows.Forms.FormBorderStyle.FixedDialog;
this.MaximizeBox = false;
this.MinimizeBox = false;
this.Name = "NoLongerAvailableForm";
this.ShowIcon = false;
this.ShowInTaskbar = false;
this.StartPosition = System.Windows.Forms.FormStartPosition.CenterParent;
this.Text = "No Longer Available";
this.ResumeLayout(false);
this.PerformLayout();
}
#endregion
private System.Windows.Forms.Label label1;
private System.Windows.Forms.TextBox textBox1;
private System.Windows.Forms.Button missingBtn;
private System.Windows.Forms.Button abortBtn;
private System.Windows.Forms.Label label2;
private System.Windows.Forms.Label label3;
}
}

View File

@ -1,28 +0,0 @@
using System;
using System.Windows.Forms;
using ScrapingDomainServices;
namespace LibationWinForm.BookLiberation
{
public partial class NoLongerAvailableForm : Form
{
public ScrapeBookDetails.NoLongerAvailableEnum EnumResult { get; private set; }
public NoLongerAvailableForm(string title, string url) : this()
{
this.Text += ": " + title;
this.label1.Text = string.Format(this.label1.Text, title);
this.textBox1.Text = url;
}
public NoLongerAvailableForm() => InitializeComponent();
private void missingBtn_Click(object sender, EventArgs e) => complete(ScrapeBookDetails.NoLongerAvailableEnum.MarkAsMissing);
private void abortBtn_Click(object sender, EventArgs e) => complete(ScrapeBookDetails.NoLongerAvailableEnum.Abort);
private void complete(ScrapeBookDetails.NoLongerAvailableEnum nlaEnum)
{
EnumResult = nlaEnum;
Close();
}
}
}

View File

@ -1,120 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<root>
<!--
Microsoft ResX Schema
Version 2.0
The primary goals of this format is to allow a simple XML format
that is mostly human readable. The generation and parsing of the
various data types are done through the TypeConverter classes
associated with the data types.
Example:
... ado.net/XML headers & schema ...
<resheader name="resmimetype">text/microsoft-resx</resheader>
<resheader name="version">2.0</resheader>
<resheader name="reader">System.Resources.ResXResourceReader, System.Windows.Forms, ...</resheader>
<resheader name="writer">System.Resources.ResXResourceWriter, System.Windows.Forms, ...</resheader>
<data name="Name1"><value>this is my long string</value><comment>this is a comment</comment></data>
<data name="Color1" type="System.Drawing.Color, System.Drawing">Blue</data>
<data name="Bitmap1" mimetype="application/x-microsoft.net.object.binary.base64">
<value>[base64 mime encoded serialized .NET Framework object]</value>
</data>
<data name="Icon1" type="System.Drawing.Icon, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64">
<value>[base64 mime encoded string representing a byte array form of the .NET Framework object]</value>
<comment>This is a comment</comment>
</data>
There are any number of "resheader" rows that contain simple
name/value pairs.
Each data row contains a name, and value. The row also contains a
type or mimetype. Type corresponds to a .NET class that support
text/value conversion through the TypeConverter architecture.
Classes that don't support this are serialized and stored with the
mimetype set.
The mimetype is used for serialized objects, and tells the
ResXResourceReader how to depersist the object. This is currently not
extensible. For a given mimetype the value must be set accordingly:
Note - application/x-microsoft.net.object.binary.base64 is the format
that the ResXResourceWriter will generate, however the reader can
read any of the formats listed below.
mimetype: application/x-microsoft.net.object.binary.base64
value : The object must be serialized with
: System.Runtime.Serialization.Formatters.Binary.BinaryFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.soap.base64
value : The object must be serialized with
: System.Runtime.Serialization.Formatters.Soap.SoapFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.bytearray.base64
value : The object must be serialized into a byte array
: using a System.ComponentModel.TypeConverter
: and then encoded with base64 encoding.
-->
<xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata">
<xsd:import namespace="http://www.w3.org/XML/1998/namespace" />
<xsd:element name="root" msdata:IsDataSet="true">
<xsd:complexType>
<xsd:choice maxOccurs="unbounded">
<xsd:element name="metadata">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" />
</xsd:sequence>
<xsd:attribute name="name" use="required" type="xsd:string" />
<xsd:attribute name="type" type="xsd:string" />
<xsd:attribute name="mimetype" type="xsd:string" />
<xsd:attribute ref="xml:space" />
</xsd:complexType>
</xsd:element>
<xsd:element name="assembly">
<xsd:complexType>
<xsd:attribute name="alias" type="xsd:string" />
<xsd:attribute name="name" type="xsd:string" />
</xsd:complexType>
</xsd:element>
<xsd:element name="data">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
<xsd:element name="comment" type="xsd:string" minOccurs="0" msdata:Ordinal="2" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" use="required" msdata:Ordinal="1" />
<xsd:attribute name="type" type="xsd:string" msdata:Ordinal="3" />
<xsd:attribute name="mimetype" type="xsd:string" msdata:Ordinal="4" />
<xsd:attribute ref="xml:space" />
</xsd:complexType>
</xsd:element>
<xsd:element name="resheader">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" use="required" />
</xsd:complexType>
</xsd:element>
</xsd:choice>
</xsd:complexType>
</xsd:element>
</xsd:schema>
<resheader name="resmimetype">
<value>text/microsoft-resx</value>
</resheader>
<resheader name="version">
<value>2.0</value>
</resheader>
<resheader name="reader">
<value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
<resheader name="writer">
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
</root>

View File

@ -3,7 +3,8 @@ using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using DataLayer;
using ScrapingDomainServices;
using Dinah.Core.ErrorHandling;
using FileLiberator;
namespace LibationWinForm.BookLiberation
{
@ -24,11 +25,18 @@ namespace LibationWinForm.BookLiberation
var backupBook = new BackupBook();
backupBook.Download.Completed += SetBackupCountsAsync;
backupBook.Decrypt.Completed += SetBackupCountsAsync;
await backupBook.ProcessValidateLibraryBookAsync(libraryBook);
}
await ProcessValidateLibraryBookAsync(backupBook, libraryBook);
}
// Download First Book (Download encrypted/DRM file)
async Task DownloadFirstBookAsync()
static async Task<StatusHandler> ProcessValidateLibraryBookAsync(IProcessable processable, LibraryBook libraryBook)
{
if (!await processable.ValidateAsync(libraryBook))
return new StatusHandler { "Validation failed" };
return await processable.ProcessAsync(libraryBook);
}
// Download First Book (Download encrypted/DRM file)
async Task DownloadFirstBookAsync()
{
var downloadBook = ProcessorAutomationController.GetWiredUpDownloadBook();
downloadBook.Completed += SetBackupCountsAsync;

View File

@ -1,6 +1,6 @@
using System;
using System.Threading.Tasks;
using ScrapingDomainServices;
using FileLiberator;
namespace LibationWinForm.BookLiberation
{
@ -39,21 +39,6 @@ namespace LibationWinForm.BookLiberation
downloadPdf.Begin += (_, __) => wireUpDownloadable(downloadPdf);
return downloadPdf;
}
public static ScrapeBookDetails GetWiredUpScrapeBookDetails()
{
var scrapeBookDetails = new ScrapeBookDetails();
scrapeBookDetails.Begin += (_, __) => wireUpDownloadable(scrapeBookDetails);
scrapeBookDetails.NoLongerAvailableAction = noLongerAvailableUI;
return scrapeBookDetails;
}
static ScrapeBookDetails.NoLongerAvailableEnum noLongerAvailableUI(string title, string url)
{
var nla = new NoLongerAvailableForm(title, url);
nla.ShowDialog();
return nla.EnumResult;
}
// subscribed to Begin event because a new form should be created+processed+closed on each iteration
private static void wireUpDownloadable(IDownloadable downloadable)

View File

@ -7,7 +7,7 @@ using System.Windows.Forms;
namespace LibationWinForm
{
public interface IRunnableDialog : IValidatable
public interface IRunnableDialog
{
IButtonControl AcceptButton { get; set; }
Control.ControlCollection Controls { get; }

View File

@ -36,26 +36,6 @@ namespace LibationWinForm
public static async Task Run(this IRunnableDialog dialog)
{
// validate children
// OfType<T>() -- skips items which aren't of the required type
// Cast<T>() -- throws an exception
var errorStrings = dialog
// get children
.Controls
.GetControlListRecursive()
.OfType<IValidatable>()
// and self
.Append(dialog)
// validate. get errors
.Select(c => c.StringBasedValidate())
// ignore successes
.Where(e => e != null);
if (errorStrings.Any())
{
MessageBox.Show(errorStrings.Aggregate((a, b) => a + "\r\n" + b));
return;
}
// get top level controls only. If Enabled, disable and push on stack
var disabledStack = disable(dialog);

View File

@ -1,15 +0,0 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace LibationWinForm
{
public interface IValidatable
{
// forms has a framework for ValidateChildren and ErrorProvider.s
// i don't feel like setting it up right now. doing this instead
string StringBasedValidate();
}
}

View File

@ -1,78 +0,0 @@
namespace LibationWinForm
{
partial class ScanLibraryDialog
{
/// <summary>
/// Required designer variable.
/// </summary>
private System.ComponentModel.IContainer components = null;
/// <summary>
/// Clean up any resources being used.
/// </summary>
/// <param name="disposing">true if managed resources should be disposed; otherwise, false.</param>
protected override void Dispose(bool disposing)
{
if (disposing && (components != null))
{
components.Dispose();
}
base.Dispose(disposing);
}
#region Windows Form Designer generated code
/// <summary>
/// Required method for Designer support - do not modify
/// the contents of this method with the code editor.
/// </summary>
private void InitializeComponent()
{
this.websiteProcessorControl1 = new LibationWinForm.WebsiteProcessorControl();
this.BeginScanBtn = new System.Windows.Forms.Button();
this.SuspendLayout();
//
// websiteProcessorControl1
//
this.websiteProcessorControl1.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom)
| System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.websiteProcessorControl1.Location = new System.Drawing.Point(12, 12);
this.websiteProcessorControl1.Name = "websiteProcessorControl1";
this.websiteProcessorControl1.Size = new System.Drawing.Size(324, 137);
this.websiteProcessorControl1.TabIndex = 0;
//
// BeginScanBtn
//
this.BeginScanBtn.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.BeginScanBtn.Location = new System.Drawing.Point(12, 155);
this.BeginScanBtn.Name = "BeginScanBtn";
this.BeginScanBtn.Size = new System.Drawing.Size(324, 23);
this.BeginScanBtn.TabIndex = 1;
this.BeginScanBtn.Text = "BEGIN SCAN";
this.BeginScanBtn.UseVisualStyleBackColor = true;
//
// ScanLibraryDialog
//
this.AcceptButton = this.BeginScanBtn;
this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
this.ClientSize = new System.Drawing.Size(348, 190);
this.Controls.Add(this.BeginScanBtn);
this.Controls.Add(this.websiteProcessorControl1);
this.Name = "ScanLibraryDialog";
this.ShowIcon = false;
this.ShowInTaskbar = false;
this.StartPosition = System.Windows.Forms.FormStartPosition.CenterParent;
this.Text = "Scan Library";
this.ResumeLayout(false);
}
#endregion
private WebsiteProcessorControl websiteProcessorControl1;
private System.Windows.Forms.Button BeginScanBtn;
}
}

View File

@ -1,41 +0,0 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Threading.Tasks;
using System.Windows.Forms;
using Dinah.Core;
using ScrapingDomainServices;
namespace LibationWinForm
{
public partial class ScanLibraryDialog : Form, IIndexLibraryDialog
{
public ScanLibraryDialog()
{
InitializeComponent();
}
public string StringBasedValidate() => null;
List<string> successMessages { get; } = new List<string>();
public string SuccessMessage => string.Join("\r\n", successMessages);
public int NewBooksAdded { get; private set; }
public int TotalBooksProcessed { get; private set; }
public async Task DoMainWorkAsync()
{
using var pageRetriever = websiteProcessorControl1.GetPageRetriever();
var jsonFilepaths = await DownloadLibrary.DownloadLibraryAsync(pageRetriever).ConfigureAwait(false);
successMessages.Add($"Downloaded {"library page".PluralizeWithCount(jsonFilepaths.Count)}");
(TotalBooksProcessed, NewBooksAdded) = await Indexer
.IndexLibraryAsync(jsonFilepaths)
.ConfigureAwait(false);
successMessages.Add($"Total processed: {TotalBooksProcessed}");
successMessages.Add($"New: {NewBooksAdded}");
}
}
}

View File

@ -1,120 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<root>
<!--
Microsoft ResX Schema
Version 2.0
The primary goals of this format is to allow a simple XML format
that is mostly human readable. The generation and parsing of the
various data types are done through the TypeConverter classes
associated with the data types.
Example:
... ado.net/XML headers & schema ...
<resheader name="resmimetype">text/microsoft-resx</resheader>
<resheader name="version">2.0</resheader>
<resheader name="reader">System.Resources.ResXResourceReader, System.Windows.Forms, ...</resheader>
<resheader name="writer">System.Resources.ResXResourceWriter, System.Windows.Forms, ...</resheader>
<data name="Name1"><value>this is my long string</value><comment>this is a comment</comment></data>
<data name="Color1" type="System.Drawing.Color, System.Drawing">Blue</data>
<data name="Bitmap1" mimetype="application/x-microsoft.net.object.binary.base64">
<value>[base64 mime encoded serialized .NET Framework object]</value>
</data>
<data name="Icon1" type="System.Drawing.Icon, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64">
<value>[base64 mime encoded string representing a byte array form of the .NET Framework object]</value>
<comment>This is a comment</comment>
</data>
There are any number of "resheader" rows that contain simple
name/value pairs.
Each data row contains a name, and value. The row also contains a
type or mimetype. Type corresponds to a .NET class that support
text/value conversion through the TypeConverter architecture.
Classes that don't support this are serialized and stored with the
mimetype set.
The mimetype is used for serialized objects, and tells the
ResXResourceReader how to depersist the object. This is currently not
extensible. For a given mimetype the value must be set accordingly:
Note - application/x-microsoft.net.object.binary.base64 is the format
that the ResXResourceWriter will generate, however the reader can
read any of the formats listed below.
mimetype: application/x-microsoft.net.object.binary.base64
value : The object must be serialized with
: System.Runtime.Serialization.Formatters.Binary.BinaryFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.soap.base64
value : The object must be serialized with
: System.Runtime.Serialization.Formatters.Soap.SoapFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.bytearray.base64
value : The object must be serialized into a byte array
: using a System.ComponentModel.TypeConverter
: and then encoded with base64 encoding.
-->
<xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata">
<xsd:import namespace="http://www.w3.org/XML/1998/namespace" />
<xsd:element name="root" msdata:IsDataSet="true">
<xsd:complexType>
<xsd:choice maxOccurs="unbounded">
<xsd:element name="metadata">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" />
</xsd:sequence>
<xsd:attribute name="name" use="required" type="xsd:string" />
<xsd:attribute name="type" type="xsd:string" />
<xsd:attribute name="mimetype" type="xsd:string" />
<xsd:attribute ref="xml:space" />
</xsd:complexType>
</xsd:element>
<xsd:element name="assembly">
<xsd:complexType>
<xsd:attribute name="alias" type="xsd:string" />
<xsd:attribute name="name" type="xsd:string" />
</xsd:complexType>
</xsd:element>
<xsd:element name="data">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
<xsd:element name="comment" type="xsd:string" minOccurs="0" msdata:Ordinal="2" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" use="required" msdata:Ordinal="1" />
<xsd:attribute name="type" type="xsd:string" msdata:Ordinal="3" />
<xsd:attribute name="mimetype" type="xsd:string" msdata:Ordinal="4" />
<xsd:attribute ref="xml:space" />
</xsd:complexType>
</xsd:element>
<xsd:element name="resheader">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" use="required" />
</xsd:complexType>
</xsd:element>
</xsd:choice>
</xsd:complexType>
</xsd:element>
</xsd:schema>
<resheader name="resmimetype">
<value>text/microsoft-resx</value>
</resheader>
<resheader name="version">
<value>2.0</value>
</resheader>
<resheader name="reader">
<value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
<resheader name="writer">
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
</root>

View File

@ -1,161 +0,0 @@
namespace LibationWinForm
{
partial class WebsiteProcessorControl
{
/// <summary>
/// Required designer variable.
/// </summary>
private System.ComponentModel.IContainer components = null;
/// <summary>
/// Clean up any resources being used.
/// </summary>
/// <param name="disposing">true if managed resources should be disposed; otherwise, false.</param>
protected override void Dispose(bool disposing)
{
if (disposing && (components != null))
{
components.Dispose();
}
base.Dispose(disposing);
}
#region Component Designer generated code
/// <summary>
/// Required method for Designer support - do not modify
/// the contents of this method with the code editor.
/// </summary>
private void InitializeComponent()
{
this.AuthGb = new System.Windows.Forms.GroupBox();
this.AuthRb_Browserless = new System.Windows.Forms.RadioButton();
this.AuthRb_UseCanonicalChrome = new System.Windows.Forms.RadioButton();
this.label3 = new System.Windows.Forms.Label();
this.AuthRb_ManualLogin = new System.Windows.Forms.RadioButton();
this.label2 = new System.Windows.Forms.Label();
this.PasswordTb = new System.Windows.Forms.TextBox();
this.UsernameTb = new System.Windows.Forms.TextBox();
this.AuthGb.SuspendLayout();
this.SuspendLayout();
//
// AuthGb
//
this.AuthGb.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom)
| System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.AuthGb.Controls.Add(this.AuthRb_Browserless);
this.AuthGb.Controls.Add(this.AuthRb_UseCanonicalChrome);
this.AuthGb.Controls.Add(this.label3);
this.AuthGb.Controls.Add(this.AuthRb_ManualLogin);
this.AuthGb.Controls.Add(this.label2);
this.AuthGb.Controls.Add(this.PasswordTb);
this.AuthGb.Controls.Add(this.UsernameTb);
this.AuthGb.Location = new System.Drawing.Point(0, 0);
this.AuthGb.Name = "AuthGb";
this.AuthGb.Size = new System.Drawing.Size(324, 137);
this.AuthGb.TabIndex = 1;
this.AuthGb.TabStop = false;
this.AuthGb.Text = "Authentication";
//
// AuthRb_Browserless
//
this.AuthRb_Browserless.AutoSize = true;
this.AuthRb_Browserless.Checked = true;
this.AuthRb_Browserless.Location = new System.Drawing.Point(6, 19);
this.AuthRb_Browserless.Name = "AuthRb_Browserless";
this.AuthRb_Browserless.Size = new System.Drawing.Size(143, 17);
this.AuthRb_Browserless.TabIndex = 0;
this.AuthRb_Browserless.TabStop = true;
this.AuthRb_Browserless.Text = "Browserless with cookies";
this.AuthRb_Browserless.UseVisualStyleBackColor = true;
//
// AuthRb_UseCanonicalChrome
//
this.AuthRb_UseCanonicalChrome.AutoSize = true;
this.AuthRb_UseCanonicalChrome.Location = new System.Drawing.Point(6, 114);
this.AuthRb_UseCanonicalChrome.Name = "AuthRb_UseCanonicalChrome";
this.AuthRb_UseCanonicalChrome.Size = new System.Drawing.Size(216, 17);
this.AuthRb_UseCanonicalChrome.TabIndex = 6;
this.AuthRb_UseCanonicalChrome.Text = "Use Canonical Chrome. SEE WARNING";
this.AuthRb_UseCanonicalChrome.UseVisualStyleBackColor = true;
this.AuthRb_UseCanonicalChrome.CheckedChanged += new System.EventHandler(this.AuthRb_UseCanonicalChrome_CheckedChanged);
//
// label3
//
this.label3.AutoSize = true;
this.label3.Location = new System.Drawing.Point(27, 91);
this.label3.Name = "label3";
this.label3.Size = new System.Drawing.Size(53, 13);
this.label3.TabIndex = 4;
this.label3.Text = "Password";
//
// AuthRb_ManualLogin
//
this.AuthRb_ManualLogin.AutoSize = true;
this.AuthRb_ManualLogin.Location = new System.Drawing.Point(6, 42);
this.AuthRb_ManualLogin.Name = "AuthRb_ManualLogin";
this.AuthRb_ManualLogin.Size = new System.Drawing.Size(89, 17);
this.AuthRb_ManualLogin.TabIndex = 1;
this.AuthRb_ManualLogin.Text = "Manual Login";
this.AuthRb_ManualLogin.UseVisualStyleBackColor = true;
//
// label2
//
this.label2.AutoSize = true;
this.label2.Location = new System.Drawing.Point(27, 65);
this.label2.Name = "label2";
this.label2.Size = new System.Drawing.Size(85, 13);
this.label2.TabIndex = 2;
this.label2.Text = "Username/Email";
//
// PasswordTb
//
this.PasswordTb.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.PasswordTb.Location = new System.Drawing.Point(118, 88);
this.PasswordTb.Name = "PasswordTb";
this.PasswordTb.PasswordChar = '*';
this.PasswordTb.Size = new System.Drawing.Size(200, 20);
this.PasswordTb.TabIndex = 5;
this.PasswordTb.TextChanged += new System.EventHandler(this.UserIsEnteringLoginInfo);
this.PasswordTb.KeyPress += new System.Windows.Forms.KeyPressEventHandler(this.UsernamePasswordTb_KeyPress);
this.PasswordTb.MouseUp += new System.Windows.Forms.MouseEventHandler(this.UserIsEnteringLoginInfo);
//
// UsernameTb
//
this.UsernameTb.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.UsernameTb.Location = new System.Drawing.Point(118, 62);
this.UsernameTb.Name = "UsernameTb";
this.UsernameTb.Size = new System.Drawing.Size(200, 20);
this.UsernameTb.TabIndex = 3;
this.UsernameTb.TextChanged += new System.EventHandler(this.UserIsEnteringLoginInfo);
this.UsernameTb.KeyPress += new System.Windows.Forms.KeyPressEventHandler(this.UsernamePasswordTb_KeyPress);
this.UsernameTb.MouseUp += new System.Windows.Forms.MouseEventHandler(this.UserIsEnteringLoginInfo);
//
// WebsiteProcessorControl
//
this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
this.Controls.Add(this.AuthGb);
this.Name = "WebsiteProcessorControl";
this.Size = new System.Drawing.Size(324, 137);
this.AuthGb.ResumeLayout(false);
this.AuthGb.PerformLayout();
this.ResumeLayout(false);
}
#endregion
private System.Windows.Forms.GroupBox AuthGb;
private System.Windows.Forms.RadioButton AuthRb_UseCanonicalChrome;
private System.Windows.Forms.Label label3;
private System.Windows.Forms.RadioButton AuthRb_ManualLogin;
private System.Windows.Forms.Label label2;
private System.Windows.Forms.TextBox PasswordTb;
private System.Windows.Forms.TextBox UsernameTb;
private System.Windows.Forms.RadioButton AuthRb_Browserless;
}
}

View File

@ -1,47 +0,0 @@
using System;
using System.Windows.Forms;
using AudibleDotComAutomation;
namespace LibationWinForm
{
public partial class WebsiteProcessorControl : UserControl, IValidatable
{
public event EventHandler<KeyPressEventArgs> KeyPressSubmit;
public WebsiteProcessorControl()
{
InitializeComponent();
}
public IPageRetriever GetPageRetriever()
=> AuthRb_UseCanonicalChrome.Checked ? new UserDataSeleniumRetriever()
: AuthRb_Browserless.Checked ? (IPageRetriever)new BrowserlessRetriever()
: new ManualLoginSeleniumRetriever(UsernameTb.Text, PasswordTb.Text);
public string StringBasedValidate()
{
if (AuthRb_ManualLogin.Checked && (string.IsNullOrWhiteSpace(UsernameTb.Text) || string.IsNullOrWhiteSpace(PasswordTb.Text)))
return "must fill in username and password";
return null;
}
private void UsernamePasswordTb_KeyPress(object sender, KeyPressEventArgs e)
{
if (e.KeyChar == (char)Keys.Return)
{
KeyPressSubmit?.Invoke(sender, e);
// call your method for action on enter
e.Handled = true; // suppress default handling
}
}
private void UserIsEnteringLoginInfo(object sender, EventArgs e) => AuthRb_ManualLogin.Checked = true;
private void AuthRb_UseCanonicalChrome_CheckedChanged(object sender, EventArgs e)
{
if (AuthRb_UseCanonicalChrome.Checked)
MessageBox.Show(@"A canonical version of Chrome will be used including User Data, cookies. etc. Selenium chromedriver won't launch URL if another Chrome instance is open");
}
}
}

View File

@ -1,120 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<root>
<!--
Microsoft ResX Schema
Version 2.0
The primary goals of this format is to allow a simple XML format
that is mostly human readable. The generation and parsing of the
various data types are done through the TypeConverter classes
associated with the data types.
Example:
... ado.net/XML headers & schema ...
<resheader name="resmimetype">text/microsoft-resx</resheader>
<resheader name="version">2.0</resheader>
<resheader name="reader">System.Resources.ResXResourceReader, System.Windows.Forms, ...</resheader>
<resheader name="writer">System.Resources.ResXResourceWriter, System.Windows.Forms, ...</resheader>
<data name="Name1"><value>this is my long string</value><comment>this is a comment</comment></data>
<data name="Color1" type="System.Drawing.Color, System.Drawing">Blue</data>
<data name="Bitmap1" mimetype="application/x-microsoft.net.object.binary.base64">
<value>[base64 mime encoded serialized .NET Framework object]</value>
</data>
<data name="Icon1" type="System.Drawing.Icon, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64">
<value>[base64 mime encoded string representing a byte array form of the .NET Framework object]</value>
<comment>This is a comment</comment>
</data>
There are any number of "resheader" rows that contain simple
name/value pairs.
Each data row contains a name, and value. The row also contains a
type or mimetype. Type corresponds to a .NET class that support
text/value conversion through the TypeConverter architecture.
Classes that don't support this are serialized and stored with the
mimetype set.
The mimetype is used for serialized objects, and tells the
ResXResourceReader how to depersist the object. This is currently not
extensible. For a given mimetype the value must be set accordingly:
Note - application/x-microsoft.net.object.binary.base64 is the format
that the ResXResourceWriter will generate, however the reader can
read any of the formats listed below.
mimetype: application/x-microsoft.net.object.binary.base64
value : The object must be serialized with
: System.Runtime.Serialization.Formatters.Binary.BinaryFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.soap.base64
value : The object must be serialized with
: System.Runtime.Serialization.Formatters.Soap.SoapFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.bytearray.base64
value : The object must be serialized into a byte array
: using a System.ComponentModel.TypeConverter
: and then encoded with base64 encoding.
-->
<xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata">
<xsd:import namespace="http://www.w3.org/XML/1998/namespace" />
<xsd:element name="root" msdata:IsDataSet="true">
<xsd:complexType>
<xsd:choice maxOccurs="unbounded">
<xsd:element name="metadata">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" />
</xsd:sequence>
<xsd:attribute name="name" use="required" type="xsd:string" />
<xsd:attribute name="type" type="xsd:string" />
<xsd:attribute name="mimetype" type="xsd:string" />
<xsd:attribute ref="xml:space" />
</xsd:complexType>
</xsd:element>
<xsd:element name="assembly">
<xsd:complexType>
<xsd:attribute name="alias" type="xsd:string" />
<xsd:attribute name="name" type="xsd:string" />
</xsd:complexType>
</xsd:element>
<xsd:element name="data">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
<xsd:element name="comment" type="xsd:string" minOccurs="0" msdata:Ordinal="2" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" use="required" msdata:Ordinal="1" />
<xsd:attribute name="type" type="xsd:string" msdata:Ordinal="3" />
<xsd:attribute name="mimetype" type="xsd:string" msdata:Ordinal="4" />
<xsd:attribute ref="xml:space" />
</xsd:complexType>
</xsd:element>
<xsd:element name="resheader">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" use="required" />
</xsd:complexType>
</xsd:element>
</xsd:choice>
</xsd:complexType>
</xsd:element>
</xsd:schema>
<resheader name="resmimetype">
<value>text/microsoft-resx</value>
</resheader>
<resheader name="version">
<value>2.0</value>
</resheader>
<resheader name="reader">
<value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
<resheader name="writer">
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
</root>

View File

@ -2,7 +2,7 @@
using System.Collections.Generic;
using System.Threading.Tasks;
using System.Windows.Forms;
using ApplicationService;
using ApplicationServices;
namespace LibationWinForm
{
@ -22,8 +22,6 @@ namespace LibationWinForm
this.Shown += (_, __) => AcceptButton.PerformClick();
}
public string StringBasedValidate() => null;
List<string> successMessages { get; } = new List<string>();
public string SuccessMessage => string.Join("\r\n", successMessages);
@ -33,8 +31,8 @@ namespace LibationWinForm
public async Task DoMainWorkAsync()
{
var callback = new Login.WinformResponder();
var refresher = new LibraryIndexer();
(TotalBooksProcessed, NewBooksAdded) = await refresher.IndexAsync(callback);
var indexer = new LibraryIndexer();
(TotalBooksProcessed, NewBooksAdded) = await indexer.IndexAsync(callback);
successMessages.Add($"Total processed: {TotalBooksProcessed}");
successMessages.Add($"New: {NewBooksAdded}");

View File

@ -34,10 +34,8 @@
this.filterBtn = new System.Windows.Forms.Button();
this.filterSearchTb = new System.Windows.Forms.TextBox();
this.menuStrip1 = new System.Windows.Forms.MenuStrip();
this.indexToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem();
this.importToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem();
this.scanLibraryToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem();
this.reimportMostRecentLibraryScanToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem();
this.beginImportingBookDetailsToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem();
this.liberateToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem();
this.beginBookBackupsToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem();
this.beginPdfBackupsToolStripMenuItem = new System.Windows.Forms.ToolStripMenuItem();
@ -100,7 +98,7 @@
// menuStrip1
//
this.menuStrip1.Items.AddRange(new System.Windows.Forms.ToolStripItem[] {
this.indexToolStripMenuItem,
this.importToolStripMenuItem,
this.liberateToolStripMenuItem,
this.quickFiltersToolStripMenuItem,
this.settingsToolStripMenuItem});
@ -110,38 +108,21 @@
this.menuStrip1.TabIndex = 0;
this.menuStrip1.Text = "menuStrip1";
//
// indexToolStripMenuItem
// importToolStripMenuItem
//
this.indexToolStripMenuItem.DropDownItems.AddRange(new System.Windows.Forms.ToolStripItem[] {
this.scanLibraryToolStripMenuItem,
this.reimportMostRecentLibraryScanToolStripMenuItem,
this.beginImportingBookDetailsToolStripMenuItem});
this.indexToolStripMenuItem.Name = "indexToolStripMenuItem";
this.indexToolStripMenuItem.Size = new System.Drawing.Size(47, 20);
this.indexToolStripMenuItem.Text = "&Index";
this.indexToolStripMenuItem.DropDownOpening += new System.EventHandler(this.indexToolStripMenuItem_DropDownOpening);
this.importToolStripMenuItem.DropDownItems.AddRange(new System.Windows.Forms.ToolStripItem[] {
this.scanLibraryToolStripMenuItem});
this.importToolStripMenuItem.Name = "importToolStripMenuItem";
this.importToolStripMenuItem.Size = new System.Drawing.Size(47, 20);
this.importToolStripMenuItem.Text = "&Import";
//
// scanLibraryToolStripMenuItem
//
this.scanLibraryToolStripMenuItem.Name = "scanLibraryToolStripMenuItem";
this.scanLibraryToolStripMenuItem.Size = new System.Drawing.Size(277, 22);
this.scanLibraryToolStripMenuItem.Text = "Scan &Library...";
this.scanLibraryToolStripMenuItem.Text = "Scan &Library";
this.scanLibraryToolStripMenuItem.Click += new System.EventHandler(this.scanLibraryToolStripMenuItem_Click);
//
// reimportMostRecentLibraryScanToolStripMenuItem
//
this.reimportMostRecentLibraryScanToolStripMenuItem.Name = "reimportMostRecentLibraryScanToolStripMenuItem";
this.reimportMostRecentLibraryScanToolStripMenuItem.Size = new System.Drawing.Size(277, 22);
this.reimportMostRecentLibraryScanToolStripMenuItem.Text = "Re-&import most recent library scan: {0}";
this.reimportMostRecentLibraryScanToolStripMenuItem.Click += new System.EventHandler(this.reimportMostRecentLibraryScanToolStripMenuItem_Click);
//
// beginImportingBookDetailsToolStripMenuItem
//
this.beginImportingBookDetailsToolStripMenuItem.Name = "beginImportingBookDetailsToolStripMenuItem";
this.beginImportingBookDetailsToolStripMenuItem.Size = new System.Drawing.Size(277, 22);
this.beginImportingBookDetailsToolStripMenuItem.Text = "Begin importing book details: {0}";
this.beginImportingBookDetailsToolStripMenuItem.Click += new System.EventHandler(this.beginImportingBookDetailsToolStripMenuItem_Click);
//
// liberateToolStripMenuItem
//
this.liberateToolStripMenuItem.DropDownItems.AddRange(new System.Windows.Forms.ToolStripItem[] {
@ -277,7 +258,7 @@
#endregion
private System.Windows.Forms.Panel gridPanel;
private System.Windows.Forms.MenuStrip menuStrip1;
private System.Windows.Forms.ToolStripMenuItem indexToolStripMenuItem;
private System.Windows.Forms.ToolStripMenuItem importToolStripMenuItem;
private System.Windows.Forms.StatusStrip statusStrip1;
private System.Windows.Forms.ToolStripStatusLabel springLbl;
private System.Windows.Forms.ToolStripStatusLabel visibleCountLbl;
@ -291,8 +272,6 @@
private System.Windows.Forms.Button filterHelpBtn;
private System.Windows.Forms.ToolStripMenuItem settingsToolStripMenuItem;
private System.Windows.Forms.ToolStripMenuItem scanLibraryToolStripMenuItem;
private System.Windows.Forms.ToolStripMenuItem reimportMostRecentLibraryScanToolStripMenuItem;
private System.Windows.Forms.ToolStripMenuItem beginImportingBookDetailsToolStripMenuItem;
private System.Windows.Forms.ToolStripMenuItem quickFiltersToolStripMenuItem;
private System.Windows.Forms.ToolStripMenuItem firstFilterIsDefaultToolStripMenuItem;
private System.Windows.Forms.Button addFilterBtn;

View File

@ -8,7 +8,6 @@ using Dinah.Core;
using Dinah.Core.Collections.Generic;
using Dinah.Core.Windows.Forms;
using FileManager;
using ScrapingDomainServices;
namespace LibationWinForm
{
@ -21,9 +20,6 @@ namespace LibationWinForm
private string pdfsCountsLbl_Format { get; }
private string visibleCountLbl_Format { get; }
private string reimportMostRecentLibraryScanToolStripMenuItem_format { get; }
private string beginImportingBookDetailsToolStripMenuItem_format { get; }
private string beginBookBackupsToolStripMenuItem_format { get; }
private string beginPdfBackupsToolStripMenuItem_format { get; }
@ -36,9 +32,6 @@ namespace LibationWinForm
pdfsCountsLbl_Format = pdfsCountsLbl.Text;
visibleCountLbl_Format = visibleCountLbl.Text;
reimportMostRecentLibraryScanToolStripMenuItem_format = reimportMostRecentLibraryScanToolStripMenuItem.Text;
beginImportingBookDetailsToolStripMenuItem_format = beginImportingBookDetailsToolStripMenuItem.Text;
beginBookBackupsToolStripMenuItem_format = beginBookBackupsToolStripMenuItem.Text;
beginPdfBackupsToolStripMenuItem_format = beginPdfBackupsToolStripMenuItem.Text;
}
@ -258,150 +251,22 @@ namespace LibationWinForm
doFilter();
}
}
#endregion
#region index menu
//
// IMPORTANT
//
// IRunnableDialog.Run() extension method contains work flow
//
#region // example code: chaining multiple dialogs
public class MyDialog1 : IRunnableDialog
{
public IEnumerable<string> Files;
public IButtonControl AcceptButton { get => throw new NotImplementedException(); set => throw new NotImplementedException(); }
public Control.ControlCollection Controls => throw new NotImplementedException();
public string SuccessMessage => throw new NotImplementedException();
public DialogResult DialogResult { get => throw new NotImplementedException(); set => throw new NotImplementedException(); }
public void Close() => throw new NotImplementedException();
public Task DoMainWorkAsync() => throw new NotImplementedException();
public DialogResult ShowDialog() => throw new NotImplementedException();
public string StringBasedValidate() => throw new NotImplementedException();
}
public class MyDialog2 : Form, IIndexLibraryDialog
{
public MyDialog2(IEnumerable<string> files) { }
Button BeginFileImportBtn = new Button();
public void Begin() => BeginFileImportBtn.PerformClick();
public int TotalBooksProcessed => throw new NotImplementedException();
public int NewBooksAdded => throw new NotImplementedException();
public string SuccessMessage => throw new NotImplementedException();
public Task DoMainWorkAsync() => throw new NotImplementedException();
public string StringBasedValidate() => throw new NotImplementedException();
}
private async void downloadPagesToFile(object sender, EventArgs e)
{
var dialog1 = new MyDialog1();
if (dialog1.RunDialog() != DialogResult.OK || !dialog1.Files.Any())
return;
if (MessageBox.Show("Index from these files?", "Index?", MessageBoxButtons.YesNo) == DialogResult.Yes)
{
var dialog2 = new MyDialog2(dialog1.Files);
dialog2.Shown += (_, __) => dialog2.Begin();
await indexDialog(dialog2);
}
}
#endregion
private void indexToolStripMenuItem_DropDownOpening(object sender, EventArgs e)
{
#region label: Re-import most recent library scan
{
var libDir = WebpageStorage.GetMostRecentLibraryDir();
if (libDir == null)
{
reimportMostRecentLibraryScanToolStripMenuItem.Enabled = false;
reimportMostRecentLibraryScanToolStripMenuItem.Text = string.Format(reimportMostRecentLibraryScanToolStripMenuItem_format, "No previous scans");
}
else
{
reimportMostRecentLibraryScanToolStripMenuItem.Enabled = true;
var now = DateTime.Now;
var span = now - libDir.CreationTime;
var ago
// less than 1 min
= (int)span.TotalSeconds < 60 ? $"{(int)span.TotalSeconds} sec ago"
// less than 1 hr
: (int)span.TotalMinutes < 60 ? $"{(int)span.TotalMinutes} min ago"
// today. eg: 4:25 PM
: now.Date == libDir.CreationTime.Date ? libDir.CreationTime.ToString("h:mm tt")
// else date and time
: libDir.CreationTime.ToString("MM/dd/yyyy h:mm tt");
reimportMostRecentLibraryScanToolStripMenuItem.Text = string.Format(reimportMostRecentLibraryScanToolStripMenuItem_format, ago);
}
}
#endregion
#region label: Begin importing book details
{
var noDetails = BookQueries.BooksWithoutDetailsCount();
if (noDetails == 0)
{
beginImportingBookDetailsToolStripMenuItem.Enabled = false;
beginImportingBookDetailsToolStripMenuItem.Text = string.Format(beginImportingBookDetailsToolStripMenuItem_format, "No books without details");
}
else
{
beginImportingBookDetailsToolStripMenuItem.Enabled = true;
beginImportingBookDetailsToolStripMenuItem.Text = string.Format(beginImportingBookDetailsToolStripMenuItem_format, $"{noDetails} remaining");
}
}
#endregion
}
#endregion
#region index menu
private async void scanLibraryToolStripMenuItem_Click(object sender, EventArgs e)
{
// legacy/scraping method
//await indexDialog(new ScanLibraryDialog());
// new/api method
await indexDialog(new IndexLibraryDialog());
}
var dialog = new IndexLibraryDialog();
private async void reimportMostRecentLibraryScanToolStripMenuItem_Click(object sender, EventArgs e)
{
// DO NOT ConfigureAwait(false)
// this would result in index() => reloadGrid() => setGrid() => "gridPanel.Controls.Remove(currProductsGrid);"
// throwing 'Cross-thread operation not valid: Control 'ProductsGrid' accessed from a thread other than the thread it was created on.'
var (TotalBooksProcessed, NewBooksAdded) = await Indexer.IndexLibraryAsync(WebpageStorage.GetMostRecentLibraryDir());
if (dialog.RunDialog().In(DialogResult.Abort, DialogResult.Cancel, DialogResult.None))
return;
MessageBox.Show($"Total processed: {TotalBooksProcessed}\r\nNew: {NewBooksAdded}");
await indexComplete(TotalBooksProcessed, NewBooksAdded);
}
private async Task indexDialog(IIndexLibraryDialog dialog)
{
if (!dialog.RunDialog().In(DialogResult.Abort, DialogResult.Cancel, DialogResult.None))
await indexComplete(dialog.TotalBooksProcessed, dialog.NewBooksAdded);
}
private async Task indexComplete(int totalBooksProcessed, int newBooksAdded)
{
// update backup counts if we have new library items
if (newBooksAdded > 0)
if (dialog.NewBooksAdded > 0)
await setBackupCountsAsync();
// skip reload if:
// - no grid is loaded
// - none indexed
if (currProductsGrid == null || totalBooksProcessed == 0)
return;
reloadGrid();
}
private void updateGridRow(object _, string productId) => currProductsGrid?.UpdateRow(productId);
private async void beginImportingBookDetailsToolStripMenuItem_Click(object sender, EventArgs e)
{
var scrapeBookDetails = BookLiberation.ProcessorAutomationController.GetWiredUpScrapeBookDetails();
scrapeBookDetails.BookSuccessfullyImported += updateGridRow;
await BookLiberation.ProcessorAutomationController.RunAutomaticDownload(scrapeBookDetails);
if (dialog.TotalBooksProcessed > 0)
reloadGrid();
}
#endregion

View File

@ -208,7 +208,7 @@ namespace LibationWinForm
{
book.UserDefinedItem.Tags = newTags;
var qtyChanges = ScrapingDomainServices.Indexer.IndexChangedTags(book);
var qtyChanges = ApplicationServices.TagUpdater.IndexChangedTags(book);
return qtyChanges;
}

View File

@ -1,5 +1,7 @@
-- begin LEGACY CODE ---------------------------------------------------------------------------------------------------------------------
-- end LEGACY CODE ---------------------------------------------------------------------------------------------------------------------
-- begin VERSIONING ---------------------------------------------------------------------------------------------------------------------
https://github.com/rmcrackan/Libation/releases
v3.0 : This version is fully powered by the Audible API. Legacy scraping code is still present but is commented out. All future check-ins are not guaranteed to have any scraping code
-- end VERSIONING ---------------------------------------------------------------------------------------------------------------------
-- begin AUDIBLE DETAILS ---------------------------------------------------------------------------------------------------------------------
alternate book id (eg BK_RAND_006061) is called 'sku' , 'sku_lite' , 'prod_id' , 'product_id' in different parts of the site

View File

@ -1,11 +0,0 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>netstandard2.1</TargetFramework>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\AudibleDotCom\AudibleDotCom.csproj" />
</ItemGroup>
</Project>

View File

@ -1,47 +0,0 @@
using System;
using System.Collections.Generic;
using System.Linq;
using AudibleDotCom;
using Dinah.Core;
using DTOs;
using Scraping.BookDetail;
using Scraping.Library;
namespace Scraping
{
public static class AudibleScraper
{
public static List<LibraryDTO> ScrapeLibrarySources(params AudiblePageSource[] pageSources)
{
if (pageSources == null || !pageSources.Any())
return new List<LibraryDTO>();
if (pageSources.Select(ps => ps.AudiblePage).Distinct().Single() != AudiblePageType.Library)
throw new Exception("only Library items allowed");
return pageSources.SelectMany(s => scrapeLibraryPageSource(s)).ToList();
}
private static List<LibraryDTO> scrapeLibraryPageSource(AudiblePageSource pageSource)
=> new LibraryScraper(pageSource)
.ScrapeCurrentPage()
// ScrapeCurrentPage() is long running. do not taunt delayed execution
.ToList();
public static BookDetailDTO ScrapeBookDetailsSource(AudiblePageSource pageSource)
{
ArgumentValidator.EnsureNotNull(pageSource, nameof(pageSource));
if (pageSource.AudiblePage != AudiblePageType.ProductDetails)
throw new Exception("only Product Details items allowed");
try
{
return new BookDetailScraper(pageSource).ScrapePage();
}
catch (Exception ex)
{
throw;
}
}
}
}

View File

@ -1,175 +0,0 @@
using System;
using System.Collections.Generic;
using System.Linq;
using AudibleDotCom;
using Dinah.Core;
using DTOs;
using Newtonsoft.Json.Linq;
using Scraping.Selectors;
namespace Scraping.BookDetail
{
static class NewtonsoftExt
{
public static string GetDecodedTokenString(this JToken jToken) => System.Net.WebUtility.HtmlDecode(((string)jToken).Trim());
}
internal class BookDetailScraper
{
private AudiblePageSource source { get; }
private WebElement docRoot { get; }
public BookDetailScraper(AudiblePageSource pageSource)
{
source = pageSource;
var doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(source.Source);
docRoot = new WebElement(doc.DocumentNode);
}
static RuleFamilyBD ruleFamily { get; } = new RuleFamilyBD
{
RowsLocator = By.XPath("/*"),
Rules = new RuleSetBD
{
parseJson,
parseSeries
}
};
public BookDetailDTO ScrapePage()
{
//debug//var sw = System.Diagnostics.Stopwatch.StartNew();
var returnBookDetailDto = new BookDetailDTO { ProductId = source.PageId };
var wholePage = ruleFamily.GetRows(docRoot).Single();
ruleFamily.Rules.Run(wholePage, returnBookDetailDto);
//debug//sw.Stop(); var ms = sw.ElapsedMilliseconds;
return returnBookDetailDto;
}
static void parseJson(WebElement row, BookDetailDTO productItem)
{
// structured data is in the 2nd of the 3 json embedded sections <script type="application/ld+json">
var ldJson = row
.FindElements(By.XPath("//script[@type='application/ld+json']"))
[1]
// use InnerText NOT webElement.Text
// .Text decodes which will break json if it contains &quot;
// eg: "foo &quot; bar" => "foo " bar"
.Node.InnerText;
var jsonArray = JArray.Parse(ldJson);
var json0 = jsonArray[0] as JObject;
//// ways to enumerate properties
//foreach (var kvp in json0) Console.WriteLine(kvp.Key);
//foreach (var prop in json0.Properties()) Console.WriteLine(prop.Name);
var properties = json0.Properties().Select(p => p.Name).ToList();
// mandatory
productItem.Title = json0["name"].GetDecodedTokenString();
productItem.Description = json0["description"].GetDecodedTokenString();
productItem.Publisher = json0["publisher"].GetDecodedTokenString();
productItem.DatePublished = DateTime.Parse(json0["datePublished"].GetDecodedTokenString());
// optional
if (properties.Contains("abridged"))
productItem.IsAbridged = Convert.ToBoolean(json0["abridged"].GetDecodedTokenString());
// not all books have narrators
if (properties.Contains("readBy"))
foreach (var narrator in json0["readBy"])
productItem.Narrators.Add(narrator["name"].GetDecodedTokenString());
var json1 = jsonArray[1]["itemListElement"];
foreach (var element in json1)
{
var item = element["item"];
var id = item["@id"].GetDecodedTokenString();
if (!id.ContainsInsensitive("/cat/"))
continue;
var categoryId = id.Split('?')[0].Split('/').Last();
var categoryName = item["name"].GetDecodedTokenString();
productItem.Categories.Add((categoryId, categoryName));
}
}
static void parseSeries(WebElement row, BookDetailDTO productItem)
{
var element = row.FindElements(By.ClassName("seriesLabel")).SingleOrDefault();
if (element == null)
return;
var currEntry = new SeriesEntry();
var children = element.Node.ChildNodes;
// skip 0. It's just the label "Series:"
for (var i = 1; i < children.Count; i++)
{
var c = children[i];
// if contains html: // series name and id. begin new entry
// new book entry
if (c.HasChildNodes)
{
string seriesId = null;
var href = c.Attributes["href"].Value;
var h2 = href.Split('?')[1];
var h3 = h2.Split('&');
foreach (var h in h3)
{
var h4 = h.Split('=');
if (h4[0].EqualsInsensitive("asin"))
{
seriesId = h4[1];
break;
}
}
if (seriesId == null)
{
// try this format instead
if (href.StartsWithInsensitive("/series/"))
{
// href
// /series/The-Interdependency-Audiobooks/B06XKNK664?pf_rd_p=52918805-f7fc-40f4-a76b-cf1c79f7d10a&pf_rd_r=GV7000W2BM97V9Z35ZQD&ref=a_pd_The-Co_c1_series_1
var mainUrl = href.Split('?')[0];
// mainUrl
// /series/The-Interdependency-Audiobooks/B06XKNK664
var urlAsin = mainUrl.Split('/').Last();
// sanity check
if (urlAsin.StartsWithInsensitive("B") && urlAsin.Length == "B07CM5ZDJL".Length)
seriesId = urlAsin;
}
}
if (seriesId == null)
throw new Exception("series id not found");
currEntry = new SeriesEntry { SeriesId = seriesId, SeriesName = c.FirstChild.InnerText };
productItem.Series.Add(currEntry);
}
// else: is the index in prev series. not all books have an index
else
{
var indexString = c.InnerText
.ToLower()
.Replace("book", "")
.Replace(",", "")
.Trim();
if (float.TryParse(indexString, out float index))
currEntry.Index = index;
}
}
}
}
}

View File

@ -1,27 +0,0 @@
using System;
using Scraping.Rules;
using Scraping.Selectors;
using DTO = DTOs.BookDetailDTO;
namespace Scraping.BookDetail
{
/// <summary>not the same as LocatedRuleSet. IRuleClass only acts upon 1 product item at a time. RuleFamily returns many product items</summary>
internal class RuleFamilyBD : RuleFamily<DTO> { }
internal interface IRuleClassBD : IRuleClass<DTO> { }
internal class BasicRuleBD : BasicRule<DTO>, IRuleClassBD
{
public BasicRuleBD() : base() { }
public BasicRuleBD(Action<WebElement, DTO> action) : base(action) { }
}
internal class RuleSetBD : RuleSet<DTO>, IRuleClassBD { }
/// <summary>LocatedRuleSet loops through found items. When it's 0 or 1, LocatedRuleSet is an easy way to parse only if exists</summary>
internal class LocatedRuleSetBD : LocatedRuleSet<DTO>, IRuleClassBD
{
public LocatedRuleSetBD(By elementsLocator) : base(elementsLocator) { }
}
}

View File

@ -1,38 +0,0 @@
using System;
using System.Collections.Generic;
namespace DTOs
{
public class SeriesEntry
{
public string SeriesId;
public string SeriesName;
public float? Index;
}
public class BookDetailDTO
{
public string ProductId { get; set; }
/// <summary>DEBUG only</summary>
public string Title { get; set; }
/// <summary>UNUSED: currently unused: desc from book-details is better desc in lib, but book-details also contains html tags</summary>
public string Description { get; set; }
public bool IsAbridged { get; set; }
// order matters: don't use hashtable/dictionary
public List<string> Narrators { get; } = new List<string>();
public string Publisher { get; set; }
public DateTime DatePublished { get; set; }
// order matters: don't use hashtable/dictionary
public List<(string categoryId, string categoryName)> Categories { get; } = new List<(string categoryId, string categoryName)>();
public List<SeriesEntry> Series { get; } = new List<SeriesEntry>();
public override string ToString() => $"[{ProductId}] {Title}";
}
}

View File

@ -1,49 +0,0 @@
using System;
using System.Collections.Generic;
namespace DTOs
{
public class LibraryDTO
{
//
// must initialize optional collections
//
public string ProductId { get; set; }
public string Title { get; set; }
/// <summary>Whether this product is episodic. These will not have a book download link or personal library ratings</summary>
public bool IsEpisodes { get; set; }
// order matters. do not use a hashtable/dictionary
public List<(string authorName, string authorId)> Authors { get; set; } = new List<(string name, string id)>();
public string[] Narrators { get; set; } = new string[0];
public int LengthInMinutes { get; set; }
public string Description { get; set; }
public string PictureId { get; set; }
/// <summary>Key: Id. Value: Name</summary>
public Dictionary<string, string> Series { get; } = new Dictionary<string, string>();
// aggregate community ratings for this product
public float Product_OverallStars { get; set; }
public float Product_PerformanceStars { get; set; }
public float Product_StoryStars { get; set; }
// my personal user ratings for this product (only products i own. ie: in library)
public int MyUserRating_Overall { get; set; }
public int MyUserRating_Performance { get; set; }
public int MyUserRating_Story { get; set; }
public List<string> SupplementUrls { get; set; } = new List<string>();
public DateTime DateAdded { get; set; }
public string DownloadBookLink { get; set; }
public override string ToString() => $"[{ProductId}] {Title}";
}
}

View File

@ -1,272 +0,0 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
using AudibleDotCom;
using Dinah.Core;
using DTOs;
using FileManager;
using Scraping.Selectors;
namespace Scraping.Library
{
internal class LibraryScraper : LibraryScraperBase
{
// row_xpath_OLD = "//tr[td[@class='adbl-lib-multipart-toggle']]"
static RuleFamilyLib ruleFamily { get; } = new RuleFamilyLib
{
// find all rows. ignores multi-part rows. ie: gets the first part only when multi-part
RowsLocator = By.XPath("//*[starts-with(@id,'adbl-library-content-row-')]"),
Rules = getRuleSet()
};
static string debug_lastGoodTitleOuterHtml;
protected static RuleSetLib getRuleSet()
=> new RuleSetLib
{
// product id
(row, productItem) => productItem.ProductId = row.FindElements(By.XPath(".//input[@name='asin']")).First().Value.Trim(),
// title: 1st td, 1st ul, 1st li, within h2
(row, productItem) => // (row, productItem) => productItem.Title = row.FindElement(By.XPath("(./td[1]//ul)[1]/li[1]/h2")).Text.Trim(),
{
var debug_attemptTitleScriptOuterHtml = row.Node.OuterHtml;
try
{
productItem.Title = row.FindElement(By.XPath("(./td[1]//ul)[1]/li[1]/h2")).Text.Trim();
debug_lastGoodTitleOuterHtml = debug_attemptTitleScriptOuterHtml;
}
catch
{
var badTitle = debug_attemptTitleScriptOuterHtml;
throw;
}
},
// is episodes. these will not have a book download link or personal library ratings
(row, productItem) => productItem.IsEpisodes = row.FindElements(By.XPath(".//a[starts-with(@href, '/a/library/subscription?')]")).Any(),
// get picture id, download images
(row, productItem) => {
productItem.PictureId = row
.FindElements(By.ClassName("bc-image-inset-border"))
.First()
.GetAttribute("src")
.Split('/').Last()
.Split('.').First();
PictureStorage.DownloadImages(productItem.PictureId);
},
// all text links
new LocatedRuleSetLib(By.XPath(".//a[not(img)]")) {
(link, ProductItem) => {
var href = link.GetAttribute("href");
if (href == null)
return;
// authors
var authorName = link.Text.Trim();
string authorId;
// with no id. DO NOT REPLACE THIS STEP. needed for valid early exit in 'else'
if (href.Contains("/search?searchAuthor="))
authorId = null;
// with id
else if (href.Contains("/author/"))
authorId = href
.Split('?')[0]
.Split('/').Last();
else // not an author
return;
ProductItem.Authors.Add((authorName, authorId));
},
},
// series. id only; not name
new LocatedRuleSetLib(By.XPath(".//a[text()='View Series']/@href")) {
(link, productItem) => productItem.Series[link.GetAttribute("href").Replace("series?asin=", "")] = null
},
// pdf download link
new LocatedRuleSetLib(By.ClassName("adbl-lib-action-pdf")) {
(link, productItem) => productItem.SupplementUrls.Add(link.GetAttribute("href"))
},
bookDownloadLink,
// date added to library
(row, productItem) => {
var dateAdded = row
.FindElements(By.ClassName("bc-text"))
.Select(l => l.Text.Trim())
.Where(str => Regex.IsMatch(str, @"^\d\d-\d\d-\d\d$"))
.Select(dateText => DateTime.ParseExact(dateText, "MM-dd-yy", System.Globalization.CultureInfo.InvariantCulture))
.ToList();
if (dateAdded.Any())
productItem.DateAdded = dateAdded.First();
},
// my library ratings
(row, productItem) => {
if (productItem.IsEpisodes)
return;
productItem.MyUserRating_Overall = int.Parse(row
.FindElement(By.ClassName("adbl-prod-rate-review-bar-overall")).GetAttribute("data-star-count"));
productItem.MyUserRating_Performance = int.Parse(row
.FindElement(By.ClassName("adbl-prod-rate-review-bar-performance")).GetAttribute("data-star-count"));
productItem.MyUserRating_Story = int.Parse(row
.FindElement(By.ClassName("adbl-prod-rate-review-bar-story")).GetAttribute("data-star-count"));
},
// 1st td (summary panel) (xpath uses 1-based indexes), top bullets
// to get the first, use parentheses. it will parse w/o parans but will fall through to the 2nd unwanted ul
new LocatedRuleSetLib(By.XPath("(./td[1]//ul)[1]/li")) {
(li, productItem) => {
var text = li.Text.Trim();
// narrators
if (!text.StartsWith("Narrated by:"))
return;
var narratorNames = text.Replace("Narrated by:", "").Trim();
productItem.Narrators = sanitizeContributorNames(narratorNames.Split(',')).ToArray();
},
(li, productItem) => {
var text = li.Text.Trim();
// parse time
if (!text.StartsWith("Length:"))
return;
if (!text.Contains(" hr") && !text.Contains(" min"))
return;
var timeSplit = text
.Replace("Length:", "")
.Trim()
.Split(new string[] { " and " }, StringSplitOptions.RemoveEmptyEntries);
// do the math for 1 item then add to productItem.
// If we += directly to the productItem inside foreach(), then time will be doubled if this runs twice
var tempLengthInMinutes = 0;
foreach (var part in timeSplit)
{
if (part.Contains("sec"))
continue;
var intPart = int.Parse(part.Replace("hr", "").Replace("min", "").Replace("s", "").Trim());
if (part.Contains("hr"))
intPart *= 60;
tempLengthInMinutes += intPart;
}
productItem.LengthInMinutes = tempLengthInMinutes;
},
},
// 1st td (summary panel)
// description
new LocatedRuleSetLib(By.XPath("./td[1]//p")) {
(p, productItem) => {
var text = p.Text.Trim();
if (!string.IsNullOrWhiteSpace(text))
productItem.Description = sanitizeDescription(text);
},
},
// 1st td (summary panel)
// 2nd set of bullets has product ratings
new LocatedRuleSetLib(By.XPath("(./td[1]//ul)[2]/li")) {
(li, productItem) => {
// splitting on null assumes white space: https://docs.microsoft.com/en-us/dotnet/api/system.string.split
var text = li.Text.Split(null as char[], StringSplitOptions.RemoveEmptyEntries);
if (text.Length < 2)
return;
var rating = float.Parse(text[1]);
var totalVotes = int.Parse(text[6].Replace(",", ""));
switch (text[0])
{
case "Overall": productItem.Product_OverallStars = rating; return;
case "Performance": productItem.Product_PerformanceStars = rating; return;
case "Story": productItem.Product_StoryStars = rating; return;
}
},
},
};
public LibraryScraper(AudiblePageSource pageSource) : base(pageSource) { }
public override IEnumerable<LibraryDTO> ScrapeCurrentPage()
{
#region // example for once per page rules
//var onePerPageProductItemValues = scrapeRows(onePerPageRules).Single();
//foreach (var productItem in scrapeRows(old_family))
//{
// productItem.CustomerId = onePerPageProductItemValues.CustomerId;
// productItem.UserName = onePerPageProductItemValues.UserName;
// productItem.Last_DownloadCustId = onePerPageProductItemValues.Last_DownloadCustId;
// yield return productItem;
//}
#endregion
#region // example for mutiple rule sets
// var i = 0;
// foreach (var oldResult in scrapeRows(oldRuleFamily))
// {
// i++;
// yield return oldResult;
// }
// if (i > 0)
// yield break;
// foreach (var newResult in scrapeRows(newRuleFamily))
// yield return newResult;
#endregion
return scrapeRows(ruleFamily);
}
// this is broken out into its own method as a proof of concept. it may also help with debugging
static void bookDownloadLink(WebElement row, LibraryDTO productItem)
{
if (productItem.IsEpisodes)
return;
var downloadLink = row.FindElements(By.ClassName("adbl-lib-action-download")).FirstOrDefault();
// ToNode switches to HtmlAgilityPack style. could also have used xpath .//a (or ./a since it happens to be the immediate descendant)
productItem.DownloadBookLink = downloadLink.Node
.Descendants("a").Single()
.Attributes["href"].Value;
// check for
// href="/howtolisten"
if (productItem.DownloadBookLink.ContainsInsensitive("howtolisten"))
throw new Exception("BAD DOWNLOAD LINKS"
+ "\r\n" + "PROBLEM: Library download button is a link to the 'howtolisten' page"
+ "\r\n" + "SOLUTION: Toggle this checkbox: Accounts Details > Update Settings > Software Verification > Check for Audible Download Manager");
}
#region static scrape page helpers
private static Regex removeIntroductionsRegex = new Regex(
#region regex: remove "introduction" variants
@"
# keep this. non-greedy
(.*?)
# non-capture. this is what to throw away
(?:
# this will capture
# (introduction)
# (introductions)
# - introduction
# - introductions
\s*\-?\s*\(?introductions?\)?
)?
", RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled
#endregion
);
// input: comma delimited list of names. possibly with " - introductions", "(introduction)", etc
// output: clean up string[]
private static IEnumerable<string> sanitizeContributorNames(IEnumerable<string> names)
=> names.Select(n => removeIntroductionsRegex.Replace(n.Replace(",", "").Trim(), ""));
// room for improvement. all kinds of other things are tagged onto names with hyphens and parans. eg: "(cover illustration)", " - essay"
private static string sanitizeDescription(string desc) => desc
.Replace("’", "'") // '
.Replace("", "'") // '
.Replace("…", "...") // …
.Replace("“", "\"") // "
.Replace("â€" + '\u009d', "\"") // "
.Replace("“", "\"") // "
.Replace("”", "\"") // "
.Trim();
#endregion
}
}

View File

@ -1,106 +0,0 @@
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using AudibleDotCom;
using DTOs;
using Scraping.Selectors;
namespace Scraping.Library
{
internal abstract class LibraryScraperBase
{
private AudiblePageSource source { get; }
private WebElement docRoot { get; }
protected LibraryScraperBase(AudiblePageSource pageSource)
{
source = pageSource;
var doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(source.Source);
docRoot = new WebElement(doc.DocumentNode);
}
public abstract IEnumerable<LibraryDTO> ScrapeCurrentPage();
#region // parallel-izing tests: selenium vs html agility pack
// iterate: foreach (var r in rows) extractProductItem(r, ruleFamily, returnProductItems)
// yield: foreach (var r in driver.FindElements(ruleFamily.RowsLocator).ToList()) yield return extractProductItem(r, ruleFamily, returnProductItems)
// Parallel_ForEach: Parallel.ForEach(rows, (r) => extractProductItem(r, ruleFamily, returnProductItems))
// WaitAll: Task.WaitAll(tasks)
// AsParallel: rows.AsParallel().Select(r => extractProductItem(r, ruleFamily, returnProductItems))
//
// in milliseconds
// selenium. [1] slow [2] is/has a bottleneck which resists parallelization
// iterate: 394424 - 439711
// yield: 387854
// Parallel_ForEach: 345149 - 371547
// WaitAll: 363970
// AsParallel: 369904
// html agility pack
// iterate: 15024 - 19092 55-60% of this time is downloading images
// Parallel_ForEach: 4060 - 4271 <<<<<<<<<<<<<<<<<<<<<<<
// WaitAll: 3646 - 8702 . mostly ~6-8k
// AsParallel: 4318 - 8378
#endregion
protected IEnumerable<LibraryDTO> scrapeRows(RuleFamilyLib ruleFamily)
{
var sw = System.Diagnostics.Stopwatch.StartNew();
var rows = ruleFamily.GetRows(docRoot).ToList();
var returnProductItems = new List<LibraryDTO>();
scrape1row_parallel(rows, ruleFamily, returnProductItems);
//scrape1row_iterate(rows, ruleFamily, returnProductItems);
sw.Stop();
var ms = sw.ElapsedMilliseconds;
return returnProductItems;
}
private IEnumerable<LibraryDTO> scrapeRows_YIELD(WebElement driver, RuleFamilyLib ruleFamily)
{
var returnProductItems = new List<LibraryDTO>();
var rows = driver.FindElements(ruleFamily.RowsLocator).ToList();
//rows = rows.Take(3).ToList(); // TOP3ONLY
for (var i = 0; i < rows.Count; i++)
{
string currentRow = $"{i + 1} of {rows.Count}";
// break here to see which row we're on
var r = rows[i];
yield return extractLibraryDTO(r, ruleFamily, returnProductItems);
}
}
private void scrape1row_iterate(IEnumerable<WebElement> rows, RuleFamilyLib ruleFamily, List<LibraryDTO> returnProductItems)
{
foreach (var r in rows)
extractLibraryDTO(r, ruleFamily, returnProductItems);
}
private void scrape1row_parallel(IEnumerable<WebElement> rows, RuleFamilyLib ruleFamily, List<LibraryDTO> returnProductItems)
=> Parallel.ForEach(rows, r => extractLibraryDTO(r, ruleFamily, returnProductItems));
private object _locker { get; } = new object();
private LibraryDTO extractLibraryDTO(WebElement row, RuleFamilyLib ruleFamily, List<LibraryDTO> returnProductItems)
{
var productItem = new LibraryDTO();
ruleFamily.Rules.Run(row, productItem);
// local lock is slightly faster than ConcurrentBag
// https://stackoverflow.com/questions/2950955/concurrentbagmytype-vs-listmytype/34016915#34016915
lock (_locker)
returnProductItems.Add(productItem);
// having a return object is for testing with yield
return productItem;
}
}
}

View File

@ -1,27 +0,0 @@
using System;
using Scraping.Rules;
using Scraping.Selectors;
using DTO = DTOs.LibraryDTO;
namespace Scraping.Library
{
/// <summary>not the same as LocatedRuleSet. IRuleClass only acts upon 1 product item at a time. RuleFamily returns many product items</summary>
internal class RuleFamilyLib : RuleFamily<DTO> { }
internal interface IRuleClassLib : IRuleClass<DTO> { }
internal class BasicRuleLib : BasicRule<DTO>, IRuleClassLib
{
public BasicRuleLib() : base() { }
public BasicRuleLib(Action<WebElement, DTO> action) : base(action) { }
}
internal class RuleSetLib : RuleSet<DTO>, IRuleClassLib { }
/// <summary>LocatedRuleSet loops through found items. When it's 0 or 1, LocatedRuleSet is an easy way to parse only if exists</summary>
internal class LocatedRuleSetLib : LocatedRuleSet<DTO>, IRuleClassLib
{
public LocatedRuleSetLib(By elementsLocator) : base(elementsLocator) { }
}
}

View File

@ -1,15 +0,0 @@
using Scraping.Library;
namespace Scraping.Rules.Examples
{
class ScraperRulesExamples
{
RuleSetLib rulesExamples { get; } = new RuleSetLib
{
// equivilant ways to declare a simple rule action
new BasicRuleLib((row, productItem) => { } ),
new BasicRuleLib { Action = (row, productItem) => { } },
(row, productItem) => { }
};
}
}

View File

@ -1,61 +0,0 @@
using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using Scraping.Selectors;
namespace Scraping.Rules
{
/// <summary>not the same as LocatedRuleSet. IRuleClass only acts upon 1 product item at a time. RuleFamily returns many product items</summary>
internal class RuleFamily<T>
{
public By RowsLocator;
public IRuleClass<T> Rules;
public IEnumerable<WebElement> GetRows(WebElement rootWebElement)
=> rootWebElement.FindElements(RowsLocator).ToList();
}
internal interface IRuleClass<T> { void Run(WebElement element, T productItem); }
internal class BasicRule<T> : IRuleClass<T>
{
public Action<WebElement, T> Action;
public BasicRule() { }
public BasicRule(Action<WebElement, T> action) => Action = action;
// this is only place that rules actions are actually run
// error handling, logging, et al. belong here
public void Run(WebElement element, T productItem) => Action(element, productItem);
}
internal class RuleSet<T> : IRuleClass<T>, IEnumerable<IRuleClass<T>>
{
private List<IRuleClass<T>> rules { get; } = new List<IRuleClass<T>>();
public void Add(IRuleClass<T> ruleClass) => rules.Add(ruleClass);
public void Add(Action<WebElement, T> action) => rules.Add(new BasicRule<T>(action));
public void AddRange(IEnumerable<IRuleClass<T>> rules) => this.rules.AddRange(rules);
public IEnumerator<IRuleClass<T>> GetEnumerator() => rules.GetEnumerator();
IEnumerator IEnumerable.GetEnumerator() => rules.GetEnumerator();
public virtual void Run(WebElement element, T productItem)
{
foreach (var rule in rules)
rule.Run(element, productItem);
}
}
/// <summary>LocatedRuleSet loops through found items. When it's 0 or 1, LocatedRuleSet is an easy way to parse only if exists</summary>
internal class LocatedRuleSet<T> : RuleSet<T>
{
public By ElementsLocator;
public LocatedRuleSet(By elementsLocator) => ElementsLocator = elementsLocator;
public override void Run(WebElement parentElement, T productItem)
{
foreach (var childElements in parentElement.FindElements(ElementsLocator))
base.Run(childElements, productItem);
}
}
}

View File

@ -1,25 +0,0 @@
using System;
using System.Collections.ObjectModel;
// adapted from OpenQA.Selenium
// https://github.com/SeleniumHQ/selenium/blob/master/dotnet/src/webdriver/By.cs
namespace Scraping.Selectors
{
/// <summary>Provides a mechanism by which to find elements within a document.</summary>
[Serializable]
internal partial class By
{
private string description { get; }
/// <summary>Gets or sets the method used to find all elements matching specified criteria.</summary>
public Func<WebElement, ReadOnlyCollection<WebElement>> FindElementsMethod { get; private set; }
protected By(string description, Func<WebElement, ReadOnlyCollection<WebElement>> findElementsMethod)
{
this.description = description;
FindElementsMethod = findElementsMethod;
}
public override string ToString() => description;
}
}

View File

@ -1,21 +0,0 @@
using System;
using System.Collections.ObjectModel;
namespace Scraping.Selectors
{
// example custom "By" locator. from: https://stackoverflow.com/questions/14263483
internal class CustomSelector : By
{
public CustomSelector(string description, Func<WebElement, ReadOnlyCollection<WebElement>> findElementsMethod) : base(description, findElementsMethod) { }
public static By Image(string imageBySource)
{
if (imageBySource == null)
throw new ArgumentNullException(nameof(imageBySource), "Cannot find elements when image string is null.");
return new CustomSelector(
nameof(CustomSelector) + ".Image: " + imageBySource,
(context) => context.FindElements(XPath("//img[@src='" + imageBySource + "']"))
);
}
}
}

View File

@ -1,101 +0,0 @@
using System;
using System.Linq;
using Dinah.Core;
namespace Scraping.Selectors
{
internal partial class By
{
private static string getDescription(string param, [System.Runtime.CompilerServices.CallerMemberName] string caller = null)
=> $"{nameof(By)}.{caller}: {param}";
/// <summary>
/// Gets a mechanism to find elements by their CSS class.
/// </summary>
/// <param name="classNameToFind">The CSS class to find.</param>
/// <returns>A <see cref="By"/> object the driver can use to find the elements.</returns>
/// <remarks>If an element has many classes then this will match against each of them.
/// For example if the value is "one two onone", then the following values for the
/// className parameter will match: "one" and "two".</remarks>
public static By ClassName(string classNameToFind)
{
if (classNameToFind == null)
throw new ArgumentNullException(nameof(classNameToFind), "Cannot find elements when the class name is null.");
return new By(
getDescription(classNameToFind),
(context) => context.Node.Descendants().Where(n => n.HasClass(classNameToFind)).ToReadOnlyCollection());
}
/// <summary>Gets a mechanism to find elements by their ID.</summary>
/// <param name="idToFind">The ID to find.</param>
/// <returns>A <see cref="By"/> object the driver can use to find the elements.</returns>
public static By Id(string idToFind)
{
if (idToFind == null)
throw new ArgumentNullException(nameof(idToFind), "Cannot find elements when the id is null.");
return new By(
getDescription(idToFind),
(context) => context.Node.Descendants().Where(n => n.Id.EqualsInsensitive(idToFind)).ToReadOnlyCollection());
}
/// <summary>Gets a mechanism to find elements by their link text.</summary>
/// <param name="linkTextToFind">The link text to find.</param>
/// <returns>A <see cref="By"/> object the driver can use to find the elements.</returns>
public static By LinkText(string linkTextToFind)
{
if (linkTextToFind == null)
throw new ArgumentNullException(nameof(linkTextToFind), "Cannot find elements when the link text is null.");
return new By(
getDescription(linkTextToFind),
(context) => context.Node.Descendants("a").Where(n => n.InnerText.EqualsInsensitive(linkTextToFind)).ToReadOnlyCollection());
}
/// <summary>selenium Name == hap get element with attribute named "name". Gets a mechanism to find elements by their name.</summary>
/// <param name="nameToFind">The name to find.</param>
/// <returns>A <see cref="By"/> object the driver can use to find the elements.</returns>
public static By Name(string nameToFind)
{
if (nameToFind == null)
throw new ArgumentNullException(nameof(nameToFind), "Cannot find elements when the tag name is null.");
return new By(
getDescription(nameToFind),
//.Descendants().Single(n => n.HasAttributes && n.Attributes["name"] != null && n.Attributes["name"].Value.EqualsInsensitive("tdTitle"))
(context) => context.Node.SelectNodes($".//*[@name='{nameToFind}']").ToReadOnlyCollection());
}
/// <summary>selenium TagName == hap Name. Gets a mechanism to find elements by their tag name.</summary>
/// <param name="tagNameToFind">The tag name to find.</param>
/// <returns>A <see cref="By"/> object the driver can use to find the elements.</returns>
public static By TagName(string tagNameToFind)
{
if (tagNameToFind == null)
throw new ArgumentNullException(nameof(tagNameToFind), "Cannot find elements when the tag name is null.");
return new By(
getDescription(tagNameToFind),
(context) => context.Node.Descendants(tagNameToFind).ToReadOnlyCollection());
}
/// <summary>
/// Gets a mechanism to find elements by an XPath query.
/// When searching within a WebElement using xpath be aware that WebDriver follows standard conventions:
/// a search prefixed with "//" will search the entire document, not just the children of this current node.
/// Use ".//" to limit your search to the children of this WebElement.
/// </summary>
/// <param name="xpathToFind">The XPath query to use.</param>
/// <returns>A <see cref="By"/> object the driver can use to find the elements.</returns>
public static By XPath(string xpathToFind)
{
if (xpathToFind == null)
throw new ArgumentNullException(nameof(xpathToFind), "Cannot find elements when the XPath expression is null.");
return new By(
getDescription(xpathToFind),
(context) => context.Node.SelectNodes(xpathToFind).ToReadOnlyCollection());
}
}
}

View File

@ -1,17 +0,0 @@
using System;
using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.Linq;
using HtmlAgilityPack;
namespace Scraping.Selectors
{
internal static class HtmlAgilityPackExt
{
public static ReadOnlyCollection<WebElement> ToReadOnlyCollection(this IEnumerable<HtmlNode> nodeCollection)
=> (nodeCollection ?? new List<HtmlNode>())
.Select(n => new WebElement(n) as WebElement)
.ToList()
.AsReadOnly();
}
}

View File

@ -1,21 +0,0 @@
using System.Collections.ObjectModel;
using System.Linq;
using HtmlAgilityPack;
namespace Scraping.Selectors
{
internal class WebElement
{
public HtmlNode Node { get; }
public WebElement(HtmlNode htmlNode) => Node = htmlNode;
public ReadOnlyCollection<WebElement> FindElements(By by) => by.FindElementsMethod(this);
/// <summary>Finds the single element matching the criteria.</summary>
public WebElement FindElement(By by) => FindElements(by).Single();
public string GetAttribute(string attributeName) => Node?.Attributes[attributeName]?.Value;
public string Text => System.Net.WebUtility.HtmlDecode(Node.InnerText);
public string Value => GetAttribute("value");
}
}

View File

@ -1,39 +0,0 @@
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
using InternalUtilities;
using AudibleDotCom;
using AudibleDotComAutomation;
using FileManager;
using Scraping;
namespace ScrapingDomainServices
{
public static class DownloadLibrary
{
/// <summary>scrape all library pages. save htm files. save json files</summary>
/// <returns>paths of json files</returns>
public static async Task<List<FileInfo>> DownloadLibraryAsync(IPageRetriever pageRetriever)
{
var batchName = WebpageStorage.GetLibraryBatchName();
// library webpages => AudiblePageSource objects
var libraryAudiblePageSources = await pageRetriever.GetPageSourcesAsync(AudiblePageType.Library);
var jsonFiles = new List<FileInfo>();
foreach (var libraryAudiblePageSource in libraryAudiblePageSources)
{
// good habit to persist htm before attempting to parse it. this way, if there's a parse error, we can test errors on a local copy
var htmFile = DataConverter.AudiblePageSource_2_HtmFile_Batch(libraryAudiblePageSource, batchName);
var libraryDTOs = AudibleScraper.ScrapeLibrarySources(libraryAudiblePageSource);
var jsonFile = DataConverter.Value_2_JsonFile(libraryDTOs, Path.ChangeExtension(htmFile.FullName, "json"));
jsonFiles.Add(jsonFile);
}
return jsonFiles;
}
}
}

View File

@ -1,67 +0,0 @@
using System;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
using DataLayer;
using Dinah.Core.ErrorHandling;
using FileManager;
namespace ScrapingDomainServices
{
public class DownloadPdf : DownloadableBase
{
public override async Task<bool> ValidateAsync(LibraryBook libraryBook)
{
var product = libraryBook.Book;
if (!product.Supplements.Any())
return false;
return !await AudibleFileStorage.PDF.ExistsAsync(product.AudibleProductId);
}
public override async Task<StatusHandler> ProcessItemAsync(LibraryBook libraryBook)
{
var product = libraryBook.Book;
if (product == null)
return new StatusHandler { "Book not found" };
var urls = product.Supplements.Select(d => d.Url).ToList();
if (urls.Count == 0)
return new StatusHandler { "PDF download url not found" };
// sanity check
if (urls.Count > 1)
throw new Exception("Multiple PDF downloads are not currently supported. typically indicates an error");
var url = urls.Single();
var destinationDir = await getDestinationDirectory(product.AudibleProductId);
if (destinationDir == null)
return new StatusHandler { "Destination directory not found for PDF download" };
var destinationFilename = Path.Combine(destinationDir, Path.GetFileName(url));
using var webClient = await GetWebClientAsync(destinationFilename);
await webClient.DownloadFileTaskAsync(url, destinationFilename);
var statusHandler = new StatusHandler();
var exists = await AudibleFileStorage.PDF.ExistsAsync(product.AudibleProductId);
if (!exists)
statusHandler.AddError("Downloaded PDF cannot be found");
return statusHandler;
}
private async Task<string> getDestinationDirectory(string productId)
{
// if audio file exists, get it's dir
var audioFile = await AudibleFileStorage.Audio.GetAsync(productId);
if (audioFile != null)
return Path.GetDirectoryName(audioFile);
// else return base Book dir
return AudibleFileStorage.PDF.StorageDirectory;
}
}
}

View File

@ -1,98 +0,0 @@
using System;
using System.Net;
using System.Threading.Tasks;
using DataLayer;
using Dinah.Core.ErrorHandling;
using Dinah.Core.Humanizer;
namespace ScrapingDomainServices
{
public abstract class DownloadableBase : IDownloadable
{
public event EventHandler<string> Begin;
public event EventHandler<string> StatusUpdate;
protected void DoStatusUpdate(string message) => StatusUpdate?.Invoke(this, message);
public event EventHandler<string> DownloadBegin;
public event EventHandler<Dinah.Core.Net.Http.DownloadProgress> DownloadProgressChanged;
public event EventHandler<string> DownloadCompleted;
protected void Invoke_DownloadBegin(string downloadMessage) => DownloadBegin?.Invoke(this, downloadMessage);
protected void Invoke_DownloadProgressChanged(object sender, Dinah.Core.Net.Http.DownloadProgress progress) => DownloadProgressChanged?.Invoke(sender, progress);
protected void Invoke_DownloadCompleted(object sender, string str) => DownloadCompleted?.Invoke(sender, str);
public event EventHandler<string> Completed;
static DownloadableBase()
{
// https://stackoverflow.com/a/15483698
ServicePointManager.ServerCertificateValidationCallback = delegate { return true; };
}
public abstract Task<bool> ValidateAsync(LibraryBook libraryBook);
public abstract Task<StatusHandler> ProcessItemAsync(LibraryBook libraryBook);
// do NOT use ConfigureAwait(false) on ProcessUnregistered()
// often does a lot with forms in the UI context
public async Task<StatusHandler> ProcessAsync(LibraryBook libraryBook)
{
var displayMessage = $"[{libraryBook.Book.AudibleProductId}] {libraryBook.Book.Title}";
Begin?.Invoke(this, displayMessage);
try
{
return await ProcessItemAsync(libraryBook);
}
finally
{
Completed?.Invoke(this, displayMessage);
}
}
// other user agents from my chrome. from: https://www.whoishostingthis.com/tools/user-agent/
static string[] userAgents { get; } = new[]
{
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.96 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.87 Safari/537.36",
};
// we need a minimum delay between tries when hitting audible.com
// in every case except decrypt (which is already long running), we hit audible.com
static Humanizer humanizer { get; } = new Humanizer { Minimum = 5, Maximum = 20 };
static Random rnd { get; } = new Random();
protected async Task<WebClient> GetWebClientAsync(string downloadMessage)
{
await humanizer.Wait();
var webClient = new WebClient();
// https://towardsdatascience.com/5-strategies-to-write-unblock-able-web-scrapers-in-python-5e40c147bdaf
var userAgentIndex = rnd.Next(0, userAgents.Length); // upper bound is exclusive
webClient.Headers["User-Agent"] = userAgents[userAgentIndex];
webClient.Headers["Referer"] = "https://google.com";
webClient.Headers["Upgrade-Insecure-Requests"] = "1";
webClient.Headers["DNT"] = "1";
webClient.Headers["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8";
webClient.Headers["Accept-Language"] = "en-US,en;q=0.9";
// this breaks pdf download which uses: http://download.audible.com
// weirdly, it works for book download even though it uses https://cds.audible.com
//webClient.Headers["Host"] = "www.audible.com";
webClient.DownloadProgressChanged += (s, e) => Invoke_DownloadProgressChanged(s, new Dinah.Core.Net.Http.DownloadProgress { BytesReceived = e.BytesReceived, ProgressPercentage = e.ProgressPercentage, TotalBytesToReceive = e.TotalBytesToReceive });
webClient.DownloadFileCompleted += (s, e) => Invoke_DownloadCompleted(s, $"Completed: {downloadMessage}");
webClient.DownloadDataCompleted += (s, e) => Invoke_DownloadCompleted(s, $"Completed: {downloadMessage}");
webClient.DownloadStringCompleted += (s, e) => Invoke_DownloadCompleted(s, $"Completed: {downloadMessage}");
Invoke_DownloadBegin(downloadMessage);
return webClient;
}
}
}

View File

@ -1,380 +0,0 @@
using System;
using System.Collections.Generic;
using System.Linq;
using DataLayer;
using DTOs;
namespace ScrapingDomainServices
{
public class DtoImporter
{
LibationContext context { get; }
public DtoImporter(LibationContext context) => this.context = context;
#region LibraryDTO
/// <summary>LONG RUNNING. call with await Task.Run</summary>
public int ReplaceLibrary(List<LibraryDTO> requestedLibraryDTOs)
{
upsertContributors(requestedLibraryDTOs);
upsertSeries(requestedLibraryDTOs);
upsertBooks(requestedLibraryDTOs);
var newAddedCount = upsertLibraryBooks(requestedLibraryDTOs);
//ReloadBookDetails(requestedLibraryDTOs);
#region // explanation of: cannot ReloadBookDetails() until context.SaveChanges()
/*
setup:
library page shows narrators "bob smith" "kevin jones" "and others"
book details shows narrators "bob smith" "kevin jones" "alice liddell"
error
creates BookContributors with same keys, even though one is orphaned
"The instance of entity type cannot be tracked because another instance with the same key value for {'Id'} is already being tracked"
https://github.com/aspnet/EntityFrameworkCore/issues/12459
solution:
replace library
creates library version
save
update book details
adds new book details version
removes library version
*/
#endregion
return newAddedCount;
}
private void upsertContributors(List<LibraryDTO> requestedLibraryDTOs)
{
var authorTuples = requestedLibraryDTOs.SelectMany(dto => dto.Authors).ToList();
var narratorNames = requestedLibraryDTOs.SelectMany(dto => dto.Narrators).ToList();
var allNames = authorTuples
.Select(a => a.authorName)
.Union(narratorNames)
.ToList();
loadLocal_contributors(allNames);
upsertAuthors(authorTuples);
upsertNarrators(narratorNames);
}
private void upsertSeries(List<LibraryDTO> requestedLibraryDTOs)
{
var requestedSeries = requestedLibraryDTOs
.SelectMany(l => l.Series)
.Select(kvp => (seriesId: kvp.Key, seriesName: kvp.Value))
.ToList();
upsertSeries(requestedSeries);
}
private void upsertBooks(List<LibraryDTO> requestedLibraryDTOs)
{
loadLocal_books(requestedLibraryDTOs.Select(dto => dto.ProductId).ToList());
foreach (var libraryDTO in requestedLibraryDTOs)
upsertBook(libraryDTO);
}
private void upsertBook(LibraryDTO libraryDTO)
{
var book = context.Books.Local.SingleOrDefault(p => p.AudibleProductId == libraryDTO.ProductId);
if (book == null)
{
// nested logic is required so order of names is retained. else, contributors may appear in the order they were inserted into the db
var authors = libraryDTO
.Authors
.Select(t => context.Contributors.Local.Single(c => t.authorName == c.Name))
.ToList();
// if no narrators listed, author is the narrator
if (!libraryDTO.Narrators.Any())
libraryDTO.Narrators = authors.Select(a => a.Name).ToArray();
// nested logic is required so order of names is retained. else, contributors may appear in the order they were inserted into the db
var narrators = libraryDTO
.Narrators
.Select(n => context.Contributors.Local.Single(c => n == c.Name))
.ToList();
book = context.Books.Add(new Book(
new AudibleProductId(libraryDTO.ProductId), libraryDTO.Title, libraryDTO.Description, libraryDTO.LengthInMinutes, authors))
.Entity;
book.ReplaceNarrators(narrators);
}
// set/update book-specific info which may have changed
book.PictureId = libraryDTO.PictureId;
book.UpdateProductRating(libraryDTO.Product_OverallStars, libraryDTO.Product_PerformanceStars, libraryDTO.Product_StoryStars);
foreach (var url in libraryDTO.SupplementUrls)
book.AddSupplementDownloadUrl(FileManager.FileUtility.RestoreDeclawed(url));
// important to update user-specific info. this will have changed if user has rated/reviewed the book since last library import
book.UserDefinedItem.UpdateRating(libraryDTO.MyUserRating_Overall, libraryDTO.MyUserRating_Performance, libraryDTO.MyUserRating_Story);
// update series even for existing books. these are occasionally updated
var seriesIds = libraryDTO.Series.Select(kvp => kvp.Key).ToList();
var allSeries = context.Series.Local.Where(c => seriesIds.Contains(c.AudibleSeriesId)).ToList();
foreach (var series in allSeries)
book.UpsertSeries(series);
}
private int upsertLibraryBooks(List<LibraryDTO> requestedLibraryDTOs)
{
var currentLibraryProductIds = context.Library.Select(l => l.Book.AudibleProductId).ToList();
List<LibraryDTO> newLibraryDTOs = requestedLibraryDTOs.Where(dto => !currentLibraryProductIds.Contains(dto.ProductId)).ToList();
foreach (var newLibraryDTO in newLibraryDTOs)
{
var libraryBook = new LibraryBook(
context.Books.Local.Single(b => b.AudibleProductId == newLibraryDTO.ProductId),
newLibraryDTO.DateAdded,
FileManager.FileUtility.RestoreDeclawed(newLibraryDTO.DownloadBookLink));
context.Library.Add(libraryBook);
}
return newLibraryDTOs.Count;
}
/// <summary>LONG RUNNING. call with await Task.Run</summary>
public void ReloadBookDetails(List<LibraryDTO> requestedLibraryDTOs)
{
var dtos = requestedLibraryDTOs
.Select(dto => dto.ProductId)
.Distinct()
.Select(productId => FileManager.WebpageStorage.GetBookDetailJsonFileInfo(productId))
.Where(fi => fi.Exists)
.Select(fi => Newtonsoft.Json.JsonConvert.DeserializeObject<BookDetailDTO>(System.IO.File.ReadAllText(fi.FullName)))
.ToList();
if (dtos.Any())
UpdateBookDetails(dtos);
}
#endregion
#region BookDetailDTO
/// <summary>LONG RUNNING. call with await Task.Run</summary>
public void UpdateBookDetails(BookDetailDTO bookDetailDTO) => UpdateBookDetails(new List<BookDetailDTO> { bookDetailDTO });
/// <summary>LONG RUNNING. call with await Task.Run</summary>
public void UpdateBookDetails(List<BookDetailDTO> bookDetailDTOs)
{
upsertContributors(bookDetailDTOs);
upsertCategories(bookDetailDTOs);
upsertSeries(bookDetailDTOs);
updateBooks(bookDetailDTOs);
}
private void upsertContributors(List<BookDetailDTO> bookDetailDTOs)
{
var narratorNames = bookDetailDTOs.SelectMany(dto => dto.Narrators).ToList();
var publisherNames = bookDetailDTOs.Select(dto => dto.Publisher).Where(p => !string.IsNullOrWhiteSpace(p)).ToList();
var allNames = narratorNames.Union(publisherNames).ToList();
loadLocal_contributors(allNames);
upsertNarrators(narratorNames);
upsertPublishers(publisherNames);
}
private void upsertCategories(List<BookDetailDTO> bookDetailDTOs)
{
var categoryIds = bookDetailDTOs.SelectMany(dto => dto.Categories).Select(c => c.categoryId).ToList();
loadLocal_categories(categoryIds);
foreach (var dto in bookDetailDTOs)
upsertCategories(dto);
}
private void upsertCategories(BookDetailDTO bookDetailDTO)
{
if (bookDetailDTO.Categories.Count == 0)
return;
if (bookDetailDTO.Categories.Count < 1 || bookDetailDTO.Categories.Count > 2)
throw new Exception("expecting either 1 or 2 categories");
for (var i = 0; i < bookDetailDTO.Categories.Count; i++)
{
var (categoryId, categoryName) = bookDetailDTO.Categories[i];
Category parentCategory = null;
if (i == 1)
parentCategory = context.Categories.Local.Single(c => c.AudibleCategoryId == bookDetailDTO.Categories[0].categoryId);
var category
= context.Categories.Local.SingleOrDefault(c => c.AudibleCategoryId == categoryId)
?? context.Categories.Add(new Category(new AudibleCategoryId(categoryId), categoryName)).Entity;
category.UpdateParentCategory(parentCategory);
}
}
private void upsertSeries(List<BookDetailDTO> bookDetailDTOs)
{
var requestedSeries = bookDetailDTOs
.SelectMany(l => l.Series)
.Select(s => (seriesId: s.SeriesId, seriesName: s.SeriesName))
.ToList();
upsertSeries(requestedSeries);
}
private void updateBooks(List<BookDetailDTO> bookDetailDTOs)
{
loadLocal_books(bookDetailDTOs.Select(dto => dto.ProductId).ToList());
foreach (var dto in bookDetailDTOs)
updateBook(dto);
}
private void updateBook(BookDetailDTO bookDetailDTO)
{
var book = context.Books.Local.Single(b => b.AudibleProductId == bookDetailDTO.ProductId);
// nested logic is required so order of names is retained. else, contributors may appear in the order they were inserted into the db
var narrators = bookDetailDTO
.Narrators
.Select(n => context.Contributors.Local.Single(c => n == c.Name))
.ToList();
// not all books have narrators. these will already be using author as narrator. don't undo this
if (narrators.Any())
book.ReplaceNarrators(narrators);
var publisherName = bookDetailDTO.Publisher;
if (!string.IsNullOrWhiteSpace(publisherName))
{
var publisher = context.Contributors.Local.Single(c => publisherName == c.Name);
book.ReplacePublisher(publisher);
}
// these will upsert over library-scraped series, but will not leave orphans
foreach (var seriesEntry in bookDetailDTO.Series)
{
var series = context.Series.Local.Single(s => seriesEntry.SeriesId == s.AudibleSeriesId);
book.UpsertSeries(series, seriesEntry.Index);
}
// categories are laid out for a breadcrumb. category is 1st, subcategory is 2nd
var category = context.Categories.Local.SingleOrDefault(c => c.AudibleCategoryId == bookDetailDTO.Categories.LastOrDefault().categoryId);
if (category != null)
book.UpdateCategory(category, context);
book.UpdateBookDetails(bookDetailDTO.IsAbridged, bookDetailDTO.DatePublished);
}
#endregion
#region load db existing => .Local
private void loadLocal_contributors(List<string> contributorNames)
{
//// BAD: very inefficient
// var x = context.Contributors.Local.Where(c => !contribNames.Contains(c.Name));
// GOOD: Except() is efficient. Due to hashing, it's close to O(n)
var localNames = context.Contributors.Local.Select(c => c.Name);
var remainingContribNames = contributorNames
.Distinct()
.Except(localNames)
.ToList();
// load existing => local
if (remainingContribNames.Any())
context.Contributors.Where(c => remainingContribNames.Contains(c.Name)).ToList();
// _________________________________^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
// i tried to extract this pattern, but this part prohibits doing so
// wouldn't work anyway for Books.GetBooks()
}
private void loadLocal_series(List<string> seriesIds)
{
var localIds = context.Series.Local.Select(s => s.AudibleSeriesId);
var remainingSeriesIds = seriesIds
.Distinct()
.Except(localIds)
.ToList();
if (remainingSeriesIds.Any())
context.Series.Where(s => remainingSeriesIds.Contains(s.AudibleSeriesId)).ToList();
}
private void loadLocal_books(List<string> productIds)
{
var localProductIds = context.Books.Local.Select(b => b.AudibleProductId);
var remainingProductIds = productIds
.Distinct()
.Except(localProductIds)
.ToList();
// GetBooks() eager loads Series, category, et al
if (remainingProductIds.Any())
context.Books.GetBooks(b => remainingProductIds.Contains(b.AudibleProductId)).ToList();
}
private void loadLocal_categories(List<string> categoryIds)
{
var localIds = context.Categories.Local.Select(c => c.AudibleCategoryId);
var remainingCategoryIds = categoryIds
.Distinct()
.Except(localIds)
.ToList();
if (remainingCategoryIds.Any())
context.Categories.Where(c => remainingCategoryIds.Contains(c.AudibleCategoryId)).ToList();
}
#endregion
// only use after loading contributors => local
private void upsertAuthors(List<(string authorName, string authorId)> requestedAuthors)
{
var distinctAuthors = requestedAuthors.Distinct().ToList();
foreach (var (authorName, authorId) in distinctAuthors)
{
var author
= context.Contributors.Local.SingleOrDefault(c => c.Name == authorName)
?? context.Contributors.Add(new Contributor(authorName)).Entity;
author.UpdateAudibleAuthorId(authorId);
}
}
// only use after loading contributors => local
private void upsertNarrators(List<string> requestedNarratorNames)
{
var distinctNarrators = requestedNarratorNames.Distinct().ToList();
foreach (var reqNarratorName in distinctNarrators)
if (context.Contributors.Local.SingleOrDefault(c => c.Name == reqNarratorName) == null)
context.Contributors.Add(new Contributor(reqNarratorName));
}
// only use after loading contributors => local
private void upsertPublishers(List<string> requestedPublisherNames)
{
var distinctPublishers = requestedPublisherNames.Distinct().ToList();
foreach (var reqPublisherName in distinctPublishers)
if (context.Contributors.Local.SingleOrDefault(c => c.Name == reqPublisherName) == null)
context.Contributors.Add(new Contributor(reqPublisherName));
}
private void upsertSeries(List<(string seriesId, string seriesName)> requestedSeries)
{
var distinctSeries = requestedSeries.Distinct().ToList();
var requestedSeriesIds = distinctSeries
.Select(r => r.seriesId)
.Distinct()
.ToList();
loadLocal_series(requestedSeriesIds);
foreach (var (seriesId, seriesName) in distinctSeries)
{
var series
= context.Series.Local.SingleOrDefault(c => c.AudibleSeriesId == seriesId)
?? context.Series.Add(new DataLayer.Series(new AudibleSeriesId(seriesId))).Entity;
series.UpdateName(seriesName);
}
}
}
}

View File

@ -1,158 +0,0 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using ApplicationService;
using DataLayer;
using Dinah.Core;
using Dinah.Core.Collections.Generic;
using DTOs;
using FileManager;
using Newtonsoft.Json;
namespace ScrapingDomainServices
{
public static class Indexer
{
#region library
public static async Task<(int total, int newEntries)> IndexLibraryAsync(DirectoryInfo libDir)
{
var jsonFileInfos = WebpageStorage.GetJsonFiles(libDir);
return await IndexLibraryAsync(jsonFileInfos);
}
public static async Task<(int total, int newEntries)> IndexLibraryAsync(List<FileInfo> jsonFileInfos)
{
var productItems = jsonFileInfos.SelectMany(fi => json2libraryDtos(fi)).ToList();
var newEntries = await IndexLibraryAsync(productItems);
return (productItems.Count, newEntries);
}
private static Regex jsonIsCollectionRegex = new Regex(@"^\s*\[\s*\{", RegexOptions.Compiled);
private static IEnumerable<LibraryDTO> json2libraryDtos(FileInfo jsonFileInfo)
{
validateJsonFile(jsonFileInfo);
var serialized = File.ReadAllText(jsonFileInfo.FullName);
// collection
if (jsonIsCollectionRegex.IsMatch(serialized))
return JsonConvert.DeserializeObject<List<LibraryDTO>>(serialized);
// single
return new List<LibraryDTO> { JsonConvert.DeserializeObject<LibraryDTO>(serialized) };
}
// new full index or library-file import: re-create search index
/// <returns>qty new entries</returns>
public static async Task<int> IndexLibraryAsync(List<LibraryDTO> productItems)
{
if (productItems == null || !productItems.Any())
return 0;
productItems = filterAndValidate(productItems);
using var context = LibationContext.Create();
var dtoImporter = new DtoImporter(context);
#region // benchmarks. re-importing a library with 500 books, all with book details json files
/*
dtoImporter.ReplaceLibrary 1.2 seconds
SaveChanges() 3.4
ReloadBookDetails() 1.3
SaveChanges() 1.4
*/
#endregion
// LONG RUNNING
var newEntries = await Task.Run(() => dtoImporter.ReplaceLibrary(productItems));
await context.SaveChangesAsync();
// must be broken out. see notes in dtoImporter.ReplaceLibrary()
// LONG RUNNING
await Task.Run(() => dtoImporter.ReloadBookDetails(productItems));
await context.SaveChangesAsync();
await SearchEngineActions.FullReIndexAsync();
return newEntries;
}
private static List<LibraryDTO> filterAndValidate(List<LibraryDTO> collection)
{
//debug//var episodes = collection.Where(dto => dto.IsEpisodes).ToList();
// for now, do not add episodic content
collection.RemoveAll(dto => dto.IsEpisodes);
if (collection.Any(pi => string.IsNullOrWhiteSpace(pi.ProductId)))
throw new Exception("All product items must contain a Product Id");
return collection.DistinctBy(pi => pi.ProductId).ToList();
// var duplicateIds = collection
// .GroupBy(pi => pi.ProductId)
// .Where(grp => grp.Count() > 1)
// .Select(grp => grp.Key)
//.ToList();
// if (duplicateIds.Any())
// throw new Exception("Cannot insert multiples of the same ProductId. Duplicates:"
// + duplicateIds
// .Select(a => "\r\n- " + a)
// .Aggregate((a, b) => a + b));
}
#endregion
#region update book tags
public static int IndexChangedTags(Book book)
{
// update disconnected entity
using var context = LibationContext.Create();
context.Update(book);
var qtyChanges = context.SaveChanges();
// this part is tags-specific
if (qtyChanges > 0)
SearchEngineActions.UpdateBookTags(book);
return qtyChanges;
}
#endregion
#region book details
public static async Task IndexBookDetailsAsync(BookDetailDTO bookDetailDTO)
=> await indexBookDetailsAsync(bookDetailDTO, () => SearchEngineActions.ProductReIndexAsync(bookDetailDTO.ProductId));
private static async Task indexBookDetailsAsync(BookDetailDTO bookDetailDTO, Func<Task> postIndexActionAsync)
{
if (bookDetailDTO == null)
return;
validate(bookDetailDTO);
using var context = LibationContext.Create();
var dtoImporter = new DtoImporter(context);
// LONG RUNNING
await Task.Run(() => dtoImporter.UpdateBookDetails(bookDetailDTO));
context.SaveChanges();
// after saving, delete orphan contributors
var count = context.RemoveOrphans();
if (count > 0) { } // don't think there's a to-do here
await postIndexActionAsync?.Invoke();
}
private static void validate(BookDetailDTO bookDetailDTO)
{
if (string.IsNullOrWhiteSpace(bookDetailDTO.ProductId))
throw new Exception("Product must contain a Product Id");
}
#endregion
private static void validateJsonFile(FileInfo jsonFileInfo)
{
if (!jsonFileInfo.Extension.EqualsInsensitive(".json"))
throw new Exception("Unsupported file types");
}
}
}

View File

@ -1,115 +0,0 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
using AudibleDotCom;
using DataLayer;
using Dinah.Core.ErrorHandling;
using DTOs;
using InternalUtilities;
using Scraping;
namespace ScrapingDomainServices
{
/// <summary>
/// book detail page:
/// - audible webpage => AudiblePageSource
/// - AudiblePageSource => declaw => htm file
/// - AudiblePageSource => scrape => DTO
/// - DTO => json file
/// - DTO => db
/// - update lucene
/// </summary>
public class ScrapeBookDetails : DownloadableBase
{
public enum NoLongerAvailableEnum { None, Abort, MarkAsMissing }
/// <summary>Returns product id of book which was successfully imported and re-indexed</summary>
public event EventHandler<string> BookSuccessfullyImported;
/// <summary>Hook for handling book no-longer-available. String 1: book title. String 2: book url</summary>
public Func<string, string, NoLongerAvailableEnum> NoLongerAvailableAction { get; set; }
public override Task<bool> ValidateAsync(LibraryBook libraryBook)
=> Task.FromResult(!libraryBook.Book.HasBookDetails);
public override async Task<StatusHandler> ProcessItemAsync(LibraryBook libraryBook)
{
var productId = libraryBook.Book.AudibleProductId;
#region // TEST CODE
//productId = "B0787DGS2T"; // book with only 1 category, no sub category
//productId = "B002V1OF70"; // mult series, more narrators here than in library
//productId = "B0032N8Q58"; // abridged
//productId = "B07GXW7KHG"; // categories in product details block. no narrators
//productId = "B002ZEEDAW"; // categores above image
//productId = "B075Y4SWJ8"; // lots of narrators, no 'abridged'
#endregion
BookDetailDTO bookDetailDTO;
// if json file exists, then htm is irrelevant. important b/c in cases of no-longer-available items, json is generated but no htm
var jsonFileInfo = FileManager.WebpageStorage.GetBookDetailJsonFileInfo(productId);
if (jsonFileInfo.Exists)
{
var serialized = File.ReadAllText(jsonFileInfo.FullName);
bookDetailDTO = Newtonsoft.Json.JsonConvert.DeserializeObject<BookDetailDTO>(serialized);
}
// no json. download htm
else
{
var htmFile = FileManager.WebpageStorage.GetBookDetailHtmFileInfo(productId);
// htm exists, json doesn't. load existing htm
if (htmFile.Exists)
{
var detailsAudiblePageSource = DataConverter.HtmFile_2_AudiblePageSource(htmFile.FullName);
bookDetailDTO = AudibleScraper.ScrapeBookDetailsSource(detailsAudiblePageSource);
}
// no htm. download and parse
else
{
// download htm
string source;
var url = AudiblePage.Product.GetUrl(productId);
using var webClient = await GetWebClientAsync($"Getting Book Details for {libraryBook.Book.Title}");
try
{
source = await webClient.DownloadStringTaskAsync(url);
var detailsAudiblePageSource = new AudiblePageSource(AudiblePageType.ProductDetails, source, productId);
// good habit to persist htm before attempting to parse it. this way, if there's a parse error, we can test errors on a local copy
DataConverter.AudiblePageSource_2_HtmFile_Product(detailsAudiblePageSource);
bookDetailDTO = AudibleScraper.ScrapeBookDetailsSource(detailsAudiblePageSource);
}
catch (System.Net.WebException webEx)
{
// cannot continue if NoLongerAvailableAction is null,
// else we'll be right back here next loop (and infinitely) with no failure condition
if (webEx.Status != System.Net.WebExceptionStatus.ConnectionClosed || NoLongerAvailableAction == null)
throw;
var nlaEnum = NoLongerAvailableAction.Invoke(
libraryBook.Book.Title,
AudiblePage.Product.GetUrl(libraryBook.Book.AudibleProductId));
if (nlaEnum == NoLongerAvailableEnum.Abort)
return new StatusHandler { "Cannot scrape book details. Aborting." };
else if (nlaEnum == NoLongerAvailableEnum.MarkAsMissing)
bookDetailDTO = new BookDetailDTO { ProductId = productId };
else
throw;
}
}
DataConverter.Value_2_JsonFile(bookDetailDTO, jsonFileInfo.FullName);
}
await Indexer.IndexBookDetailsAsync(bookDetailDTO);
BookSuccessfullyImported?.Invoke(this, productId);
return new StatusHandler();
}
}
}

View File

@ -1,127 +0,0 @@
namespace WinFormsDesigner.BookLiberation
{
partial class NoLongerAvailableForm
{
/// <summary>
/// Required designer variable.
/// </summary>
private System.ComponentModel.IContainer components = null;
/// <summary>
/// Clean up any resources being used.
/// </summary>
/// <param name="disposing">true if managed resources should be disposed; otherwise, false.</param>
protected override void Dispose(bool disposing)
{
if (disposing && (components != null))
{
components.Dispose();
}
base.Dispose(disposing);
}
#region Windows Form Designer generated code
/// <summary>
/// Required method for Designer support - do not modify
/// the contents of this method with the code editor.
/// </summary>
private void InitializeComponent()
{
this.label1 = new System.Windows.Forms.Label();
this.textBox1 = new System.Windows.Forms.TextBox();
this.missingBtn = new System.Windows.Forms.Button();
this.abortBtn = new System.Windows.Forms.Button();
this.label2 = new System.Windows.Forms.Label();
this.label3 = new System.Windows.Forms.Label();
this.SuspendLayout();
//
// label1
//
this.label1.AutoSize = true;
this.label1.Location = new System.Drawing.Point(12, 9);
this.label1.Name = "label1";
this.label1.Size = new System.Drawing.Size(174, 39);
this.label1.TabIndex = 0;
this.label1.Text = "Book details download failed.\r\n{0} may be no longer available.\r\nVerify the book i" +
"s still available here";
//
// textBox1
//
this.textBox1.Location = new System.Drawing.Point(15, 51);
this.textBox1.Name = "textBox1";
this.textBox1.ReadOnly = true;
this.textBox1.Size = new System.Drawing.Size(384, 20);
this.textBox1.TabIndex = 1;
//
// missingBtn
//
this.missingBtn.Location = new System.Drawing.Point(324, 77);
this.missingBtn.Name = "missingBtn";
this.missingBtn.Size = new System.Drawing.Size(75, 23);
this.missingBtn.TabIndex = 3;
this.missingBtn.Text = "Missing";
this.missingBtn.UseVisualStyleBackColor = true;
//
// abortBtn
//
this.abortBtn.Location = new System.Drawing.Point(324, 126);
this.abortBtn.Name = "abortBtn";
this.abortBtn.Size = new System.Drawing.Size(75, 23);
this.abortBtn.TabIndex = 5;
this.abortBtn.Text = "Abort";
this.abortBtn.UseVisualStyleBackColor = true;
//
// label2
//
this.label2.AutoSize = true;
this.label2.Location = new System.Drawing.Point(12, 74);
this.label2.Name = "label2";
this.label2.Size = new System.Drawing.Size(306, 26);
this.label2.TabIndex = 2;
this.label2.Text = "If the book is not available, click here to mark it as missing\r\nNo further book d" +
"etails download will be attempted for this book";
//
// label3
//
this.label3.AutoSize = true;
this.label3.Location = new System.Drawing.Point(12, 123);
this.label3.Name = "label3";
this.label3.Size = new System.Drawing.Size(204, 26);
this.label3.TabIndex = 4;
this.label3.Text = "If the book is actually available, click here\r\nto abort and try again later";
//
// NoLongerAvailableForm
//
this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
this.ClientSize = new System.Drawing.Size(411, 161);
this.Controls.Add(this.label3);
this.Controls.Add(this.label2);
this.Controls.Add(this.abortBtn);
this.Controls.Add(this.missingBtn);
this.Controls.Add(this.textBox1);
this.Controls.Add(this.label1);
this.FormBorderStyle = System.Windows.Forms.FormBorderStyle.FixedDialog;
this.MaximizeBox = false;
this.MinimizeBox = false;
this.Name = "NoLongerAvailableForm";
this.ShowIcon = false;
this.ShowInTaskbar = false;
this.StartPosition = System.Windows.Forms.FormStartPosition.CenterParent;
this.Text = "No Longer Available";
this.ResumeLayout(false);
this.PerformLayout();
}
#endregion
private System.Windows.Forms.Label label1;
private System.Windows.Forms.TextBox textBox1;
private System.Windows.Forms.Button missingBtn;
private System.Windows.Forms.Button abortBtn;
private System.Windows.Forms.Label label2;
private System.Windows.Forms.Label label3;
}
}

View File

@ -1,13 +0,0 @@
using System;
using System.Windows.Forms;
namespace WinFormsDesigner.BookLiberation
{
public partial class NoLongerAvailableForm : Form
{
public NoLongerAvailableForm()
{
InitializeComponent();
}
}
}

View File

@ -1,120 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<root>
<!--
Microsoft ResX Schema
Version 2.0
The primary goals of this format is to allow a simple XML format
that is mostly human readable. The generation and parsing of the
various data types are done through the TypeConverter classes
associated with the data types.
Example:
... ado.net/XML headers & schema ...
<resheader name="resmimetype">text/microsoft-resx</resheader>
<resheader name="version">2.0</resheader>
<resheader name="reader">System.Resources.ResXResourceReader, System.Windows.Forms, ...</resheader>
<resheader name="writer">System.Resources.ResXResourceWriter, System.Windows.Forms, ...</resheader>
<data name="Name1"><value>this is my long string</value><comment>this is a comment</comment></data>
<data name="Color1" type="System.Drawing.Color, System.Drawing">Blue</data>
<data name="Bitmap1" mimetype="application/x-microsoft.net.object.binary.base64">
<value>[base64 mime encoded serialized .NET Framework object]</value>
</data>
<data name="Icon1" type="System.Drawing.Icon, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64">
<value>[base64 mime encoded string representing a byte array form of the .NET Framework object]</value>
<comment>This is a comment</comment>
</data>
There are any number of "resheader" rows that contain simple
name/value pairs.
Each data row contains a name, and value. The row also contains a
type or mimetype. Type corresponds to a .NET class that support
text/value conversion through the TypeConverter architecture.
Classes that don't support this are serialized and stored with the
mimetype set.
The mimetype is used for serialized objects, and tells the
ResXResourceReader how to depersist the object. This is currently not
extensible. For a given mimetype the value must be set accordingly:
Note - application/x-microsoft.net.object.binary.base64 is the format
that the ResXResourceWriter will generate, however the reader can
read any of the formats listed below.
mimetype: application/x-microsoft.net.object.binary.base64
value : The object must be serialized with
: System.Runtime.Serialization.Formatters.Binary.BinaryFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.soap.base64
value : The object must be serialized with
: System.Runtime.Serialization.Formatters.Soap.SoapFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.bytearray.base64
value : The object must be serialized into a byte array
: using a System.ComponentModel.TypeConverter
: and then encoded with base64 encoding.
-->
<xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata">
<xsd:import namespace="http://www.w3.org/XML/1998/namespace" />
<xsd:element name="root" msdata:IsDataSet="true">
<xsd:complexType>
<xsd:choice maxOccurs="unbounded">
<xsd:element name="metadata">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" />
</xsd:sequence>
<xsd:attribute name="name" use="required" type="xsd:string" />
<xsd:attribute name="type" type="xsd:string" />
<xsd:attribute name="mimetype" type="xsd:string" />
<xsd:attribute ref="xml:space" />
</xsd:complexType>
</xsd:element>
<xsd:element name="assembly">
<xsd:complexType>
<xsd:attribute name="alias" type="xsd:string" />
<xsd:attribute name="name" type="xsd:string" />
</xsd:complexType>
</xsd:element>
<xsd:element name="data">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
<xsd:element name="comment" type="xsd:string" minOccurs="0" msdata:Ordinal="2" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" use="required" msdata:Ordinal="1" />
<xsd:attribute name="type" type="xsd:string" msdata:Ordinal="3" />
<xsd:attribute name="mimetype" type="xsd:string" msdata:Ordinal="4" />
<xsd:attribute ref="xml:space" />
</xsd:complexType>
</xsd:element>
<xsd:element name="resheader">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" use="required" />
</xsd:complexType>
</xsd:element>
</xsd:choice>
</xsd:complexType>
</xsd:element>
</xsd:schema>
<resheader name="resmimetype">
<value>text/microsoft-resx</value>
</resheader>
<resheader name="version">
<value>2.0</value>
</resheader>
<resheader name="reader">
<value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
<resheader name="writer">
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
</root>

View File

@ -1,78 +0,0 @@
namespace WinFormsDesigner
{
partial class ScanLibraryDialog
{
/// <summary>
/// Required designer variable.
/// </summary>
private System.ComponentModel.IContainer components = null;
/// <summary>
/// Clean up any resources being used.
/// </summary>
/// <param name="disposing">true if managed resources should be disposed; otherwise, false.</param>
protected override void Dispose(bool disposing)
{
if (disposing && (components != null))
{
components.Dispose();
}
base.Dispose(disposing);
}
#region Windows Form Designer generated code
/// <summary>
/// Required method for Designer support - do not modify
/// the contents of this method with the code editor.
/// </summary>
private void InitializeComponent()
{
this.websiteProcessorControl1 = new WinFormsDesigner.WebsiteProcessorControl();
this.BeginScanBtn = new System.Windows.Forms.Button();
this.SuspendLayout();
//
// websiteProcessorControl1
//
this.websiteProcessorControl1.Anchor = ((System.Windows.Forms.AnchorStyles)((((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Bottom)
| System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.websiteProcessorControl1.Location = new System.Drawing.Point(12, 12);
this.websiteProcessorControl1.Name = "websiteProcessorControl1";
this.websiteProcessorControl1.Size = new System.Drawing.Size(324, 137);
this.websiteProcessorControl1.TabIndex = 0;
//
// BeginScanBtn
//
this.BeginScanBtn.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Bottom | System.Windows.Forms.AnchorStyles.Left)
| System.Windows.Forms.AnchorStyles.Right)));
this.BeginScanBtn.Location = new System.Drawing.Point(12, 155);
this.BeginScanBtn.Name = "BeginScanBtn";
this.BeginScanBtn.Size = new System.Drawing.Size(324, 23);
this.BeginScanBtn.TabIndex = 1;
this.BeginScanBtn.Text = "BEGIN SCAN";
this.BeginScanBtn.UseVisualStyleBackColor = true;
//
// ScanLibraryDialog
//
this.AcceptButton = this.BeginScanBtn;
this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
this.ClientSize = new System.Drawing.Size(348, 190);
this.Controls.Add(this.BeginScanBtn);
this.Controls.Add(this.websiteProcessorControl1);
this.Name = "ScanLibraryDialog";
this.ShowIcon = false;
this.ShowInTaskbar = false;
this.StartPosition = System.Windows.Forms.FormStartPosition.CenterParent;
this.Text = "Scan Library";
this.ResumeLayout(false);
}
#endregion
private WebsiteProcessorControl websiteProcessorControl1;
private System.Windows.Forms.Button BeginScanBtn;
}
}

View File

@ -1,13 +0,0 @@
using System;
using System.Windows.Forms;
namespace WinFormsDesigner
{
public partial class ScanLibraryDialog : Form
{
public ScanLibraryDialog()
{
InitializeComponent();
}
}
}

View File

@ -1,120 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<root>
<!--
Microsoft ResX Schema
Version 2.0
The primary goals of this format is to allow a simple XML format
that is mostly human readable. The generation and parsing of the
various data types are done through the TypeConverter classes
associated with the data types.
Example:
... ado.net/XML headers & schema ...
<resheader name="resmimetype">text/microsoft-resx</resheader>
<resheader name="version">2.0</resheader>
<resheader name="reader">System.Resources.ResXResourceReader, System.Windows.Forms, ...</resheader>
<resheader name="writer">System.Resources.ResXResourceWriter, System.Windows.Forms, ...</resheader>
<data name="Name1"><value>this is my long string</value><comment>this is a comment</comment></data>
<data name="Color1" type="System.Drawing.Color, System.Drawing">Blue</data>
<data name="Bitmap1" mimetype="application/x-microsoft.net.object.binary.base64">
<value>[base64 mime encoded serialized .NET Framework object]</value>
</data>
<data name="Icon1" type="System.Drawing.Icon, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64">
<value>[base64 mime encoded string representing a byte array form of the .NET Framework object]</value>
<comment>This is a comment</comment>
</data>
There are any number of "resheader" rows that contain simple
name/value pairs.
Each data row contains a name, and value. The row also contains a
type or mimetype. Type corresponds to a .NET class that support
text/value conversion through the TypeConverter architecture.
Classes that don't support this are serialized and stored with the
mimetype set.
The mimetype is used for serialized objects, and tells the
ResXResourceReader how to depersist the object. This is currently not
extensible. For a given mimetype the value must be set accordingly:
Note - application/x-microsoft.net.object.binary.base64 is the format
that the ResXResourceWriter will generate, however the reader can
read any of the formats listed below.
mimetype: application/x-microsoft.net.object.binary.base64
value : The object must be serialized with
: System.Runtime.Serialization.Formatters.Binary.BinaryFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.soap.base64
value : The object must be serialized with
: System.Runtime.Serialization.Formatters.Soap.SoapFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.bytearray.base64
value : The object must be serialized into a byte array
: using a System.ComponentModel.TypeConverter
: and then encoded with base64 encoding.
-->
<xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata">
<xsd:import namespace="http://www.w3.org/XML/1998/namespace" />
<xsd:element name="root" msdata:IsDataSet="true">
<xsd:complexType>
<xsd:choice maxOccurs="unbounded">
<xsd:element name="metadata">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" />
</xsd:sequence>
<xsd:attribute name="name" use="required" type="xsd:string" />
<xsd:attribute name="type" type="xsd:string" />
<xsd:attribute name="mimetype" type="xsd:string" />
<xsd:attribute ref="xml:space" />
</xsd:complexType>
</xsd:element>
<xsd:element name="assembly">
<xsd:complexType>
<xsd:attribute name="alias" type="xsd:string" />
<xsd:attribute name="name" type="xsd:string" />
</xsd:complexType>
</xsd:element>
<xsd:element name="data">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
<xsd:element name="comment" type="xsd:string" minOccurs="0" msdata:Ordinal="2" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" use="required" msdata:Ordinal="1" />
<xsd:attribute name="type" type="xsd:string" msdata:Ordinal="3" />
<xsd:attribute name="mimetype" type="xsd:string" msdata:Ordinal="4" />
<xsd:attribute ref="xml:space" />
</xsd:complexType>
</xsd:element>
<xsd:element name="resheader">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" use="required" />
</xsd:complexType>
</xsd:element>
</xsd:choice>
</xsd:complexType>
</xsd:element>
</xsd:schema>
<resheader name="resmimetype">
<value>text/microsoft-resx</value>
</resheader>
<resheader name="version">
<value>2.0</value>
</resheader>
<resheader name="reader">
<value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
<resheader name="writer">
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
</root>

Some files were not shown because too many files have changed in this diff Show More