Libary import got a complete overhaul. On a library of 1,200 titles: initial scan is 80-85% faster. Subsequent imports are 60-70% faster

This commit is contained in:
Robert McRackan 2022-04-29 16:35:49 -04:00
parent 7c144b8277
commit 878a5dd36c
20 changed files with 227 additions and 196 deletions

View File

@ -6,7 +6,7 @@
<ItemGroup> <ItemGroup>
<PackageReference Include="AAXClean" Version="0.2.8" /> <PackageReference Include="AAXClean" Version="0.2.8" />
<PackageReference Include="AAXClean.Codecs" Version="0.1.6" /> <PackageReference Include="AAXClean.Codecs" Version="0.1.7" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>

View File

@ -3,7 +3,7 @@
<PropertyGroup> <PropertyGroup>
<TargetFramework>net6.0-windows</TargetFramework> <TargetFramework>net6.0-windows</TargetFramework>
<Version>6.8.4.1</Version> <Version>7.0.0.1</Version>
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>
@ -11,7 +11,7 @@
<PrivateAssets>all</PrivateAssets> <PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets> <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference> </PackageReference>
<PackageReference Include="Octokit" Version="0.50.0" /> <PackageReference Include="Octokit" Version="0.51.0" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>

View File

@ -89,6 +89,9 @@ namespace ApplicationServices
var totalCount = importItems.Count; var totalCount = importItems.Count;
Log.Logger.Information($"GetAllLibraryItems: Total count {totalCount}"); Log.Logger.Information($"GetAllLibraryItems: Total count {totalCount}");
if (totalCount == 0)
return default;
Log.Logger.Information("Begin long-running import"); Log.Logger.Information("Begin long-running import");
logTime($"pre {nameof(importIntoDbAsync)}"); logTime($"pre {nameof(importIntoDbAsync)}");
var newCount = await importIntoDbAsync(importItems); var newCount = await importIntoDbAsync(importItems);

View File

@ -5,7 +5,7 @@
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>
<PackageReference Include="AudibleApi" Version="2.7.2.1" /> <PackageReference Include="AudibleApi" Version="2.7.3.1" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>

View File

@ -9,8 +9,8 @@ namespace DataLayer.Configurations
{ {
entity.HasKey(bc => new { bc.BookId, bc.ContributorId, bc.Role }); entity.HasKey(bc => new { bc.BookId, bc.ContributorId, bc.Role });
entity.HasIndex(b => b.BookId); entity.HasIndex(bc => bc.BookId);
entity.HasIndex(b => b.ContributorId); entity.HasIndex(bc => bc.ContributorId);
entity entity
.HasOne(bc => bc.Book) .HasOne(bc => bc.Book)

View File

@ -21,12 +21,12 @@ namespace DataLayer.Configurations
// - update LibraryBook import code // - update LibraryBook import code
// - would likely challenge assumptions throughout Libation which have been true up until now // - would likely challenge assumptions throughout Libation which have been true up until now
entity.HasKey(b => b.BookId); entity.HasKey(lb => lb.BookId);
entity entity
.HasOne(le => le.Book) .HasOne(lb => lb.Book)
.WithOne() .WithOne()
.HasForeignKey<LibraryBook>(le => le.BookId); .HasForeignKey<LibraryBook>(lb => lb.BookId);
} }
} }
} }

View File

@ -7,10 +7,10 @@ namespace DataLayer.Configurations
{ {
public void Configure(EntityTypeBuilder<SeriesBook> entity) public void Configure(EntityTypeBuilder<SeriesBook> entity)
{ {
entity.HasKey(bc => new { bc.SeriesId, bc.BookId }); entity.HasKey(sb => new { sb.SeriesId, sb.BookId });
entity.HasIndex(b => b.SeriesId); entity.HasIndex(sb => sb.SeriesId);
entity.HasIndex(b => b.BookId); entity.HasIndex(sb => sb.BookId);
entity entity
.HasOne(sb => sb.Series) .HasOne(sb => sb.Series)

View File

@ -7,8 +7,8 @@ namespace DataLayer.Configurations
{ {
public void Configure(EntityTypeBuilder<Series> entity) public void Configure(EntityTypeBuilder<Series> entity)
{ {
entity.HasKey(b => b.SeriesId); entity.HasKey(s => s.SeriesId);
entity.HasIndex(b => b.AudibleSeriesId); entity.HasIndex(s => s.AudibleSeriesId);
entity entity
.Metadata .Metadata

View File

@ -31,16 +31,12 @@ namespace DataLayer
public string AudibleContributorId { get; private set; } public string AudibleContributorId { get; private set; }
private Contributor() { } private Contributor() { }
public Contributor(string name) public Contributor(string name, string audibleContributorId = null)
{ {
ArgumentValidator.EnsureNotNullOrWhiteSpace(name, nameof(name)); Name = ArgumentValidator.EnsureNotNullOrWhiteSpace(name, nameof(name));
_booksLink = new HashSet<BookContributor>(); _booksLink = new HashSet<BookContributor>();
Name = name;
}
public Contributor(string name, string audibleContributorId) : this(name)
{
// don't overwrite with null or whitespace but not an error // don't overwrite with null or whitespace but not an error
if (!string.IsNullOrWhiteSpace(audibleContributorId)) if (!string.IsNullOrWhiteSpace(audibleContributorId))
AudibleContributorId = audibleContributorId; AudibleContributorId = audibleContributorId;

View File

@ -19,7 +19,8 @@ namespace DataLayer
public static Book GetBook(this IQueryable<Book> books, string productId) public static Book GetBook(this IQueryable<Book> books, string productId)
=> books => books
.GetBooks() .GetBooks()
.SingleOrDefault(b => b.AudibleProductId == productId); // 'Single' is more accurate but 'First' is faster and less error prone
.FirstOrDefault(b => b.AudibleProductId == productId);
/// <summary>This is still IQueryable. YOU MUST CALL ToList() YOURSELF</summary> /// <summary>This is still IQueryable. YOU MUST CALL ToList() YOURSELF</summary>
public static IQueryable<Book> GetBooks(this IQueryable<Book> books, Expression<Func<Book, bool>> predicate) public static IQueryable<Book> GetBooks(this IQueryable<Book> books, Expression<Func<Book, bool>> predicate)

View File

@ -4,62 +4,53 @@ using System.Linq;
using AudibleApi.Common; using AudibleApi.Common;
using AudibleUtilities; using AudibleUtilities;
using DataLayer; using DataLayer;
using Dinah.Core.Collections.Generic;
namespace DtoImporterService namespace DtoImporterService
{ {
public class BookImporter : ItemsImporterBase public class BookImporter : ItemsImporterBase
{ {
public BookImporter(LibationContext context) : base(context) { } protected override IValidator Validator => new BookValidator();
public override IEnumerable<Exception> Validate(IEnumerable<ImportItem> importItems) => new BookValidator().Validate(importItems.Select(i => i.DtoItem)); public Dictionary<string, Book> Cache { get; private set; } = new();
private ContributorImporter contributorImporter { get; }
private SeriesImporter seriesImporter { get; }
private CategoryImporter categoryImporter { get; }
public BookImporter(LibationContext context) : base(context)
{
contributorImporter = new ContributorImporter(DbContext);
seriesImporter = new SeriesImporter(DbContext);
categoryImporter = new CategoryImporter(DbContext);
}
protected override int DoImport(IEnumerable<ImportItem> importItems) protected override int DoImport(IEnumerable<ImportItem> importItems)
{ {
// pre-req.s // pre-req.s
new ContributorImporter(DbContext).Import(importItems); contributorImporter.Import(importItems);
new SeriesImporter(DbContext).Import(importItems); seriesImporter.Import(importItems);
new CategoryImporter(DbContext).Import(importItems); categoryImporter.Import(importItems);
// get distinct // load db existing => hash table
var productIds = importItems.Select(i => i.DtoItem.ProductId).Distinct().ToList(); loadLocal_books(importItems);
// load db existing => .Local
loadLocal_books(productIds);
// upsert // upsert
var qtyNew = upsertBooks(importItems); var qtyNew = upsertBooks(importItems);
return qtyNew; return qtyNew;
} }
private void loadLocal_books(List<string> productIds) private void loadLocal_books(IEnumerable<ImportItem> importItems)
{ {
// if this context has already loaded books, don't need to reload them. vestige from when context was long-lived. in practice, we now typically use a fresh context. this is quick though so no harm in leaving it. // get distinct
var localProductIds = DbContext.Books.Local.Select(b => b.AudibleProductId).ToList(); var productIds = importItems
var remainingProductIds = productIds .Select(i => i.DtoItem.ProductId)
.Except(localProductIds) .Distinct()
.ToList(); .ToList();
#region // explanation of DbContext.Books.GetBooks(b => remainingProductIds.Contains(b.AudibleProductId)).ToList(); Cache = DbContext.Books
/* .GetBooks(b => productIds.Contains(b.AudibleProductId))
articles suggest loading to Local with .ToDictionarySafe(b => b.AudibleProductId);
context.Books.Load();
we want Books and associated fields
context.Books.GetBooks(b => remainingProductIds.Contains(b.AudibleProductId)).ToList();
this is emulating Load() but with also getting associated fields
from: Microsoft.EntityFrameworkCore.EntityFrameworkQueryableExtensions
// Summary:
// Enumerates the query. When using Entity Framework, this causes the results of
// the query to be loaded into the associated context. This is equivalent to calling
// ToList and then throwing away the list (without the overhead of actually creating
// the list).
public static void Load<TSource>([NotNullAttribute] this IQueryable<TSource> source);
*/
#endregion
// GetBooks() eager loads Series, category, et al
if (remainingProductIds.Any())
DbContext.Books.GetBooks(b => remainingProductIds.Contains(b.AudibleProductId)).ToList();
} }
private int upsertBooks(IEnumerable<ImportItem> importItems) private int upsertBooks(IEnumerable<ImportItem> importItems)
@ -68,8 +59,7 @@ namespace DtoImporterService
foreach (var item in importItems) foreach (var item in importItems)
{ {
var book = DbContext.Books.Local.FirstOrDefault(p => p.AudibleProductId == item.DtoItem.ProductId); if (!Cache.TryGetValue(item.DtoItem.ProductId, out var book))
if (book is null)
{ {
book = createNewBook(item); book = createNewBook(item);
qtyNew++; qtyNew++;
@ -94,8 +84,7 @@ namespace DtoImporterService
// nested logic is required so order of names is retained. else, contributors may appear in the order they were inserted into the db // nested logic is required so order of names is retained. else, contributors may appear in the order they were inserted into the db
var authors = item var authors = item
.Authors .Authors
// This should properly be Single() not FirstOrDefault(), but FirstOrDefault is defensive .Select(a => contributorImporter.Cache[a.Name])
.Select(a => DbContext.Contributors.Local.FirstOrDefault(c => a.Name == c.Name))
.ToList(); .ToList();
var narrators var narrators
@ -105,8 +94,7 @@ namespace DtoImporterService
// nested logic is required so order of names is retained. else, contributors may appear in the order they were inserted into the db // nested logic is required so order of names is retained. else, contributors may appear in the order they were inserted into the db
: item : item
.Narrators .Narrators
// This should properly be Single() not FirstOrDefault(), but FirstOrDefault is defensive .Select(n => contributorImporter.Cache[n.Name])
.Select(n => DbContext.Contributors.Local.FirstOrDefault(c => n.Name == c.Name))
.ToList(); .ToList();
// categories are laid out for a breadcrumb. category is 1st, subcategory is 2nd // categories are laid out for a breadcrumb. category is 1st, subcategory is 2nd
@ -120,8 +108,7 @@ namespace DtoImporterService
// 2+ // 2+
: item.Categories[1].CategoryId; : item.Categories[1].CategoryId;
// This should properly be SingleOrDefault() not FirstOrDefault(), but FirstOrDefault is defensive var category = categoryImporter.Cache[lastCategory];
var category = DbContext.Categories.Local.FirstOrDefault(c => c.AudibleCategoryId == lastCategory);
Book book; Book book;
try try
@ -137,6 +124,7 @@ namespace DtoImporterService
category, category,
importItem.LocaleName) importItem.LocaleName)
).Entity; ).Entity;
Cache.Add(book.AudibleProductId, book);
} }
catch (Exception ex) catch (Exception ex)
{ {
@ -157,8 +145,7 @@ namespace DtoImporterService
var publisherName = item.Publisher; var publisherName = item.Publisher;
if (!string.IsNullOrWhiteSpace(publisherName)) if (!string.IsNullOrWhiteSpace(publisherName))
{ {
// This should properly be Single() not FirstOrDefault(), but FirstOrDefault is defensive var publisher = contributorImporter.Cache[publisherName];
var publisher = DbContext.Contributors.Local.FirstOrDefault(c => publisherName == c.Name);
book.ReplacePublisher(publisher); book.ReplacePublisher(publisher);
} }
@ -189,7 +176,7 @@ namespace DtoImporterService
{ {
foreach (var seriesEntry in item.Series) foreach (var seriesEntry in item.Series)
{ {
var series = DbContext.Series.Local.FirstOrDefault(s => seriesEntry.SeriesId == s.AudibleSeriesId); var series = seriesImporter.Cache[seriesEntry.SeriesId];
book.UpsertSeries(series, seriesEntry.Sequence); book.UpsertSeries(series, seriesEntry.Sequence);
} }
} }

View File

@ -4,14 +4,17 @@ using System.Linq;
using AudibleApi.Common; using AudibleApi.Common;
using AudibleUtilities; using AudibleUtilities;
using DataLayer; using DataLayer;
using Dinah.Core.Collections.Generic;
namespace DtoImporterService namespace DtoImporterService
{ {
public class CategoryImporter : ItemsImporterBase public class CategoryImporter : ItemsImporterBase
{ {
public CategoryImporter(LibationContext context) : base(context) { } protected override IValidator Validator => new CategoryValidator();
public override IEnumerable<Exception> Validate(IEnumerable<ImportItem> importItems) => new CategoryValidator().Validate(importItems.Select(i => i.DtoItem)); public Dictionary<string, Category> Cache { get; private set; } = new();
public CategoryImporter(LibationContext context) : base(context) { }
protected override int DoImport(IEnumerable<ImportItem> importItems) protected override int DoImport(IEnumerable<ImportItem> importItems)
{ {
@ -19,7 +22,9 @@ namespace DtoImporterService
var categoryIds = importItems var categoryIds = importItems
.Select(i => i.DtoItem) .Select(i => i.DtoItem)
.GetCategoriesDistinct() .GetCategoriesDistinct()
.Select(c => c.CategoryId).ToList(); .Select(c => c.CategoryId)
.Distinct()
.ToList();
// load db existing => .Local // load db existing => .Local
loadLocal_categories(categoryIds); loadLocal_categories(categoryIds);
@ -38,15 +43,10 @@ namespace DtoImporterService
// must include default/empty/missing // must include default/empty/missing
categoryIds.Add(Category.GetEmpty().AudibleCategoryId); categoryIds.Add(Category.GetEmpty().AudibleCategoryId);
var localIds = DbContext.Categories.Local.Select(c => c.AudibleCategoryId).ToList();
var remainingCategoryIds = categoryIds
.Distinct()
.Except(localIds)
.ToList();
// load existing => local // load existing => local
if (remainingCategoryIds.Any()) Cache = DbContext.Categories
DbContext.Categories.Where(c => remainingCategoryIds.Contains(c.AudibleCategoryId)).ToList(); .Where(c => categoryIds.Contains(c.AudibleCategoryId))
.ToDictionarySafe(c => c.AudibleCategoryId);
} }
// only use after loading contributors => local // only use after loading contributors => local
@ -67,22 +67,11 @@ namespace DtoImporterService
Category parentCategory = null; Category parentCategory = null;
if (i == 1) if (i == 1)
// should be "Single()" but user is getting a strange error Cache.TryGetValue(pair[0].CategoryId, out parentCategory);
parentCategory = DbContext.Categories.Local.FirstOrDefault(c => c.AudibleCategoryId == pair[0].CategoryId);
// should be "SingleOrDefault()" but user is getting a strange error if (!Cache.TryGetValue(id, out var category))
var category = DbContext.Categories.Local.FirstOrDefault(c => c.AudibleCategoryId == id);
if (category is null)
{ {
try category = addCategory(id, name);
{
category = DbContext.Categories.Add(new Category(new AudibleCategoryId(id), name)).Entity;
}
catch (Exception ex)
{
Serilog.Log.Logger.Error(ex, "Error adding category. {@DebugInfo}", new { id, name });
throw;
}
qtyNew++; qtyNew++;
} }
@ -92,5 +81,24 @@ namespace DtoImporterService
return qtyNew; return qtyNew;
} }
private Category addCategory(string id, string name)
{
try
{
var category = new Category(new AudibleCategoryId(id), name);
var entityEntry = DbContext.Categories.Add(category);
var entity = entityEntry.Entity;
Cache.Add(entity.AudibleCategoryId, entity);
return entity;
}
catch (Exception ex)
{
Serilog.Log.Logger.Error(ex, "Error adding category. {@DebugInfo}", new { id, name });
throw;
}
}
} }
} }

View File

@ -4,14 +4,17 @@ using System.Linq;
using AudibleApi.Common; using AudibleApi.Common;
using AudibleUtilities; using AudibleUtilities;
using DataLayer; using DataLayer;
using Dinah.Core.Collections.Generic;
namespace DtoImporterService namespace DtoImporterService
{ {
public class ContributorImporter : ItemsImporterBase public class ContributorImporter : ItemsImporterBase
{ {
public ContributorImporter(LibationContext context) : base(context) { } protected override IValidator Validator => new ContributorValidator();
public override IEnumerable<Exception> Validate(IEnumerable<ImportItem> importItems) => new ContributorValidator().Validate(importItems.Select(i => i.DtoItem)); public Dictionary<string, Contributor> Cache { get; private set; } = new();
public ContributorImporter(LibationContext context) : base(context) { }
protected override int DoImport(IEnumerable<ImportItem> importItems) protected override int DoImport(IEnumerable<ImportItem> importItems)
{ {
@ -50,78 +53,61 @@ namespace DtoImporterService
// must include default/empty/missing // must include default/empty/missing
contributorNames.Add(Contributor.GetEmpty().Name); contributorNames.Add(Contributor.GetEmpty().Name);
//// BAD: very inefficient
// var x = context.Contributors.Local.Where(c => !contribNames.Contains(c.Name));
// GOOD: Except() is efficient. Due to hashing, it's close to O(n)
var localNames = DbContext.Contributors.Local.Select(c => c.Name).ToList();
var remainingContribNames = contributorNames
.Distinct()
.Except(localNames)
.ToList();
// load existing => local // load existing => local
if (remainingContribNames.Any()) Cache = DbContext.Contributors
DbContext.Contributors.Where(c => remainingContribNames.Contains(c.Name)).ToList(); .Where(c => contributorNames.Contains(c.Name))
.ToDictionarySafe(c => c.Name);
} }
// only use after loading contributors => local
private int upsertPeople(List<Person> people) private int upsertPeople(List<Person> people)
{ {
var localNames = DbContext.Contributors.Local.Select(c => c.Name).ToList(); var hash = people
var newPeople = people // new people only
.Select(p => p.Name) .Where(p => !Cache.ContainsKey(p.Name))
.Distinct() // remove duplicates by Name. first in wins
.Except(localNames) .ToDictionarySafe(p => p.Name);
.ToList();
var groupby = people.GroupBy( foreach (var kvp in hash)
p => p.Name,
p => p,
(key, g) => new { Name = key, People = g.ToList() }
);
foreach (var name in newPeople)
{ {
// This should properly be Single() not FirstOrDefault(), but FirstOrDefault is defensive var person = kvp.Value;
var p = groupby.FirstOrDefault(g => g.Name == name).People.First(); addContributor(person.Name, person.Asin);
try
{
DbContext.Contributors.Add(new Contributor(p.Name, p.Asin));
}
catch (Exception ex)
{
Serilog.Log.Logger.Error(ex, "Error adding person. {@DebugInfo}", new { p?.Name, p?.Asin });
throw;
}
} }
return newPeople.Count; return hash.Count;
} }
// only use after loading contributors => local // only use after loading contributors => local
private int upsertPublishers(List<string> publishers) private int upsertPublishers(List<string> publishers)
{ {
var localNames = DbContext.Contributors.Local.Select(c => c.Name).ToList(); var hash = publishers
var newPublishers = publishers // new publishers only
.Distinct() .Where(p => !Cache.ContainsKey(p))
.Except(localNames) // remove duplicates
.ToList(); .ToHashSet();
foreach (var pub in newPublishers) foreach (var pub in hash)
{ addContributor(pub);
try
{
DbContext.Contributors.Add(new Contributor(pub));
}
catch (Exception ex)
{
Serilog.Log.Logger.Error(ex, "Error adding publisher. {@DebugInfo}", new { pub });
throw;
}
}
return newPublishers.Count; return hash.Count;
} }
}
private Contributor addContributor(string name, string id = null)
{
try
{
var newContrib = new Contributor(name);
var entityEntry = DbContext.Contributors.Add(newContrib);
var entity = entityEntry.Entity;
Cache.Add(entity.Name, entity);
return entity;
}
catch (Exception ex)
{
Serilog.Log.Logger.Error(ex, "Error adding contributor. {@DebugInfo}", new { name, id });
throw;
}
}
}
} }

View File

@ -53,5 +53,9 @@ namespace DtoImporterService
public abstract class ItemsImporterBase : ImporterBase<IEnumerable<ImportItem>> public abstract class ItemsImporterBase : ImporterBase<IEnumerable<ImportItem>>
{ {
protected ItemsImporterBase(LibationContext context) : base(context) { } protected ItemsImporterBase(LibationContext context) : base(context) { }
protected abstract IValidator Validator { get; }
public sealed override IEnumerable<Exception> Validate(IEnumerable<ImportItem> importItems)
=> Validator.Validate(importItems.Select(i => i.DtoItem));
} }
} }

View File

@ -3,18 +3,24 @@ using System.Collections.Generic;
using System.Linq; using System.Linq;
using AudibleUtilities; using AudibleUtilities;
using DataLayer; using DataLayer;
using Dinah.Core.Collections.Generic;
namespace DtoImporterService namespace DtoImporterService
{ {
public class LibraryBookImporter : ItemsImporterBase public class LibraryBookImporter : ItemsImporterBase
{ {
public LibraryBookImporter(LibationContext context) : base(context) { } protected override IValidator Validator => new LibraryValidator();
public override IEnumerable<Exception> Validate(IEnumerable<ImportItem> importItems) => new LibraryValidator().Validate(importItems.Select(i => i.DtoItem)); private BookImporter bookImporter { get; }
public LibraryBookImporter(LibationContext context) : base(context)
{
bookImporter = new BookImporter(DbContext);
}
protected override int DoImport(IEnumerable<ImportItem> importItems) protected override int DoImport(IEnumerable<ImportItem> importItems)
{ {
new BookImporter(DbContext).Import(importItems); bookImporter.Import(importItems);
var qtyNew = upsertLibraryBooks(importItems); var qtyNew = upsertLibraryBooks(importItems);
return qtyNew; return qtyNew;
@ -36,25 +42,18 @@ namespace DtoImporterService
var currentLibraryProductIds = DbContext.LibraryBooks.Select(l => l.Book.AudibleProductId).ToList(); var currentLibraryProductIds = DbContext.LibraryBooks.Select(l => l.Book.AudibleProductId).ToList();
var newItems = importItems var newItems = importItems
.Where(dto => !currentLibraryProductIds .Where(dto => !currentLibraryProductIds.Contains(dto.DtoItem.ProductId))
.Contains(dto.DtoItem.ProductId))
.ToList(); .ToList();
// if 2 accounts try to import the same book in the same transaction: error since we're only tracking and pulling by asin. // if 2 accounts try to import the same book in the same transaction: error since we're only tracking and pulling by asin.
// just use the first // just use the first
var groupby = newItems.GroupBy( var hash = newItems.ToDictionarySafe(dto => dto.DtoItem.ProductId);
i => i.DtoItem.ProductId, foreach (var kvp in hash)
i => i, {
(key, g) => new { ProductId = key, ImportItems = g.ToList() } var newItem = kvp.Value;
)
.ToList();
foreach (var gb in groupby)
{
var newItem = gb.ImportItems.First();
var libraryBook = new LibraryBook( var libraryBook = new LibraryBook(
// This should properly be Single() not FirstOrDefault(), but FirstOrDefault is defensive bookImporter.Cache[newItem.DtoItem.ProductId],
DbContext.Books.Local.FirstOrDefault(b => b.AudibleProductId == newItem.DtoItem.ProductId),
newItem.DtoItem.DateAdded, newItem.DtoItem.DateAdded,
newItem.AccountId); newItem.AccountId);
try try
@ -67,7 +66,7 @@ namespace DtoImporterService
} }
} }
var qtyNew = groupby.Count; var qtyNew = hash.Count;
return qtyNew; return qtyNew;
} }
} }

View File

@ -4,14 +4,17 @@ using System.Linq;
using AudibleApi.Common; using AudibleApi.Common;
using AudibleUtilities; using AudibleUtilities;
using DataLayer; using DataLayer;
using Dinah.Core.Collections.Generic;
namespace DtoImporterService namespace DtoImporterService
{ {
public class SeriesImporter : ItemsImporterBase public class SeriesImporter : ItemsImporterBase
{ {
public SeriesImporter(LibationContext context) : base(context) { } protected override IValidator Validator => new SeriesValidator();
public override IEnumerable<Exception> Validate(IEnumerable<ImportItem> importItems) => new SeriesValidator().Validate(importItems.Select(i => i.DtoItem)); public Dictionary<string, DataLayer.Series> Cache { get; private set; } = new();
public SeriesImporter(LibationContext context) : base(context) { }
protected override int DoImport(IEnumerable<ImportItem> importItems) protected override int DoImport(IEnumerable<ImportItem> importItems)
{ {
@ -31,15 +34,12 @@ namespace DtoImporterService
private void loadLocal_series(List<AudibleApi.Common.Series> series) private void loadLocal_series(List<AudibleApi.Common.Series> series)
{ {
var seriesIds = series.Select(s => s.SeriesId).ToList(); var seriesIds = series.Select(s => s.SeriesId).Distinct().ToList();
var localIds = DbContext.Series.Local.Select(s => s.AudibleSeriesId).ToList();
var remainingSeriesIds = seriesIds
.Distinct()
.Except(localIds)
.ToList();
if (remainingSeriesIds.Any()) if (seriesIds.Any())
DbContext.Series.Where(s => remainingSeriesIds.Contains(s.AudibleSeriesId)).ToList(); Cache = DbContext.Series
.Where(s => seriesIds.Contains(s.AudibleSeriesId))
.ToDictionarySafe(s => s.AudibleSeriesId);
} }
private int upsertSeries(List<AudibleApi.Common.Series> requestedSeries) private int upsertSeries(List<AudibleApi.Common.Series> requestedSeries)
@ -48,18 +48,10 @@ namespace DtoImporterService
foreach (var s in requestedSeries) foreach (var s in requestedSeries)
{ {
var series = DbContext.Series.Local.FirstOrDefault(c => c.AudibleSeriesId == s.SeriesId); // AudibleApi.Common.Series.SeriesId == DataLayer.AudibleSeriesId
if (series is null) if (!Cache.TryGetValue(s.SeriesId, out var series))
{ {
try series = addSeries(s.SeriesId);
{
series = DbContext.Series.Add(new DataLayer.Series(new AudibleSeriesId(s.SeriesId))).Entity;
}
catch (Exception ex)
{
Serilog.Log.Logger.Error(ex, "Error adding series. {@DebugInfo}", new { s?.SeriesId });
throw;
}
qtyNew++; qtyNew++;
} }
series.UpdateName(s.SeriesName); series.UpdateName(s.SeriesName);
@ -67,5 +59,24 @@ namespace DtoImporterService
return qtyNew; return qtyNew;
} }
private DataLayer.Series addSeries(string seriesId)
{
try
{
var series = new DataLayer.Series(new AudibleSeriesId(seriesId));
var entityEntry = DbContext.Series.Add(series);
var entity = entityEntry.Entity;
Cache.Add(entity.AudibleSeriesId, entity);
return entity;
}
catch (Exception ex)
{
Serilog.Log.Logger.Error(ex, "Error adding series. {@DebugInfo}", new { seriesId });
throw;
}
}
} }
} }

View File

@ -0,0 +1,37 @@
* Local (eg DbContext.Books.Local): indexes/hashes PK and nothing else. Local.Find(PK) is fast. All other searches (eg FirstOrDefault) have awful performance. It deceptively *feels* like we get this partially for free since added/modified entries live here.
* live db: for all importers, fields used for lookup are indexed
Using BookImporter as an example: since AudibleProductId is indexed, hitting the live db is much faster than using Local. Fastest is putting all in a local hash table
Note: GetBook/GetBooks eager loads Series, category, et al
for 1,200 iterations
* load to LocalView
DbContext.Books.Local.FirstOrDefault(p => p.AudibleProductId == item.DtoItem.ProductId)
27,125 ms
* read from live db
DbContext.Books.GetBook(item.DtoItem.ProductId)
12,224 ms
* load to hash table: Dictionary<string, Book>
dictionary[item.DtoItem.ProductId];
1 ms (yes: ONE)
With hashtable, somehow memory usage was not significantly affected
-----------------------------------
why were we using Local to begin with?
articles suggest loading to Local with
context.Books.Load();
this loads this table but not associated fields
we want Books and associated fields
context.Books.GetBooks(b => remainingProductIds.Contains(b.AudibleProductId)).ToList();
this is emulating Load() but with also getting associated fields
from: Microsoft.EntityFrameworkCore.EntityFrameworkQueryableExtensions
// Summary:
// Enumerates the query. When using Entity Framework, this causes the results of
// the query to be loaded into the associated context. This is equivalent to calling
// ToList and then throwing away the list (without the overhead of actually creating
// the list).
public static void Load<TSource>([NotNullAttribute] this IQueryable<TSource> source);

View File

@ -5,7 +5,7 @@
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>
<PackageReference Include="Dinah.Core" Version="4.0.4.1" /> <PackageReference Include="Dinah.Core" Version="4.0.6.1" />
<PackageReference Include="Polly" Version="7.2.3" /> <PackageReference Include="Polly" Version="7.2.3" />
</ItemGroup> </ItemGroup>

View File

@ -28,7 +28,7 @@
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>
<PackageReference Include="Dinah.Core.WindowsDesktop" Version="4.0.4.1" /> <PackageReference Include="Dinah.Core.WindowsDesktop" Version="4.0.6.1" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>

View File

@ -228,7 +228,6 @@ namespace LibationWinForms
var libhackFiles = Directory.EnumerateDirectories(config.Books, "*.libhack", SearchOption.AllDirectories); var libhackFiles = Directory.EnumerateDirectories(config.Books, "*.libhack", SearchOption.AllDirectories);
using var context = ApplicationServices.DbContexts.GetContext(); using var context = ApplicationServices.DbContexts.GetContext();
context.Books.Load();
var jArr = JArray.Parse(File.ReadAllText(filePaths)); var jArr = JArray.Parse(File.ReadAllText(filePaths));
@ -248,7 +247,7 @@ namespace LibationWinForms
if (fileType == FileType.Unknown || fileType == FileType.AAXC) if (fileType == FileType.Unknown || fileType == FileType.AAXC)
continue; continue;
var book = context.Books.Local.FirstOrDefault(b => b.AudibleProductId == asin); var book = context.Books.FirstOrDefault(b => b.AudibleProductId == asin);
if (book is null) if (book is null)
continue; continue;