* search engine: refactoring and improved logging

* bug fix: after book is liberated, filter should immediately honor new "is liberated" status
This commit is contained in:
Robert McRackan 2021-04-01 12:44:16 -04:00
parent 7b966f6962
commit abd00ff1df
5 changed files with 146 additions and 77 deletions

View File

@ -1,4 +1,5 @@
using System.IO; using System;
using System.IO;
using DataLayer; using DataLayer;
using LibationSearchEngine; using LibationSearchEngine;
@ -12,31 +13,43 @@ namespace ApplicationServices
engine.CreateNewIndex(); engine.CreateNewIndex();
} }
public static SearchResultSet Search(string searchString) public static SearchResultSet Search(string searchString) => performSearchEngineFunc_safe(e =>
e.Search(searchString)
);
public static void UpdateBookTags(Book book) => performSearchEngineAction_safe(e =>
e.UpdateTags(book.AudibleProductId, book.UserDefinedItem.Tags)
);
public static void UpdateIsLiberated(Book book) => performSearchEngineAction_safe(e =>
e.UpdateIsLiberated(book.AudibleProductId)
);
private static void performSearchEngineAction_safe(Action<SearchEngine> action)
{ {
var engine = new SearchEngine(DbContexts.GetContext()); var engine = new SearchEngine(DbContexts.GetContext());
try try
{ {
return engine.Search(searchString); action(engine);
} }
catch (FileNotFoundException) catch (FileNotFoundException)
{ {
FullReIndex(); FullReIndex();
return engine.Search(searchString); action(engine);
} }
} }
public static void UpdateBookTags(Book book) private static T performSearchEngineFunc_safe<T>(Func<SearchEngine, T> action)
{ {
var engine = new SearchEngine(DbContexts.GetContext()); var engine = new SearchEngine(DbContexts.GetContext());
try try
{ {
engine.UpdateTags(book.AudibleProductId, book.UserDefinedItem.Tags); return action(engine);
} }
catch (FileNotFoundException) catch (FileNotFoundException)
{ {
FullReIndex(); FullReIndex();
engine.UpdateTags(book.AudibleProductId, book.UserDefinedItem.Tags); return action(engine);
} }
} }
} }

View File

@ -13,7 +13,7 @@
<!-- <PublishSingleFile>true</PublishSingleFile> --> <!-- <PublishSingleFile>true</PublishSingleFile> -->
<RuntimeIdentifier>win-x64</RuntimeIdentifier> <RuntimeIdentifier>win-x64</RuntimeIdentifier>
<Version>4.2.0.1</Version> <Version>4.2.1.1</Version>
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>

View File

@ -26,8 +26,15 @@ namespace LibationSearchEngine
public const string _ID_ = "_ID_"; public const string _ID_ = "_ID_";
public const string TAGS = "tags"; public const string TAGS = "tags";
// special field for each book which includes all major parts of the book's metadata. enables non-targetting searching
public const string ALL = "all"; public const string ALL = "all";
// the workaround which allows displaying all books when query is empty
public const string ALL_QUERY = "*:*";
public SearchEngine(LibationContext context) => this.context = context;
#region index rules
private static ReadOnlyDictionary<string, Func<LibraryBook, string>> idIndexRules { get; } private static ReadOnlyDictionary<string, Func<LibraryBook, string>> idIndexRules { get; }
= new ReadOnlyDictionary<string, Func<LibraryBook, string>>( = new ReadOnlyDictionary<string, Func<LibraryBook, string>>(
new Dictionary<string, Func<LibraryBook, string>> new Dictionary<string, Func<LibraryBook, string>>
@ -38,6 +45,7 @@ namespace LibationSearchEngine
["ASIN"] = lb => lb.Book.AudibleProductId ["ASIN"] = lb => lb.Book.AudibleProductId
} }
); );
private static ReadOnlyDictionary<string, Func<LibraryBook, string>> stringIndexRules { get; } private static ReadOnlyDictionary<string, Func<LibraryBook, string>> stringIndexRules { get; }
= new ReadOnlyDictionary<string, Func<LibraryBook, string>>( = new ReadOnlyDictionary<string, Func<LibraryBook, string>>(
new Dictionary<string, Func<LibraryBook, string>> new Dictionary<string, Func<LibraryBook, string>>
@ -98,6 +106,7 @@ namespace LibationSearchEngine
["MyRating"] = lb => lb.Book.UserDefinedItem.Rating.OverallRating.ToLuceneString() ["MyRating"] = lb => lb.Book.UserDefinedItem.Rating.OverallRating.ToLuceneString()
} }
); );
private static ReadOnlyDictionary<string, Func<LibraryBook, bool>> boolIndexRules { get; } private static ReadOnlyDictionary<string, Func<LibraryBook, bool>> boolIndexRules { get; }
= new ReadOnlyDictionary<string, Func<LibraryBook, bool>>( = new ReadOnlyDictionary<string, Func<LibraryBook, bool>>(
new Dictionary<string, Func<LibraryBook, bool>> new Dictionary<string, Func<LibraryBook, bool>>
@ -114,16 +123,17 @@ namespace LibationSearchEngine
["IsRated"] = lb => lb.Book.UserDefinedItem.Rating.OverallRating > 0f, ["IsRated"] = lb => lb.Book.UserDefinedItem.Rating.OverallRating > 0f,
["Rated"] = lb => lb.Book.UserDefinedItem.Rating.OverallRating > 0f, ["Rated"] = lb => lb.Book.UserDefinedItem.Rating.OverallRating > 0f,
["IsAuthorNarrated"] = lb => isAuthorNarrated(lb), ["IsAuthorNarrated"] = isAuthorNarrated,
["AuthorNarrated"] = lb => isAuthorNarrated(lb), ["AuthorNarrated"] = isAuthorNarrated,
[nameof(Book.IsAbridged)] = lb => lb.Book.IsAbridged, [nameof(Book.IsAbridged)] = lb => lb.Book.IsAbridged,
["Abridged"] = lb => lb.Book.IsAbridged, ["Abridged"] = lb => lb.Book.IsAbridged,
// this will only be evaluated at time of re-index. ie: state of files moved later will be out of sync until next re-index // this will only be evaluated at time of re-index. ie: state of files moved later will be out of sync until next re-index
["IsLiberated"] = lb => AudibleFileStorage.Audio.Exists(lb.Book.AudibleProductId), ["IsLiberated"] = lb => isLiberated(lb.Book.AudibleProductId),
["Liberated"] = lb => AudibleFileStorage.Audio.Exists(lb.Book.AudibleProductId), ["Liberated"] = lb => isLiberated(lb.Book.AudibleProductId),
}); }
);
private static bool isAuthorNarrated(LibraryBook lb) private static bool isAuthorNarrated(LibraryBook lb)
{ {
@ -132,6 +142,8 @@ namespace LibationSearchEngine
return authors.Intersect(narrators).Any(); return authors.Intersect(narrators).Any();
} }
private static bool isLiberated(string id) => AudibleFileStorage.Audio.Exists(id);
// use these common fields in the "all" default search field // use these common fields in the "all" default search field
private static IEnumerable<Func<LibraryBook, string>> allFieldIndexRules { get; } private static IEnumerable<Func<LibraryBook, string>> allFieldIndexRules { get; }
= new List<Func<LibraryBook, string>> = new List<Func<LibraryBook, string>>
@ -141,7 +153,9 @@ namespace LibationSearchEngine
stringIndexRules[nameof(Book.AuthorNames)], stringIndexRules[nameof(Book.AuthorNames)],
stringIndexRules[nameof(Book.NarratorNames)] stringIndexRules[nameof(Book.NarratorNames)]
}; };
#endregion
#region get search fields. used for display in help
public static IEnumerable<string> GetSearchIdFields() public static IEnumerable<string> GetSearchIdFields()
{ {
foreach (var key in idIndexRules.Keys) foreach (var key in idIndexRules.Keys)
@ -177,11 +191,13 @@ namespace LibationSearchEngine
foreach (var key in numberIndexRules.Keys) foreach (var key in numberIndexRules.Keys)
yield return key; yield return key;
} }
#endregion
private Directory getIndex() => FSDirectory.Open(SearchEngineDirectory); #region create and update index
/// <summary>
public SearchEngine(LibationContext context) => this.context = context; /// create new. ie: full re-index
/// </summary>
/// <param name="overwrite"></param>
public void CreateNewIndex(bool overwrite = true) public void CreateNewIndex(bool overwrite = true)
{ {
// 300 products // 300 products
@ -215,6 +231,22 @@ namespace LibationSearchEngine
log(); log();
} }
/// <summary>Long running. Use await Task.Run(() => UpdateBook(productId))</summary>
public void UpdateBook(string productId)
{
var libraryBook = context.GetLibraryBook_Flat_NoTracking(productId);
var term = new Term(_ID_, productId);
var document = createBookIndexDocument(libraryBook);
var createNewIndex = false;
using var index = getIndex();
using var analyzer = new StandardAnalyzer(Version);
using var ixWriter = new IndexWriter(index, analyzer, createNewIndex, IndexWriter.MaxFieldLength.UNLIMITED);
ixWriter.DeleteDocuments(term);
ixWriter.AddDocument(document);
}
private static Document createBookIndexDocument(LibraryBook libraryBook) private static Document createBookIndexDocument(LibraryBook libraryBook)
{ {
var doc = new Document(); var doc = new Document();
@ -252,23 +284,38 @@ namespace LibationSearchEngine
return doc; return doc;
} }
/// <summary>Long running. Use await Task.Run(() => UpdateBook(productId))</summary> // update single document entry
public void UpdateBook(string productId) // all fields, including 'tags' are case-specific
public void UpdateTags(string productId, string tags) => updateAnalyzedField(productId, TAGS, tags);
// all fields are case-specific
private static void updateAnalyzedField(string productId, string fieldName, string newValue)
=> updateDocument(
productId,
d =>
{ {
var libraryBook = context.GetLibraryBook_Flat_NoTracking(productId); // fields are key value pairs. MULTIPLE FIELDS CAN POTENTIALLY HAVE THE SAME KEY.
var term = new Term(_ID_, productId); // ie: must remove old before adding new else will create unwanted duplicates.
d.RemoveField(fieldName);
d.AddAnalyzed(fieldName, newValue);
});
var document = createBookIndexDocument(libraryBook); // update single document entry
var createNewIndex = false; public void UpdateIsLiberated(string productId)
=> updateDocument(
productId,
d =>
{
// fields are key value pairs. MULTIPLE FIELDS CAN POTENTIALLY HAVE THE SAME KEY.
// ie: must remove old before adding new else will create unwanted duplicates.
var v = isLiberated(productId);
d.RemoveField("IsLiberated");
d.AddBool("IsLiberated", v);
d.RemoveField("Liberated");
d.AddBool("Liberated", v);
});
using var index = getIndex(); private static void updateDocument(string productId, Action<Document> action)
using var analyzer = new StandardAnalyzer(Version);
using var ixWriter = new IndexWriter(index, analyzer, createNewIndex, IndexWriter.MaxFieldLength.UNLIMITED);
ixWriter.DeleteDocuments(term);
ixWriter.AddDocument(document);
}
public void UpdateTags(string productId, string tags)
{ {
var productTerm = new Term(_ID_, productId); var productTerm = new Term(_ID_, productId);
@ -284,11 +331,9 @@ namespace LibationSearchEngine
var document = searcher.Doc(scoreDoc.Doc); var document = searcher.Doc(scoreDoc.Doc);
// update document entry with new tags // perform update
// fields are key value pairs and MULTIPLE FIELDS CAN HAVE THE SAME KEY. must remove old before adding new action(document);
// REMEMBER: all fields, including 'tags' are case-specific
document.RemoveField(TAGS);
document.AddAnalyzed(TAGS, tags);
// update index // update index
var createNewIndex = false; var createNewIndex = false;
@ -296,11 +341,15 @@ namespace LibationSearchEngine
using var ixWriter = new IndexWriter(index, analyzer, createNewIndex, IndexWriter.MaxFieldLength.UNLIMITED); using var ixWriter = new IndexWriter(index, analyzer, createNewIndex, IndexWriter.MaxFieldLength.UNLIMITED);
ixWriter.UpdateDocument(productTerm, document, analyzer); ixWriter.UpdateDocument(productTerm, document, analyzer);
} }
#endregion
#region search
public SearchResultSet Search(string searchString) public SearchResultSet Search(string searchString)
{ {
Serilog.Log.Logger.Debug("original search string: {@DebugInfo}", new { searchString });
if (string.IsNullOrWhiteSpace(searchString)) if (string.IsNullOrWhiteSpace(searchString))
searchString = "*:*"; searchString = ALL_QUERY;
#region apply formatting #region apply formatting
searchString = parseTag(searchString); searchString = parseTag(searchString);
@ -315,13 +364,18 @@ namespace LibationSearchEngine
searchString = lowerFieldNames(searchString); searchString = lowerFieldNames(searchString);
#endregion #endregion
Serilog.Log.Logger.Debug("formatted search string: {@DebugInfo}", new { searchString });
var results = generalSearch(searchString); var results = generalSearch(searchString);
Serilog.Log.Logger.Debug("Hit(s): {@DebugInfo}", new { count = results.Docs.Count() });
displayResults(results); displayResults(results);
return results; return results;
} }
#region format query string
private static string parseTag(string tagSearchString) private static string parseTag(string tagSearchString)
{ {
var allMatches = LuceneRegex var allMatches = LuceneRegex
@ -380,13 +434,10 @@ namespace LibationSearchEngine
return searchString; return searchString;
} }
#endregion
public int MaxSearchResultsToReturn { get; set; } = 999;
private SearchResultSet generalSearch(string searchString) private SearchResultSet generalSearch(string searchString)
{ {
Console.WriteLine($"searchString: {searchString}");
var defaultField = ALL; var defaultField = ALL;
using var index = getIndex(); using var index = getIndex();
@ -407,14 +458,14 @@ namespace LibationSearchEngine
boolQuery.Add(new MatchAllDocsQuery(), Occur.MUST); boolQuery.Add(new MatchAllDocsQuery(), Occur.MUST);
} }
Console.WriteLine($" query: {query}");
var docs = searcher var docs = searcher
.Search(query, MaxSearchResultsToReturn) .Search(query, searcher.MaxDoc + 1)
.ScoreDocs .ScoreDocs
.Select(ds => new ScoreDocExplicit(searcher.Doc(ds.Doc), ds.Score)) .Select(ds => new ScoreDocExplicit(searcher.Doc(ds.Doc), ds.Score))
.ToList(); .ToList();
return new SearchResultSet(query.ToString(), docs); var queryString = query.ToString();
Serilog.Log.Logger.Debug("query: {@DebugInfo}", new { queryString });
return new SearchResultSet(queryString, docs);
} }
private IEnumerable<Occur> getOccurs_recurs(BooleanQuery query) private IEnumerable<Occur> getOccurs_recurs(BooleanQuery query)
@ -434,7 +485,6 @@ namespace LibationSearchEngine
private void displayResults(SearchResultSet docs) private void displayResults(SearchResultSet docs)
{ {
Console.WriteLine($"Hit(s): {docs.Docs.Count()}");
//for (int i = 0; i < docs.Docs.Count(); i++) //for (int i = 0; i < docs.Docs.Count(); i++)
//{ //{
// var sde = docs.Docs.First(); // var sde = docs.Docs.First();
@ -442,13 +492,14 @@ namespace LibationSearchEngine
// Document doc = sde.Doc; // Document doc = sde.Doc;
// float score = sde.Score; // float score = sde.Score;
// Console.WriteLine($"{(i + 1)}) score={score}. Fields:"); // Serilog.Log.Logger.Debug($"{(i + 1)}) score={score}. Fields:");
// var allFields = doc.GetFields(); // var allFields = doc.GetFields();
// foreach (var f in allFields) // foreach (var f in allFields)
// Console.WriteLine($" [{f.Name}]={f.StringValue}"); // Serilog.Log.Logger.Debug($" [{f.Name}]={f.StringValue}");
//} //}
//Console.WriteLine();
} }
#endregion
private static Directory getIndex() => FSDirectory.Open(SearchEngineDirectory);
} }
} }

View File

@ -74,6 +74,16 @@ namespace LibationWinForms.BookLiberation
backupBook.DecryptBook.Begin += (_, __) => wireUpEvents(backupBook.DecryptBook); backupBook.DecryptBook.Begin += (_, __) => wireUpEvents(backupBook.DecryptBook);
backupBook.DownloadPdf.Begin += (_, __) => wireUpEvents(backupBook.DownloadPdf); backupBook.DownloadPdf.Begin += (_, __) => wireUpEvents(backupBook.DownloadPdf);
// must occur before completedAction. A common use case is:
// - filter by -liberated
// - liberate only that book
// completedAction is to refresh grid
// - want to see that book disappear from grid
// also for this to work, updateIsLiberated can NOT be async
backupBook.DownloadBook.Completed += updateIsLiberated;
backupBook.DecryptBook.Completed += updateIsLiberated;
backupBook.DownloadPdf.Completed += updateIsLiberated;
if (completedAction != null) if (completedAction != null)
{ {
backupBook.DownloadBook.Completed += completedAction; backupBook.DownloadBook.Completed += completedAction;
@ -81,16 +91,10 @@ namespace LibationWinForms.BookLiberation
backupBook.DownloadPdf.Completed += completedAction; backupBook.DownloadPdf.Completed += completedAction;
} }
// enables search engine to index for things like "IsLiberated"
backupBook.DownloadBook.Completed += reindex;
backupBook.DecryptBook.Completed += reindex;
backupBook.DownloadPdf.Completed += reindex;
return backupBook; return backupBook;
} }
private static async void reindex(object sender, LibraryBook e) private static void updateIsLiberated(object sender, LibraryBook e) => ApplicationServices.SearchEngineCommands.UpdateIsLiberated(e.Book);
=> await Task.Run(() => ApplicationServices.SearchEngineCommands.FullReIndex());
private static (AutomatedBackupsForm, LogMe) attachToBackupsForm(BackupBook backupBook) private static (AutomatedBackupsForm, LogMe) attachToBackupsForm(BackupBook backupBook)
{ {

View File

@ -201,6 +201,9 @@ namespace LibationWinForms
// update cells incl Liberate button text // update cells incl Liberate button text
dataGridView.InvalidateRow(rowId); dataGridView.InvalidateRow(rowId);
// needed in case filtering by -IsLiberated and it gets changed to Liberated. want to immediately show the change
filter();
BackupCountsChanged?.Invoke(this, EventArgs.Empty); BackupCountsChanged?.Invoke(this, EventArgs.Empty);
} }
@ -396,8 +399,6 @@ namespace LibationWinForms
} }
currencyManager.ResumeBinding(); currencyManager.ResumeBinding();
VisibleCountChanged?.Invoke(this, dataGridView.AsEnumerable().Count(r => r.Visible)); VisibleCountChanged?.Invoke(this, dataGridView.AsEnumerable().Count(r => r.Visible));
var luceneSearchString_debug = searchResults.SearchString;
} }
#endregion #endregion