* search engine: refactoring and improved logging
* bug fix: after book is liberated, filter should immediately honor new "is liberated" status
This commit is contained in:
parent
7b966f6962
commit
abd00ff1df
@ -1,4 +1,5 @@
|
||||
using System.IO;
|
||||
using System;
|
||||
using System.IO;
|
||||
using DataLayer;
|
||||
using LibationSearchEngine;
|
||||
|
||||
@ -12,31 +13,43 @@ namespace ApplicationServices
|
||||
engine.CreateNewIndex();
|
||||
}
|
||||
|
||||
public static SearchResultSet Search(string searchString)
|
||||
public static SearchResultSet Search(string searchString) => performSearchEngineFunc_safe(e =>
|
||||
e.Search(searchString)
|
||||
);
|
||||
|
||||
public static void UpdateBookTags(Book book) => performSearchEngineAction_safe(e =>
|
||||
e.UpdateTags(book.AudibleProductId, book.UserDefinedItem.Tags)
|
||||
);
|
||||
|
||||
public static void UpdateIsLiberated(Book book) => performSearchEngineAction_safe(e =>
|
||||
e.UpdateIsLiberated(book.AudibleProductId)
|
||||
);
|
||||
|
||||
private static void performSearchEngineAction_safe(Action<SearchEngine> action)
|
||||
{
|
||||
var engine = new SearchEngine(DbContexts.GetContext());
|
||||
try
|
||||
{
|
||||
return engine.Search(searchString);
|
||||
action(engine);
|
||||
}
|
||||
catch (FileNotFoundException)
|
||||
{
|
||||
FullReIndex();
|
||||
return engine.Search(searchString);
|
||||
action(engine);
|
||||
}
|
||||
}
|
||||
|
||||
public static void UpdateBookTags(Book book)
|
||||
private static T performSearchEngineFunc_safe<T>(Func<SearchEngine, T> action)
|
||||
{
|
||||
var engine = new SearchEngine(DbContexts.GetContext());
|
||||
try
|
||||
{
|
||||
engine.UpdateTags(book.AudibleProductId, book.UserDefinedItem.Tags);
|
||||
return action(engine);
|
||||
}
|
||||
catch (FileNotFoundException)
|
||||
{
|
||||
FullReIndex();
|
||||
engine.UpdateTags(book.AudibleProductId, book.UserDefinedItem.Tags);
|
||||
return action(engine);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -13,7 +13,7 @@
|
||||
<!-- <PublishSingleFile>true</PublishSingleFile> -->
|
||||
<RuntimeIdentifier>win-x64</RuntimeIdentifier>
|
||||
|
||||
<Version>4.2.0.1</Version>
|
||||
<Version>4.2.1.1</Version>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
|
||||
@ -26,9 +26,16 @@ namespace LibationSearchEngine
|
||||
|
||||
public const string _ID_ = "_ID_";
|
||||
public const string TAGS = "tags";
|
||||
// special field for each book which includes all major parts of the book's metadata. enables non-targetting searching
|
||||
public const string ALL = "all";
|
||||
|
||||
private static ReadOnlyDictionary<string, Func<LibraryBook, string>> idIndexRules { get; }
|
||||
// the workaround which allows displaying all books when query is empty
|
||||
public const string ALL_QUERY = "*:*";
|
||||
|
||||
public SearchEngine(LibationContext context) => this.context = context;
|
||||
|
||||
#region index rules
|
||||
private static ReadOnlyDictionary<string, Func<LibraryBook, string>> idIndexRules { get; }
|
||||
= new ReadOnlyDictionary<string, Func<LibraryBook, string>>(
|
||||
new Dictionary<string, Func<LibraryBook, string>>
|
||||
{
|
||||
@ -38,6 +45,7 @@ namespace LibationSearchEngine
|
||||
["ASIN"] = lb => lb.Book.AudibleProductId
|
||||
}
|
||||
);
|
||||
|
||||
private static ReadOnlyDictionary<string, Func<LibraryBook, string>> stringIndexRules { get; }
|
||||
= new ReadOnlyDictionary<string, Func<LibraryBook, string>>(
|
||||
new Dictionary<string, Func<LibraryBook, string>>
|
||||
@ -98,6 +106,7 @@ namespace LibationSearchEngine
|
||||
["MyRating"] = lb => lb.Book.UserDefinedItem.Rating.OverallRating.ToLuceneString()
|
||||
}
|
||||
);
|
||||
|
||||
private static ReadOnlyDictionary<string, Func<LibraryBook, bool>> boolIndexRules { get; }
|
||||
= new ReadOnlyDictionary<string, Func<LibraryBook, bool>>(
|
||||
new Dictionary<string, Func<LibraryBook, bool>>
|
||||
@ -114,16 +123,17 @@ namespace LibationSearchEngine
|
||||
["IsRated"] = lb => lb.Book.UserDefinedItem.Rating.OverallRating > 0f,
|
||||
["Rated"] = lb => lb.Book.UserDefinedItem.Rating.OverallRating > 0f,
|
||||
|
||||
["IsAuthorNarrated"] = lb => isAuthorNarrated(lb),
|
||||
["AuthorNarrated"] = lb => isAuthorNarrated(lb),
|
||||
["IsAuthorNarrated"] = isAuthorNarrated,
|
||||
["AuthorNarrated"] = isAuthorNarrated,
|
||||
|
||||
[nameof(Book.IsAbridged)] = lb => lb.Book.IsAbridged,
|
||||
["Abridged"] = lb => lb.Book.IsAbridged,
|
||||
|
||||
// this will only be evaluated at time of re-index. ie: state of files moved later will be out of sync until next re-index
|
||||
["IsLiberated"] = lb => AudibleFileStorage.Audio.Exists(lb.Book.AudibleProductId),
|
||||
["Liberated"] = lb => AudibleFileStorage.Audio.Exists(lb.Book.AudibleProductId),
|
||||
});
|
||||
["IsLiberated"] = lb => isLiberated(lb.Book.AudibleProductId),
|
||||
["Liberated"] = lb => isLiberated(lb.Book.AudibleProductId),
|
||||
}
|
||||
);
|
||||
|
||||
private static bool isAuthorNarrated(LibraryBook lb)
|
||||
{
|
||||
@ -132,6 +142,8 @@ namespace LibationSearchEngine
|
||||
return authors.Intersect(narrators).Any();
|
||||
}
|
||||
|
||||
private static bool isLiberated(string id) => AudibleFileStorage.Audio.Exists(id);
|
||||
|
||||
// use these common fields in the "all" default search field
|
||||
private static IEnumerable<Func<LibraryBook, string>> allFieldIndexRules { get; }
|
||||
= new List<Func<LibraryBook, string>>
|
||||
@ -141,8 +153,10 @@ namespace LibationSearchEngine
|
||||
stringIndexRules[nameof(Book.AuthorNames)],
|
||||
stringIndexRules[nameof(Book.NarratorNames)]
|
||||
};
|
||||
#endregion
|
||||
|
||||
public static IEnumerable<string> GetSearchIdFields()
|
||||
#region get search fields. used for display in help
|
||||
public static IEnumerable<string> GetSearchIdFields()
|
||||
{
|
||||
foreach (var key in idIndexRules.Keys)
|
||||
yield return key;
|
||||
@ -177,11 +191,13 @@ namespace LibationSearchEngine
|
||||
foreach (var key in numberIndexRules.Keys)
|
||||
yield return key;
|
||||
}
|
||||
#endregion
|
||||
|
||||
private Directory getIndex() => FSDirectory.Open(SearchEngineDirectory);
|
||||
|
||||
public SearchEngine(LibationContext context) => this.context = context;
|
||||
|
||||
#region create and update index
|
||||
/// <summary>
|
||||
/// create new. ie: full re-index
|
||||
/// </summary>
|
||||
/// <param name="overwrite"></param>
|
||||
public void CreateNewIndex(bool overwrite = true)
|
||||
{
|
||||
// 300 products
|
||||
@ -215,6 +231,22 @@ namespace LibationSearchEngine
|
||||
log();
|
||||
}
|
||||
|
||||
/// <summary>Long running. Use await Task.Run(() => UpdateBook(productId))</summary>
|
||||
public void UpdateBook(string productId)
|
||||
{
|
||||
var libraryBook = context.GetLibraryBook_Flat_NoTracking(productId);
|
||||
var term = new Term(_ID_, productId);
|
||||
|
||||
var document = createBookIndexDocument(libraryBook);
|
||||
var createNewIndex = false;
|
||||
|
||||
using var index = getIndex();
|
||||
using var analyzer = new StandardAnalyzer(Version);
|
||||
using var ixWriter = new IndexWriter(index, analyzer, createNewIndex, IndexWriter.MaxFieldLength.UNLIMITED);
|
||||
ixWriter.DeleteDocuments(term);
|
||||
ixWriter.AddDocument(document);
|
||||
}
|
||||
|
||||
private static Document createBookIndexDocument(LibraryBook libraryBook)
|
||||
{
|
||||
var doc = new Document();
|
||||
@ -252,55 +284,72 @@ namespace LibationSearchEngine
|
||||
return doc;
|
||||
}
|
||||
|
||||
/// <summary>Long running. Use await Task.Run(() => UpdateBook(productId))</summary>
|
||||
public void UpdateBook(string productId)
|
||||
{
|
||||
var libraryBook = context.GetLibraryBook_Flat_NoTracking(productId);
|
||||
var term = new Term(_ID_, productId);
|
||||
// update single document entry
|
||||
// all fields, including 'tags' are case-specific
|
||||
public void UpdateTags(string productId, string tags) => updateAnalyzedField(productId, TAGS, tags);
|
||||
|
||||
var document = createBookIndexDocument(libraryBook);
|
||||
var createNewIndex = false;
|
||||
// all fields are case-specific
|
||||
private static void updateAnalyzedField(string productId, string fieldName, string newValue)
|
||||
=> updateDocument(
|
||||
productId,
|
||||
d =>
|
||||
{
|
||||
// fields are key value pairs. MULTIPLE FIELDS CAN POTENTIALLY HAVE THE SAME KEY.
|
||||
// ie: must remove old before adding new else will create unwanted duplicates.
|
||||
d.RemoveField(fieldName);
|
||||
d.AddAnalyzed(fieldName, newValue);
|
||||
});
|
||||
|
||||
using var index = getIndex();
|
||||
using var analyzer = new StandardAnalyzer(Version);
|
||||
using var ixWriter = new IndexWriter(index, analyzer, createNewIndex, IndexWriter.MaxFieldLength.UNLIMITED);
|
||||
ixWriter.DeleteDocuments(term);
|
||||
ixWriter.AddDocument(document);
|
||||
}
|
||||
// update single document entry
|
||||
public void UpdateIsLiberated(string productId)
|
||||
=> updateDocument(
|
||||
productId,
|
||||
d =>
|
||||
{
|
||||
// fields are key value pairs. MULTIPLE FIELDS CAN POTENTIALLY HAVE THE SAME KEY.
|
||||
// ie: must remove old before adding new else will create unwanted duplicates.
|
||||
var v = isLiberated(productId);
|
||||
d.RemoveField("IsLiberated");
|
||||
d.AddBool("IsLiberated", v);
|
||||
d.RemoveField("Liberated");
|
||||
d.AddBool("Liberated", v);
|
||||
});
|
||||
|
||||
public void UpdateTags(string productId, string tags)
|
||||
private static void updateDocument(string productId, Action<Document> action)
|
||||
{
|
||||
var productTerm = new Term(_ID_, productId);
|
||||
|
||||
using var index = getIndex();
|
||||
using var index = getIndex();
|
||||
|
||||
// get existing document
|
||||
using var searcher = new IndexSearcher(index);
|
||||
var query = new TermQuery(productTerm);
|
||||
var docs = searcher.Search(query, 1);
|
||||
var scoreDoc = docs.ScoreDocs.SingleOrDefault();
|
||||
if (scoreDoc == null)
|
||||
throw new Exception("document not found");
|
||||
var document = searcher.Doc(scoreDoc.Doc);
|
||||
// get existing document
|
||||
using var searcher = new IndexSearcher(index);
|
||||
var query = new TermQuery(productTerm);
|
||||
var docs = searcher.Search(query, 1);
|
||||
var scoreDoc = docs.ScoreDocs.SingleOrDefault();
|
||||
if (scoreDoc == null)
|
||||
throw new Exception("document not found");
|
||||
var document = searcher.Doc(scoreDoc.Doc);
|
||||
|
||||
|
||||
// update document entry with new tags
|
||||
// fields are key value pairs and MULTIPLE FIELDS CAN HAVE THE SAME KEY. must remove old before adding new
|
||||
// REMEMBER: all fields, including 'tags' are case-specific
|
||||
document.RemoveField(TAGS);
|
||||
document.AddAnalyzed(TAGS, tags);
|
||||
// perform update
|
||||
action(document);
|
||||
|
||||
// update index
|
||||
var createNewIndex = false;
|
||||
using var analyzer = new StandardAnalyzer(Version);
|
||||
using var ixWriter = new IndexWriter(index, analyzer, createNewIndex, IndexWriter.MaxFieldLength.UNLIMITED);
|
||||
ixWriter.UpdateDocument(productTerm, document, analyzer);
|
||||
}
|
||||
|
||||
// update index
|
||||
var createNewIndex = false;
|
||||
using var analyzer = new StandardAnalyzer(Version);
|
||||
using var ixWriter = new IndexWriter(index, analyzer, createNewIndex, IndexWriter.MaxFieldLength.UNLIMITED);
|
||||
ixWriter.UpdateDocument(productTerm, document, analyzer);
|
||||
}
|
||||
#endregion
|
||||
|
||||
#region search
|
||||
public SearchResultSet Search(string searchString)
|
||||
{
|
||||
Serilog.Log.Logger.Debug("original search string: {@DebugInfo}", new { searchString });
|
||||
|
||||
if (string.IsNullOrWhiteSpace(searchString))
|
||||
searchString = "*:*";
|
||||
searchString = ALL_QUERY;
|
||||
|
||||
#region apply formatting
|
||||
searchString = parseTag(searchString);
|
||||
@ -315,14 +364,19 @@ namespace LibationSearchEngine
|
||||
searchString = lowerFieldNames(searchString);
|
||||
#endregion
|
||||
|
||||
Serilog.Log.Logger.Debug("formatted search string: {@DebugInfo}", new { searchString });
|
||||
|
||||
var results = generalSearch(searchString);
|
||||
|
||||
Serilog.Log.Logger.Debug("Hit(s): {@DebugInfo}", new { count = results.Docs.Count() });
|
||||
|
||||
displayResults(results);
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
private static string parseTag(string tagSearchString)
|
||||
#region format query string
|
||||
private static string parseTag(string tagSearchString)
|
||||
{
|
||||
var allMatches = LuceneRegex
|
||||
.TagRegex
|
||||
@ -380,13 +434,10 @@ namespace LibationSearchEngine
|
||||
|
||||
return searchString;
|
||||
}
|
||||
|
||||
public int MaxSearchResultsToReturn { get; set; } = 999;
|
||||
#endregion
|
||||
|
||||
private SearchResultSet generalSearch(string searchString)
|
||||
{
|
||||
Console.WriteLine($"searchString: {searchString}");
|
||||
|
||||
var defaultField = ALL;
|
||||
|
||||
using var index = getIndex();
|
||||
@ -407,14 +458,14 @@ namespace LibationSearchEngine
|
||||
boolQuery.Add(new MatchAllDocsQuery(), Occur.MUST);
|
||||
}
|
||||
|
||||
Console.WriteLine($" query: {query}");
|
||||
|
||||
var docs = searcher
|
||||
.Search(query, MaxSearchResultsToReturn)
|
||||
.Search(query, searcher.MaxDoc + 1)
|
||||
.ScoreDocs
|
||||
.Select(ds => new ScoreDocExplicit(searcher.Doc(ds.Doc), ds.Score))
|
||||
.ToList();
|
||||
return new SearchResultSet(query.ToString(), docs);
|
||||
var queryString = query.ToString();
|
||||
Serilog.Log.Logger.Debug("query: {@DebugInfo}", new { queryString });
|
||||
return new SearchResultSet(queryString, docs);
|
||||
}
|
||||
|
||||
private IEnumerable<Occur> getOccurs_recurs(BooleanQuery query)
|
||||
@ -434,7 +485,6 @@ namespace LibationSearchEngine
|
||||
|
||||
private void displayResults(SearchResultSet docs)
|
||||
{
|
||||
Console.WriteLine($"Hit(s): {docs.Docs.Count()}");
|
||||
//for (int i = 0; i < docs.Docs.Count(); i++)
|
||||
//{
|
||||
// var sde = docs.Docs.First();
|
||||
@ -442,13 +492,14 @@ namespace LibationSearchEngine
|
||||
// Document doc = sde.Doc;
|
||||
// float score = sde.Score;
|
||||
|
||||
// Console.WriteLine($"{(i + 1)}) score={score}. Fields:");
|
||||
// Serilog.Log.Logger.Debug($"{(i + 1)}) score={score}. Fields:");
|
||||
// var allFields = doc.GetFields();
|
||||
// foreach (var f in allFields)
|
||||
// Console.WriteLine($" [{f.Name}]={f.StringValue}");
|
||||
// Serilog.Log.Logger.Debug($" [{f.Name}]={f.StringValue}");
|
||||
//}
|
||||
|
||||
//Console.WriteLine();
|
||||
}
|
||||
#endregion
|
||||
|
||||
private static Directory getIndex() => FSDirectory.Open(SearchEngineDirectory);
|
||||
}
|
||||
}
|
||||
|
||||
@ -74,6 +74,16 @@ namespace LibationWinForms.BookLiberation
|
||||
backupBook.DecryptBook.Begin += (_, __) => wireUpEvents(backupBook.DecryptBook);
|
||||
backupBook.DownloadPdf.Begin += (_, __) => wireUpEvents(backupBook.DownloadPdf);
|
||||
|
||||
// must occur before completedAction. A common use case is:
|
||||
// - filter by -liberated
|
||||
// - liberate only that book
|
||||
// completedAction is to refresh grid
|
||||
// - want to see that book disappear from grid
|
||||
// also for this to work, updateIsLiberated can NOT be async
|
||||
backupBook.DownloadBook.Completed += updateIsLiberated;
|
||||
backupBook.DecryptBook.Completed += updateIsLiberated;
|
||||
backupBook.DownloadPdf.Completed += updateIsLiberated;
|
||||
|
||||
if (completedAction != null)
|
||||
{
|
||||
backupBook.DownloadBook.Completed += completedAction;
|
||||
@ -81,16 +91,10 @@ namespace LibationWinForms.BookLiberation
|
||||
backupBook.DownloadPdf.Completed += completedAction;
|
||||
}
|
||||
|
||||
// enables search engine to index for things like "IsLiberated"
|
||||
backupBook.DownloadBook.Completed += reindex;
|
||||
backupBook.DecryptBook.Completed += reindex;
|
||||
backupBook.DownloadPdf.Completed += reindex;
|
||||
|
||||
return backupBook;
|
||||
}
|
||||
|
||||
private static async void reindex(object sender, LibraryBook e)
|
||||
=> await Task.Run(() => ApplicationServices.SearchEngineCommands.FullReIndex());
|
||||
private static void updateIsLiberated(object sender, LibraryBook e) => ApplicationServices.SearchEngineCommands.UpdateIsLiberated(e.Book);
|
||||
|
||||
private static (AutomatedBackupsForm, LogMe) attachToBackupsForm(BackupBook backupBook)
|
||||
{
|
||||
|
||||
@ -201,6 +201,9 @@ namespace LibationWinForms
|
||||
// update cells incl Liberate button text
|
||||
dataGridView.InvalidateRow(rowId);
|
||||
|
||||
// needed in case filtering by -IsLiberated and it gets changed to Liberated. want to immediately show the change
|
||||
filter();
|
||||
|
||||
BackupCountsChanged?.Invoke(this, EventArgs.Empty);
|
||||
}
|
||||
|
||||
@ -396,8 +399,6 @@ namespace LibationWinForms
|
||||
}
|
||||
currencyManager.ResumeBinding();
|
||||
VisibleCountChanged?.Invoke(this, dataGridView.AsEnumerable().Count(r => r.Visible));
|
||||
|
||||
var luceneSearchString_debug = searchResults.SearchString;
|
||||
}
|
||||
#endregion
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user