From b4aa22005115db94e9953866504a4619b886533b Mon Sep 17 00:00:00 2001 From: Mbucari Date: Mon, 12 Jun 2023 13:22:35 -0600 Subject: [PATCH] Refactor LibationSearchEngine --- .../Dialogs/SearchSyntaxDialog.axaml.cs | 10 +- Source/LibationSearchEngine/BookRule.cs | 21 ++ Source/LibationSearchEngine/IIndexRule.cs | 22 ++ .../IndexRuleCollection.cs | 28 ++ .../LibationSearchEngine/LibraryBookRule.cs | 21 ++ .../LibationSearchEngine/LuceneExtensions.cs | 63 +++- Source/LibationSearchEngine/QuerySanitizer.cs | 22 +- Source/LibationSearchEngine/SearchEngine.cs | 303 ++++-------------- .../Dialogs/SearchSyntaxDialog.cs | 11 +- 9 files changed, 235 insertions(+), 266 deletions(-) create mode 100644 Source/LibationSearchEngine/BookRule.cs create mode 100644 Source/LibationSearchEngine/IIndexRule.cs create mode 100644 Source/LibationSearchEngine/IndexRuleCollection.cs create mode 100644 Source/LibationSearchEngine/LibraryBookRule.cs diff --git a/Source/LibationAvalonia/Dialogs/SearchSyntaxDialog.axaml.cs b/Source/LibationAvalonia/Dialogs/SearchSyntaxDialog.axaml.cs index 07992c0b..07ca798f 100644 --- a/Source/LibationAvalonia/Dialogs/SearchSyntaxDialog.axaml.cs +++ b/Source/LibationAvalonia/Dialogs/SearchSyntaxDialog.axaml.cs @@ -1,3 +1,5 @@ +using LibationSearchEngine; + namespace LibationAvalonia.Dialogs { public partial class SearchSyntaxDialog : DialogWindow @@ -18,7 +20,7 @@ Search for wizard of oz: title:""wizard of oz"" -" + string.Join("\r\n", LibationSearchEngine.SearchEngine.GetSearchStringFields()); +" + string.Join("\r\n", SearchEngine.FieldIndexRules.StringFieldNames); NumberFields = @" Find books between 1-100 minutes long @@ -30,14 +32,14 @@ Find books published from 2020-1-1 to datepublished:[20200101 TO 20231231] -" + string.Join("\r\n", LibationSearchEngine.SearchEngine.GetSearchNumberFields()); +" + string.Join("\r\n", SearchEngine.FieldIndexRules.NumberFieldNames); BoolFields = @" Find books that you haven't rated: -IsRated -" + string.Join("\r\n", LibationSearchEngine.SearchEngine.GetSearchBoolFields()); +" + string.Join("\r\n", SearchEngine.FieldIndexRules.BoolFieldNames); IdFields = @" Alice's Adventures in @@ -49,7 +51,7 @@ All of these are synonyms for the ID field -" + string.Join("\r\n", LibationSearchEngine.SearchEngine.GetSearchIdFields()); +" + string.Join("\r\n", SearchEngine.FieldIndexRules.IdFieldNames); DataContext = this; diff --git a/Source/LibationSearchEngine/BookRule.cs b/Source/LibationSearchEngine/BookRule.cs new file mode 100644 index 00000000..e7c296e1 --- /dev/null +++ b/Source/LibationSearchEngine/BookRule.cs @@ -0,0 +1,21 @@ +using DataLayer; +using System; +using System.Collections.ObjectModel; + +namespace LibationSearchEngine; + +public class BookRule : IIndexRule +{ + public FieldType FieldType { get; } + public Func ValueGetter { get; } + public ReadOnlyCollection FieldNames { get; } + + public BookRule(FieldType fieldType, Func valueGetter, params string[] fieldNames) + { + ValueGetter = valueGetter; + FieldType = fieldType; + FieldNames = new ReadOnlyCollection(fieldNames); + } + + public string GetValue(LibraryBook libraryBook) => ValueGetter(libraryBook.Book); +} diff --git a/Source/LibationSearchEngine/IIndexRule.cs b/Source/LibationSearchEngine/IIndexRule.cs new file mode 100644 index 00000000..dcaa441f --- /dev/null +++ b/Source/LibationSearchEngine/IIndexRule.cs @@ -0,0 +1,22 @@ +using DataLayer; +using System.Collections.ObjectModel; + +namespace LibationSearchEngine; + +public enum FieldType +{ + Bool, + String, + Number, + ID, + Raw +} + +public interface IIndexRule +{ + /// This rule's value type. + FieldType FieldType { get; } + /// All aliases of this search index rule + ReadOnlyCollection FieldNames { get; } + string GetValue(LibraryBook libraryBook); +} diff --git a/Source/LibationSearchEngine/IndexRuleCollection.cs b/Source/LibationSearchEngine/IndexRuleCollection.cs new file mode 100644 index 00000000..2a20dad1 --- /dev/null +++ b/Source/LibationSearchEngine/IndexRuleCollection.cs @@ -0,0 +1,28 @@ +using DataLayer; +using System; +using System.Collections; +using System.Collections.Generic; +using System.Linq; + +namespace LibationSearchEngine; + +public class IndexRuleCollection : IEnumerable +{ + private readonly List rules = new(); + public IEnumerable IdFieldNames => rules.Where(x => x.FieldType is FieldType.ID).SelectMany(r => r.FieldNames); + public IEnumerable BoolFieldNames => rules.Where(x => x.FieldType is FieldType.Bool).SelectMany(r => r.FieldNames); + public IEnumerable StringFieldNames => rules.Where(x => x.FieldType is FieldType.String).SelectMany(r => r.FieldNames); + public IEnumerable NumberFieldNames => rules.Where(x => x.FieldType is FieldType.Number).SelectMany(r => r.FieldNames); + + public void Add(FieldType fieldType, Func getter, params string[] fieldNames) + => rules.Add(new LibraryBookRule(fieldType, getter, fieldNames)); + + public void Add(FieldType fieldType, Func getter, params string[] fieldNames) + => rules.Add(new BookRule(fieldType, getter, fieldNames)); + + public T GetRuleByFieldName(string fieldName) where T : IIndexRule + => (T)rules.SingleOrDefault(r => r.FieldNames.Any(n => n.Equals(fieldName, StringComparison.OrdinalIgnoreCase))); + + public IEnumerator GetEnumerator() => rules.GetEnumerator(); + IEnumerator IEnumerable.GetEnumerator() => GetEnumerator(); +} diff --git a/Source/LibationSearchEngine/LibraryBookRule.cs b/Source/LibationSearchEngine/LibraryBookRule.cs new file mode 100644 index 00000000..6c9036b7 --- /dev/null +++ b/Source/LibationSearchEngine/LibraryBookRule.cs @@ -0,0 +1,21 @@ +using DataLayer; +using System; +using System.Collections.ObjectModel; + +namespace LibationSearchEngine; + +public class LibraryBookRule : IIndexRule +{ + public FieldType FieldType { get; } + public Func ValueGetter { get; } + public ReadOnlyCollection FieldNames { get; } + + public LibraryBookRule(FieldType fieldType, Func valueGetter, params string[] fieldNames) + { + ValueGetter = valueGetter; + FieldType = fieldType; + FieldNames = new ReadOnlyCollection(fieldNames); + } + + public string GetValue(LibraryBook libraryBook) => ValueGetter(libraryBook); +} diff --git a/Source/LibationSearchEngine/LuceneExtensions.cs b/Source/LibationSearchEngine/LuceneExtensions.cs index 20c3eaa9..0e09fe8d 100644 --- a/Source/LibationSearchEngine/LuceneExtensions.cs +++ b/Source/LibationSearchEngine/LuceneExtensions.cs @@ -1,6 +1,6 @@ using System; using System.Collections.Generic; -using System.Linq; +using DataLayer; using Lucene.Net.Analysis; using Lucene.Net.Documents; using Lucene.Net.QueryParsers; @@ -10,21 +10,58 @@ namespace LibationSearchEngine { // field names are case specific and, due to StandardAnalyzer, content is case INspecific internal static class LuceneExtensions - { - internal static void AddRaw(this Document document, string name, string value) - => document.Add(new Field(name, value, Field.Store.YES, Field.Index.NOT_ANALYZED)); + { + internal static void AddAnalyzed(this Document document, string name, string value) + { + if (value is not null) + document.Add(new Field(name.ToLowerInvariant(), value, Field.Store.YES, Field.Index.ANALYZED)); + } - internal static void AddAnalyzed(this Document document, string name, string value) - { - if (value is not null) - document.Add(new Field(name.ToLowerInvariant(), value, Field.Store.YES, Field.Index.ANALYZED)); - } + internal static void RemoveRule(this Document document, IIndexRule rule) + { + // fields are key value pairs. MULTIPLE FIELDS CAN POTENTIALLY HAVE THE SAME KEY. + // ie: must remove old before adding new else will create unwanted duplicates. + foreach (var name in rule.FieldNames) + document.RemoveFields(name.ToLowerInvariant()); + } - internal static void AddNotAnalyzed(this Document document, string name, string value) - => document.Add(new Field(name.ToLowerInvariant(), value, Field.Store.YES, Field.Index.NOT_ANALYZED)); + internal static void AddIndexRule(this Document document, IIndexRule rule, LibraryBook libraryBook) + { + string value = rule.GetValue(libraryBook); - internal static void AddBool(this Document document, string name, bool value) - => document.Add(new Field(name.ToLowerInvariant(), value.ToString(), Field.Store.YES, Field.Index.ANALYZED_NO_NORMS)); + addIndexRule(document, rule, value); + } + + internal static void AddIndexRule(this Document document, BookRule rule, Book libraryBook) + { + addIndexRule(document, rule, rule.ValueGetter(libraryBook)); + } + + private static void addIndexRule(Document document, IIndexRule rule, string value) + { + if (value is null) return; + + foreach (var name in rule.FieldNames) + { + // fields are key value pairs and MULTIPLE FIELDS CAN HAVE THE SAME KEY. + // splitting authors and narrators and/or tags into multiple fields could be interesting research. + // it could allow for more advanced searches, or maybe it could break broad searches. + + // all searching should be lowercase + // external callers have the reasonable expectation that product id will be returned CASE SPECIFIC + var field = rule.FieldType switch + { + FieldType.Bool => new Field(name.ToLowerInvariant(), value, Field.Store.YES, Field.Index.ANALYZED_NO_NORMS), + FieldType.String => new Field(name.ToLowerInvariant(), value, Field.Store.YES, Field.Index.ANALYZED), + FieldType.Number => new Field(name.ToLowerInvariant(), value, Field.Store.YES, Field.Index.NOT_ANALYZED), + FieldType.ID => new Field(name.ToLowerInvariant(), value, Field.Store.YES, Field.Index.NOT_ANALYZED), + FieldType.Raw => new Field(name, value, Field.Store.YES, Field.Index.NOT_ANALYZED), + _ => throw new KeyNotFoundException(), + }; + + document.Add(field); + } + } internal static Query GetQuery(this Analyzer analyzer, string defaultField, string searchString) => new QueryParser(SearchEngine.Version, defaultField.ToLowerInvariant(), analyzer).Parse(searchString); diff --git a/Source/LibationSearchEngine/QuerySanitizer.cs b/Source/LibationSearchEngine/QuerySanitizer.cs index bd4acef0..f6020528 100644 --- a/Source/LibationSearchEngine/QuerySanitizer.cs +++ b/Source/LibationSearchEngine/QuerySanitizer.cs @@ -8,22 +8,20 @@ namespace LibationSearchEngine internal static class QuerySanitizer { private static readonly HashSet idTerms - = SearchEngine.idIndexRules.Keys - .Select(s => s.ToLowerInvariant()) - .ToHashSet(); + = SearchEngine.FieldIndexRules.IdFieldNames + .Select(n => n.ToLowerInvariant()) + .ToHashSet(); private static readonly HashSet boolTerms - = SearchEngine.boolIndexRules.Keys - .Select(s => s.ToLowerInvariant()) - .ToHashSet(); + = SearchEngine.FieldIndexRules.BoolFieldNames + .Select(n => n.ToLowerInvariant()) + .ToHashSet(); private static readonly HashSet fieldTerms - = SearchEngine.stringIndexRules.Keys - .Union(SearchEngine.numberIndexRules.Keys) - .Select(s => s.ToLowerInvariant()) - .Union(idTerms) - .Union(boolTerms) - .ToHashSet(); + = SearchEngine.FieldIndexRules + .SelectMany(r => r.FieldNames) + .Select(n => n.ToLowerInvariant()) + .ToHashSet(); internal static string Sanitize(string searchString, StandardAnalyzer analyzer) { diff --git a/Source/LibationSearchEngine/SearchEngine.cs b/Source/LibationSearchEngine/SearchEngine.cs index 83d7d588..872a4b1a 100644 --- a/Source/LibationSearchEngine/SearchEngine.cs +++ b/Source/LibationSearchEngine/SearchEngine.cs @@ -1,13 +1,10 @@ using System; using System.Collections.Generic; -using System.Collections.ObjectModel; using System.Linq; -using System.Text.RegularExpressions; using DataLayer; using Dinah.Core; using LibationFileManager; using Lucene.Net.Analysis.Standard; -using Lucene.Net.Analysis.Tokenattributes; using Lucene.Net.Documents; using Lucene.Net.Index; using Lucene.Net.Search; @@ -25,164 +22,47 @@ namespace LibationSearchEngine public const string ALL = "all"; #region index rules - // common fields used in the "all" default search field - public const string ALL_AUDIBLE_PRODUCT_ID = nameof(Book.AudibleProductId); - public const string ALL_TITLE = nameof(Book.Title); - public const string ALL_AUTHOR_NAMES = "AuthorNames"; - public const string ALL_NARRATOR_NAMES = "NarratorNames"; - public const string ALL_SERIES_NAMES = "SeriesNames"; - internal static ReadOnlyDictionary> idIndexRules { get; } - = new ReadOnlyDictionary>( - new Dictionary> - { - [nameof(Book.AudibleProductId)] = lb => lb.Book.AudibleProductId.ToLowerInvariant(), - ["ProductId"] = lb => lb.Book.AudibleProductId.ToLowerInvariant(), - ["Id"] = lb => lb.Book.AudibleProductId.ToLowerInvariant(), - ["ASIN"] = lb => lb.Book.AudibleProductId.ToLowerInvariant() - } - ); - - internal static ReadOnlyDictionary> stringIndexRules { get; } - = new ReadOnlyDictionary>( - new Dictionary> - { - [nameof(Book.Title)] = lb => lb.Book.Title, - [ALL_AUTHOR_NAMES] = lb => lb.Book.AuthorNames(), - ["Author"] = lb => lb.Book.AuthorNames(), - ["Authors"] = lb => lb.Book.AuthorNames(), - [ALL_NARRATOR_NAMES] = lb => lb.Book.NarratorNames(), - ["Narrator"] = lb => lb.Book.NarratorNames(), - ["Narrators"] = lb => lb.Book.NarratorNames(), - [nameof(Book.Publisher)] = lb => lb.Book.Publisher, - - [ALL_SERIES_NAMES] = lb => lb.Book.SeriesNames(), - ["Series"] = lb => lb.Book.SeriesNames(), - ["SeriesId"] = lb => string.Join(", ", lb.Book.SeriesLink.Select(s => s.Series.AudibleSeriesId)), - - ["CategoriesNames"] = lb => lb.Book.CategoriesIds() is null ? null : string.Join(", ", lb.Book.CategoriesIds()), - [nameof(Book.Category)] = lb => lb.Book.CategoriesIds() is null ? null : string.Join(", ", lb.Book.CategoriesIds()), - ["Categories"] = lb => lb.Book.CategoriesIds() is null ? null : string.Join(", ", lb.Book.CategoriesIds()), - ["CategoriesId"] = lb => lb.Book.CategoriesIds() is null ? null : string.Join(", ", lb.Book.CategoriesIds()), - ["CategoryId"] = lb => lb.Book.CategoriesIds() is null ? null : string.Join(", ", lb.Book.CategoriesIds()), - - [TAGS.FirstCharToUpper()] = lb => lb.Book.UserDefinedItem.Tags, - - ["Locale"] = lb => lb.Book.Locale, - ["Region"] = lb => lb.Book.Locale, - ["Account"] = lb => lb.Account, - ["Email"] = lb => lb.Account - } - ); - - internal static ReadOnlyDictionary> numberIndexRules { get; } - = new ReadOnlyDictionary>( - new Dictionary> - { - // for now, all numbers are padded to 8 char.s - // This will allow a single method to auto-pad numbers. The method will match these as well as date: yyyymmdd - [nameof(Book.LengthInMinutes)] = lb => lb.Book.LengthInMinutes.ToLuceneString(), - ["Length"] = lb => lb.Book.LengthInMinutes.ToLuceneString(), - ["Minutes"] = lb => lb.Book.LengthInMinutes.ToLuceneString(), - ["Hours"] = lb => (lb.Book.LengthInMinutes / 60).ToLuceneString(), - - ["ProductRating"] = lb => lb.Book.Rating.OverallRating.ToLuceneString(), - ["Rating"] = lb => lb.Book.Rating.OverallRating.ToLuceneString(), - ["UserRating"] = lb => userOverallRating(lb.Book), - ["MyRating"] = lb => userOverallRating(lb.Book), - - [nameof(LibraryBook.DateAdded)] = lb => lb.DateAdded.ToLuceneString(), - [nameof(Book.DatePublished)] = lb => lb.Book.DatePublished?.ToLuceneString() ?? "", - - ["LastDownload"] = lb => lb.Book.UserDefinedItem.LastDownloaded.ToLuceneString(), - ["LastDownloaded"] = lb => lb.Book.UserDefinedItem.LastDownloaded.ToLuceneString() - } - ); - - internal static ReadOnlyDictionary> boolIndexRules { get; } - = new ReadOnlyDictionary>( - new Dictionary> - { - ["HasDownloads"] = lb => lb.Book.HasPdf(), - ["HasDownload"] = lb => lb.Book.HasPdf(), - ["Downloads"] = lb => lb.Book.HasPdf(), - ["Download"] = lb => lb.Book.HasPdf(), - ["HasPDFs"] = lb => lb.Book.HasPdf(), - ["HasPDF"] = lb => lb.Book.HasPdf(), - ["PDFs"] = lb => lb.Book.HasPdf(), - ["PDF"] = lb => lb.Book.HasPdf(), - - ["IsRated"] = lb => lb.Book.UserDefinedItem.Rating.OverallRating > 0f, - ["Rated"] = lb => lb.Book.UserDefinedItem.Rating.OverallRating > 0f, - - ["IsAuthorNarrated"] = isAuthorNarrated, - ["AuthorNarrated"] = isAuthorNarrated, - - [nameof(Book.IsAbridged)] = lb => lb.Book.IsAbridged, - ["Abridged"] = lb => lb.Book.IsAbridged, - - ["IsLiberated"] = lb => isLiberated(lb.Book), - ["Liberated"] = lb => isLiberated(lb.Book), - ["LiberatedError"] = lb => liberatedError(lb.Book), - - ["Podcast"] = lb => lb.Book.IsEpisodeChild(), - ["Podcasts"] = lb => lb.Book.IsEpisodeChild(), - ["IsPodcast"] = lb => lb.Book.IsEpisodeChild(), - ["Episode"] = lb => lb.Book.IsEpisodeChild(), - ["Episodes"] = lb => lb.Book.IsEpisodeChild(), - ["IsEpisode"] = lb => lb.Book.IsEpisodeChild(), - - ["Absent"] = lb => lb.AbsentFromLastScan, - ["AbsentFromLastScan"] = lb => lb.AbsentFromLastScan, - } - ); - - private static bool isAuthorNarrated(LibraryBook lb) + private static bool isAuthorNarrated(Book book) { - var authors = lb.Book.Authors.Select(a => a.Name).ToArray(); - var narrators = lb.Book.Narrators.Select(a => a.Name).ToArray(); + var authors = book.Authors.Select(a => a.Name).ToArray(); + var narrators = book.Narrators.Select(a => a.Name).ToArray(); return authors.Intersect(narrators).Any(); } - private static string userOverallRating(Book book) => book.UserDefinedItem.Rating.OverallRating.ToLuceneString(); - private static bool isLiberated(Book book) => book.UserDefinedItem.BookStatus == LiberatedStatus.Liberated; - private static bool liberatedError(Book book) => book.UserDefinedItem.BookStatus == LiberatedStatus.Error; - // use these common fields in the "all" default search field - private static IEnumerable> allFieldIndexRules { get; } - = new List> - { - idIndexRules[ALL_AUDIBLE_PRODUCT_ID], - stringIndexRules[ALL_TITLE], - stringIndexRules[ALL_AUTHOR_NAMES], - stringIndexRules[ALL_NARRATOR_NAMES], - stringIndexRules[ALL_SERIES_NAMES] - }; - #endregion - - #region get search fields. used for display in help - public static IEnumerable GetSearchIdFields() + // use these common fields in the "all" default search field + public static IndexRuleCollection FieldIndexRules { get; } = new IndexRuleCollection { - foreach (var key in idIndexRules.Keys) - yield return key; - } - - public static IEnumerable GetSearchStringFields() - { - foreach (var key in stringIndexRules.Keys) - yield return key; - } - - public static IEnumerable GetSearchBoolFields() - { - foreach (var key in boolIndexRules.Keys) - yield return key; - } - - public static IEnumerable GetSearchNumberFields() - { - foreach (var key in numberIndexRules.Keys) - yield return key; - } + { FieldType.ID, Book => Book.AudibleProductId.ToLowerInvariant(), nameof(Book.AudibleProductId), "ProductId", "Id", "ASIN" }, + { FieldType.Raw, Book => Book.AudibleProductId, _ID_ }, + { FieldType.String, Book => Book.Title, nameof(Book.Title), "ProductId", "Id", "ASIN" }, + { FieldType.String, Book => Book.AuthorNames(), "AuthorNames", "Author", "Authors" }, + { FieldType.String, Book => Book.NarratorNames(), "NarratorNames", "Narrator", "Narrators" }, + { FieldType.String, Book => Book.Publisher, nameof(Book.Publisher) }, + { FieldType.String, Book => Book.SeriesNames(), "SeriesNames", "Narrator", "Series" }, + { FieldType.String, Book => string.Join(", ", Book.SeriesLink.Select(s => s.Series.AudibleSeriesId)), "SeriesId" }, + { FieldType.String, Book => Book.CategoriesIds() is null ? null : string.Join(", ", Book.CategoriesIds()), nameof(Book.Category), "Categories", "CategoriesId", "CategoryId", "CategoriesNames" }, + { FieldType.String, Book => Book.UserDefinedItem.Tags, TAGS.FirstCharToUpper() }, + { FieldType.String, Book => Book.Locale, "Locale", "Region" }, + { FieldType.String, lb => lb.Account, "Account", "Email" }, + { FieldType.Bool, Book => Book.HasPdf().ToString(), "HasDownloads", "HasDownload", "Downloads" , "Download", "HasPDFs", "HasPDF" , "PDFs", "PDF" }, + { FieldType.Bool, Book => (Book.UserDefinedItem.Rating.OverallRating > 0f).ToString(), "IsRated", "Rated" }, + { FieldType.Bool, Book => isAuthorNarrated(Book).ToString(), "IsAuthorNarrated", "AuthorNarrated" }, + { FieldType.Bool, Book => Book.IsAbridged.ToString(), nameof(Book.IsAbridged), "Abridged" }, + { FieldType.Bool, Book => (Book.UserDefinedItem.BookStatus == LiberatedStatus.Liberated).ToString(), "IsLiberated", "Liberated" }, + { FieldType.Bool, Book => (Book.UserDefinedItem.BookStatus == LiberatedStatus.Error).ToString(), "LiberatedError" }, + { FieldType.Bool, Book => Book.IsEpisodeChild().ToString(), "Podcast", "Podcasts", "IsPodcast", "Episode", "Episodes", "IsEpisode" }, + { FieldType.Bool, lb => lb.AbsentFromLastScan.ToString(), "AbsentFromLastScan", "Absent" }, + // all numbers are padded to 8 char.s + // This will allow a single method to auto-pad numbers. The method will match these as well as date: yyyymmdd + { FieldType.Number, Book => Book.LengthInMinutes.ToLuceneString(), nameof(Book.LengthInMinutes), "Length", "Minutes" }, + { FieldType.Number, Book => (Book.LengthInMinutes / 60).ToLuceneString(), "Hours" }, + { FieldType.Number, Book => Book.Rating.OverallRating.ToLuceneString(), "ProductRating", "Rating" }, + { FieldType.Number, Book => Book.UserDefinedItem.Rating.OverallRating.ToLuceneString(), "UserRating", "MyRating" }, + { FieldType.Number, Book => Book.DatePublished?.ToLuceneString() ?? "", nameof(Book.DatePublished) }, + { FieldType.Number, Book => Book.UserDefinedItem.LastDownloaded.ToLuceneString(), nameof(UserDefinedItem.LastDownloaded), "LastDownload" }, + { FieldType.Number, lb => lb.DateAdded.ToLuceneString(), nameof(LibraryBook.DateAdded) } + }; #endregion #region create and update index @@ -224,35 +104,15 @@ namespace LibationSearchEngine { var doc = new Document(); - // refine with - // http://codeclimber.net.nz/archive/2009/09/10/how-subtext-lucenenet-index-is-structured/ - - // fields are key value pairs and MULTIPLE FIELDS CAN HAVE THE SAME KEY. - // splitting authors and narrators and/or tags into multiple fields could be interesting research. - // it could allow for more advanced searches, or maybe it could break broad searches. - - // all searching should be lowercase - // external callers have the reasonable expectation that product id will be returned CASE SPECIFIC - doc.AddRaw(_ID_, libraryBook.Book.AudibleProductId); - // concat all common fields for the default 'all' field var allConcat = - allFieldIndexRules - .Select(rule => rule(libraryBook)) + FieldIndexRules + .Select(rule => rule.GetValue(libraryBook)) .Aggregate((a, b) => $"{a} {b}"); doc.AddAnalyzed(ALL, allConcat); - foreach (var kvp in idIndexRules) - doc.AddNotAnalyzed(kvp.Key, kvp.Value(libraryBook)); - - foreach (var kvp in stringIndexRules) - doc.AddAnalyzed(kvp.Key, kvp.Value(libraryBook)); - - foreach (var kvp in boolIndexRules) - doc.AddBool(kvp.Key, kvp.Value(libraryBook)); - - foreach (var kvp in numberIndexRules) - doc.AddNotAnalyzed(kvp.Key, kvp.Value(libraryBook)); + foreach (var rule in FieldIndexRules) + doc.AddIndexRule(rule, libraryBook); return doc; } @@ -267,58 +127,39 @@ namespace LibationSearchEngine productId, d => { - // fields are key value pairs. MULTIPLE FIELDS CAN POTENTIALLY HAVE THE SAME KEY. - // ie: must remove old before adding new else will create unwanted duplicates. - d.RemoveField(fieldName.ToLower()); + d.RemoveField(fieldName.ToLower()); d.AddAnalyzed(fieldName, newValue); - }); + }); - // update single document entry + // update single document entry public void UpdateLiberatedStatus(Book book) => updateDocument( book.AudibleProductId, d => { - // - // TODO: better synonym handling. This is too easy to mess up - // + var lib = FieldIndexRules.GetRuleByFieldName("IsLiberated"); + var libError = FieldIndexRules.GetRuleByFieldName("LiberatedError"); + var lastDl = FieldIndexRules.GetRuleByFieldName(nameof(UserDefinedItem.LastDownloaded)); - // fields are key value pairs. MULTIPLE FIELDS CAN POTENTIALLY HAVE THE SAME KEY. - // ie: must remove old before adding new else will create unwanted duplicates. - var v1 = isLiberated(book); - d.RemoveField("isliberated"); - d.AddBool("IsLiberated", v1); - d.RemoveField("liberated"); - d.AddBool("Liberated", v1); + d.RemoveRule(lib); + d.RemoveRule(libError); + d.RemoveRule(lastDl); - var v2 = liberatedError(book); - d.RemoveField("liberatederror"); - d.AddBool("LiberatedError", v2); - - var v3 = book.UserDefinedItem.LastDownloaded?.ToLuceneString() ?? ""; - d.RemoveField("LastDownload"); - d.AddNotAnalyzed("LastDownload", v3); - d.RemoveField("LastDownloaded"); - d.AddNotAnalyzed("LastDownloaded", v3); + d.AddIndexRule(lib, book); + d.AddIndexRule(libError, book); + d.AddIndexRule(lastDl, book); }); public void UpdateUserRatings(Book book) =>updateDocument( book.AudibleProductId, d => - { - // - // TODO: better synonym handling. This is too easy to mess up - // + { + var rating = FieldIndexRules.GetRuleByFieldName("UserRating"); - // fields are key value pairs. MULTIPLE FIELDS CAN POTENTIALLY HAVE THE SAME KEY. - // ie: must remove old before adding new else will create unwanted duplicates. - var v1 = userOverallRating(book); - d.RemoveField("userrating"); - d.AddNotAnalyzed("UserRating", v1); - d.RemoveField("myrating"); - d.AddNotAnalyzed("MyRating", v1); - }); + d.RemoveRule(rating); + d.AddIndexRule(rating, book); + }); private static void updateDocument(string productId, Action action) { @@ -335,11 +176,9 @@ namespace LibationSearchEngine return; var document = searcher.Doc(scoreDoc.Doc); - // perform update action(document); - // update index var createNewIndex = false; using var analyzer = new StandardAnalyzer(Version); @@ -412,24 +251,24 @@ namespace LibationSearchEngine return returnList; } - private void displayResults(SearchResultSet docs) - { - //for (int i = 0; i < docs.Docs.Count(); i++) - //{ - // var sde = docs.Docs.First(); + private void displayResults(SearchResultSet docs) + { + //for (int i = 0; i < docs.Docs.Count(); i++) + //{ + // var sde = docs.Docs.First(); - // Document doc = sde.Doc; - // float score = sde.Score; + // Document doc = sde.Doc; + // float score = sde.Score; - // Serilog.Log.Logger.Debug($"{(i + 1)}) score={score}. Fields:"); - // var allFields = doc.GetFields(); - // foreach (var f in allFields) - // Serilog.Log.Logger.Debug($" [{f.Name}]={f.StringValue}"); - //} - } - #endregion + // Serilog.Log.Logger.Debug($"{(i + 1)}) score={score}. Fields:"); + // var allFields = doc.GetFields(); + // foreach (var f in allFields) + // Serilog.Log.Logger.Debug($" [{f.Name}]={f.StringValue}"); + //} + } + #endregion - private static Directory getIndex() => FSDirectory.Open(SearchEngineDirectory); + private static Directory getIndex() => FSDirectory.Open(SearchEngineDirectory); // not customizable. don't move to config private static string SearchEngineDirectory { get; } diff --git a/Source/LibationWinForms/Dialogs/SearchSyntaxDialog.cs b/Source/LibationWinForms/Dialogs/SearchSyntaxDialog.cs index d809f33f..f255f520 100644 --- a/Source/LibationWinForms/Dialogs/SearchSyntaxDialog.cs +++ b/Source/LibationWinForms/Dialogs/SearchSyntaxDialog.cs @@ -1,4 +1,5 @@ -using System; +using LibationSearchEngine; +using System; using System.Linq; using System.Windows.Forms; @@ -10,10 +11,10 @@ namespace LibationWinForms.Dialogs { InitializeComponent(); - label2.Text += "\r\n\r\n" + string.Join("\r\n", LibationSearchEngine.SearchEngine.GetSearchStringFields()); - label3.Text += "\r\n\r\n" + string.Join("\r\n", LibationSearchEngine.SearchEngine.GetSearchNumberFields()); - label4.Text += "\r\n\r\n" + string.Join("\r\n", LibationSearchEngine.SearchEngine.GetSearchBoolFields()); - label5.Text += "\r\n\r\n" + string.Join("\r\n", LibationSearchEngine.SearchEngine.GetSearchIdFields()); + label2.Text += "\r\n\r\n" + string.Join("\r\n", SearchEngine.FieldIndexRules.StringFieldNames); + label3.Text += "\r\n\r\n" + string.Join("\r\n", SearchEngine.FieldIndexRules.NumberFieldNames); + label4.Text += "\r\n\r\n" + string.Join("\r\n", SearchEngine.FieldIndexRules.BoolFieldNames); + label5.Text += "\r\n\r\n" + string.Join("\r\n", SearchEngine.FieldIndexRules.IdFieldNames); this.SetLibationIcon(); }