Merge branch 'master' of https://github.com/rmcrackan/Libation
This commit is contained in:
commit
a4dfdf80e4
@ -28,6 +28,15 @@ To make upgrades and reinstalls easier, Libation separates all of its responsibi
|
|||||||
|
|
||||||
* Allow Libation to fix up audiobook metadata. After decrypting a title, Libation attempts to fix details like chapters and cover art. Some power users and/or control freaks prefer to manage this themselves. By unchecking this setting, Libation will only decrypt the book and will leave metadata as-is, warts and all.
|
* Allow Libation to fix up audiobook metadata. After decrypting a title, Libation attempts to fix details like chapters and cover art. Some power users and/or control freaks prefer to manage this themselves. By unchecking this setting, Libation will only decrypt the book and will leave metadata as-is, warts and all.
|
||||||
|
|
||||||
|
In addition to the options that are enabled if you allow Libation to "fix up" the audiobook, it does the following:
|
||||||
|
|
||||||
|
* Adds the `TCOM` metadata tag for the narrators.
|
||||||
|
* Sets the `©gen` metadata tag for the genres.
|
||||||
|
* Unescapes the copyright symbol (replace `©` with `©`)
|
||||||
|
* Replaces the recording copyright `(P)` string with `℗`
|
||||||
|
* Replaces the chapter markers embedded in the aax file with the chapter markers retrieved from Audible's API.
|
||||||
|
* Sets the embedded cover art image with the 500x500 px cover art retrieved from Audible
|
||||||
|
|
||||||
### Command Line Interface
|
### Command Line Interface
|
||||||
|
|
||||||
Libationcli.exe allows limited access to Libation's functionalities as a CLI.
|
Libationcli.exe allows limited access to Libation's functionalities as a CLI.
|
||||||
|
|||||||
@ -1,11 +1,10 @@
|
|||||||
using Avalonia.Controls;
|
using Avalonia.Controls;
|
||||||
using Avalonia.Input;
|
using Avalonia.Input;
|
||||||
using Avalonia.Styling;
|
|
||||||
using System;
|
using System;
|
||||||
|
|
||||||
namespace LibationAvalonia.Controls
|
namespace LibationAvalonia.Controls
|
||||||
{
|
{
|
||||||
public partial class WheelComboBox : ComboBox, IStyleable
|
public partial class WheelComboBox : ComboBox
|
||||||
{
|
{
|
||||||
protected override Type StyleKeyOverride => typeof(ComboBox);
|
protected override Type StyleKeyOverride => typeof(ComboBox);
|
||||||
|
|
||||||
|
|||||||
@ -1,81 +0,0 @@
|
|||||||
using Lucene.Net.Analysis.Tokenattributes;
|
|
||||||
using Lucene.Net.Analysis;
|
|
||||||
using System;
|
|
||||||
using System.Collections.Generic;
|
|
||||||
using System.Linq;
|
|
||||||
using System.Text;
|
|
||||||
using System.Threading.Tasks;
|
|
||||||
|
|
||||||
namespace LibationSearchEngine
|
|
||||||
{
|
|
||||||
internal class AsinAnalyzer : Analyzer
|
|
||||||
{
|
|
||||||
public override TokenStream TokenStream(string fieldName, System.IO.TextReader reader)
|
|
||||||
{
|
|
||||||
return new AsinFilter(reader);
|
|
||||||
}
|
|
||||||
/// <summary>
|
|
||||||
/// Emits the entire input as a single token and removes
|
|
||||||
/// trailing .00 from strings that parsed to numbers
|
|
||||||
///
|
|
||||||
/// Based on Lucene.Net.Analysis.KeywordTokenizer
|
|
||||||
/// </summary>
|
|
||||||
private class AsinFilter : Tokenizer
|
|
||||||
{
|
|
||||||
private bool done;
|
|
||||||
private int finalOffset;
|
|
||||||
private readonly ITermAttribute termAtt;
|
|
||||||
private readonly IOffsetAttribute offsetAtt;
|
|
||||||
private const int DEFAULT_BUFFER_SIZE = 256;
|
|
||||||
|
|
||||||
public AsinFilter(System.IO.TextReader input) : base(input)
|
|
||||||
{
|
|
||||||
offsetAtt = AddAttribute<IOffsetAttribute>();
|
|
||||||
termAtt = AddAttribute<ITermAttribute>();
|
|
||||||
termAtt.ResizeTermBuffer(DEFAULT_BUFFER_SIZE);
|
|
||||||
}
|
|
||||||
public override bool IncrementToken()
|
|
||||||
{
|
|
||||||
var charReader = input as CharReader;
|
|
||||||
if (!done)
|
|
||||||
{
|
|
||||||
ClearAttributes();
|
|
||||||
done = true;
|
|
||||||
int upto = 0;
|
|
||||||
char[] buffer = termAtt.TermBuffer();
|
|
||||||
|
|
||||||
while (true)
|
|
||||||
{
|
|
||||||
int length = charReader.Read(buffer, upto, buffer.Length - upto);
|
|
||||||
if (length == 0)
|
|
||||||
break;
|
|
||||||
upto += length;
|
|
||||||
if (upto == buffer.Length)
|
|
||||||
buffer = termAtt.ResizeTermBuffer(1 + buffer.Length);
|
|
||||||
}
|
|
||||||
|
|
||||||
var termStr = new string(buffer, 0, upto);
|
|
||||||
if (termStr.EndsWith(".00"))
|
|
||||||
upto -= 3;
|
|
||||||
|
|
||||||
termAtt.SetTermLength(upto);
|
|
||||||
finalOffset = CorrectOffset(upto);
|
|
||||||
offsetAtt.SetOffset(CorrectOffset(0), finalOffset);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
public override void End()
|
|
||||||
{
|
|
||||||
// set final offset
|
|
||||||
offsetAtt.SetOffset(finalOffset, finalOffset);
|
|
||||||
}
|
|
||||||
|
|
||||||
public override void Reset(System.IO.TextReader input)
|
|
||||||
{
|
|
||||||
base.Reset(input);
|
|
||||||
this.done = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@ -1,103 +0,0 @@
|
|||||||
using System;
|
|
||||||
using System.Collections.Generic;
|
|
||||||
using System.Linq;
|
|
||||||
using System.Text.RegularExpressions;
|
|
||||||
|
|
||||||
namespace LibationSearchEngine
|
|
||||||
{
|
|
||||||
internal static partial class LuceneRegex
|
|
||||||
{
|
|
||||||
#region pattern pieces
|
|
||||||
// negative lookbehind: cannot be preceeded by an escaping \
|
|
||||||
const string NOT_ESCAPED = @"(?<!\\)";
|
|
||||||
|
|
||||||
// disallow spaces and lucene reserved characters
|
|
||||||
// + - && || ! ( ) { } [ ] ^ " ~ * ? : \
|
|
||||||
// define chars
|
|
||||||
// escape and concat
|
|
||||||
// create regex. also disallow spaces
|
|
||||||
private static char[] disallowedChars { get; } = new[] {
|
|
||||||
'+', '-', '&', '|', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':', '\\' };
|
|
||||||
private static string disallowedCharsEscaped { get; } = disallowedChars.Select(c => $@"\{c}").Aggregate((a, b) => a + b);
|
|
||||||
private static string WORD_CAPTURE { get; } = $@"([^\s{disallowedCharsEscaped}]+)";
|
|
||||||
|
|
||||||
// : with optional preceeding spaces. capture these so i don't accidentally replace a non-field name
|
|
||||||
const string FIELD_END = @"(\s*:)";
|
|
||||||
|
|
||||||
const string BEGIN_TAG = @"\[";
|
|
||||||
const string END_TAG = @"\]";
|
|
||||||
|
|
||||||
// space is forgiven at beginning and end of tag but not in the middle
|
|
||||||
// literal space character only. do NOT allow new lines, tabs, ...
|
|
||||||
const string OPTIONAL_SPACE_LITERAL = @"\u0020*";
|
|
||||||
#endregion
|
|
||||||
|
|
||||||
private static string tagPattern { get; } = NOT_ESCAPED + BEGIN_TAG + OPTIONAL_SPACE_LITERAL + WORD_CAPTURE + OPTIONAL_SPACE_LITERAL + END_TAG;
|
|
||||||
public static Regex TagRegex { get; } = new Regex(tagPattern, RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
|
|
||||||
|
|
||||||
private static string fieldPattern { get; } = NOT_ESCAPED + WORD_CAPTURE + FIELD_END;
|
|
||||||
public static Regex FieldRegex { get; } = new Regex(fieldPattern, RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// auto-pad numbers to 8 char.s. This will match int.s and dates (yyyyMMdd)
|
|
||||||
/// positive look behind: beginning space { [ :
|
|
||||||
/// positive look ahead: end space ] }
|
|
||||||
/// </summary>
|
|
||||||
|
|
||||||
[GeneratedRegex(@"(?<=^|\s|\{|\[|:)(\d+\.?\d*)(?=$|\s|\]|\})", RegexOptions.Compiled)]
|
|
||||||
public static partial Regex NumbersRegex();
|
|
||||||
|
|
||||||
/// <summary>
|
|
||||||
/// proper bools are single keywords which are turned into keyword:True
|
|
||||||
/// if bordered by colons or inside brackets, they are not stand-alone bool keywords
|
|
||||||
/// the negative lookbehind and lookahead patterns prevent bugs where a bool keyword is also a user-defined tag:
|
|
||||||
/// [israted]
|
|
||||||
/// parseTag => tags:israted
|
|
||||||
/// replaceBools => tags:israted:True
|
|
||||||
/// or
|
|
||||||
/// [israted]
|
|
||||||
/// replaceBools => israted:True
|
|
||||||
/// parseTag => [israted:True]
|
|
||||||
/// also don't want to apply :True where the value already exists:
|
|
||||||
/// israted:false => israted:false:True
|
|
||||||
///
|
|
||||||
/// despite using parans, lookahead and lookbehind are zero-length assertions which do not capture. therefore the bool search keyword is still $1 since it's the first and only capture
|
|
||||||
/// </summary>
|
|
||||||
private static string boolPattern_parameterized { get; }
|
|
||||||
= @"
|
|
||||||
### IMPORTANT: 'ignore whitespace' is only partially honored in character sets
|
|
||||||
### - new lines are ok
|
|
||||||
### - ANY leading whitespace is treated like actual matching spaces :(
|
|
||||||
|
|
||||||
### can't begin with colon. incorrect syntax
|
|
||||||
### can't begin with open bracket: this signals the start of a tag
|
|
||||||
(?<! # begin negative lookbehind
|
|
||||||
[:\[] # char set: colon and open bracket, escaped
|
|
||||||
\s* # optional space
|
|
||||||
) # end negative lookbehind
|
|
||||||
|
|
||||||
\b # word boundary
|
|
||||||
({0}) # captured bool search keyword. this is the $1 reference used in regex.Replace
|
|
||||||
\b # word boundary
|
|
||||||
|
|
||||||
### can't end with colon. this signals that the bool's value already exists
|
|
||||||
### can't begin with close bracket: this signals the end of a tag
|
|
||||||
(?! # begin negative lookahead
|
|
||||||
\s* # optional space
|
|
||||||
[:\]] # char set: colon and close bracket, escaped
|
|
||||||
) # end negative lookahead
|
|
||||||
";
|
|
||||||
private static Dictionary<string, Regex> boolRegexDic { get; } = new Dictionary<string, Regex>();
|
|
||||||
public static Regex GetBoolRegex(string boolSearch)
|
|
||||||
{
|
|
||||||
if (boolRegexDic.TryGetValue(boolSearch, out var regex))
|
|
||||||
return regex;
|
|
||||||
|
|
||||||
var boolPattern = string.Format(boolPattern_parameterized, boolSearch);
|
|
||||||
regex = new Regex(boolPattern, RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
|
||||||
boolRegexDic.Add(boolSearch, regex);
|
|
||||||
|
|
||||||
return regex;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
153
Source/LibationSearchEngine/QuerySanitizer.cs
Normal file
153
Source/LibationSearchEngine/QuerySanitizer.cs
Normal file
@ -0,0 +1,153 @@
|
|||||||
|
using Lucene.Net.Analysis.Standard;
|
||||||
|
using Lucene.Net.Analysis.Tokenattributes;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
|
||||||
|
namespace LibationSearchEngine
|
||||||
|
{
|
||||||
|
internal static class QuerySanitizer
|
||||||
|
{
|
||||||
|
private static readonly HashSet<string> idTerms
|
||||||
|
= SearchEngine.idIndexRules.Keys
|
||||||
|
.Select(s => s.ToLowerInvariant())
|
||||||
|
.ToHashSet();
|
||||||
|
|
||||||
|
private static readonly HashSet<string> boolTerms
|
||||||
|
= SearchEngine.boolIndexRules.Keys
|
||||||
|
.Select(s => s.ToLowerInvariant())
|
||||||
|
.ToHashSet();
|
||||||
|
|
||||||
|
private static readonly HashSet<string> fieldTerms
|
||||||
|
= SearchEngine.stringIndexRules.Keys
|
||||||
|
.Union(SearchEngine.numberIndexRules.Keys)
|
||||||
|
.Select(s => s.ToLowerInvariant())
|
||||||
|
.Union(idTerms)
|
||||||
|
.Union(boolTerms)
|
||||||
|
.ToHashSet();
|
||||||
|
|
||||||
|
internal static string Sanitize(string searchString, StandardAnalyzer analyzer)
|
||||||
|
{
|
||||||
|
if (string.IsNullOrWhiteSpace(searchString))
|
||||||
|
return SearchEngine.ALL_QUERY;
|
||||||
|
|
||||||
|
// range operator " TO " and bool operators " AND " and " OR " must be uppercase
|
||||||
|
searchString
|
||||||
|
= searchString
|
||||||
|
.Replace(" to ", " TO ", System.StringComparison.OrdinalIgnoreCase)
|
||||||
|
.Replace(" and ", " AND ", System.StringComparison.OrdinalIgnoreCase)
|
||||||
|
.Replace(" or ", " OR ", System.StringComparison.OrdinalIgnoreCase);
|
||||||
|
|
||||||
|
using var tokenStream = analyzer.TokenStream(SearchEngine.ALL, new System.IO.StringReader(searchString));
|
||||||
|
|
||||||
|
var partList = new List<string>();
|
||||||
|
int previousEndOffset = 0;
|
||||||
|
bool previousIsBool = false, previousIsTags = false, previousIsAsin = false;
|
||||||
|
|
||||||
|
while (tokenStream.IncrementToken())
|
||||||
|
{
|
||||||
|
var term = tokenStream.GetAttribute<ITermAttribute>().Term;
|
||||||
|
var offset = tokenStream.GetAttribute<IOffsetAttribute>();
|
||||||
|
|
||||||
|
if (previousIsBool && !bool.TryParse(term, out _))
|
||||||
|
{
|
||||||
|
//The previous term was a boolean tag and this term is NOT a bool value
|
||||||
|
//Add the default ":True" bool and continue parsing the current term
|
||||||
|
partList.Add(":True");
|
||||||
|
previousIsBool = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Add all text between the current token and the previous token
|
||||||
|
partList.Add(searchString.Substring(previousEndOffset, offset.StartOffset - previousEndOffset));
|
||||||
|
|
||||||
|
if (previousIsBool)
|
||||||
|
{
|
||||||
|
//The previous term was a boolean tag and this term is a bool value
|
||||||
|
addUnalteredToken(offset);
|
||||||
|
previousIsBool = false;
|
||||||
|
}
|
||||||
|
else if (previousIsAsin)
|
||||||
|
{
|
||||||
|
//The previous term was an ASIN field ID, so this term is an ASIN
|
||||||
|
partList.Add(term);
|
||||||
|
previousIsAsin = false;
|
||||||
|
}
|
||||||
|
else if (previousIsTags)
|
||||||
|
{
|
||||||
|
//This term is a tag. Do this check before checking if term is a defined field
|
||||||
|
//so that "tags:israted" does not parse as a bool
|
||||||
|
addUnalteredToken(offset);
|
||||||
|
previousIsTags = false;
|
||||||
|
}
|
||||||
|
else if (tryParseBlockTag(offset, partList, searchString, out var tagName))
|
||||||
|
{
|
||||||
|
//The term is a block tag. add it to the part list
|
||||||
|
partList.Add($"{SearchEngine.TAGS}:{tagName}");
|
||||||
|
}
|
||||||
|
else if (double.TryParse(term, out var num))
|
||||||
|
{
|
||||||
|
//Term is a number so pad it with zeros
|
||||||
|
partList.Add(num.ToLuceneString());
|
||||||
|
}
|
||||||
|
else if (fieldTerms.Contains(term))
|
||||||
|
{
|
||||||
|
//Term is a defined search field, add it.
|
||||||
|
//The StandardAnalyzer already converts all terms to lowercase
|
||||||
|
partList.Add(term);
|
||||||
|
previousIsBool = boolTerms.Contains(term);
|
||||||
|
previousIsAsin = idTerms.Contains(term);
|
||||||
|
previousIsTags = term == SearchEngine.TAGS;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
//Term is any other user-defined constant value
|
||||||
|
addUnalteredToken(offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
previousEndOffset = offset.EndOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (previousIsBool)
|
||||||
|
partList.Add(":True");
|
||||||
|
|
||||||
|
//Add ending non-token text
|
||||||
|
partList.Add(searchString.Substring(previousEndOffset, searchString.Length - previousEndOffset));
|
||||||
|
|
||||||
|
return string.Concat(partList);
|
||||||
|
|
||||||
|
//Add the full, unaltered token as well as all inter-token text
|
||||||
|
void addUnalteredToken(IOffsetAttribute offset) =>
|
||||||
|
partList.Add(searchString.Substring(offset.StartOffset, offset.EndOffset - offset.StartOffset));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static bool tryParseBlockTag(IOffsetAttribute offset, List<string> partList, string searchString, out string tagName)
|
||||||
|
{
|
||||||
|
tagName = null;
|
||||||
|
if (partList.Count == 0) return false;
|
||||||
|
|
||||||
|
var previous = partList[^1].TrimEnd();
|
||||||
|
|
||||||
|
//cannot be preceeded by an escaping \
|
||||||
|
if (previous.Length == 0) return false;
|
||||||
|
if (previous[^1] != '[' || (previous.Length > 1 && previous[^2] == '\\')) return false;
|
||||||
|
|
||||||
|
var next = searchString.Substring(offset.EndOffset);
|
||||||
|
if (next.Length == 0 || !next.TrimStart().StartsWith(']')) return false;
|
||||||
|
|
||||||
|
tagName = searchString.Substring(offset.StartOffset, offset.EndOffset - offset.StartOffset);
|
||||||
|
|
||||||
|
//Only legal tag characters are letters, numbers and underscores
|
||||||
|
//Per DataLayer.UserDefinedItem.IllegalCharacterRegex()
|
||||||
|
foreach (var c in tagName)
|
||||||
|
{
|
||||||
|
if (!char.IsLetterOrDigit(c) && c != '_')
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Remove the leading '['
|
||||||
|
partList[^1] = previous[..^1];
|
||||||
|
//Ignore the trailing ']'
|
||||||
|
offset.SetOffset(offset.StartOffset, searchString.IndexOf(']', offset.EndOffset) + 1);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -6,8 +6,8 @@ using System.Text.RegularExpressions;
|
|||||||
using DataLayer;
|
using DataLayer;
|
||||||
using Dinah.Core;
|
using Dinah.Core;
|
||||||
using LibationFileManager;
|
using LibationFileManager;
|
||||||
using Lucene.Net.Analysis;
|
|
||||||
using Lucene.Net.Analysis.Standard;
|
using Lucene.Net.Analysis.Standard;
|
||||||
|
using Lucene.Net.Analysis.Tokenattributes;
|
||||||
using Lucene.Net.Documents;
|
using Lucene.Net.Documents;
|
||||||
using Lucene.Net.Index;
|
using Lucene.Net.Index;
|
||||||
using Lucene.Net.Search;
|
using Lucene.Net.Search;
|
||||||
@ -32,18 +32,18 @@ namespace LibationSearchEngine
|
|||||||
public const string ALL_NARRATOR_NAMES = "NarratorNames";
|
public const string ALL_NARRATOR_NAMES = "NarratorNames";
|
||||||
public const string ALL_SERIES_NAMES = "SeriesNames";
|
public const string ALL_SERIES_NAMES = "SeriesNames";
|
||||||
|
|
||||||
private static ReadOnlyDictionary<string, Func<LibraryBook, string>> idIndexRules { get; }
|
internal static ReadOnlyDictionary<string, Func<LibraryBook, string>> idIndexRules { get; }
|
||||||
= new ReadOnlyDictionary<string, Func<LibraryBook, string>>(
|
= new ReadOnlyDictionary<string, Func<LibraryBook, string>>(
|
||||||
new Dictionary<string, Func<LibraryBook, string>>
|
new Dictionary<string, Func<LibraryBook, string>>
|
||||||
{
|
{
|
||||||
[nameof(Book.AudibleProductId)] = lb => lb.Book.AudibleProductId,
|
[nameof(Book.AudibleProductId)] = lb => lb.Book.AudibleProductId.ToLowerInvariant(),
|
||||||
["ProductId"] = lb => lb.Book.AudibleProductId,
|
["ProductId"] = lb => lb.Book.AudibleProductId.ToLowerInvariant(),
|
||||||
["Id"] = lb => lb.Book.AudibleProductId,
|
["Id"] = lb => lb.Book.AudibleProductId.ToLowerInvariant(),
|
||||||
["ASIN"] = lb => lb.Book.AudibleProductId
|
["ASIN"] = lb => lb.Book.AudibleProductId.ToLowerInvariant()
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
private static ReadOnlyDictionary<string, Func<LibraryBook, string>> stringIndexRules { get; }
|
internal static ReadOnlyDictionary<string, Func<LibraryBook, string>> stringIndexRules { get; }
|
||||||
= new ReadOnlyDictionary<string, Func<LibraryBook, string>>(
|
= new ReadOnlyDictionary<string, Func<LibraryBook, string>>(
|
||||||
new Dictionary<string, Func<LibraryBook, string>>
|
new Dictionary<string, Func<LibraryBook, string>>
|
||||||
{
|
{
|
||||||
@ -75,7 +75,7 @@ namespace LibationSearchEngine
|
|||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
private static ReadOnlyDictionary<string, Func<LibraryBook, string>> numberIndexRules { get; }
|
internal static ReadOnlyDictionary<string, Func<LibraryBook, string>> numberIndexRules { get; }
|
||||||
= new ReadOnlyDictionary<string, Func<LibraryBook, string>>(
|
= new ReadOnlyDictionary<string, Func<LibraryBook, string>>(
|
||||||
new Dictionary<string, Func<LibraryBook, string>>
|
new Dictionary<string, Func<LibraryBook, string>>
|
||||||
{
|
{
|
||||||
@ -99,7 +99,7 @@ namespace LibationSearchEngine
|
|||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
private static ReadOnlyDictionary<string, Func<LibraryBook, bool>> boolIndexRules { get; }
|
internal static ReadOnlyDictionary<string, Func<LibraryBook, bool>> boolIndexRules { get; }
|
||||||
= new ReadOnlyDictionary<string, Func<LibraryBook, bool>>(
|
= new ReadOnlyDictionary<string, Func<LibraryBook, bool>>(
|
||||||
new Dictionary<string, Func<LibraryBook, bool>>
|
new Dictionary<string, Func<LibraryBook, bool>>
|
||||||
{
|
{
|
||||||
@ -354,111 +354,26 @@ namespace LibationSearchEngine
|
|||||||
#region search
|
#region search
|
||||||
public SearchResultSet Search(string searchString)
|
public SearchResultSet Search(string searchString)
|
||||||
{
|
{
|
||||||
|
using var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
|
||||||
|
|
||||||
Serilog.Log.Logger.Debug("original search string: {@DebugInfo}", new { searchString });
|
Serilog.Log.Logger.Debug("original search string: {@DebugInfo}", new { searchString });
|
||||||
searchString = FormatSearchQuery(searchString);
|
searchString = QuerySanitizer.Sanitize(searchString, analyzer);
|
||||||
Serilog.Log.Logger.Debug("formatted search string: {@DebugInfo}", new { searchString });
|
Serilog.Log.Logger.Debug("formatted search string: {@DebugInfo}", new { searchString });
|
||||||
|
|
||||||
var results = generalSearch(searchString);
|
var results = generalSearch(searchString, analyzer);
|
||||||
Serilog.Log.Logger.Debug("Hit(s): {@DebugInfo}", new { count = results.Docs.Count() });
|
Serilog.Log.Logger.Debug("Hit(s): {@DebugInfo}", new { count = results.Docs.Count() });
|
||||||
displayResults(results);
|
displayResults(results);
|
||||||
|
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
internal static string FormatSearchQuery(string searchString)
|
private SearchResultSet generalSearch(string searchString, StandardAnalyzer analyzer)
|
||||||
{
|
|
||||||
if (string.IsNullOrWhiteSpace(searchString))
|
|
||||||
return ALL_QUERY;
|
|
||||||
|
|
||||||
searchString = replaceBools(searchString);
|
|
||||||
|
|
||||||
searchString = parseTag(searchString);
|
|
||||||
|
|
||||||
// in ranges " TO " must be uppercase
|
|
||||||
searchString = searchString.Replace(" to ", " TO ");
|
|
||||||
|
|
||||||
searchString = padNumbers(searchString);
|
|
||||||
|
|
||||||
searchString = lowerFieldNames(searchString);
|
|
||||||
|
|
||||||
return searchString;
|
|
||||||
}
|
|
||||||
|
|
||||||
#region format query string
|
|
||||||
private static string parseTag(string tagSearchString)
|
|
||||||
{
|
|
||||||
var allMatches = LuceneRegex
|
|
||||||
.TagRegex
|
|
||||||
.Matches(tagSearchString)
|
|
||||||
.Cast<Match>()
|
|
||||||
.Select(a => a.ToString())
|
|
||||||
.ToList();
|
|
||||||
foreach (var match in allMatches)
|
|
||||||
tagSearchString = tagSearchString.Replace(
|
|
||||||
match,
|
|
||||||
TAGS + ":" + match.Trim('[', ']').Trim()
|
|
||||||
);
|
|
||||||
|
|
||||||
return tagSearchString;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static string replaceBools(string searchString)
|
|
||||||
{
|
|
||||||
foreach (var boolSearch in boolIndexRules.Keys)
|
|
||||||
searchString =
|
|
||||||
LuceneRegex.GetBoolRegex(boolSearch)
|
|
||||||
.Replace(searchString, @"$1:True");
|
|
||||||
|
|
||||||
return searchString;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static string padNumbers(string searchString)
|
|
||||||
{
|
|
||||||
var matches = LuceneRegex
|
|
||||||
.NumbersRegex()
|
|
||||||
.Matches(searchString)
|
|
||||||
.Cast<Match>()
|
|
||||||
.OrderByDescending(m => m.Index);
|
|
||||||
|
|
||||||
foreach (var m in matches)
|
|
||||||
{
|
|
||||||
var replaceString = double.Parse(m.ToString()).ToLuceneString();
|
|
||||||
searchString = LuceneRegex.NumbersRegex().Replace(searchString, replaceString, 1, m.Index);
|
|
||||||
}
|
|
||||||
|
|
||||||
return searchString;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static string lowerFieldNames(string searchString)
|
|
||||||
{
|
|
||||||
// fields are case specific
|
|
||||||
var allMatches = LuceneRegex
|
|
||||||
.FieldRegex
|
|
||||||
.Matches(searchString)
|
|
||||||
.Cast<Match>()
|
|
||||||
.Select(a => a.ToString())
|
|
||||||
.ToList();
|
|
||||||
|
|
||||||
foreach (var match in allMatches)
|
|
||||||
searchString = searchString.Replace(match, match.ToLowerInvariant());
|
|
||||||
|
|
||||||
return searchString;
|
|
||||||
}
|
|
||||||
#endregion
|
|
||||||
|
|
||||||
private SearchResultSet generalSearch(string searchString)
|
|
||||||
{
|
{
|
||||||
var defaultField = ALL;
|
var defaultField = ALL;
|
||||||
|
|
||||||
using var index = getIndex();
|
using var index = getIndex();
|
||||||
using var searcher = new IndexSearcher(index);
|
using var searcher = new IndexSearcher(index);
|
||||||
using var analyzer = new StandardAnalyzer(Version);
|
var query = analyzer.GetQuery(defaultField, searchString);
|
||||||
using var asinAnalyzer = new AsinAnalyzer();
|
|
||||||
|
|
||||||
var dic = idIndexRules.Keys.Select(k => new KeyValuePair<string, Analyzer>(k.ToLowerInvariant(), asinAnalyzer));
|
|
||||||
using var perFieldAnalyzer = new PerFieldAnalyzerWrapper(analyzer, dic);
|
|
||||||
|
|
||||||
var query = perFieldAnalyzer.GetQuery(defaultField, searchString);
|
|
||||||
|
|
||||||
// lucene doesn't allow only negations. eg this returns nothing:
|
// lucene doesn't allow only negations. eg this returns nothing:
|
||||||
// -tags:hidden
|
// -tags:hidden
|
||||||
|
|||||||
@ -10,6 +10,7 @@ using Dinah.Core;
|
|||||||
using FluentAssertions;
|
using FluentAssertions;
|
||||||
using FluentAssertions.Common;
|
using FluentAssertions.Common;
|
||||||
using LibationSearchEngine;
|
using LibationSearchEngine;
|
||||||
|
using Lucene.Net.Analysis.Standard;
|
||||||
using Microsoft.VisualStudio.TestPlatform.Common.Filtering;
|
using Microsoft.VisualStudio.TestPlatform.Common.Filtering;
|
||||||
using Microsoft.VisualStudio.TestTools.UnitTesting;
|
using Microsoft.VisualStudio.TestTools.UnitTesting;
|
||||||
using Moq;
|
using Moq;
|
||||||
@ -33,6 +34,7 @@ namespace SearchEngineTests
|
|||||||
[DataRow(" [foo]", " tags:foo")]
|
[DataRow(" [foo]", " tags:foo")]
|
||||||
[DataRow(" [ foo ]", " tags:foo")]
|
[DataRow(" [ foo ]", " tags:foo")]
|
||||||
[DataRow("[foo] ", "tags:foo ")]
|
[DataRow("[foo] ", "tags:foo ")]
|
||||||
|
[DataRow(" [foo] ", " tags:foo ")]
|
||||||
[DataRow("-[foo]", "-tags:foo")]
|
[DataRow("-[foo]", "-tags:foo")]
|
||||||
[DataRow(" -[foo]", " -tags:foo")]
|
[DataRow(" -[foo]", " -tags:foo")]
|
||||||
[DataRow("-[foo] ", "-tags:foo ")]
|
[DataRow("-[foo] ", "-tags:foo ")]
|
||||||
@ -51,15 +53,25 @@ namespace SearchEngineTests
|
|||||||
[DataRow("-israted ", "-israted:True ")]
|
[DataRow("-israted ", "-israted:True ")]
|
||||||
[DataRow(" -israted ", " -israted:True ")]
|
[DataRow(" -israted ", " -israted:True ")]
|
||||||
|
|
||||||
|
//ID Tags to lowercase and not parsed as numbers
|
||||||
|
[DataRow("id:0000000123", "id:0000000123")]
|
||||||
|
[DataRow("id:B000000123", "id:b000000123")]
|
||||||
|
[DataRow("ASIN:B000000123", "asin:b000000123")]
|
||||||
|
[DataRow("AudibleProductId:B000000123", "audibleproductid:b000000123")]
|
||||||
|
[DataRow("ProductId:B000000123", "productid:b000000123")]
|
||||||
|
|
||||||
// bool keyword. Append :True
|
// bool keyword. Append :True
|
||||||
[DataRow("israted", "israted:True")]
|
[DataRow("israted", "israted:True")]
|
||||||
|
|
||||||
// bool keyword with [:bool]. Do not add :True
|
// bool keyword with [:bool]. Do not add :True
|
||||||
[DataRow("israted:True", "israted:True")]
|
[DataRow("israted:True", "israted:True")]
|
||||||
[DataRow("isRated:false", "israted:false")]
|
[DataRow("isRated:false", "israted:false")]
|
||||||
|
[DataRow("liberated AND isRated:false", "liberated:True AND israted:false")]
|
||||||
|
|
||||||
// tag which happens to be a bool keyword >> parse as tag
|
// tag which happens to be a bool keyword >> parse as tag
|
||||||
[DataRow("[israted]", "tags:israted")]
|
[DataRow("[israted]", "tags:israted")]
|
||||||
|
[DataRow("[tags] [israted] [tags] [tags] [isliberated] [israted] ", "tags:tags tags:israted tags:tags tags:tags tags:isliberated tags:israted ")]
|
||||||
|
[DataRow("[tags][israted]", "tags:tagstags:israted")]
|
||||||
|
|
||||||
// numbers with "to". TO all caps, numbers [8.2] format
|
// numbers with "to". TO all caps, numbers [8.2] format
|
||||||
[DataRow("1 to 10", "00000001.00 TO 00000010.00")]
|
[DataRow("1 to 10", "00000001.00 TO 00000010.00")]
|
||||||
@ -72,6 +84,10 @@ namespace SearchEngineTests
|
|||||||
[DataRow("-isRATED", "-israted:True")]
|
[DataRow("-isRATED", "-israted:True")]
|
||||||
|
|
||||||
public void FormattingTest(string input, string output)
|
public void FormattingTest(string input, string output)
|
||||||
=> SearchEngine.FormatSearchQuery(input).Should().Be(output);
|
{
|
||||||
|
using var analyzer = new StandardAnalyzer(SearchEngine.Version);
|
||||||
|
|
||||||
|
QuerySanitizer.Sanitize(input, analyzer).Should().Be(output);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user