Merge branch 'master' of https://github.com/rmcrackan/Libation
This commit is contained in:
commit
a4dfdf80e4
@ -28,6 +28,15 @@ To make upgrades and reinstalls easier, Libation separates all of its responsibi
|
||||
|
||||
* Allow Libation to fix up audiobook metadata. After decrypting a title, Libation attempts to fix details like chapters and cover art. Some power users and/or control freaks prefer to manage this themselves. By unchecking this setting, Libation will only decrypt the book and will leave metadata as-is, warts and all.
|
||||
|
||||
In addition to the options that are enabled if you allow Libation to "fix up" the audiobook, it does the following:
|
||||
|
||||
* Adds the `TCOM` metadata tag for the narrators.
|
||||
* Sets the `©gen` metadata tag for the genres.
|
||||
* Unescapes the copyright symbol (replace `©` with `©`)
|
||||
* Replaces the recording copyright `(P)` string with `℗`
|
||||
* Replaces the chapter markers embedded in the aax file with the chapter markers retrieved from Audible's API.
|
||||
* Sets the embedded cover art image with the 500x500 px cover art retrieved from Audible
|
||||
|
||||
### Command Line Interface
|
||||
|
||||
Libationcli.exe allows limited access to Libation's functionalities as a CLI.
|
||||
|
||||
@ -1,11 +1,10 @@
|
||||
using Avalonia.Controls;
|
||||
using Avalonia.Input;
|
||||
using Avalonia.Styling;
|
||||
using System;
|
||||
|
||||
namespace LibationAvalonia.Controls
|
||||
{
|
||||
public partial class WheelComboBox : ComboBox, IStyleable
|
||||
public partial class WheelComboBox : ComboBox
|
||||
{
|
||||
protected override Type StyleKeyOverride => typeof(ComboBox);
|
||||
|
||||
|
||||
@ -1,81 +0,0 @@
|
||||
using Lucene.Net.Analysis.Tokenattributes;
|
||||
using Lucene.Net.Analysis;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace LibationSearchEngine
|
||||
{
|
||||
internal class AsinAnalyzer : Analyzer
|
||||
{
|
||||
public override TokenStream TokenStream(string fieldName, System.IO.TextReader reader)
|
||||
{
|
||||
return new AsinFilter(reader);
|
||||
}
|
||||
/// <summary>
|
||||
/// Emits the entire input as a single token and removes
|
||||
/// trailing .00 from strings that parsed to numbers
|
||||
///
|
||||
/// Based on Lucene.Net.Analysis.KeywordTokenizer
|
||||
/// </summary>
|
||||
private class AsinFilter : Tokenizer
|
||||
{
|
||||
private bool done;
|
||||
private int finalOffset;
|
||||
private readonly ITermAttribute termAtt;
|
||||
private readonly IOffsetAttribute offsetAtt;
|
||||
private const int DEFAULT_BUFFER_SIZE = 256;
|
||||
|
||||
public AsinFilter(System.IO.TextReader input) : base(input)
|
||||
{
|
||||
offsetAtt = AddAttribute<IOffsetAttribute>();
|
||||
termAtt = AddAttribute<ITermAttribute>();
|
||||
termAtt.ResizeTermBuffer(DEFAULT_BUFFER_SIZE);
|
||||
}
|
||||
public override bool IncrementToken()
|
||||
{
|
||||
var charReader = input as CharReader;
|
||||
if (!done)
|
||||
{
|
||||
ClearAttributes();
|
||||
done = true;
|
||||
int upto = 0;
|
||||
char[] buffer = termAtt.TermBuffer();
|
||||
|
||||
while (true)
|
||||
{
|
||||
int length = charReader.Read(buffer, upto, buffer.Length - upto);
|
||||
if (length == 0)
|
||||
break;
|
||||
upto += length;
|
||||
if (upto == buffer.Length)
|
||||
buffer = termAtt.ResizeTermBuffer(1 + buffer.Length);
|
||||
}
|
||||
|
||||
var termStr = new string(buffer, 0, upto);
|
||||
if (termStr.EndsWith(".00"))
|
||||
upto -= 3;
|
||||
|
||||
termAtt.SetTermLength(upto);
|
||||
finalOffset = CorrectOffset(upto);
|
||||
offsetAtt.SetOffset(CorrectOffset(0), finalOffset);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
public override void End()
|
||||
{
|
||||
// set final offset
|
||||
offsetAtt.SetOffset(finalOffset, finalOffset);
|
||||
}
|
||||
|
||||
public override void Reset(System.IO.TextReader input)
|
||||
{
|
||||
base.Reset(input);
|
||||
this.done = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1,103 +0,0 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace LibationSearchEngine
|
||||
{
|
||||
internal static partial class LuceneRegex
|
||||
{
|
||||
#region pattern pieces
|
||||
// negative lookbehind: cannot be preceeded by an escaping \
|
||||
const string NOT_ESCAPED = @"(?<!\\)";
|
||||
|
||||
// disallow spaces and lucene reserved characters
|
||||
// + - && || ! ( ) { } [ ] ^ " ~ * ? : \
|
||||
// define chars
|
||||
// escape and concat
|
||||
// create regex. also disallow spaces
|
||||
private static char[] disallowedChars { get; } = new[] {
|
||||
'+', '-', '&', '|', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':', '\\' };
|
||||
private static string disallowedCharsEscaped { get; } = disallowedChars.Select(c => $@"\{c}").Aggregate((a, b) => a + b);
|
||||
private static string WORD_CAPTURE { get; } = $@"([^\s{disallowedCharsEscaped}]+)";
|
||||
|
||||
// : with optional preceeding spaces. capture these so i don't accidentally replace a non-field name
|
||||
const string FIELD_END = @"(\s*:)";
|
||||
|
||||
const string BEGIN_TAG = @"\[";
|
||||
const string END_TAG = @"\]";
|
||||
|
||||
// space is forgiven at beginning and end of tag but not in the middle
|
||||
// literal space character only. do NOT allow new lines, tabs, ...
|
||||
const string OPTIONAL_SPACE_LITERAL = @"\u0020*";
|
||||
#endregion
|
||||
|
||||
private static string tagPattern { get; } = NOT_ESCAPED + BEGIN_TAG + OPTIONAL_SPACE_LITERAL + WORD_CAPTURE + OPTIONAL_SPACE_LITERAL + END_TAG;
|
||||
public static Regex TagRegex { get; } = new Regex(tagPattern, RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
|
||||
|
||||
private static string fieldPattern { get; } = NOT_ESCAPED + WORD_CAPTURE + FIELD_END;
|
||||
public static Regex FieldRegex { get; } = new Regex(fieldPattern, RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
|
||||
|
||||
/// <summary>
|
||||
/// auto-pad numbers to 8 char.s. This will match int.s and dates (yyyyMMdd)
|
||||
/// positive look behind: beginning space { [ :
|
||||
/// positive look ahead: end space ] }
|
||||
/// </summary>
|
||||
|
||||
[GeneratedRegex(@"(?<=^|\s|\{|\[|:)(\d+\.?\d*)(?=$|\s|\]|\})", RegexOptions.Compiled)]
|
||||
public static partial Regex NumbersRegex();
|
||||
|
||||
/// <summary>
|
||||
/// proper bools are single keywords which are turned into keyword:True
|
||||
/// if bordered by colons or inside brackets, they are not stand-alone bool keywords
|
||||
/// the negative lookbehind and lookahead patterns prevent bugs where a bool keyword is also a user-defined tag:
|
||||
/// [israted]
|
||||
/// parseTag => tags:israted
|
||||
/// replaceBools => tags:israted:True
|
||||
/// or
|
||||
/// [israted]
|
||||
/// replaceBools => israted:True
|
||||
/// parseTag => [israted:True]
|
||||
/// also don't want to apply :True where the value already exists:
|
||||
/// israted:false => israted:false:True
|
||||
///
|
||||
/// despite using parans, lookahead and lookbehind are zero-length assertions which do not capture. therefore the bool search keyword is still $1 since it's the first and only capture
|
||||
/// </summary>
|
||||
private static string boolPattern_parameterized { get; }
|
||||
= @"
|
||||
### IMPORTANT: 'ignore whitespace' is only partially honored in character sets
|
||||
### - new lines are ok
|
||||
### - ANY leading whitespace is treated like actual matching spaces :(
|
||||
|
||||
### can't begin with colon. incorrect syntax
|
||||
### can't begin with open bracket: this signals the start of a tag
|
||||
(?<! # begin negative lookbehind
|
||||
[:\[] # char set: colon and open bracket, escaped
|
||||
\s* # optional space
|
||||
) # end negative lookbehind
|
||||
|
||||
\b # word boundary
|
||||
({0}) # captured bool search keyword. this is the $1 reference used in regex.Replace
|
||||
\b # word boundary
|
||||
|
||||
### can't end with colon. this signals that the bool's value already exists
|
||||
### can't begin with close bracket: this signals the end of a tag
|
||||
(?! # begin negative lookahead
|
||||
\s* # optional space
|
||||
[:\]] # char set: colon and close bracket, escaped
|
||||
) # end negative lookahead
|
||||
";
|
||||
private static Dictionary<string, Regex> boolRegexDic { get; } = new Dictionary<string, Regex>();
|
||||
public static Regex GetBoolRegex(string boolSearch)
|
||||
{
|
||||
if (boolRegexDic.TryGetValue(boolSearch, out var regex))
|
||||
return regex;
|
||||
|
||||
var boolPattern = string.Format(boolPattern_parameterized, boolSearch);
|
||||
regex = new Regex(boolPattern, RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
||||
boolRegexDic.Add(boolSearch, regex);
|
||||
|
||||
return regex;
|
||||
}
|
||||
}
|
||||
}
|
||||
153
Source/LibationSearchEngine/QuerySanitizer.cs
Normal file
153
Source/LibationSearchEngine/QuerySanitizer.cs
Normal file
@ -0,0 +1,153 @@
|
||||
using Lucene.Net.Analysis.Standard;
|
||||
using Lucene.Net.Analysis.Tokenattributes;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
|
||||
namespace LibationSearchEngine
|
||||
{
|
||||
internal static class QuerySanitizer
|
||||
{
|
||||
private static readonly HashSet<string> idTerms
|
||||
= SearchEngine.idIndexRules.Keys
|
||||
.Select(s => s.ToLowerInvariant())
|
||||
.ToHashSet();
|
||||
|
||||
private static readonly HashSet<string> boolTerms
|
||||
= SearchEngine.boolIndexRules.Keys
|
||||
.Select(s => s.ToLowerInvariant())
|
||||
.ToHashSet();
|
||||
|
||||
private static readonly HashSet<string> fieldTerms
|
||||
= SearchEngine.stringIndexRules.Keys
|
||||
.Union(SearchEngine.numberIndexRules.Keys)
|
||||
.Select(s => s.ToLowerInvariant())
|
||||
.Union(idTerms)
|
||||
.Union(boolTerms)
|
||||
.ToHashSet();
|
||||
|
||||
internal static string Sanitize(string searchString, StandardAnalyzer analyzer)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(searchString))
|
||||
return SearchEngine.ALL_QUERY;
|
||||
|
||||
// range operator " TO " and bool operators " AND " and " OR " must be uppercase
|
||||
searchString
|
||||
= searchString
|
||||
.Replace(" to ", " TO ", System.StringComparison.OrdinalIgnoreCase)
|
||||
.Replace(" and ", " AND ", System.StringComparison.OrdinalIgnoreCase)
|
||||
.Replace(" or ", " OR ", System.StringComparison.OrdinalIgnoreCase);
|
||||
|
||||
using var tokenStream = analyzer.TokenStream(SearchEngine.ALL, new System.IO.StringReader(searchString));
|
||||
|
||||
var partList = new List<string>();
|
||||
int previousEndOffset = 0;
|
||||
bool previousIsBool = false, previousIsTags = false, previousIsAsin = false;
|
||||
|
||||
while (tokenStream.IncrementToken())
|
||||
{
|
||||
var term = tokenStream.GetAttribute<ITermAttribute>().Term;
|
||||
var offset = tokenStream.GetAttribute<IOffsetAttribute>();
|
||||
|
||||
if (previousIsBool && !bool.TryParse(term, out _))
|
||||
{
|
||||
//The previous term was a boolean tag and this term is NOT a bool value
|
||||
//Add the default ":True" bool and continue parsing the current term
|
||||
partList.Add(":True");
|
||||
previousIsBool = false;
|
||||
}
|
||||
|
||||
//Add all text between the current token and the previous token
|
||||
partList.Add(searchString.Substring(previousEndOffset, offset.StartOffset - previousEndOffset));
|
||||
|
||||
if (previousIsBool)
|
||||
{
|
||||
//The previous term was a boolean tag and this term is a bool value
|
||||
addUnalteredToken(offset);
|
||||
previousIsBool = false;
|
||||
}
|
||||
else if (previousIsAsin)
|
||||
{
|
||||
//The previous term was an ASIN field ID, so this term is an ASIN
|
||||
partList.Add(term);
|
||||
previousIsAsin = false;
|
||||
}
|
||||
else if (previousIsTags)
|
||||
{
|
||||
//This term is a tag. Do this check before checking if term is a defined field
|
||||
//so that "tags:israted" does not parse as a bool
|
||||
addUnalteredToken(offset);
|
||||
previousIsTags = false;
|
||||
}
|
||||
else if (tryParseBlockTag(offset, partList, searchString, out var tagName))
|
||||
{
|
||||
//The term is a block tag. add it to the part list
|
||||
partList.Add($"{SearchEngine.TAGS}:{tagName}");
|
||||
}
|
||||
else if (double.TryParse(term, out var num))
|
||||
{
|
||||
//Term is a number so pad it with zeros
|
||||
partList.Add(num.ToLuceneString());
|
||||
}
|
||||
else if (fieldTerms.Contains(term))
|
||||
{
|
||||
//Term is a defined search field, add it.
|
||||
//The StandardAnalyzer already converts all terms to lowercase
|
||||
partList.Add(term);
|
||||
previousIsBool = boolTerms.Contains(term);
|
||||
previousIsAsin = idTerms.Contains(term);
|
||||
previousIsTags = term == SearchEngine.TAGS;
|
||||
}
|
||||
else
|
||||
{
|
||||
//Term is any other user-defined constant value
|
||||
addUnalteredToken(offset);
|
||||
}
|
||||
|
||||
previousEndOffset = offset.EndOffset;
|
||||
}
|
||||
|
||||
if (previousIsBool)
|
||||
partList.Add(":True");
|
||||
|
||||
//Add ending non-token text
|
||||
partList.Add(searchString.Substring(previousEndOffset, searchString.Length - previousEndOffset));
|
||||
|
||||
return string.Concat(partList);
|
||||
|
||||
//Add the full, unaltered token as well as all inter-token text
|
||||
void addUnalteredToken(IOffsetAttribute offset) =>
|
||||
partList.Add(searchString.Substring(offset.StartOffset, offset.EndOffset - offset.StartOffset));
|
||||
}
|
||||
|
||||
private static bool tryParseBlockTag(IOffsetAttribute offset, List<string> partList, string searchString, out string tagName)
|
||||
{
|
||||
tagName = null;
|
||||
if (partList.Count == 0) return false;
|
||||
|
||||
var previous = partList[^1].TrimEnd();
|
||||
|
||||
//cannot be preceeded by an escaping \
|
||||
if (previous.Length == 0) return false;
|
||||
if (previous[^1] != '[' || (previous.Length > 1 && previous[^2] == '\\')) return false;
|
||||
|
||||
var next = searchString.Substring(offset.EndOffset);
|
||||
if (next.Length == 0 || !next.TrimStart().StartsWith(']')) return false;
|
||||
|
||||
tagName = searchString.Substring(offset.StartOffset, offset.EndOffset - offset.StartOffset);
|
||||
|
||||
//Only legal tag characters are letters, numbers and underscores
|
||||
//Per DataLayer.UserDefinedItem.IllegalCharacterRegex()
|
||||
foreach (var c in tagName)
|
||||
{
|
||||
if (!char.IsLetterOrDigit(c) && c != '_')
|
||||
return false;
|
||||
}
|
||||
|
||||
//Remove the leading '['
|
||||
partList[^1] = previous[..^1];
|
||||
//Ignore the trailing ']'
|
||||
offset.SetOffset(offset.StartOffset, searchString.IndexOf(']', offset.EndOffset) + 1);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -6,8 +6,8 @@ using System.Text.RegularExpressions;
|
||||
using DataLayer;
|
||||
using Dinah.Core;
|
||||
using LibationFileManager;
|
||||
using Lucene.Net.Analysis;
|
||||
using Lucene.Net.Analysis.Standard;
|
||||
using Lucene.Net.Analysis.Tokenattributes;
|
||||
using Lucene.Net.Documents;
|
||||
using Lucene.Net.Index;
|
||||
using Lucene.Net.Search;
|
||||
@ -32,18 +32,18 @@ namespace LibationSearchEngine
|
||||
public const string ALL_NARRATOR_NAMES = "NarratorNames";
|
||||
public const string ALL_SERIES_NAMES = "SeriesNames";
|
||||
|
||||
private static ReadOnlyDictionary<string, Func<LibraryBook, string>> idIndexRules { get; }
|
||||
internal static ReadOnlyDictionary<string, Func<LibraryBook, string>> idIndexRules { get; }
|
||||
= new ReadOnlyDictionary<string, Func<LibraryBook, string>>(
|
||||
new Dictionary<string, Func<LibraryBook, string>>
|
||||
{
|
||||
[nameof(Book.AudibleProductId)] = lb => lb.Book.AudibleProductId,
|
||||
["ProductId"] = lb => lb.Book.AudibleProductId,
|
||||
["Id"] = lb => lb.Book.AudibleProductId,
|
||||
["ASIN"] = lb => lb.Book.AudibleProductId
|
||||
}
|
||||
[nameof(Book.AudibleProductId)] = lb => lb.Book.AudibleProductId.ToLowerInvariant(),
|
||||
["ProductId"] = lb => lb.Book.AudibleProductId.ToLowerInvariant(),
|
||||
["Id"] = lb => lb.Book.AudibleProductId.ToLowerInvariant(),
|
||||
["ASIN"] = lb => lb.Book.AudibleProductId.ToLowerInvariant()
|
||||
}
|
||||
);
|
||||
|
||||
private static ReadOnlyDictionary<string, Func<LibraryBook, string>> stringIndexRules { get; }
|
||||
internal static ReadOnlyDictionary<string, Func<LibraryBook, string>> stringIndexRules { get; }
|
||||
= new ReadOnlyDictionary<string, Func<LibraryBook, string>>(
|
||||
new Dictionary<string, Func<LibraryBook, string>>
|
||||
{
|
||||
@ -75,7 +75,7 @@ namespace LibationSearchEngine
|
||||
}
|
||||
);
|
||||
|
||||
private static ReadOnlyDictionary<string, Func<LibraryBook, string>> numberIndexRules { get; }
|
||||
internal static ReadOnlyDictionary<string, Func<LibraryBook, string>> numberIndexRules { get; }
|
||||
= new ReadOnlyDictionary<string, Func<LibraryBook, string>>(
|
||||
new Dictionary<string, Func<LibraryBook, string>>
|
||||
{
|
||||
@ -99,7 +99,7 @@ namespace LibationSearchEngine
|
||||
}
|
||||
);
|
||||
|
||||
private static ReadOnlyDictionary<string, Func<LibraryBook, bool>> boolIndexRules { get; }
|
||||
internal static ReadOnlyDictionary<string, Func<LibraryBook, bool>> boolIndexRules { get; }
|
||||
= new ReadOnlyDictionary<string, Func<LibraryBook, bool>>(
|
||||
new Dictionary<string, Func<LibraryBook, bool>>
|
||||
{
|
||||
@ -353,112 +353,27 @@ namespace LibationSearchEngine
|
||||
|
||||
#region search
|
||||
public SearchResultSet Search(string searchString)
|
||||
{
|
||||
Serilog.Log.Logger.Debug("original search string: {@DebugInfo}", new { searchString });
|
||||
searchString = FormatSearchQuery(searchString);
|
||||
{
|
||||
using var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
|
||||
|
||||
Serilog.Log.Logger.Debug("original search string: {@DebugInfo}", new { searchString });
|
||||
searchString = QuerySanitizer.Sanitize(searchString, analyzer);
|
||||
Serilog.Log.Logger.Debug("formatted search string: {@DebugInfo}", new { searchString });
|
||||
|
||||
var results = generalSearch(searchString);
|
||||
var results = generalSearch(searchString, analyzer);
|
||||
Serilog.Log.Logger.Debug("Hit(s): {@DebugInfo}", new { count = results.Docs.Count() });
|
||||
displayResults(results);
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
internal static string FormatSearchQuery(string searchString)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(searchString))
|
||||
return ALL_QUERY;
|
||||
|
||||
searchString = replaceBools(searchString);
|
||||
|
||||
searchString = parseTag(searchString);
|
||||
|
||||
// in ranges " TO " must be uppercase
|
||||
searchString = searchString.Replace(" to ", " TO ");
|
||||
|
||||
searchString = padNumbers(searchString);
|
||||
|
||||
searchString = lowerFieldNames(searchString);
|
||||
|
||||
return searchString;
|
||||
}
|
||||
|
||||
#region format query string
|
||||
private static string parseTag(string tagSearchString)
|
||||
{
|
||||
var allMatches = LuceneRegex
|
||||
.TagRegex
|
||||
.Matches(tagSearchString)
|
||||
.Cast<Match>()
|
||||
.Select(a => a.ToString())
|
||||
.ToList();
|
||||
foreach (var match in allMatches)
|
||||
tagSearchString = tagSearchString.Replace(
|
||||
match,
|
||||
TAGS + ":" + match.Trim('[', ']').Trim()
|
||||
);
|
||||
|
||||
return tagSearchString;
|
||||
}
|
||||
|
||||
private static string replaceBools(string searchString)
|
||||
{
|
||||
foreach (var boolSearch in boolIndexRules.Keys)
|
||||
searchString =
|
||||
LuceneRegex.GetBoolRegex(boolSearch)
|
||||
.Replace(searchString, @"$1:True");
|
||||
|
||||
return searchString;
|
||||
}
|
||||
|
||||
private static string padNumbers(string searchString)
|
||||
{
|
||||
var matches = LuceneRegex
|
||||
.NumbersRegex()
|
||||
.Matches(searchString)
|
||||
.Cast<Match>()
|
||||
.OrderByDescending(m => m.Index);
|
||||
|
||||
foreach (var m in matches)
|
||||
{
|
||||
var replaceString = double.Parse(m.ToString()).ToLuceneString();
|
||||
searchString = LuceneRegex.NumbersRegex().Replace(searchString, replaceString, 1, m.Index);
|
||||
}
|
||||
|
||||
return searchString;
|
||||
}
|
||||
|
||||
private static string lowerFieldNames(string searchString)
|
||||
{
|
||||
// fields are case specific
|
||||
var allMatches = LuceneRegex
|
||||
.FieldRegex
|
||||
.Matches(searchString)
|
||||
.Cast<Match>()
|
||||
.Select(a => a.ToString())
|
||||
.ToList();
|
||||
|
||||
foreach (var match in allMatches)
|
||||
searchString = searchString.Replace(match, match.ToLowerInvariant());
|
||||
|
||||
return searchString;
|
||||
}
|
||||
#endregion
|
||||
|
||||
private SearchResultSet generalSearch(string searchString)
|
||||
private SearchResultSet generalSearch(string searchString, StandardAnalyzer analyzer)
|
||||
{
|
||||
var defaultField = ALL;
|
||||
|
||||
using var index = getIndex();
|
||||
using var searcher = new IndexSearcher(index);
|
||||
using var analyzer = new StandardAnalyzer(Version);
|
||||
using var asinAnalyzer = new AsinAnalyzer();
|
||||
|
||||
var dic = idIndexRules.Keys.Select(k => new KeyValuePair<string, Analyzer>(k.ToLowerInvariant(), asinAnalyzer));
|
||||
using var perFieldAnalyzer = new PerFieldAnalyzerWrapper(analyzer, dic);
|
||||
|
||||
var query = perFieldAnalyzer.GetQuery(defaultField, searchString);
|
||||
var query = analyzer.GetQuery(defaultField, searchString);
|
||||
|
||||
// lucene doesn't allow only negations. eg this returns nothing:
|
||||
// -tags:hidden
|
||||
|
||||
@ -10,6 +10,7 @@ using Dinah.Core;
|
||||
using FluentAssertions;
|
||||
using FluentAssertions.Common;
|
||||
using LibationSearchEngine;
|
||||
using Lucene.Net.Analysis.Standard;
|
||||
using Microsoft.VisualStudio.TestPlatform.Common.Filtering;
|
||||
using Microsoft.VisualStudio.TestTools.UnitTesting;
|
||||
using Moq;
|
||||
@ -31,6 +32,7 @@ namespace SearchEngineTests
|
||||
// tag surrounded by spaces
|
||||
[DataRow("[foo]", "tags:foo")]
|
||||
[DataRow(" [foo]", " tags:foo")]
|
||||
[DataRow(" [ foo ]", " tags:foo")]
|
||||
[DataRow("[foo] ", "tags:foo ")]
|
||||
[DataRow(" [foo] ", " tags:foo ")]
|
||||
[DataRow("-[foo]", "-tags:foo")]
|
||||
@ -51,15 +53,25 @@ namespace SearchEngineTests
|
||||
[DataRow("-israted ", "-israted:True ")]
|
||||
[DataRow(" -israted ", " -israted:True ")]
|
||||
|
||||
//ID Tags to lowercase and not parsed as numbers
|
||||
[DataRow("id:0000000123", "id:0000000123")]
|
||||
[DataRow("id:B000000123", "id:b000000123")]
|
||||
[DataRow("ASIN:B000000123", "asin:b000000123")]
|
||||
[DataRow("AudibleProductId:B000000123", "audibleproductid:b000000123")]
|
||||
[DataRow("ProductId:B000000123", "productid:b000000123")]
|
||||
|
||||
// bool keyword. Append :True
|
||||
[DataRow("israted", "israted:True")]
|
||||
|
||||
// bool keyword with [:bool]. Do not add :True
|
||||
[DataRow("israted:True", "israted:True")]
|
||||
[DataRow("isRated:false", "israted:false")]
|
||||
[DataRow("liberated AND isRated:false", "liberated:True AND israted:false")]
|
||||
|
||||
// tag which happens to be a bool keyword >> parse as tag
|
||||
[DataRow("[israted]", "tags:israted")]
|
||||
[DataRow("[tags] [israted] [tags] [tags] [isliberated] [israted] ", "tags:tags tags:israted tags:tags tags:tags tags:isliberated tags:israted ")]
|
||||
[DataRow("[tags][israted]", "tags:tagstags:israted")]
|
||||
|
||||
// numbers with "to". TO all caps, numbers [8.2] format
|
||||
[DataRow("1 to 10", "00000001.00 TO 00000010.00")]
|
||||
@ -72,6 +84,10 @@ namespace SearchEngineTests
|
||||
[DataRow("-isRATED", "-israted:True")]
|
||||
|
||||
public void FormattingTest(string input, string output)
|
||||
=> SearchEngine.FormatSearchQuery(input).Should().Be(output);
|
||||
{
|
||||
using var analyzer = new StandardAnalyzer(SearchEngine.Version);
|
||||
|
||||
QuerySanitizer.Sanitize(input, analyzer).Should().Be(output);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user