diff --git a/Libation.sln b/Libation.sln index ece018cc..52c0673f 100644 --- a/Libation.sln +++ b/Libation.sln @@ -86,6 +86,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "0 Libation Tests", "0 Libat EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "InternalUtilities.Tests", "_Tests\InternalUtilities.Tests\InternalUtilities.Tests.csproj", "{8447C956-B03E-4F59-9DD4-877793B849D9}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LibationSearchEngine.Tests", "_Tests\LibationSearchEngine.Tests\LibationSearchEngine.Tests.csproj", "{C5B21768-C7C9-4FCB-AC1E-187B223D5A98}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -208,6 +210,10 @@ Global {8447C956-B03E-4F59-9DD4-877793B849D9}.Debug|Any CPU.Build.0 = Debug|Any CPU {8447C956-B03E-4F59-9DD4-877793B849D9}.Release|Any CPU.ActiveCfg = Release|Any CPU {8447C956-B03E-4F59-9DD4-877793B849D9}.Release|Any CPU.Build.0 = Release|Any CPU + {C5B21768-C7C9-4FCB-AC1E-187B223D5A98}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {C5B21768-C7C9-4FCB-AC1E-187B223D5A98}.Debug|Any CPU.Build.0 = Debug|Any CPU + {C5B21768-C7C9-4FCB-AC1E-187B223D5A98}.Release|Any CPU.ActiveCfg = Release|Any CPU + {C5B21768-C7C9-4FCB-AC1E-187B223D5A98}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -242,6 +248,7 @@ Global {E7EFD64D-6630-4426-B09C-B6862A92E3FD} = {F0CBB7A7-D3FB-41FF-8F47-CF3F6A592249} {F3B04A3A-20C8-4582-A54A-715AF6A5D859} = {8679CAC8-9164-4007-BDD2-F004810EDA14} {8447C956-B03E-4F59-9DD4-877793B849D9} = {67E66E82-5532-4440-AFB3-9FB1DF9DEF53} + {C5B21768-C7C9-4FCB-AC1E-187B223D5A98} = {67E66E82-5532-4440-AFB3-9FB1DF9DEF53} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {615E00ED-BAEF-4E8E-A92A-9B82D87942A9} diff --git a/LibationLauncher/LibationLauncher.csproj b/LibationLauncher/LibationLauncher.csproj index c32d313d..c04fbbdd 100644 --- a/LibationLauncher/LibationLauncher.csproj +++ b/LibationLauncher/LibationLauncher.csproj @@ -13,7 +13,7 @@ win-x64 - 4.2.1.1 + 4.2.2.1 diff --git a/LibationSearchEngine/UNTESTED/LuceneRegex.cs b/LibationSearchEngine/UNTESTED/LuceneRegex.cs index 3eab17c6..326be325 100644 --- a/LibationSearchEngine/UNTESTED/LuceneRegex.cs +++ b/LibationSearchEngine/UNTESTED/LuceneRegex.cs @@ -42,5 +42,56 @@ namespace LibationSearchEngine // positive look behind: beginning space { [ : // positive look ahead: end space ] } public static Regex NumbersRegex { get; } = new Regex(@"(?<=^|\s|\{|\[|:)(\d+\.?\d*)(?=$|\s|\]|\})", RegexOptions.Compiled); + + /// + /// proper bools are single keywords which are turned into keyword:True + /// if bordered by colons or inside brackets, they are not stand-alone bool keywords + /// the negative lookbehind and lookahead patterns prevent bugs where a bool keyword is also a user-defined tag: + /// [israted] + /// parseTag => tags:israted + /// replaceBools => tags:israted:True + /// or + /// [israted] + /// replaceBools => israted:True + /// parseTag => [israted:True] + /// also don't want to apply :True where the value already exists: + /// israted:false => israted:false:True + /// + /// despite using parans, lookahead and lookbehind are zero-length assertions which do not capture. therefore the bool search keyword is still $1 since it's the first and only capture + /// + private static string boolPattern_parameterized { get; } + = @" +(? boolRegexDic { get; } = new Dictionary(); + public static Regex GetBoolRegex(string boolSearch) + { + if (boolRegexDic.TryGetValue(boolSearch, out var regex)) + return regex; + + var boolPattern = string.Format(boolPattern_parameterized, boolSearch); + regex = new Regex(boolPattern, RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase | RegexOptions.Compiled); + boolRegexDic.Add(boolSearch, regex); + + return regex; + } } } diff --git a/LibationSearchEngine/UNTESTED/SearchEngine.cs b/LibationSearchEngine/UNTESTED/SearchEngine.cs index 3ca208e6..9871fa28 100644 --- a/LibationSearchEngine/UNTESTED/SearchEngine.cs +++ b/LibationSearchEngine/UNTESTED/SearchEngine.cs @@ -347,32 +347,33 @@ namespace LibationSearchEngine public SearchResultSet Search(string searchString) { Serilog.Log.Logger.Debug("original search string: {@DebugInfo}", new { searchString }); + searchString = FormatSearchQuery(searchString); + Serilog.Log.Logger.Debug("formatted search string: {@DebugInfo}", new { searchString }); + var results = generalSearch(searchString); + Serilog.Log.Logger.Debug("Hit(s): {@DebugInfo}", new { count = results.Docs.Count() }); + displayResults(results); + + return results; + } + + public static string FormatSearchQuery(string searchString) + { if (string.IsNullOrWhiteSpace(searchString)) - searchString = ALL_QUERY; - - #region apply formatting - searchString = parseTag(searchString); + return ALL_QUERY; searchString = replaceBools(searchString); + searchString = parseTag(searchString); + // in ranges " TO " must be uppercase searchString = searchString.Replace(" to ", " TO "); searchString = padNumbers(searchString); searchString = lowerFieldNames(searchString); - #endregion - Serilog.Log.Logger.Debug("formatted search string: {@DebugInfo}", new { searchString }); - - var results = generalSearch(searchString); - - Serilog.Log.Logger.Debug("Hit(s): {@DebugInfo}", new { count = results.Docs.Count() }); - - displayResults(results); - - return results; + return searchString; } #region format query string @@ -395,9 +396,10 @@ namespace LibationSearchEngine private static string replaceBools(string searchString) { - // negative look-ahead for optional spaces then colon. don't want to double-up. eg:"israted:false" => "israted:false:True" foreach (var boolSearch in boolIndexRules.Keys) - searchString = Regex.Replace(searchString, $@"\b({boolSearch})\b(?!\s*:)", @"$1:True", RegexOptions.IgnoreCase); + searchString = + LuceneRegex.GetBoolRegex(boolSearch) + .Replace(searchString, @"$1:True"); return searchString; } @@ -434,7 +436,7 @@ namespace LibationSearchEngine return searchString; } - #endregion + #endregion private SearchResultSet generalSearch(string searchString) { diff --git a/_Tests/LibationSearchEngine.Tests/LibationSearchEngine.Tests.csproj b/_Tests/LibationSearchEngine.Tests/LibationSearchEngine.Tests.csproj new file mode 100644 index 00000000..42ec9ddf --- /dev/null +++ b/_Tests/LibationSearchEngine.Tests/LibationSearchEngine.Tests.csproj @@ -0,0 +1,24 @@ + + + + net5.0 + + false + + + + + + + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + + + + + + + diff --git a/_Tests/LibationSearchEngine.Tests/SearchEngineTests.cs b/_Tests/LibationSearchEngine.Tests/SearchEngineTests.cs new file mode 100644 index 00000000..e7fae71c --- /dev/null +++ b/_Tests/LibationSearchEngine.Tests/SearchEngineTests.cs @@ -0,0 +1,39 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Net; +using System.Net.Http; +using System.Threading; +using System.Threading.Tasks; +using Dinah.Core; +using FluentAssertions; +using FluentAssertions.Common; +using LibationSearchEngine; +using Microsoft.VisualStudio.TestPlatform.Common.Filtering; +using Microsoft.VisualStudio.TestTools.UnitTesting; +using Moq; +using Moq.Protected; +using Newtonsoft.Json; +using Newtonsoft.Json.Linq; +using TestCommon; + +namespace SearchEngineTests +{ + [TestClass] + public class FormatSearchQuery + { + [TestMethod] + [DataRow(null, SearchEngine.ALL_QUERY)] + [DataRow("", SearchEngine.ALL_QUERY)] + [DataRow(" ", SearchEngine.ALL_QUERY)] + [DataRow("israted", "israted:True")] + [DataRow("israted:True", "israted:True")] + [DataRow("isRated:false", "israted:false")] + [DataRow("[israted]", "tags:israted")] + [DataRow("1 to 10", "00000001.00 TO 00000010.00")] + [DataRow("19990101 to 20001231", "19990101.00 TO 20001231.00")] + public void FormattingTest(string input, string output) + => SearchEngine.FormatSearchQuery(input).Should().Be(output); + } +}