* bug fix: when user creates a tag which is also a reserved bool word (eg: israted), searching for this tag breaks the search
* add unit tests for search engine
This commit is contained in:
parent
abd00ff1df
commit
726b36de4d
@ -86,6 +86,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "0 Libation Tests", "0 Libat
|
|||||||
EndProject
|
EndProject
|
||||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "InternalUtilities.Tests", "_Tests\InternalUtilities.Tests\InternalUtilities.Tests.csproj", "{8447C956-B03E-4F59-9DD4-877793B849D9}"
|
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "InternalUtilities.Tests", "_Tests\InternalUtilities.Tests\InternalUtilities.Tests.csproj", "{8447C956-B03E-4F59-9DD4-877793B849D9}"
|
||||||
EndProject
|
EndProject
|
||||||
|
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LibationSearchEngine.Tests", "_Tests\LibationSearchEngine.Tests\LibationSearchEngine.Tests.csproj", "{C5B21768-C7C9-4FCB-AC1E-187B223D5A98}"
|
||||||
|
EndProject
|
||||||
Global
|
Global
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||||
Debug|Any CPU = Debug|Any CPU
|
Debug|Any CPU = Debug|Any CPU
|
||||||
@ -208,6 +210,10 @@ Global
|
|||||||
{8447C956-B03E-4F59-9DD4-877793B849D9}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
{8447C956-B03E-4F59-9DD4-877793B849D9}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
{8447C956-B03E-4F59-9DD4-877793B849D9}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
{8447C956-B03E-4F59-9DD4-877793B849D9}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
{8447C956-B03E-4F59-9DD4-877793B849D9}.Release|Any CPU.Build.0 = Release|Any CPU
|
{8447C956-B03E-4F59-9DD4-877793B849D9}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
|
{C5B21768-C7C9-4FCB-AC1E-187B223D5A98}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
|
{C5B21768-C7C9-4FCB-AC1E-187B223D5A98}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
|
{C5B21768-C7C9-4FCB-AC1E-187B223D5A98}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
|
{C5B21768-C7C9-4FCB-AC1E-187B223D5A98}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
EndGlobalSection
|
EndGlobalSection
|
||||||
GlobalSection(SolutionProperties) = preSolution
|
GlobalSection(SolutionProperties) = preSolution
|
||||||
HideSolutionNode = FALSE
|
HideSolutionNode = FALSE
|
||||||
@ -242,6 +248,7 @@ Global
|
|||||||
{E7EFD64D-6630-4426-B09C-B6862A92E3FD} = {F0CBB7A7-D3FB-41FF-8F47-CF3F6A592249}
|
{E7EFD64D-6630-4426-B09C-B6862A92E3FD} = {F0CBB7A7-D3FB-41FF-8F47-CF3F6A592249}
|
||||||
{F3B04A3A-20C8-4582-A54A-715AF6A5D859} = {8679CAC8-9164-4007-BDD2-F004810EDA14}
|
{F3B04A3A-20C8-4582-A54A-715AF6A5D859} = {8679CAC8-9164-4007-BDD2-F004810EDA14}
|
||||||
{8447C956-B03E-4F59-9DD4-877793B849D9} = {67E66E82-5532-4440-AFB3-9FB1DF9DEF53}
|
{8447C956-B03E-4F59-9DD4-877793B849D9} = {67E66E82-5532-4440-AFB3-9FB1DF9DEF53}
|
||||||
|
{C5B21768-C7C9-4FCB-AC1E-187B223D5A98} = {67E66E82-5532-4440-AFB3-9FB1DF9DEF53}
|
||||||
EndGlobalSection
|
EndGlobalSection
|
||||||
GlobalSection(ExtensibilityGlobals) = postSolution
|
GlobalSection(ExtensibilityGlobals) = postSolution
|
||||||
SolutionGuid = {615E00ED-BAEF-4E8E-A92A-9B82D87942A9}
|
SolutionGuid = {615E00ED-BAEF-4E8E-A92A-9B82D87942A9}
|
||||||
|
|||||||
@ -13,7 +13,7 @@
|
|||||||
<!-- <PublishSingleFile>true</PublishSingleFile> -->
|
<!-- <PublishSingleFile>true</PublishSingleFile> -->
|
||||||
<RuntimeIdentifier>win-x64</RuntimeIdentifier>
|
<RuntimeIdentifier>win-x64</RuntimeIdentifier>
|
||||||
|
|
||||||
<Version>4.2.1.1</Version>
|
<Version>4.2.2.1</Version>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
|||||||
@ -42,5 +42,56 @@ namespace LibationSearchEngine
|
|||||||
// positive look behind: beginning space { [ :
|
// positive look behind: beginning space { [ :
|
||||||
// positive look ahead: end space ] }
|
// positive look ahead: end space ] }
|
||||||
public static Regex NumbersRegex { get; } = new Regex(@"(?<=^|\s|\{|\[|:)(\d+\.?\d*)(?=$|\s|\]|\})", RegexOptions.Compiled);
|
public static Regex NumbersRegex { get; } = new Regex(@"(?<=^|\s|\{|\[|:)(\d+\.?\d*)(?=$|\s|\]|\})", RegexOptions.Compiled);
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// proper bools are single keywords which are turned into keyword:True
|
||||||
|
/// if bordered by colons or inside brackets, they are not stand-alone bool keywords
|
||||||
|
/// the negative lookbehind and lookahead patterns prevent bugs where a bool keyword is also a user-defined tag:
|
||||||
|
/// [israted]
|
||||||
|
/// parseTag => tags:israted
|
||||||
|
/// replaceBools => tags:israted:True
|
||||||
|
/// or
|
||||||
|
/// [israted]
|
||||||
|
/// replaceBools => israted:True
|
||||||
|
/// parseTag => [israted:True]
|
||||||
|
/// also don't want to apply :True where the value already exists:
|
||||||
|
/// israted:false => israted:false:True
|
||||||
|
///
|
||||||
|
/// despite using parans, lookahead and lookbehind are zero-length assertions which do not capture. therefore the bool search keyword is still $1 since it's the first and only capture
|
||||||
|
/// </summary>
|
||||||
|
private static string boolPattern_parameterized { get; }
|
||||||
|
= @"
|
||||||
|
(?<! # begin negative lookbehind
|
||||||
|
[ # begin char set
|
||||||
|
: # colon
|
||||||
|
\[ # open bracket, escaped
|
||||||
|
] # end char set
|
||||||
|
\s* # optional space
|
||||||
|
) # end negative lookbehind
|
||||||
|
|
||||||
|
\b # word boundary
|
||||||
|
({0}) # captured bool search keyword. this is the $1 reference used in regex.Replace
|
||||||
|
\b # word boundary
|
||||||
|
|
||||||
|
(?! # begin negative lookahead
|
||||||
|
\s* # optional space
|
||||||
|
[ # begin char set
|
||||||
|
: # colon
|
||||||
|
\] # close bracket, escaped
|
||||||
|
] # end char set
|
||||||
|
) # end negative lookahead
|
||||||
|
";
|
||||||
|
private static Dictionary<string, Regex> boolRegexDic { get; } = new Dictionary<string, Regex>();
|
||||||
|
public static Regex GetBoolRegex(string boolSearch)
|
||||||
|
{
|
||||||
|
if (boolRegexDic.TryGetValue(boolSearch, out var regex))
|
||||||
|
return regex;
|
||||||
|
|
||||||
|
var boolPattern = string.Format(boolPattern_parameterized, boolSearch);
|
||||||
|
regex = new Regex(boolPattern, RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
||||||
|
boolRegexDic.Add(boolSearch, regex);
|
||||||
|
|
||||||
|
return regex;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -347,32 +347,33 @@ namespace LibationSearchEngine
|
|||||||
public SearchResultSet Search(string searchString)
|
public SearchResultSet Search(string searchString)
|
||||||
{
|
{
|
||||||
Serilog.Log.Logger.Debug("original search string: {@DebugInfo}", new { searchString });
|
Serilog.Log.Logger.Debug("original search string: {@DebugInfo}", new { searchString });
|
||||||
|
searchString = FormatSearchQuery(searchString);
|
||||||
|
Serilog.Log.Logger.Debug("formatted search string: {@DebugInfo}", new { searchString });
|
||||||
|
|
||||||
|
var results = generalSearch(searchString);
|
||||||
|
Serilog.Log.Logger.Debug("Hit(s): {@DebugInfo}", new { count = results.Docs.Count() });
|
||||||
|
displayResults(results);
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static string FormatSearchQuery(string searchString)
|
||||||
|
{
|
||||||
if (string.IsNullOrWhiteSpace(searchString))
|
if (string.IsNullOrWhiteSpace(searchString))
|
||||||
searchString = ALL_QUERY;
|
return ALL_QUERY;
|
||||||
|
|
||||||
#region apply formatting
|
|
||||||
searchString = parseTag(searchString);
|
|
||||||
|
|
||||||
searchString = replaceBools(searchString);
|
searchString = replaceBools(searchString);
|
||||||
|
|
||||||
|
searchString = parseTag(searchString);
|
||||||
|
|
||||||
// in ranges " TO " must be uppercase
|
// in ranges " TO " must be uppercase
|
||||||
searchString = searchString.Replace(" to ", " TO ");
|
searchString = searchString.Replace(" to ", " TO ");
|
||||||
|
|
||||||
searchString = padNumbers(searchString);
|
searchString = padNumbers(searchString);
|
||||||
|
|
||||||
searchString = lowerFieldNames(searchString);
|
searchString = lowerFieldNames(searchString);
|
||||||
#endregion
|
|
||||||
|
|
||||||
Serilog.Log.Logger.Debug("formatted search string: {@DebugInfo}", new { searchString });
|
return searchString;
|
||||||
|
|
||||||
var results = generalSearch(searchString);
|
|
||||||
|
|
||||||
Serilog.Log.Logger.Debug("Hit(s): {@DebugInfo}", new { count = results.Docs.Count() });
|
|
||||||
|
|
||||||
displayResults(results);
|
|
||||||
|
|
||||||
return results;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#region format query string
|
#region format query string
|
||||||
@ -395,9 +396,10 @@ namespace LibationSearchEngine
|
|||||||
|
|
||||||
private static string replaceBools(string searchString)
|
private static string replaceBools(string searchString)
|
||||||
{
|
{
|
||||||
// negative look-ahead for optional spaces then colon. don't want to double-up. eg:"israted:false" => "israted:false:True"
|
|
||||||
foreach (var boolSearch in boolIndexRules.Keys)
|
foreach (var boolSearch in boolIndexRules.Keys)
|
||||||
searchString = Regex.Replace(searchString, $@"\b({boolSearch})\b(?!\s*:)", @"$1:True", RegexOptions.IgnoreCase);
|
searchString =
|
||||||
|
LuceneRegex.GetBoolRegex(boolSearch)
|
||||||
|
.Replace(searchString, @"$1:True");
|
||||||
|
|
||||||
return searchString;
|
return searchString;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -0,0 +1,24 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<TargetFramework>net5.0</TargetFramework>
|
||||||
|
|
||||||
|
<IsPackable>false</IsPackable>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.9.4" />
|
||||||
|
<PackageReference Include="MSTest.TestAdapter" Version="2.2.3" />
|
||||||
|
<PackageReference Include="MSTest.TestFramework" Version="2.2.3" />
|
||||||
|
<PackageReference Include="coverlet.collector" Version="3.0.3">
|
||||||
|
<PrivateAssets>all</PrivateAssets>
|
||||||
|
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
|
||||||
|
</PackageReference>
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<ProjectReference Include="..\..\..\Dinah.Core\_Tests\TestCommon\TestCommon.csproj" />
|
||||||
|
<ProjectReference Include="..\..\LibationSearchEngine\LibationSearchEngine.csproj" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
|
</Project>
|
||||||
39
_Tests/LibationSearchEngine.Tests/SearchEngineTests.cs
Normal file
39
_Tests/LibationSearchEngine.Tests/SearchEngineTests.cs
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.IO;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Net;
|
||||||
|
using System.Net.Http;
|
||||||
|
using System.Threading;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
using Dinah.Core;
|
||||||
|
using FluentAssertions;
|
||||||
|
using FluentAssertions.Common;
|
||||||
|
using LibationSearchEngine;
|
||||||
|
using Microsoft.VisualStudio.TestPlatform.Common.Filtering;
|
||||||
|
using Microsoft.VisualStudio.TestTools.UnitTesting;
|
||||||
|
using Moq;
|
||||||
|
using Moq.Protected;
|
||||||
|
using Newtonsoft.Json;
|
||||||
|
using Newtonsoft.Json.Linq;
|
||||||
|
using TestCommon;
|
||||||
|
|
||||||
|
namespace SearchEngineTests
|
||||||
|
{
|
||||||
|
[TestClass]
|
||||||
|
public class FormatSearchQuery
|
||||||
|
{
|
||||||
|
[TestMethod]
|
||||||
|
[DataRow(null, SearchEngine.ALL_QUERY)]
|
||||||
|
[DataRow("", SearchEngine.ALL_QUERY)]
|
||||||
|
[DataRow(" ", SearchEngine.ALL_QUERY)]
|
||||||
|
[DataRow("israted", "israted:True")]
|
||||||
|
[DataRow("israted:True", "israted:True")]
|
||||||
|
[DataRow("isRated:false", "israted:false")]
|
||||||
|
[DataRow("[israted]", "tags:israted")]
|
||||||
|
[DataRow("1 to 10", "00000001.00 TO 00000010.00")]
|
||||||
|
[DataRow("19990101 to 20001231", "19990101.00 TO 20001231.00")]
|
||||||
|
public void FormattingTest(string input, string output)
|
||||||
|
=> SearchEngine.FormatSearchQuery(input).Should().Be(output);
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user