* bug fix: when user creates a tag which is also a reserved bool word (eg: israted), searching for this tag breaks the search
* add unit tests for search engine
This commit is contained in:
parent
abd00ff1df
commit
726b36de4d
@ -86,6 +86,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "0 Libation Tests", "0 Libat
|
||||
EndProject
|
||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "InternalUtilities.Tests", "_Tests\InternalUtilities.Tests\InternalUtilities.Tests.csproj", "{8447C956-B03E-4F59-9DD4-877793B849D9}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LibationSearchEngine.Tests", "_Tests\LibationSearchEngine.Tests\LibationSearchEngine.Tests.csproj", "{C5B21768-C7C9-4FCB-AC1E-187B223D5A98}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
@ -208,6 +210,10 @@ Global
|
||||
{8447C956-B03E-4F59-9DD4-877793B849D9}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{8447C956-B03E-4F59-9DD4-877793B849D9}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{8447C956-B03E-4F59-9DD4-877793B849D9}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{C5B21768-C7C9-4FCB-AC1E-187B223D5A98}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{C5B21768-C7C9-4FCB-AC1E-187B223D5A98}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{C5B21768-C7C9-4FCB-AC1E-187B223D5A98}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{C5B21768-C7C9-4FCB-AC1E-187B223D5A98}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
@ -242,6 +248,7 @@ Global
|
||||
{E7EFD64D-6630-4426-B09C-B6862A92E3FD} = {F0CBB7A7-D3FB-41FF-8F47-CF3F6A592249}
|
||||
{F3B04A3A-20C8-4582-A54A-715AF6A5D859} = {8679CAC8-9164-4007-BDD2-F004810EDA14}
|
||||
{8447C956-B03E-4F59-9DD4-877793B849D9} = {67E66E82-5532-4440-AFB3-9FB1DF9DEF53}
|
||||
{C5B21768-C7C9-4FCB-AC1E-187B223D5A98} = {67E66E82-5532-4440-AFB3-9FB1DF9DEF53}
|
||||
EndGlobalSection
|
||||
GlobalSection(ExtensibilityGlobals) = postSolution
|
||||
SolutionGuid = {615E00ED-BAEF-4E8E-A92A-9B82D87942A9}
|
||||
|
||||
@ -13,7 +13,7 @@
|
||||
<!-- <PublishSingleFile>true</PublishSingleFile> -->
|
||||
<RuntimeIdentifier>win-x64</RuntimeIdentifier>
|
||||
|
||||
<Version>4.2.1.1</Version>
|
||||
<Version>4.2.2.1</Version>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
|
||||
@ -42,5 +42,56 @@ namespace LibationSearchEngine
|
||||
// positive look behind: beginning space { [ :
|
||||
// positive look ahead: end space ] }
|
||||
public static Regex NumbersRegex { get; } = new Regex(@"(?<=^|\s|\{|\[|:)(\d+\.?\d*)(?=$|\s|\]|\})", RegexOptions.Compiled);
|
||||
|
||||
/// <summary>
|
||||
/// proper bools are single keywords which are turned into keyword:True
|
||||
/// if bordered by colons or inside brackets, they are not stand-alone bool keywords
|
||||
/// the negative lookbehind and lookahead patterns prevent bugs where a bool keyword is also a user-defined tag:
|
||||
/// [israted]
|
||||
/// parseTag => tags:israted
|
||||
/// replaceBools => tags:israted:True
|
||||
/// or
|
||||
/// [israted]
|
||||
/// replaceBools => israted:True
|
||||
/// parseTag => [israted:True]
|
||||
/// also don't want to apply :True where the value already exists:
|
||||
/// israted:false => israted:false:True
|
||||
///
|
||||
/// despite using parans, lookahead and lookbehind are zero-length assertions which do not capture. therefore the bool search keyword is still $1 since it's the first and only capture
|
||||
/// </summary>
|
||||
private static string boolPattern_parameterized { get; }
|
||||
= @"
|
||||
(?<! # begin negative lookbehind
|
||||
[ # begin char set
|
||||
: # colon
|
||||
\[ # open bracket, escaped
|
||||
] # end char set
|
||||
\s* # optional space
|
||||
) # end negative lookbehind
|
||||
|
||||
\b # word boundary
|
||||
({0}) # captured bool search keyword. this is the $1 reference used in regex.Replace
|
||||
\b # word boundary
|
||||
|
||||
(?! # begin negative lookahead
|
||||
\s* # optional space
|
||||
[ # begin char set
|
||||
: # colon
|
||||
\] # close bracket, escaped
|
||||
] # end char set
|
||||
) # end negative lookahead
|
||||
";
|
||||
private static Dictionary<string, Regex> boolRegexDic { get; } = new Dictionary<string, Regex>();
|
||||
public static Regex GetBoolRegex(string boolSearch)
|
||||
{
|
||||
if (boolRegexDic.TryGetValue(boolSearch, out var regex))
|
||||
return regex;
|
||||
|
||||
var boolPattern = string.Format(boolPattern_parameterized, boolSearch);
|
||||
regex = new Regex(boolPattern, RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase | RegexOptions.Compiled);
|
||||
boolRegexDic.Add(boolSearch, regex);
|
||||
|
||||
return regex;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -347,32 +347,33 @@ namespace LibationSearchEngine
|
||||
public SearchResultSet Search(string searchString)
|
||||
{
|
||||
Serilog.Log.Logger.Debug("original search string: {@DebugInfo}", new { searchString });
|
||||
searchString = FormatSearchQuery(searchString);
|
||||
Serilog.Log.Logger.Debug("formatted search string: {@DebugInfo}", new { searchString });
|
||||
|
||||
var results = generalSearch(searchString);
|
||||
Serilog.Log.Logger.Debug("Hit(s): {@DebugInfo}", new { count = results.Docs.Count() });
|
||||
displayResults(results);
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
public static string FormatSearchQuery(string searchString)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(searchString))
|
||||
searchString = ALL_QUERY;
|
||||
|
||||
#region apply formatting
|
||||
searchString = parseTag(searchString);
|
||||
return ALL_QUERY;
|
||||
|
||||
searchString = replaceBools(searchString);
|
||||
|
||||
searchString = parseTag(searchString);
|
||||
|
||||
// in ranges " TO " must be uppercase
|
||||
searchString = searchString.Replace(" to ", " TO ");
|
||||
|
||||
searchString = padNumbers(searchString);
|
||||
|
||||
searchString = lowerFieldNames(searchString);
|
||||
#endregion
|
||||
|
||||
Serilog.Log.Logger.Debug("formatted search string: {@DebugInfo}", new { searchString });
|
||||
|
||||
var results = generalSearch(searchString);
|
||||
|
||||
Serilog.Log.Logger.Debug("Hit(s): {@DebugInfo}", new { count = results.Docs.Count() });
|
||||
|
||||
displayResults(results);
|
||||
|
||||
return results;
|
||||
return searchString;
|
||||
}
|
||||
|
||||
#region format query string
|
||||
@ -395,9 +396,10 @@ namespace LibationSearchEngine
|
||||
|
||||
private static string replaceBools(string searchString)
|
||||
{
|
||||
// negative look-ahead for optional spaces then colon. don't want to double-up. eg:"israted:false" => "israted:false:True"
|
||||
foreach (var boolSearch in boolIndexRules.Keys)
|
||||
searchString = Regex.Replace(searchString, $@"\b({boolSearch})\b(?!\s*:)", @"$1:True", RegexOptions.IgnoreCase);
|
||||
searchString =
|
||||
LuceneRegex.GetBoolRegex(boolSearch)
|
||||
.Replace(searchString, @"$1:True");
|
||||
|
||||
return searchString;
|
||||
}
|
||||
|
||||
@ -0,0 +1,24 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net5.0</TargetFramework>
|
||||
|
||||
<IsPackable>false</IsPackable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.9.4" />
|
||||
<PackageReference Include="MSTest.TestAdapter" Version="2.2.3" />
|
||||
<PackageReference Include="MSTest.TestFramework" Version="2.2.3" />
|
||||
<PackageReference Include="coverlet.collector" Version="3.0.3">
|
||||
<PrivateAssets>all</PrivateAssets>
|
||||
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
|
||||
</PackageReference>
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\..\..\Dinah.Core\_Tests\TestCommon\TestCommon.csproj" />
|
||||
<ProjectReference Include="..\..\LibationSearchEngine\LibationSearchEngine.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
39
_Tests/LibationSearchEngine.Tests/SearchEngineTests.cs
Normal file
39
_Tests/LibationSearchEngine.Tests/SearchEngineTests.cs
Normal file
@ -0,0 +1,39 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Net;
|
||||
using System.Net.Http;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using Dinah.Core;
|
||||
using FluentAssertions;
|
||||
using FluentAssertions.Common;
|
||||
using LibationSearchEngine;
|
||||
using Microsoft.VisualStudio.TestPlatform.Common.Filtering;
|
||||
using Microsoft.VisualStudio.TestTools.UnitTesting;
|
||||
using Moq;
|
||||
using Moq.Protected;
|
||||
using Newtonsoft.Json;
|
||||
using Newtonsoft.Json.Linq;
|
||||
using TestCommon;
|
||||
|
||||
namespace SearchEngineTests
|
||||
{
|
||||
[TestClass]
|
||||
public class FormatSearchQuery
|
||||
{
|
||||
[TestMethod]
|
||||
[DataRow(null, SearchEngine.ALL_QUERY)]
|
||||
[DataRow("", SearchEngine.ALL_QUERY)]
|
||||
[DataRow(" ", SearchEngine.ALL_QUERY)]
|
||||
[DataRow("israted", "israted:True")]
|
||||
[DataRow("israted:True", "israted:True")]
|
||||
[DataRow("isRated:false", "israted:false")]
|
||||
[DataRow("[israted]", "tags:israted")]
|
||||
[DataRow("1 to 10", "00000001.00 TO 00000010.00")]
|
||||
[DataRow("19990101 to 20001231", "19990101.00 TO 20001231.00")]
|
||||
public void FormattingTest(string input, string output)
|
||||
=> SearchEngine.FormatSearchQuery(input).Should().Be(output);
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user