Add Asin query tokenizer
This commit is contained in:
parent
359d082ffd
commit
c77fe5d561
81
Source/LibationSearchEngine/AsinAnalyzer.cs
Normal file
81
Source/LibationSearchEngine/AsinAnalyzer.cs
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
using Lucene.Net.Analysis.Tokenattributes;
|
||||||
|
using Lucene.Net.Analysis;
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Text;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace LibationSearchEngine
|
||||||
|
{
|
||||||
|
internal class AsinAnalyzer : Analyzer
|
||||||
|
{
|
||||||
|
public override TokenStream TokenStream(string fieldName, System.IO.TextReader reader)
|
||||||
|
{
|
||||||
|
return new AsinFilter(reader);
|
||||||
|
}
|
||||||
|
/// <summary>
|
||||||
|
/// Emits the entire input as a single token and removes
|
||||||
|
/// trailing .00 from strings that parsed to numbers
|
||||||
|
///
|
||||||
|
/// Based on Lucene.Net.Analysis.KeywordTokenizer
|
||||||
|
/// </summary>
|
||||||
|
private class AsinFilter : Tokenizer
|
||||||
|
{
|
||||||
|
private bool done;
|
||||||
|
private int finalOffset;
|
||||||
|
private readonly ITermAttribute termAtt;
|
||||||
|
private readonly IOffsetAttribute offsetAtt;
|
||||||
|
private const int DEFAULT_BUFFER_SIZE = 256;
|
||||||
|
|
||||||
|
public AsinFilter(System.IO.TextReader input) : base(input)
|
||||||
|
{
|
||||||
|
offsetAtt = AddAttribute<IOffsetAttribute>();
|
||||||
|
termAtt = AddAttribute<ITermAttribute>();
|
||||||
|
termAtt.ResizeTermBuffer(DEFAULT_BUFFER_SIZE);
|
||||||
|
}
|
||||||
|
public override bool IncrementToken()
|
||||||
|
{
|
||||||
|
var charReader = input as CharReader;
|
||||||
|
if (!done)
|
||||||
|
{
|
||||||
|
ClearAttributes();
|
||||||
|
done = true;
|
||||||
|
int upto = 0;
|
||||||
|
char[] buffer = termAtt.TermBuffer();
|
||||||
|
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
int length = charReader.Read(buffer, upto, buffer.Length - upto);
|
||||||
|
if (length == 0)
|
||||||
|
break;
|
||||||
|
upto += length;
|
||||||
|
if (upto == buffer.Length)
|
||||||
|
buffer = termAtt.ResizeTermBuffer(1 + buffer.Length);
|
||||||
|
}
|
||||||
|
|
||||||
|
var termStr = new string(buffer, 0, upto);
|
||||||
|
if (termStr.EndsWith(".00"))
|
||||||
|
upto -= 3;
|
||||||
|
|
||||||
|
termAtt.SetTermLength(upto);
|
||||||
|
finalOffset = CorrectOffset(upto);
|
||||||
|
offsetAtt.SetOffset(CorrectOffset(0), finalOffset);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
public override void End()
|
||||||
|
{
|
||||||
|
// set final offset
|
||||||
|
offsetAtt.SetOffset(finalOffset, finalOffset);
|
||||||
|
}
|
||||||
|
|
||||||
|
public override void Reset(System.IO.TextReader input)
|
||||||
|
{
|
||||||
|
base.Reset(input);
|
||||||
|
this.done = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -6,6 +6,7 @@ using System.Text.RegularExpressions;
|
|||||||
using DataLayer;
|
using DataLayer;
|
||||||
using Dinah.Core;
|
using Dinah.Core;
|
||||||
using LibationFileManager;
|
using LibationFileManager;
|
||||||
|
using Lucene.Net.Analysis;
|
||||||
using Lucene.Net.Analysis.Standard;
|
using Lucene.Net.Analysis.Standard;
|
||||||
using Lucene.Net.Documents;
|
using Lucene.Net.Documents;
|
||||||
using Lucene.Net.Index;
|
using Lucene.Net.Index;
|
||||||
@ -452,8 +453,12 @@ namespace LibationSearchEngine
|
|||||||
using var index = getIndex();
|
using var index = getIndex();
|
||||||
using var searcher = new IndexSearcher(index);
|
using var searcher = new IndexSearcher(index);
|
||||||
using var analyzer = new StandardAnalyzer(Version);
|
using var analyzer = new StandardAnalyzer(Version);
|
||||||
var query = analyzer.GetQuery(defaultField, searchString);
|
using var asinAnalyzer = new AsinAnalyzer();
|
||||||
|
|
||||||
|
var dic = idIndexRules.Keys.Select(k => new KeyValuePair<string, Analyzer>(k.ToLowerInvariant(), asinAnalyzer));
|
||||||
|
using var perFieldAnalyzer = new PerFieldAnalyzerWrapper(analyzer, dic);
|
||||||
|
|
||||||
|
var query = perFieldAnalyzer.GetQuery(defaultField, searchString);
|
||||||
|
|
||||||
// lucene doesn't allow only negations. eg this returns nothing:
|
// lucene doesn't allow only negations. eg this returns nothing:
|
||||||
// -tags:hidden
|
// -tags:hidden
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user