using Lucene.Net.Analysis.Tokenattributes;
using Lucene.Net.Analysis;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace LibationSearchEngine
{
internal class AsinAnalyzer : Analyzer
{
public override TokenStream TokenStream(string fieldName, System.IO.TextReader reader)
{
return new AsinFilter(reader);
}
///
/// Emits the entire input as a single token and removes
/// trailing .00 from strings that parsed to numbers
///
/// Based on Lucene.Net.Analysis.KeywordTokenizer
///
private class AsinFilter : Tokenizer
{
private bool done;
private int finalOffset;
private readonly ITermAttribute termAtt;
private readonly IOffsetAttribute offsetAtt;
private const int DEFAULT_BUFFER_SIZE = 256;
public AsinFilter(System.IO.TextReader input) : base(input)
{
offsetAtt = AddAttribute();
termAtt = AddAttribute();
termAtt.ResizeTermBuffer(DEFAULT_BUFFER_SIZE);
}
public override bool IncrementToken()
{
var charReader = input as CharReader;
if (!done)
{
ClearAttributes();
done = true;
int upto = 0;
char[] buffer = termAtt.TermBuffer();
while (true)
{
int length = charReader.Read(buffer, upto, buffer.Length - upto);
if (length == 0)
break;
upto += length;
if (upto == buffer.Length)
buffer = termAtt.ResizeTermBuffer(1 + buffer.Length);
}
var termStr = new string(buffer, 0, upto);
if (termStr.EndsWith(".00"))
upto -= 3;
termAtt.SetTermLength(upto);
finalOffset = CorrectOffset(upto);
offsetAtt.SetOffset(CorrectOffset(0), finalOffset);
return true;
}
return false;
}
public override void End()
{
// set final offset
offsetAtt.SetOffset(finalOffset, finalOffset);
}
public override void Reset(System.IO.TextReader input)
{
base.Reset(input);
this.done = false;
}
}
}
}