Use new .NET regular expression source generators
This commit is contained in:
parent
5b4a4341ad
commit
ee8c0ae27b
@ -1,11 +1,11 @@
|
|||||||
using System;
|
using NPOI.XWPF.UserModel;
|
||||||
|
using System;
|
||||||
using System.Text.RegularExpressions;
|
using System.Text.RegularExpressions;
|
||||||
|
|
||||||
namespace AppScaffolding
|
namespace AppScaffolding
|
||||||
{
|
{
|
||||||
public record UpgradeProperties
|
public partial record UpgradeProperties
|
||||||
{
|
{
|
||||||
private static readonly Regex linkstripper = new Regex(@"\[(.*)\]\(.*\)");
|
|
||||||
public string ZipUrl { get; }
|
public string ZipUrl { get; }
|
||||||
public string HtmlUrl { get; }
|
public string HtmlUrl { get; }
|
||||||
public string ZipName { get; }
|
public string ZipName { get; }
|
||||||
@ -18,17 +18,10 @@ namespace AppScaffolding
|
|||||||
HtmlUrl = htmlUrl;
|
HtmlUrl = htmlUrl;
|
||||||
ZipUrl = zipUrl;
|
ZipUrl = zipUrl;
|
||||||
LatestRelease = latestRelease;
|
LatestRelease = latestRelease;
|
||||||
Notes = stripMarkdownLinks(notes);
|
Notes = LinkStripRegex().Replace(notes, "$1");
|
||||||
}
|
}
|
||||||
private string stripMarkdownLinks(string body)
|
|
||||||
{
|
|
||||||
body = body.Replace(@"\", "");
|
|
||||||
var matches = linkstripper.Matches(body);
|
|
||||||
|
|
||||||
foreach (Match match in matches)
|
[GeneratedRegex(@"\[(.*)\]\(.*\)")]
|
||||||
body = body.Replace(match.Groups[0].Value, match.Groups[1].Value);
|
private static partial Regex LinkStripRegex();
|
||||||
|
|
||||||
return body;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -20,7 +20,7 @@ namespace DataLayer
|
|||||||
PartialDownload = 0x1000
|
PartialDownload = 0x1000
|
||||||
}
|
}
|
||||||
|
|
||||||
public class UserDefinedItem
|
public partial class UserDefinedItem
|
||||||
{
|
{
|
||||||
internal int BookId { get; private set; }
|
internal int BookId { get; private set; }
|
||||||
public Book Book { get; private set; }
|
public Book Book { get; private set; }
|
||||||
@ -51,18 +51,23 @@ namespace DataLayer
|
|||||||
public IEnumerable<string> TagsEnumerated => Tags == "" ? new string[0] : Tags.Split(null as char[], StringSplitOptions.RemoveEmptyEntries);
|
public IEnumerable<string> TagsEnumerated => Tags == "" ? new string[0] : Tags.Split(null as char[], StringSplitOptions.RemoveEmptyEntries);
|
||||||
|
|
||||||
#region sanitize tags: space delimited. Inline/denormalized. Lower case. Alpha numeric and hyphen
|
#region sanitize tags: space delimited. Inline/denormalized. Lower case. Alpha numeric and hyphen
|
||||||
// only legal chars are letters numbers underscores and separating whitespace
|
|
||||||
//
|
/// <summary>
|
||||||
// technically, the only char.s which aren't easily supported are \ [ ]
|
/// only legal chars are letters numbers underscores and separating whitespace
|
||||||
// however, whitelisting is far safer than blacklisting (eg: new lines, non-printable character)
|
///
|
||||||
// it's easy to expand whitelist as needed
|
/// technically, the only char.s which aren't easily supported are \ [ ]
|
||||||
// for lucene, ToLower() isn't needed because search is case-inspecific. for here, it prevents duplicates
|
/// however, whitelisting is far safer than blacklisting (eg: new lines, non-printable character)
|
||||||
//
|
/// it's easy to expand whitelist as needed
|
||||||
// there are also other allowed but misleading characters. eg: the ^ operator defines a 'boost' score
|
/// for lucene, ToLower() isn't needed because search is case-inspecific. for here, it prevents duplicates
|
||||||
// full list of characters which must be escaped:
|
///
|
||||||
// + - && || ! ( ) { } [ ] ^ " ~ * ? : \
|
/// there are also other allowed but misleading characters. eg: the ^ operator defines a 'boost' score
|
||||||
static Regex regex { get; } = new Regex(@"[^\w\d\s_]", RegexOptions.Compiled);
|
/// full list of characters which must be escaped:
|
||||||
private static string sanitize(string input)
|
/// + - && || ! ( ) { } [ ] ^ " ~ * ? : \
|
||||||
|
/// </summary>
|
||||||
|
|
||||||
|
[GeneratedRegex(@"[^\w\d\s_]")]
|
||||||
|
private static partial Regex IllegalCharacterRegex();
|
||||||
|
private static string sanitize(string input)
|
||||||
{
|
{
|
||||||
if (string.IsNullOrWhiteSpace(input))
|
if (string.IsNullOrWhiteSpace(input))
|
||||||
return "";
|
return "";
|
||||||
@ -73,9 +78,9 @@ namespace DataLayer
|
|||||||
// assume a hyphen is supposed to be an underscore
|
// assume a hyphen is supposed to be an underscore
|
||||||
.Replace("-", "_");
|
.Replace("-", "_");
|
||||||
|
|
||||||
var unique = regex
|
var unique = IllegalCharacterRegex()
|
||||||
// turn illegal characters into a space. this will also take care of turning new lines into spaces
|
// turn illegal characters into a space. this will also take care of turning new lines into spaces
|
||||||
.Replace(str, " ")
|
.Replace(str, " ")
|
||||||
// split and remove excess spaces
|
// split and remove excess spaces
|
||||||
.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries)
|
.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries)
|
||||||
// de-dup
|
// de-dup
|
||||||
|
|||||||
@ -19,7 +19,7 @@ namespace LibationFileManager
|
|||||||
static abstract IEnumerable<TagCollection> TagCollections { get; }
|
static abstract IEnumerable<TagCollection> TagCollections { get; }
|
||||||
}
|
}
|
||||||
|
|
||||||
public abstract class Templates
|
public abstract partial class Templates
|
||||||
{
|
{
|
||||||
public const string ERROR_FULL_PATH_IS_INVALID = @"No colons or full paths allowed. Eg: should not start with C:\";
|
public const string ERROR_FULL_PATH_IS_INVALID = @"No colons or full paths allowed. Eg: should not start with C:\";
|
||||||
public const string WARNING_NO_CHAPTER_NUMBER_TAG = "Should include chapter number tag in template used for naming files which are split by chapter. Ie: <ch#> or <ch# 0>";
|
public const string WARNING_NO_CHAPTER_NUMBER_TAG = "Should include chapter number tag in template used for naming files which are split by chapter. Ie: <ch#> or <ch# 0>";
|
||||||
@ -250,46 +250,31 @@ namespace LibationFileManager
|
|||||||
|
|
||||||
#endregion
|
#endregion
|
||||||
|
|
||||||
#region Tag Formatters
|
#region Tag Formatters
|
||||||
|
|
||||||
//Format must have at least one of the string {T}, {F}, {M}, {L}, or {S}
|
/// <summary> Sort must have exactly one of the characters F, M, or L </summary>
|
||||||
private static readonly Regex FormatRegex = new(@"[Ff]ormat\((.*?(?:{[TFMLS]})+.*?)\)", RegexOptions.Compiled);
|
[GeneratedRegex(@"[Ss]ort\(\s*?([FML])\s*?\)")]
|
||||||
//Sort must have exactly one of the characters F, M, or L
|
private static partial Regex NamesSortRegex();
|
||||||
private static readonly Regex SortRegex = new(@"[Ss]ort\(\s*?([FML])\s*?\)", RegexOptions.Compiled);
|
/// <summary> Format must have at least one of the string {T}, {F}, {M}, {L}, or {S} </summary>
|
||||||
//Max must have a 1 or 2-digit number
|
[GeneratedRegex(@"[Ff]ormat\((.*?(?:{[TFMLS]})+.*?)\)")]
|
||||||
private static readonly Regex MaxRegex = new(@"[Mm]ax\(\s*?(\d{1,2})\s*?\)", RegexOptions.Compiled);
|
private static partial Regex NamesFormatRegex();
|
||||||
//Separator can be anything
|
/// <summary> Separator can be anything </summary>
|
||||||
private static readonly Regex SeparatorRegex = new(@"[Ss]eparator\((.*?)\)", RegexOptions.Compiled);
|
[GeneratedRegex(@"[Ss]eparator\((.*?)\)")]
|
||||||
|
private static partial Regex NamesSeparatorRegex();
|
||||||
|
/// <summary> Max must have a 1 or 2-digit number </summary>
|
||||||
|
[GeneratedRegex(@"[Mm]ax\(\s*?(\d{1,2})\s*?\)")]
|
||||||
|
private static partial Regex NamesMaxRegex();
|
||||||
|
|
||||||
private static string NameListFormatter(ITemplateTag templateTag, IEnumerable<string> value, string formatString)
|
private static string NameListFormatter(ITemplateTag templateTag, IEnumerable<string> names, string formatString)
|
||||||
{
|
{
|
||||||
var names = value.Select(n => new HumanName(removeSuffix(n), Prefer.FirstOverPrefix));
|
var humanNames = names.Select(n => new HumanName(removeSuffix(n), Prefer.FirstOverPrefix));
|
||||||
|
|
||||||
var formatMatch = FormatRegex.Match(formatString);
|
|
||||||
string nameFormatString = formatMatch.Success ? formatMatch.Groups[1].Value : "{T} {F} {M} {L} {S}";
|
|
||||||
|
|
||||||
var maxMatch = MaxRegex.Match(formatString);
|
var sortedNames = sort(humanNames, formatString);
|
||||||
int maxNames = maxMatch.Success && int.TryParse(maxMatch.Groups[1].Value, out var max) ? int.Max(1, max) : int.MaxValue;
|
var nameFormatString = format(formatString, defaultValue: "{T} {F} {M} {L} {S}");
|
||||||
|
var separatorString = separator(formatString, defaultValue: ", ");
|
||||||
|
var maxNames = max(formatString, defaultValue: humanNames.Count());
|
||||||
|
|
||||||
var separatorMatch = SeparatorRegex.Match(formatString);
|
var formattedNames = string.Join(separatorString, sortedNames.Take(maxNames).Select(n => formatName(n, nameFormatString)));
|
||||||
var separatorString = separatorMatch.Success ? separatorMatch.Groups[1].Value : ", ";
|
|
||||||
|
|
||||||
var sortMatch = SortRegex.Match(formatString);
|
|
||||||
var sortedNames
|
|
||||||
= sortMatch.Success
|
|
||||||
? (
|
|
||||||
sortMatch.Groups[1].Value == "F" ? names.OrderBy(n => n.First)
|
|
||||||
: sortMatch.Groups[1].Value == "M" ? names.OrderBy(n => n.Middle)
|
|
||||||
: sortMatch.Groups[1].Value == "L" ? names.OrderBy(n => n.Last)
|
|
||||||
: names
|
|
||||||
)
|
|
||||||
: names;
|
|
||||||
|
|
||||||
var formattedNames = string.Join(
|
|
||||||
separatorString,
|
|
||||||
sortedNames
|
|
||||||
.Take(int.Min(sortedNames.Count(), maxNames))
|
|
||||||
.Select(n => formatName(n, nameFormatString)));
|
|
||||||
|
|
||||||
while (formattedNames.Contains(" "))
|
while (formattedNames.Contains(" "))
|
||||||
formattedNames = formattedNames.Replace(" ", " ");
|
formattedNames = formattedNames.Replace(" ", " ");
|
||||||
@ -299,12 +284,40 @@ namespace LibationFileManager
|
|||||||
static string removeSuffix(string namesString)
|
static string removeSuffix(string namesString)
|
||||||
{
|
{
|
||||||
namesString = namesString.Replace('’', '\'').Replace(" - Ret.", ", Ret.");
|
namesString = namesString.Replace('’', '\'').Replace(" - Ret.", ", Ret.");
|
||||||
|
|
||||||
int dashIndex = namesString.IndexOf(" - ");
|
int dashIndex = namesString.IndexOf(" - ");
|
||||||
|
|
||||||
return (dashIndex > 0 ? namesString[..dashIndex] : namesString).Trim();
|
return (dashIndex > 0 ? namesString[..dashIndex] : namesString).Trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static IEnumerable<HumanName> sort(IEnumerable<HumanName> humanNames, string formatString)
|
||||||
|
{
|
||||||
|
var sortMatch = NamesSortRegex().Match(formatString);
|
||||||
|
return
|
||||||
|
sortMatch.Success
|
||||||
|
? sortMatch.Groups[1].Value == "F" ? humanNames.OrderBy(n => n.First)
|
||||||
|
: sortMatch.Groups[1].Value == "M" ? humanNames.OrderBy(n => n.Middle)
|
||||||
|
: sortMatch.Groups[1].Value == "L" ? humanNames.OrderBy(n => n.Last)
|
||||||
|
: humanNames
|
||||||
|
: humanNames;
|
||||||
|
}
|
||||||
|
|
||||||
|
static string format(string formatString, string defaultValue)
|
||||||
|
{
|
||||||
|
var formatMatch = NamesFormatRegex().Match(formatString);
|
||||||
|
return formatMatch.Success ? formatMatch.Groups[1].Value : defaultValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
static string separator(string formatString, string defaultValue)
|
||||||
|
{
|
||||||
|
var separatorMatch = NamesSeparatorRegex().Match(formatString);
|
||||||
|
return separatorMatch.Success ? separatorMatch.Groups[1].Value : defaultValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int max(string formatString, int defaultValue)
|
||||||
|
{
|
||||||
|
var maxMatch = NamesMaxRegex().Match(formatString);
|
||||||
|
return maxMatch.Success && int.TryParse(maxMatch.Groups[1].Value, out var max) ? int.Max(1, max) : defaultValue;
|
||||||
|
}
|
||||||
|
|
||||||
static string formatName(HumanName humanName, string nameFormatString)
|
static string formatName(HumanName humanName, string nameFormatString)
|
||||||
{
|
{
|
||||||
//Single-word names parse as first names. Use it as last name.
|
//Single-word names parse as first names. Use it as last name.
|
||||||
|
|||||||
@ -5,7 +5,7 @@ using System.Text.RegularExpressions;
|
|||||||
|
|
||||||
namespace LibationSearchEngine
|
namespace LibationSearchEngine
|
||||||
{
|
{
|
||||||
internal static class LuceneRegex
|
internal static partial class LuceneRegex
|
||||||
{
|
{
|
||||||
#region pattern pieces
|
#region pattern pieces
|
||||||
// negative lookbehind: cannot be preceeded by an escaping \
|
// negative lookbehind: cannot be preceeded by an escaping \
|
||||||
@ -38,28 +38,32 @@ namespace LibationSearchEngine
|
|||||||
private static string fieldPattern { get; } = NOT_ESCAPED + WORD_CAPTURE + FIELD_END;
|
private static string fieldPattern { get; } = NOT_ESCAPED + WORD_CAPTURE + FIELD_END;
|
||||||
public static Regex FieldRegex { get; } = new Regex(fieldPattern, RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
|
public static Regex FieldRegex { get; } = new Regex(fieldPattern, RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
|
||||||
|
|
||||||
// auto-pad numbers to 8 char.s. This will match int.s and dates (yyyyMMdd)
|
/// <summary>
|
||||||
// positive look behind: beginning space { [ :
|
/// auto-pad numbers to 8 char.s. This will match int.s and dates (yyyyMMdd)
|
||||||
// positive look ahead: end space ] }
|
/// positive look behind: beginning space { [ :
|
||||||
public static Regex NumbersRegex { get; } = new Regex(@"(?<=^|\s|\{|\[|:)(\d+\.?\d*)(?=$|\s|\]|\})", RegexOptions.Compiled);
|
/// positive look ahead: end space ] }
|
||||||
|
/// </summary>
|
||||||
|
|
||||||
/// <summary>
|
[GeneratedRegex(@"(?<=^|\s|\{|\[|:)(\d+\.?\d*)(?=$|\s|\]|\})", RegexOptions.Compiled)]
|
||||||
/// proper bools are single keywords which are turned into keyword:True
|
public static partial Regex NumbersRegex();
|
||||||
/// if bordered by colons or inside brackets, they are not stand-alone bool keywords
|
|
||||||
/// the negative lookbehind and lookahead patterns prevent bugs where a bool keyword is also a user-defined tag:
|
/// <summary>
|
||||||
/// [israted]
|
/// proper bools are single keywords which are turned into keyword:True
|
||||||
/// parseTag => tags:israted
|
/// if bordered by colons or inside brackets, they are not stand-alone bool keywords
|
||||||
/// replaceBools => tags:israted:True
|
/// the negative lookbehind and lookahead patterns prevent bugs where a bool keyword is also a user-defined tag:
|
||||||
/// or
|
/// [israted]
|
||||||
/// [israted]
|
/// parseTag => tags:israted
|
||||||
/// replaceBools => israted:True
|
/// replaceBools => tags:israted:True
|
||||||
/// parseTag => [israted:True]
|
/// or
|
||||||
/// also don't want to apply :True where the value already exists:
|
/// [israted]
|
||||||
/// israted:false => israted:false:True
|
/// replaceBools => israted:True
|
||||||
///
|
/// parseTag => [israted:True]
|
||||||
/// despite using parans, lookahead and lookbehind are zero-length assertions which do not capture. therefore the bool search keyword is still $1 since it's the first and only capture
|
/// also don't want to apply :True where the value already exists:
|
||||||
/// </summary>
|
/// israted:false => israted:false:True
|
||||||
private static string boolPattern_parameterized { get; }
|
///
|
||||||
|
/// despite using parans, lookahead and lookbehind are zero-length assertions which do not capture. therefore the bool search keyword is still $1 since it's the first and only capture
|
||||||
|
/// </summary>
|
||||||
|
private static string boolPattern_parameterized { get; }
|
||||||
= @"
|
= @"
|
||||||
### IMPORTANT: 'ignore whitespace' is only partially honored in character sets
|
### IMPORTANT: 'ignore whitespace' is only partially honored in character sets
|
||||||
### - new lines are ok
|
### - new lines are ok
|
||||||
@ -95,5 +99,5 @@ namespace LibationSearchEngine
|
|||||||
|
|
||||||
return regex;
|
return regex;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -402,7 +402,7 @@ namespace LibationSearchEngine
|
|||||||
private static string padNumbers(string searchString)
|
private static string padNumbers(string searchString)
|
||||||
{
|
{
|
||||||
var matches = LuceneRegex
|
var matches = LuceneRegex
|
||||||
.NumbersRegex
|
.NumbersRegex()
|
||||||
.Matches(searchString)
|
.Matches(searchString)
|
||||||
.Cast<Match>()
|
.Cast<Match>()
|
||||||
.OrderByDescending(m => m.Index);
|
.OrderByDescending(m => m.Index);
|
||||||
@ -410,7 +410,7 @@ namespace LibationSearchEngine
|
|||||||
foreach (var m in matches)
|
foreach (var m in matches)
|
||||||
{
|
{
|
||||||
var replaceString = double.Parse(m.ToString()).ToLuceneString();
|
var replaceString = double.Parse(m.ToString()).ToLuceneString();
|
||||||
searchString = LuceneRegex.NumbersRegex.Replace(searchString, replaceString, 1, m.Index);
|
searchString = LuceneRegex.NumbersRegex().Replace(searchString, replaceString, 1, m.Index);
|
||||||
}
|
}
|
||||||
|
|
||||||
return searchString;
|
return searchString;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user