Use new .NET regular expression source generators
This commit is contained in:
parent
5b4a4341ad
commit
ee8c0ae27b
@ -1,11 +1,11 @@
|
||||
using System;
|
||||
using NPOI.XWPF.UserModel;
|
||||
using System;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace AppScaffolding
|
||||
{
|
||||
public record UpgradeProperties
|
||||
public partial record UpgradeProperties
|
||||
{
|
||||
private static readonly Regex linkstripper = new Regex(@"\[(.*)\]\(.*\)");
|
||||
public string ZipUrl { get; }
|
||||
public string HtmlUrl { get; }
|
||||
public string ZipName { get; }
|
||||
@ -18,17 +18,10 @@ namespace AppScaffolding
|
||||
HtmlUrl = htmlUrl;
|
||||
ZipUrl = zipUrl;
|
||||
LatestRelease = latestRelease;
|
||||
Notes = stripMarkdownLinks(notes);
|
||||
Notes = LinkStripRegex().Replace(notes, "$1");
|
||||
}
|
||||
private string stripMarkdownLinks(string body)
|
||||
{
|
||||
body = body.Replace(@"\", "");
|
||||
var matches = linkstripper.Matches(body);
|
||||
|
||||
foreach (Match match in matches)
|
||||
body = body.Replace(match.Groups[0].Value, match.Groups[1].Value);
|
||||
|
||||
return body;
|
||||
}
|
||||
[GeneratedRegex(@"\[(.*)\]\(.*\)")]
|
||||
private static partial Regex LinkStripRegex();
|
||||
}
|
||||
}
|
||||
|
||||
@ -20,7 +20,7 @@ namespace DataLayer
|
||||
PartialDownload = 0x1000
|
||||
}
|
||||
|
||||
public class UserDefinedItem
|
||||
public partial class UserDefinedItem
|
||||
{
|
||||
internal int BookId { get; private set; }
|
||||
public Book Book { get; private set; }
|
||||
@ -51,17 +51,22 @@ namespace DataLayer
|
||||
public IEnumerable<string> TagsEnumerated => Tags == "" ? new string[0] : Tags.Split(null as char[], StringSplitOptions.RemoveEmptyEntries);
|
||||
|
||||
#region sanitize tags: space delimited. Inline/denormalized. Lower case. Alpha numeric and hyphen
|
||||
// only legal chars are letters numbers underscores and separating whitespace
|
||||
//
|
||||
// technically, the only char.s which aren't easily supported are \ [ ]
|
||||
// however, whitelisting is far safer than blacklisting (eg: new lines, non-printable character)
|
||||
// it's easy to expand whitelist as needed
|
||||
// for lucene, ToLower() isn't needed because search is case-inspecific. for here, it prevents duplicates
|
||||
//
|
||||
// there are also other allowed but misleading characters. eg: the ^ operator defines a 'boost' score
|
||||
// full list of characters which must be escaped:
|
||||
// + - && || ! ( ) { } [ ] ^ " ~ * ? : \
|
||||
static Regex regex { get; } = new Regex(@"[^\w\d\s_]", RegexOptions.Compiled);
|
||||
|
||||
/// <summary>
|
||||
/// only legal chars are letters numbers underscores and separating whitespace
|
||||
///
|
||||
/// technically, the only char.s which aren't easily supported are \ [ ]
|
||||
/// however, whitelisting is far safer than blacklisting (eg: new lines, non-printable character)
|
||||
/// it's easy to expand whitelist as needed
|
||||
/// for lucene, ToLower() isn't needed because search is case-inspecific. for here, it prevents duplicates
|
||||
///
|
||||
/// there are also other allowed but misleading characters. eg: the ^ operator defines a 'boost' score
|
||||
/// full list of characters which must be escaped:
|
||||
/// + - && || ! ( ) { } [ ] ^ " ~ * ? : \
|
||||
/// </summary>
|
||||
|
||||
[GeneratedRegex(@"[^\w\d\s_]")]
|
||||
private static partial Regex IllegalCharacterRegex();
|
||||
private static string sanitize(string input)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(input))
|
||||
@ -73,7 +78,7 @@ namespace DataLayer
|
||||
// assume a hyphen is supposed to be an underscore
|
||||
.Replace("-", "_");
|
||||
|
||||
var unique = regex
|
||||
var unique = IllegalCharacterRegex()
|
||||
// turn illegal characters into a space. this will also take care of turning new lines into spaces
|
||||
.Replace(str, " ")
|
||||
// split and remove excess spaces
|
||||
|
||||
@ -19,7 +19,7 @@ namespace LibationFileManager
|
||||
static abstract IEnumerable<TagCollection> TagCollections { get; }
|
||||
}
|
||||
|
||||
public abstract class Templates
|
||||
public abstract partial class Templates
|
||||
{
|
||||
public const string ERROR_FULL_PATH_IS_INVALID = @"No colons or full paths allowed. Eg: should not start with C:\";
|
||||
public const string WARNING_NO_CHAPTER_NUMBER_TAG = "Should include chapter number tag in template used for naming files which are split by chapter. Ie: <ch#> or <ch# 0>";
|
||||
@ -252,44 +252,29 @@ namespace LibationFileManager
|
||||
|
||||
#region Tag Formatters
|
||||
|
||||
//Format must have at least one of the string {T}, {F}, {M}, {L}, or {S}
|
||||
private static readonly Regex FormatRegex = new(@"[Ff]ormat\((.*?(?:{[TFMLS]})+.*?)\)", RegexOptions.Compiled);
|
||||
//Sort must have exactly one of the characters F, M, or L
|
||||
private static readonly Regex SortRegex = new(@"[Ss]ort\(\s*?([FML])\s*?\)", RegexOptions.Compiled);
|
||||
//Max must have a 1 or 2-digit number
|
||||
private static readonly Regex MaxRegex = new(@"[Mm]ax\(\s*?(\d{1,2})\s*?\)", RegexOptions.Compiled);
|
||||
//Separator can be anything
|
||||
private static readonly Regex SeparatorRegex = new(@"[Ss]eparator\((.*?)\)", RegexOptions.Compiled);
|
||||
/// <summary> Sort must have exactly one of the characters F, M, or L </summary>
|
||||
[GeneratedRegex(@"[Ss]ort\(\s*?([FML])\s*?\)")]
|
||||
private static partial Regex NamesSortRegex();
|
||||
/// <summary> Format must have at least one of the string {T}, {F}, {M}, {L}, or {S} </summary>
|
||||
[GeneratedRegex(@"[Ff]ormat\((.*?(?:{[TFMLS]})+.*?)\)")]
|
||||
private static partial Regex NamesFormatRegex();
|
||||
/// <summary> Separator can be anything </summary>
|
||||
[GeneratedRegex(@"[Ss]eparator\((.*?)\)")]
|
||||
private static partial Regex NamesSeparatorRegex();
|
||||
/// <summary> Max must have a 1 or 2-digit number </summary>
|
||||
[GeneratedRegex(@"[Mm]ax\(\s*?(\d{1,2})\s*?\)")]
|
||||
private static partial Regex NamesMaxRegex();
|
||||
|
||||
private static string NameListFormatter(ITemplateTag templateTag, IEnumerable<string> value, string formatString)
|
||||
private static string NameListFormatter(ITemplateTag templateTag, IEnumerable<string> names, string formatString)
|
||||
{
|
||||
var names = value.Select(n => new HumanName(removeSuffix(n), Prefer.FirstOverPrefix));
|
||||
var humanNames = names.Select(n => new HumanName(removeSuffix(n), Prefer.FirstOverPrefix));
|
||||
|
||||
var formatMatch = FormatRegex.Match(formatString);
|
||||
string nameFormatString = formatMatch.Success ? formatMatch.Groups[1].Value : "{T} {F} {M} {L} {S}";
|
||||
var sortedNames = sort(humanNames, formatString);
|
||||
var nameFormatString = format(formatString, defaultValue: "{T} {F} {M} {L} {S}");
|
||||
var separatorString = separator(formatString, defaultValue: ", ");
|
||||
var maxNames = max(formatString, defaultValue: humanNames.Count());
|
||||
|
||||
var maxMatch = MaxRegex.Match(formatString);
|
||||
int maxNames = maxMatch.Success && int.TryParse(maxMatch.Groups[1].Value, out var max) ? int.Max(1, max) : int.MaxValue;
|
||||
|
||||
var separatorMatch = SeparatorRegex.Match(formatString);
|
||||
var separatorString = separatorMatch.Success ? separatorMatch.Groups[1].Value : ", ";
|
||||
|
||||
var sortMatch = SortRegex.Match(formatString);
|
||||
var sortedNames
|
||||
= sortMatch.Success
|
||||
? (
|
||||
sortMatch.Groups[1].Value == "F" ? names.OrderBy(n => n.First)
|
||||
: sortMatch.Groups[1].Value == "M" ? names.OrderBy(n => n.Middle)
|
||||
: sortMatch.Groups[1].Value == "L" ? names.OrderBy(n => n.Last)
|
||||
: names
|
||||
)
|
||||
: names;
|
||||
|
||||
var formattedNames = string.Join(
|
||||
separatorString,
|
||||
sortedNames
|
||||
.Take(int.Min(sortedNames.Count(), maxNames))
|
||||
.Select(n => formatName(n, nameFormatString)));
|
||||
var formattedNames = string.Join(separatorString, sortedNames.Take(maxNames).Select(n => formatName(n, nameFormatString)));
|
||||
|
||||
while (formattedNames.Contains(" "))
|
||||
formattedNames = formattedNames.Replace(" ", " ");
|
||||
@ -299,12 +284,40 @@ namespace LibationFileManager
|
||||
static string removeSuffix(string namesString)
|
||||
{
|
||||
namesString = namesString.Replace('’', '\'').Replace(" - Ret.", ", Ret.");
|
||||
|
||||
int dashIndex = namesString.IndexOf(" - ");
|
||||
|
||||
return (dashIndex > 0 ? namesString[..dashIndex] : namesString).Trim();
|
||||
}
|
||||
|
||||
static IEnumerable<HumanName> sort(IEnumerable<HumanName> humanNames, string formatString)
|
||||
{
|
||||
var sortMatch = NamesSortRegex().Match(formatString);
|
||||
return
|
||||
sortMatch.Success
|
||||
? sortMatch.Groups[1].Value == "F" ? humanNames.OrderBy(n => n.First)
|
||||
: sortMatch.Groups[1].Value == "M" ? humanNames.OrderBy(n => n.Middle)
|
||||
: sortMatch.Groups[1].Value == "L" ? humanNames.OrderBy(n => n.Last)
|
||||
: humanNames
|
||||
: humanNames;
|
||||
}
|
||||
|
||||
static string format(string formatString, string defaultValue)
|
||||
{
|
||||
var formatMatch = NamesFormatRegex().Match(formatString);
|
||||
return formatMatch.Success ? formatMatch.Groups[1].Value : defaultValue;
|
||||
}
|
||||
|
||||
static string separator(string formatString, string defaultValue)
|
||||
{
|
||||
var separatorMatch = NamesSeparatorRegex().Match(formatString);
|
||||
return separatorMatch.Success ? separatorMatch.Groups[1].Value : defaultValue;
|
||||
}
|
||||
|
||||
static int max(string formatString, int defaultValue)
|
||||
{
|
||||
var maxMatch = NamesMaxRegex().Match(formatString);
|
||||
return maxMatch.Success && int.TryParse(maxMatch.Groups[1].Value, out var max) ? int.Max(1, max) : defaultValue;
|
||||
}
|
||||
|
||||
static string formatName(HumanName humanName, string nameFormatString)
|
||||
{
|
||||
//Single-word names parse as first names. Use it as last name.
|
||||
|
||||
@ -5,7 +5,7 @@ using System.Text.RegularExpressions;
|
||||
|
||||
namespace LibationSearchEngine
|
||||
{
|
||||
internal static class LuceneRegex
|
||||
internal static partial class LuceneRegex
|
||||
{
|
||||
#region pattern pieces
|
||||
// negative lookbehind: cannot be preceeded by an escaping \
|
||||
@ -38,10 +38,14 @@ namespace LibationSearchEngine
|
||||
private static string fieldPattern { get; } = NOT_ESCAPED + WORD_CAPTURE + FIELD_END;
|
||||
public static Regex FieldRegex { get; } = new Regex(fieldPattern, RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
|
||||
|
||||
// auto-pad numbers to 8 char.s. This will match int.s and dates (yyyyMMdd)
|
||||
// positive look behind: beginning space { [ :
|
||||
// positive look ahead: end space ] }
|
||||
public static Regex NumbersRegex { get; } = new Regex(@"(?<=^|\s|\{|\[|:)(\d+\.?\d*)(?=$|\s|\]|\})", RegexOptions.Compiled);
|
||||
/// <summary>
|
||||
/// auto-pad numbers to 8 char.s. This will match int.s and dates (yyyyMMdd)
|
||||
/// positive look behind: beginning space { [ :
|
||||
/// positive look ahead: end space ] }
|
||||
/// </summary>
|
||||
|
||||
[GeneratedRegex(@"(?<=^|\s|\{|\[|:)(\d+\.?\d*)(?=$|\s|\]|\})", RegexOptions.Compiled)]
|
||||
public static partial Regex NumbersRegex();
|
||||
|
||||
/// <summary>
|
||||
/// proper bools are single keywords which are turned into keyword:True
|
||||
|
||||
@ -402,7 +402,7 @@ namespace LibationSearchEngine
|
||||
private static string padNumbers(string searchString)
|
||||
{
|
||||
var matches = LuceneRegex
|
||||
.NumbersRegex
|
||||
.NumbersRegex()
|
||||
.Matches(searchString)
|
||||
.Cast<Match>()
|
||||
.OrderByDescending(m => m.Index);
|
||||
@ -410,7 +410,7 @@ namespace LibationSearchEngine
|
||||
foreach (var m in matches)
|
||||
{
|
||||
var replaceString = double.Parse(m.ToString()).ToLuceneString();
|
||||
searchString = LuceneRegex.NumbersRegex.Replace(searchString, replaceString, 1, m.Index);
|
||||
searchString = LuceneRegex.NumbersRegex().Replace(searchString, replaceString, 1, m.Index);
|
||||
}
|
||||
|
||||
return searchString;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user