From 428fe84ab74044520043dd613b492c7cdc04d48c Mon Sep 17 00:00:00 2001 From: Elena Mokeeva <25465835+ryzheboka@users.noreply.github.com> Date: Mon, 26 Sep 2022 16:02:14 +0200 Subject: [PATCH] Add support for edge markers (#46) * support \b and \A * support \B and \z --- pkg/dialect/base/chars.go | 41 ++++++++++++++++++++++++++++++---- pkg/dialect/base/chars_test.go | 16 +++++++++++++ 2 files changed, 53 insertions(+), 4 deletions(-) diff --git a/pkg/dialect/base/chars.go b/pkg/dialect/base/chars.go index 9ae8964..676d98b 100644 --- a/pkg/dialect/base/chars.go +++ b/pkg/dialect/base/chars.go @@ -128,6 +128,13 @@ func (CharsBaseDialect) Begin() ClassToken { return newClassToken(helper.ByteToken('^')).withoutBrackets() } +// Begin of text (even if the flag EnableMultiline is set) +// +// Regex: `\A`. +func (CharsBaseDialect) BeginOfText() ClassToken { + return newClassToken(helper.StringToken(`\A`)).withoutBrackets() +} + // End of text or line if the flag EnableMultiline is set. // // Regex: `$`. @@ -135,6 +142,31 @@ func (CharsBaseDialect) End() ClassToken { return newClassToken(helper.ByteToken('$')).withoutBrackets() } +// End of text (even if the flag EnableMultiline is set). +// +// Regex: `\z`. +func (CharsBaseDialect) EndOfText() ClassToken { + return newClassToken(helper.StringToken(`\z`)).withoutBrackets() +} + +// A word boundary for ACII words. Following positions count as word boundaries: +// - Beginning of string: If the first character is an ASCII word character. +// - End of string: If the last character is an ASCII word character. +// - Between a word and a non-word character. +// +// Regex: `\b`. +func (CharsBaseDialect) ASCIIWordBoundary() ClassToken { + return newClassToken(helper.StringToken(`\b`)).withoutBrackets() +} + +// A non-word boundary: +// A position between two word characters or two non-word characters. +// +// Regex: `\B`. +func (CharsBaseDialect) NotASCIIWordBoundary() ClassToken { + return newClassToken(helper.StringToken(`\B`)).withoutBrackets() +} + // Any character, possibly including newline if the flag AnyIncludeNewLine() is set. // // Regex: `.`. @@ -146,8 +178,9 @@ func (CharsBaseDialect) Any() ClassToken { // It is safe to pass unicode characters. // // Example usage: -// Runes("a") // == Chars.Single('a') -// Runes("ab") // == Common.Class(Chars.Single('a'), Chars.Single('b')) +// +// Runes("a") // == Chars.Single('a') +// Runes("ab") // == Common.Class(Chars.Single('a'), Chars.Single('b')) // // Regex: `[abc]`. func (CharsBaseDialect) Runes(val string) ClassToken { @@ -204,7 +237,7 @@ func (CharsBaseDialect) Single(r rune) ClassToken { // // Example usage: // -// Chars.Unicode(unicode.Greek) +// Chars.Unicode(unicode.Greek) // // Regex: `\p{Greek}`. func (d CharsBaseDialect) Unicode(table *unicode.RangeTable) ClassToken { @@ -230,7 +263,7 @@ func (d CharsBaseDialect) Unicode(table *unicode.RangeTable) ClassToken { // // Example usage: // -// Chars.UnicodeByName("Greek") +// Chars.UnicodeByName("Greek") // // Regex: `\p{Greek}`. func (CharsBaseDialect) UnicodeByName(name string) ClassToken { diff --git a/pkg/dialect/base/chars_test.go b/pkg/dialect/base/chars_test.go index 8c43c48..5c03bb1 100644 --- a/pkg/dialect/base/chars_test.go +++ b/pkg/dialect/base/chars_test.go @@ -78,10 +78,26 @@ func TestRexChars_base(t *testing.T) { Name: "begin", Chain: []dialect.Token{base.Chars.Begin()}, Expected: `^`, + }, { + Name: "beginOfText", + Chain: []dialect.Token{base.Chars.BeginOfText()}, + Expected: `\A`, }, { Name: "end", Chain: []dialect.Token{base.Chars.End()}, Expected: `$`, + }, { + Name: "endOfText", + Chain: []dialect.Token{base.Chars.EndOfText()}, + Expected: `\z`, + }, { + Name: "ASCIIWordBoundary", + Chain: []dialect.Token{base.Chars.ASCIIWordBoundary()}, + Expected: `\b`, + }, { + Name: "notASCIIWordBoundary", + Chain: []dialect.Token{base.Chars.NotASCIIWordBoundary()}, + Expected: `\B`, }, { Name: "single", Chain: []dialect.Token{base.Chars.Single('a')},