Skip to content

Commit

Permalink
Add support for edge markers (#46)
Browse files Browse the repository at this point in the history
* support \b and \A

* support \B and \z
  • Loading branch information
ryzheboka authored Sep 26, 2022
1 parent 510f3a9 commit 428fe84
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 4 deletions.
41 changes: 37 additions & 4 deletions pkg/dialect/base/chars.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,13 +128,45 @@ func (CharsBaseDialect) Begin() ClassToken {
return newClassToken(helper.ByteToken('^')).withoutBrackets()
}

// Begin of text (even if the flag EnableMultiline is set)
//
// Regex: `\A`.
func (CharsBaseDialect) BeginOfText() ClassToken {
return newClassToken(helper.StringToken(`\A`)).withoutBrackets()
}

// End of text or line if the flag EnableMultiline is set.
//
// Regex: `$`.
func (CharsBaseDialect) End() ClassToken {
return newClassToken(helper.ByteToken('$')).withoutBrackets()
}

// End of text (even if the flag EnableMultiline is set).
//
// Regex: `\z`.
func (CharsBaseDialect) EndOfText() ClassToken {
return newClassToken(helper.StringToken(`\z`)).withoutBrackets()
}

// A word boundary for ACII words. Following positions count as word boundaries:
// - Beginning of string: If the first character is an ASCII word character.
// - End of string: If the last character is an ASCII word character.
// - Between a word and a non-word character.
//
// Regex: `\b`.
func (CharsBaseDialect) ASCIIWordBoundary() ClassToken {
return newClassToken(helper.StringToken(`\b`)).withoutBrackets()
}

// A non-word boundary:
// A position between two word characters or two non-word characters.
//
// Regex: `\B`.
func (CharsBaseDialect) NotASCIIWordBoundary() ClassToken {
return newClassToken(helper.StringToken(`\B`)).withoutBrackets()
}

// Any character, possibly including newline if the flag AnyIncludeNewLine() is set.
//
// Regex: `.`.
Expand All @@ -146,8 +178,9 @@ func (CharsBaseDialect) Any() ClassToken {
// It is safe to pass unicode characters.
//
// Example usage:
// Runes("a") // == Chars.Single('a')
// Runes("ab") // == Common.Class(Chars.Single('a'), Chars.Single('b'))
//
// Runes("a") // == Chars.Single('a')
// Runes("ab") // == Common.Class(Chars.Single('a'), Chars.Single('b'))
//
// Regex: `[abc]`.
func (CharsBaseDialect) Runes(val string) ClassToken {
Expand Down Expand Up @@ -204,7 +237,7 @@ func (CharsBaseDialect) Single(r rune) ClassToken {
//
// Example usage:
//
// Chars.Unicode(unicode.Greek)
// Chars.Unicode(unicode.Greek)
//
// Regex: `\p{Greek}`.
func (d CharsBaseDialect) Unicode(table *unicode.RangeTable) ClassToken {
Expand All @@ -230,7 +263,7 @@ func (d CharsBaseDialect) Unicode(table *unicode.RangeTable) ClassToken {
//
// Example usage:
//
// Chars.UnicodeByName("Greek")
// Chars.UnicodeByName("Greek")
//
// Regex: `\p{Greek}`.
func (CharsBaseDialect) UnicodeByName(name string) ClassToken {
Expand Down
16 changes: 16 additions & 0 deletions pkg/dialect/base/chars_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,26 @@ func TestRexChars_base(t *testing.T) {
Name: "begin",
Chain: []dialect.Token{base.Chars.Begin()},
Expected: `^`,
}, {
Name: "beginOfText",
Chain: []dialect.Token{base.Chars.BeginOfText()},
Expected: `\A`,
}, {
Name: "end",
Chain: []dialect.Token{base.Chars.End()},
Expected: `$`,
}, {
Name: "endOfText",
Chain: []dialect.Token{base.Chars.EndOfText()},
Expected: `\z`,
}, {
Name: "ASCIIWordBoundary",
Chain: []dialect.Token{base.Chars.ASCIIWordBoundary()},
Expected: `\b`,
}, {
Name: "notASCIIWordBoundary",
Chain: []dialect.Token{base.Chars.NotASCIIWordBoundary()},
Expected: `\B`,
}, {
Name: "single",
Chain: []dialect.Token{base.Chars.Single('a')},
Expand Down

0 comments on commit 428fe84

Please sign in to comment.