diff --git a/go.mod b/go.mod index c1a6c5f496..5700857baf 100644 --- a/go.mod +++ b/go.mod @@ -174,6 +174,6 @@ require ( replace ( github.com/pingcap/log => github.com/pingcap/log v0.0.0-20191012051959-b742a5d432e9 - github.com/pingcap/parser => github.com/sjjian/parser v0.0.0-20220614062700-e3219e3d6833 + github.com/pingcap/parser => github.com/sjjian/parser v0.0.0-20231020015929-c5d7ca486d80 google.golang.org/grpc => google.golang.org/grpc v1.29.0 ) diff --git a/go.sum b/go.sum index 9a6cdf9ceb..94a7230810 100644 --- a/go.sum +++ b/go.sum @@ -839,8 +839,8 @@ github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrf github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/sirupsen/logrus v1.9.0 h1:trlNQbNUG3OdDrDil03MCb1H2o9nJ1x4/5LYw7byDE0= github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= -github.com/sjjian/parser v0.0.0-20220614062700-e3219e3d6833 h1:0JHwfRBrBZr/QcLo7+q9C3xOt8oXRKLCnZOrHN+Bza0= -github.com/sjjian/parser v0.0.0-20220614062700-e3219e3d6833/go.mod h1:Qq2tnreUXwVo7NAKAHmbWFsgqpDUkxwhJCClY+ZCudA= +github.com/sjjian/parser v0.0.0-20231020015929-c5d7ca486d80 h1:2pFDv4Gd1vVqHwPEzWiTN4vgP9jLKZDuVP/jEcC0SE0= +github.com/sjjian/parser v0.0.0-20231020015929-c5d7ca486d80/go.mod h1:Qq2tnreUXwVo7NAKAHmbWFsgqpDUkxwhJCClY+ZCudA= github.com/skeema/knownhosts v1.2.0 h1:h9r9cf0+u7wSE+M183ZtMGgOJKiL96brpaz5ekfJCpM= github.com/skeema/knownhosts v1.2.0/go.mod h1:g4fPeYpque7P0xefxtGzV81ihjC8sX2IqpAoNkjxbMo= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= diff --git a/vendor/github.com/pingcap/parser/charset/charset.go b/vendor/github.com/pingcap/parser/charset/charset.go index 1758569ac3..6ddcd7aa8e 100644 --- a/vendor/github.com/pingcap/parser/charset/charset.go +++ b/vendor/github.com/pingcap/parser/charset/charset.go @@ -451,4 +451,6 @@ func init() { for id, name := range mysql.Collations { collationsNameMap[name] = collationsIDMap[int(id)] } + + InitAllCharset() } diff --git a/vendor/github.com/pingcap/parser/charset/charset_extra.go b/vendor/github.com/pingcap/parser/charset/charset_extra.go new file mode 100644 index 0000000000..887b06662c --- /dev/null +++ b/vendor/github.com/pingcap/parser/charset/charset_extra.go @@ -0,0 +1,40 @@ +package charset + +import "strings" + +// 1. support utf8mb3, 最新解析器代码是支持了 utf8mb3, 但是解析器库又挪回了TiDB仓库,整个改动太大了。因此目前还是基于老的独立解析器仓库进行定制。除非有其他新SQL不得不支持。 +// 2. 支持能成功解析出所有字符集和排序。当前的解析库解析SQL时,如果存在 TiDB 不支持的字符集和排序则会显式的报错,不符合预期。 +func InitAllCharset() { + // 将所有的字符集都放进 `charsets`, 这是个代表当前支持的字符集。 + for _, c := range collations { + if charset, ok := charsets[c.CharsetName]; ok { + charset.Collations[c.Name] = c + if c.IsDefault { + charset.DefaultCollation = c.Name + } + } else { + charsets[c.CharsetName] = &Charset{ + Name: c.CharsetName, + DefaultCollation: c.Name, + Collations: map[string]*Collation{ + c.Name: c, + }, + } + } + } + + // utf8mb3 直接引用 utf8 的 charset. + charsets["utf8mb3"] = charsets["utf8"] + + // 将所有 utf8 的字符集排序都使用 utf8mb3 别名重新引用一次,保证通过 utf8mb3 相关字符集排序解析器支持。 + utf8mb3Collations := []*Collation{} + for _, c := range collations { + if c.CharsetName == "utf8" { + aliasName := strings.Replace(c.Name, "utf8_", "utf8mb3_", 1) + collationsNameMap[aliasName] = c + utf8mb3Collations = append(utf8mb3Collations, c) + } + } + // collations 原始的记录表,此处的改动是为了保证单测 `TestGetDefaultCollation` 成功。 + collations = append(collations, utf8mb3Collations...) +} diff --git a/vendor/github.com/pingcap/parser/perfect_parser.go b/vendor/github.com/pingcap/parser/perfect_parser.go index 870aba416c..a2ff6717ad 100644 --- a/vendor/github.com/pingcap/parser/perfect_parser.go +++ b/vendor/github.com/pingcap/parser/perfect_parser.go @@ -2,6 +2,7 @@ package parser import ( "bytes" + "github.com/pingcap/parser/ast" ) @@ -70,6 +71,12 @@ ScanLoop: endOffset = l.lastScanOffset break ScanLoop } + case invalid: + // `Lex`内`scan`在进行token遍历时,当有特殊字符时返回invalid,此时未调用`inc`进行滑动,导致每次遍历同一个pos点位触发死循环。有多种情况会返回invalid。 + // 对于解析器本身没影响,因为 token 提取失败就退出了,但是我们需要继续遍历。 + if l.lastScanOffset == l.r.p.Offset { + l.r.inc() + } } } unparsedStmtBuf := bytes.Buffer{} diff --git a/vendor/modules.txt b/vendor/modules.txt index e84c2d3ab6..29bdb57907 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -512,7 +512,7 @@ github.com/pingcap/errors # github.com/pingcap/log v0.0.0-20210317133921-96f4fcab92a4 => github.com/pingcap/log v0.0.0-20191012051959-b742a5d432e9 ## explicit; go 1.13 github.com/pingcap/log -# github.com/pingcap/parser v3.0.12+incompatible => github.com/sjjian/parser v0.0.0-20220614062700-e3219e3d6833 +# github.com/pingcap/parser v3.0.12+incompatible => github.com/sjjian/parser v0.0.0-20231020015929-c5d7ca486d80 ## explicit; go 1.13 github.com/pingcap/parser github.com/pingcap/parser/ast @@ -887,5 +887,5 @@ vitess.io/vitess/go/vt/sysvars vitess.io/vitess/go/vt/vterrors vitess.io/vitess/go/vt/vtgate/evalengine # github.com/pingcap/log => github.com/pingcap/log v0.0.0-20191012051959-b742a5d432e9 -# github.com/pingcap/parser => github.com/sjjian/parser v0.0.0-20220614062700-e3219e3d6833 +# github.com/pingcap/parser => github.com/sjjian/parser v0.0.0-20231020015929-c5d7ca486d80 # google.golang.org/grpc => google.golang.org/grpc v1.29.0