-
Notifications
You must be signed in to change notification settings - Fork 0
/
mysql2sqlite
289 lines (259 loc) · 9.29 KB
/
mysql2sqlite
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
#!/usr/bin/awk -f
# Authors: @esperlu, @artemyk, @gkuenning, @dumblob
# FIXME detect empty input file and issue a warning
function printerr( s ){ print s | "cat >&2" }
BEGIN {
if( ARGC != 2 ){
printerr( \
"USAGE:\n"\
" mysql2sqlite dump_mysql.sql > dump_sqlite3.sql\n" \
" OR\n" \
" mysql2sqlite dump_mysql.sql | sqlite3 sqlite.db\n" \
"\n" \
"NOTES:\n" \
" Dash in filename is not supported, because dash (-) means stdin." )
no_END = 1
exit 1
}
# Find INT_MAX supported by both this AWK (usually an ISO C signed int)
# and SQlite.
# On non-8bit-based architectures, the additional bits are safely ignored.
# 8bit (lower precision should not exist)
s="127"
# "63" + 0 avoids potential parser misbehavior
if( (s + 0) "" == s ){ INT_MAX_HALF = "63" + 0 }
# 16bit
s="32767"
if( (s + 0) "" == s ){ INT_MAX_HALF = "16383" + 0 }
# 32bit
s="2147483647"
if( (s + 0) "" == s ){ INT_MAX_HALF = "1073741823" + 0 }
# 64bit (as INTEGER in SQlite3)
s="9223372036854775807"
if( (s + 0) "" == s ){ INT_MAX_HALF = "4611686018427387904" + 0 }
# # 128bit
# s="170141183460469231731687303715884105728"
# if( (s + 0) "" == s ){ INT_MAX_HALF = "85070591730234615865843651857942052864" + 0 }
# # 256bit
# s="57896044618658097711785492504343953926634992332820282019728792003956564819968"
# if( (s + 0) "" == s ){ INT_MAX_HALF = "28948022309329048855892746252171976963317496166410141009864396001978282409984" + 0 }
# # 512bit
# s="6703903964971298549787012499102923063739682910296196688861780721860882015036773488400937149083451713845015929093243025426876941405973284973216824503042048"
# if( (s + 0) "" == s ){ INT_MAX_HALF = "3351951982485649274893506249551461531869841455148098344430890360930441007518386744200468574541725856922507964546621512713438470702986642486608412251521024" + 0 }
# # 1024bit
# s="89884656743115795386465259539451236680898848947115328636715040578866337902750481566354238661203768010560056939935696678829394884407208311246423715319737062188883946712432742638151109800623047059726541476042502884419075341171231440736956555270413618581675255342293149119973622969239858152417678164812112068608"
# if( (s + 0) "" == s ){ INT_MAX_HALF = "44942328371557897693232629769725618340449424473557664318357520289433168951375240783177119330601884005280028469967848339414697442203604155623211857659868531094441973356216371319075554900311523529863270738021251442209537670585615720368478277635206809290837627671146574559986811484619929076208839082406056034304" + 0 }
# # higher precision probably not needed
FS=",$"
print "PRAGMA synchronous = OFF;"
print "PRAGMA journal_mode = MEMORY;"
print "BEGIN TRANSACTION;"
}
# historically 3 spaces separate non-argument local variables
function bit_to_int( str_bit, powtwo, i, res, bit, overflow ){
powtwo = 1
overflow = 0
# 011101 = 1*2^0 + 0*2^1 + 1*2^2 ...
for( i = length( str_bit ); i > 0; --i ){
bit = substr( str_bit, i, 1 )
if( overflow || ( bit == 1 && res > INT_MAX_HALF ) ){
printerr( \
NR ": WARN Bit field overflow, number truncated (LSBs saved, MSBs ignored)." )
break
}
res = res + bit * powtwo
# no warning here as it might be the last iteration
if( powtwo > INT_MAX_HALF ){ overflow = 1; continue }
powtwo = powtwo * 2
}
return res
}
# CREATE TRIGGER statements have funny commenting. Remember we are in trigger.
/^\/\*.*(CREATE.*TRIGGER|create.*trigger)/ {
gsub( /^.*(TRIGGER|trigger)/, "CREATE TRIGGER" )
print
inTrigger = 1
next
}
# The end of CREATE TRIGGER has a stray comment terminator
/(END|end) \*\/;;/ { gsub( /\*\//, "" ); print; inTrigger = 0; next }
# The rest of triggers just get passed through
inTrigger != 0 { print; next }
# CREATE VIEW looks like a TABLE in comments
/^\/\*.*(CREATE.*TABLE|create.*table)/ {
inView = 1
next
}
# end of CREATE VIEW
/^(\).*(ENGINE|engine).*\*\/;)/ {
inView = 0
next
}
# content of CREATE VIEW
inView != 0 { next }
# skip comments
/^\/\*/ { next }
# skip PARTITION statements
/^ *[(]?(PARTITION|partition) +[^ ]+/ { next }
# print all INSERT lines
( /^ *\(/ && /\) *[,;] *$/ ) || /^(INSERT|insert|REPLACE|replace)/ {
prev = ""
# first replace \\ by \_ that mysqldump never generates to deal with
# sequnces like \\n that should be translated into \n, not \<LF>.
# After we convert all escapes we replace \_ by backslashes.
gsub( /\\\\/, "\\_" )
# single quotes are escaped by another single quote
gsub( /\\'/, "''" )
gsub( /\\n/, "\n" )
gsub( /\\r/, "\r" )
gsub( /\\"/, "\"" )
gsub( /\\\032/, "\032" ) # substitute char
gsub( /\\_/, "\\" )
# sqlite3 is limited to 16 significant digits of precision
while( match( $0, /0x[0-9a-fA-F]{17}/ ) ){
hexIssue = 1
sub( /0x[0-9a-fA-F]+/, substr( $0, RSTART, RLENGTH-1 ), $0 )
}
if( hexIssue ){
printerr( \
NR ": WARN Hex number trimmed (length longer than 16 chars)." )
hexIssue = 0
}
print
next
}
# CREATE DATABASE is not supported
/^(CREATE DATABASE|create database)/ { next }
# print the CREATE line as is and capture the table name
/^(CREATE|create)/ {
if( $0 ~ /IF NOT EXISTS|if not exists/ || $0 ~ /TEMPORARY|temporary/ ){
caseIssue = 1
printerr( \
NR ": WARN Potential case sensitivity issues with table/column naming\n" \
" (see INFO at the end)." )
}
if( match( $0, /`[^`]+/ ) ){
tableName = substr( $0, RSTART+1, RLENGTH-1 )
}
aInc = 0
prev = ""
firstInTable = 1
print
next
}
# Replace `FULLTEXT KEY` (probably other `XXXXX KEY`)
/^ (FULLTEXT KEY|fulltext key)/ { gsub( /[A-Za-z ]+(KEY|key)/, " KEY" ) }
# Get rid of field lengths in KEY lines
/ (PRIMARY |primary )?(KEY|key)/ { gsub( /\([0-9]+\)/, "" ) }
aInc == 1 && /PRIMARY KEY|primary key/ { next }
# Replace COLLATE xxx_xxxx_xx statements with COLLATE BINARY
/ (COLLATE|collate) [a-z0-9_]*/ { gsub( /(COLLATE|collate) [a-z0-9_]*/, "COLLATE BINARY" ) }
# Print all fields definition lines except the `KEY` lines.
/^ / && !/^( (KEY|key)|\);)/ {
if( match( $0, /[^"`]AUTO_INCREMENT|auto_increment[^"`]/) ){
aInc = 1
gsub( /AUTO_INCREMENT|auto_increment/, "PRIMARY KEY AUTOINCREMENT" )
}
gsub( /(UNIQUE KEY|unique key) (`.*`|".*") /, "UNIQUE " )
gsub( /(CHARACTER SET|character set) [^ ]+[ ,]/, "" )
# FIXME
# CREATE TRIGGER [UpdateLastTime]
# AFTER UPDATE
# ON Package
# FOR EACH ROW
# BEGIN
# UPDATE Package SET LastUpdate = CURRENT_TIMESTAMP WHERE ActionId = old.ActionId;
# END
gsub( /(ON|on) (UPDATE|update) (CURRENT_TIMESTAMP|current_timestamp)(\(\))?/, "" )
gsub( /(DEFAULT|default) (CURRENT_TIMESTAMP|current_timestamp)(\(\))?/, "DEFAULT current_timestamp")
gsub( /(COLLATE|collate) [^ ]+ /, "" )
gsub( /(ENUM|enum)[^)]+\)/, "text " )
gsub( /(SET|set)\([^)]+\)/, "text " )
gsub( /UNSIGNED|unsigned/, "" )
gsub( /_utf8mb3/, "" )
gsub( /` [^ ]*(INT|int|BIT|bit)[^ ]*/, "` integer" )
gsub( /" [^ ]*(INT|int|BIT|bit)[^ ]*/, "\" integer" )
ere_bit_field = "[bB]'[10]+'"
if( match($0, ere_bit_field) ){
sub( ere_bit_field, bit_to_int( substr( $0, RSTART +2, RLENGTH -2 -1 ) ) )
}
# remove USING BTREE and other suffixes for USING, for example: "UNIQUE KEY
# `hostname_domain` (`hostname`,`domain`) USING BTREE,"
gsub( / USING [^, ]+/, "" )
# field comments are not supported
gsub( / (COMMENT|comment).+$/, "" )
# Get commas off end of line
gsub( /,.?$/, "" )
if( prev ){
if( firstInTable ){
print prev
firstInTable = 0
}
else {
print "," prev
}
}
else {
# FIXME check if this is correct in all cases
if( match( $1,
/(CONSTRAINT|constraint) ["].*["] (FOREIGN KEY|foreign key)/ ) ){
print ","
}
}
prev = $1
}
/ ENGINE| engine/ {
if( prev ){
if( firstInTable ){
print prev
firstInTable = 0
}
else {
print "," prev
}
}
prev=""
print ");"
next
}
# `KEY` lines are extracted from the `CREATE` block and stored in array for later print
# in a separate `CREATE KEY` command. The index name is prefixed by the table name to
# avoid a sqlite error for duplicate index name.
/^( (KEY|key)|\);)/ {
if( prev ){
if( firstInTable ){
print prev
firstInTable = 0
}
else {
print "," prev
}
}
prev = ""
if( $0 == ");" ){
print
}
else {
if( match( $0, /`[^`]+/ ) ){
indexName = substr( $0, RSTART+1, RLENGTH-1 )
}
if( match( $0, /\([^()]+/ ) ){
indexKey = substr( $0, RSTART+1, RLENGTH-1 )
}
# idx_ prefix to avoid name clashes (they really happen!)
key[tableName] = key[tableName] "CREATE INDEX \"idx_" \
tableName "_" indexName "\" ON \"" tableName "\" (" indexKey ");\n"
}
}
END {
if( no_END ){ exit 1}
# print all KEY creation lines.
for( table in key ){ printf key[table] }
print "END TRANSACTION;"
if( caseIssue ){
printerr( \
"INFO Pure sqlite identifiers are case insensitive (even if quoted\n" \
" or if ASCII) and doesnt cross-check TABLE and TEMPORARY TABLE\n" \
" identifiers. Thus expect errors like \"table T has no column named F\".")
}
}