|
#!/usr/bin/awk -f |
|
|
|
# Authors: @esperlu, @artemyk, @gkuenning, @dumblob |
|
|
|
# FIXME detect empty input file and issue a warning |
|
|
|
function printerr( s ){ print s | "cat >&2" } |
|
|
|
BEGIN { |
|
if( ARGC != 2 ){ |
|
printerr( \ |
|
"USAGE:\n"\ |
|
" mysql2sqlite dump_mysql.sql > dump_sqlite3.sql\n" \ |
|
" OR\n" \ |
|
" mysql2sqlite dump_mysql.sql | sqlite3 sqlite.db\n" \ |
|
"\n" \ |
|
"NOTES:\n" \ |
|
" Dash in filename is not supported, because dash (-) means stdin." ) |
|
no_END = 1 |
|
exit 1 |
|
} |
|
|
|
# Find INT_MAX supported by both this AWK (usually an ISO C signed int) |
|
# and SQlite. |
|
# On non-8bit-based architectures, the additional bits are safely ignored. |
|
|
|
# 8bit (lower precision should not exist) |
|
s="127" |
|
# "63" + 0 avoids potential parser misbehavior |
|
if( (s + 0) "" == s ){ INT_MAX_HALF = "63" + 0 } |
|
# 16bit |
|
s="32767" |
|
if( (s + 0) "" == s ){ INT_MAX_HALF = "16383" + 0 } |
|
# 32bit |
|
s="2147483647" |
|
if( (s + 0) "" == s ){ INT_MAX_HALF = "1073741823" + 0 } |
|
# 64bit (as INTEGER in SQlite3) |
|
s="9223372036854775807" |
|
if( (s + 0) "" == s ){ INT_MAX_HALF = "4611686018427387904" + 0 } |
|
# # 128bit |
|
# s="170141183460469231731687303715884105728" |
|
# if( (s + 0) "" == s ){ INT_MAX_HALF = "85070591730234615865843651857942052864" + 0 } |
|
# # 256bit |
|
# s="57896044618658097711785492504343953926634992332820282019728792003956564819968" |
|
# if( (s + 0) "" == s ){ INT_MAX_HALF = "28948022309329048855892746252171976963317496166410141009864396001978282409984" + 0 } |
|
# # 512bit |
|
# s="6703903964971298549787012499102923063739682910296196688861780721860882015036773488400937149083451713845015929093243025426876941405973284973216824503042048" |
|
# if( (s + 0) "" == s ){ INT_MAX_HALF = "3351951982485649274893506249551461531869841455148098344430890360930441007518386744200468574541725856922507964546621512713438470702986642486608412251521024" + 0 } |
|
# # 1024bit |
|
# s="89884656743115795386465259539451236680898848947115328636715040578866337902750481566354238661203768010560056939935696678829394884407208311246423715319737062188883946712432742638151109800623047059726541476042502884419075341171231440736956555270413618581675255342293149119973622969239858152417678164812112068608" |
|
# if( (s + 0) "" == s ){ INT_MAX_HALF = "44942328371557897693232629769725618340449424473557664318357520289433168951375240783177119330601884005280028469967848339414697442203604155623211857659868531094441973356216371319075554900311523529863270738021251442209537670585615720368478277635206809290837627671146574559986811484619929076208839082406056034304" + 0 } |
|
# # higher precision probably not needed |
|
|
|
FS=",$" |
|
print "PRAGMA synchronous = OFF;" |
|
print "PRAGMA journal_mode = MEMORY;" |
|
print "BEGIN TRANSACTION;" |
|
} |
|
|
|
# historically 3 spaces separate non-argument local variables |
|
function bit_to_int( str_bit, powtwo, i, res, bit, overflow ){ |
|
powtwo = 1 |
|
overflow = 0 |
|
# 011101 = 1*2^0 + 0*2^1 + 1*2^2 ... |
|
for( i = length( str_bit ); i > 0; --i ){ |
|
bit = substr( str_bit, i, 1 ) |
|
if( overflow || ( bit == 1 && res > INT_MAX_HALF ) ){ |
|
printerr( \ |
|
NR ": WARN Bit field overflow, number truncated (LSBs saved, MSBs ignored)." ) |
|
break |
|
} |
|
res = res + bit * powtwo |
|
# no warning here as it might be the last iteration |
|
if( powtwo > INT_MAX_HALF ){ overflow = 1; continue } |
|
powtwo = powtwo * 2 |
|
} |
|
return res |
|
} |
|
|
|
# CREATE TRIGGER statements have funny commenting. Remember we are in trigger. |
|
/^\/\*.*(CREATE.*TRIGGER|create.*trigger)/ { |
|
gsub( /^.*(TRIGGER|trigger)/, "CREATE TRIGGER" ) |
|
print |
|
inTrigger = 1 |
|
next |
|
} |
|
# The end of CREATE TRIGGER has a stray comment terminator |
|
/(END|end) \*\/;;/ { gsub( /\*\//, "" ); print; inTrigger = 0; next } |
|
# The rest of triggers just get passed through |
|
inTrigger != 0 { print; next } |
|
|
|
# CREATE VIEW looks like a TABLE in comments |
|
/^\/\*.*(CREATE.*TABLE|create.*table)/ { |
|
inView = 1 |
|
next |
|
} |
|
# end of CREATE VIEW |
|
/^(\).*(ENGINE|engine).*\*\/;)/ { |
|
inView = 0 |
|
next |
|
} |
|
# content of CREATE VIEW |
|
inView != 0 { next } |
|
|
|
# skip comments |
|
/^\/\*/ { next } |
|
|
|
# skip PARTITION statements |
|
/^ *[(]?(PARTITION|partition) +[^ ]+/ { next } |
|
|
|
# print all INSERT lines |
|
( /^ *\(/ && /\) *[,;] *$/ ) || /^(INSERT|insert|REPLACE|replace)/ { |
|
prev = "" |
|
|
|
# first replace \\ by \_ that mysqldump never generates to deal with |
|
# sequnces like \\n that should be translated into \n, not \<LF>. |
|
# After we convert all escapes we replace \_ by backslashes. |
|
gsub( /\\\\/, "\\_" ) |
|
|
|
# single quotes are escaped by another single quote |
|
gsub( /\\'/, "''" ) |
|
gsub( /\\n/, "\n" ) |
|
gsub( /\\r/, "\r" ) |
|
gsub( /\\"/, "\"" ) |
|
gsub( /\\\032/, "\032" ) # substitute char |
|
|
|
gsub( /\\_/, "\\" ) |
|
|
|
# sqlite3 is limited to 16 significant digits of precision |
|
while( match( $0, /0x[0-9a-fA-F]{17}/ ) ){ |
|
hexIssue = 1 |
|
sub( /0x[0-9a-fA-F]+/, substr( $0, RSTART, RLENGTH-1 ), $0 ) |
|
} |
|
if( hexIssue ){ |
|
printerr( \ |
|
NR ": WARN Hex number trimmed (length longer than 16 chars)." ) |
|
hexIssue = 0 |
|
} |
|
print |
|
next |
|
} |
|
|
|
# CREATE DATABASE is not supported |
|
/^(CREATE DATABASE|create database)/ { next } |
|
|
|
# print the CREATE line as is and capture the table name |
|
/^(CREATE|create)/ { |
|
if( $0 ~ /IF NOT EXISTS|if not exists/ || $0 ~ /TEMPORARY|temporary/ ){ |
|
caseIssue = 1 |
|
printerr( \ |
|
NR ": WARN Potential case sensitivity issues with table/column naming\n" \ |
|
" (see INFO at the end)." ) |
|
} |
|
if( match( $0, /`[^`]+/ ) ){ |
|
tableName = substr( $0, RSTART+1, RLENGTH-1 ) |
|
} |
|
aInc = 0 |
|
prev = "" |
|
firstInTable = 1 |
|
print |
|
next |
|
} |
|
|
|
# Replace `FULLTEXT KEY` (probably other `XXXXX KEY`) |
|
/^ (FULLTEXT KEY|fulltext key)/ { gsub( /[A-Za-z ]+(KEY|key)/, " KEY" ) } |
|
|
|
# Get rid of field lengths in KEY lines |
|
/ (PRIMARY |primary )?(KEY|key)/ { gsub( /\([0-9]+\)/, "" ) } |
|
|
|
aInc == 1 && /PRIMARY KEY|primary key/ { next } |
|
|
|
# Replace COLLATE xxx_xxxx_xx statements with COLLATE BINARY |
|
/ (COLLATE|collate) [a-z0-9_]*/ { gsub( /(COLLATE|collate) [a-z0-9_]*/, "COLLATE BINARY" ) } |
|
|
|
# Print all fields definition lines except the `KEY` lines. |
|
/^ / && !/^( (KEY|key)|\);)/ { |
|
if( match( $0, /[^"`]AUTO_INCREMENT|auto_increment[^"`]/) ){ |
|
aInc = 1 |
|
gsub( /AUTO_INCREMENT|auto_increment/, "PRIMARY KEY AUTOINCREMENT" ) |
|
} |
|
gsub( /(UNIQUE KEY|unique key) (`.*`|".*") /, "UNIQUE " ) |
|
gsub( /(CHARACTER SET|character set) [^ ]+[ ,]/, "" ) |
|
# FIXME |
|
# CREATE TRIGGER [UpdateLastTime] |
|
# AFTER UPDATE |
|
# ON Package |
|
# FOR EACH ROW |
|
# BEGIN |
|
# UPDATE Package SET LastUpdate = CURRENT_TIMESTAMP WHERE ActionId = old.ActionId; |
|
# END |
|
gsub( /(ON|on) (UPDATE|update) (CURRENT_TIMESTAMP|current_timestamp)(\(\))?/, "" ) |
|
gsub( /(DEFAULT|default) (CURRENT_TIMESTAMP|current_timestamp)(\(\))?/, "DEFAULT current_timestamp") |
|
gsub( /(COLLATE|collate) [^ ]+ /, "" ) |
|
gsub( /(ENUM|enum)[^)]+\)/, "text " ) |
|
gsub( /(SET|set)\([^)]+\)/, "text " ) |
|
gsub( /UNSIGNED|unsigned/, "" ) |
|
gsub( /_utf8mb3/, "" ) |
|
gsub( /` [^ ]*(INT|int|BIT|bit)[^ ]*/, "` integer" ) |
|
gsub( /" [^ ]*(INT|int|BIT|bit)[^ ]*/, "\" integer" ) |
|
ere_bit_field = "[bB]'[10]+'" |
|
if( match($0, ere_bit_field) ){ |
|
sub( ere_bit_field, bit_to_int( substr( $0, RSTART +2, RLENGTH -2 -1 ) ) ) |
|
} |
|
|
|
# remove USING BTREE and other suffixes for USING, for example: "UNIQUE KEY |
|
# `hostname_domain` (`hostname`,`domain`) USING BTREE," |
|
gsub( / USING [^, ]+/, "" ) |
|
|
|
# field comments are not supported |
|
gsub( / (COMMENT|comment).+$/, "" ) |
|
# Get commas off end of line |
|
gsub( /,.?$/, "" ) |
|
if( prev ){ |
|
if( firstInTable ){ |
|
print prev |
|
firstInTable = 0 |
|
} |
|
else { |
|
print "," prev |
|
} |
|
} |
|
else { |
|
# FIXME check if this is correct in all cases |
|
if( match( $1, |
|
/(CONSTRAINT|constraint) ["].*["] (FOREIGN KEY|foreign key)/ ) ){ |
|
print "," |
|
} |
|
} |
|
prev = $1 |
|
} |
|
|
|
/ ENGINE| engine/ { |
|
if( prev ){ |
|
if( firstInTable ){ |
|
print prev |
|
firstInTable = 0 |
|
} |
|
else { |
|
print "," prev |
|
} |
|
} |
|
prev="" |
|
print ");" |
|
next |
|
} |
|
# `KEY` lines are extracted from the `CREATE` block and stored in array for later print |
|
# in a separate `CREATE KEY` command. The index name is prefixed by the table name to |
|
# avoid a sqlite error for duplicate index name. |
|
/^( (KEY|key)|\);)/ { |
|
if( prev ){ |
|
if( firstInTable ){ |
|
print prev |
|
firstInTable = 0 |
|
} |
|
else { |
|
print "," prev |
|
} |
|
} |
|
prev = "" |
|
if( $0 == ");" ){ |
|
print |
|
} |
|
else { |
|
if( match( $0, /`[^`]+/ ) ){ |
|
indexName = substr( $0, RSTART+1, RLENGTH-1 ) |
|
} |
|
if( match( $0, /\([^()]+/ ) ){ |
|
indexKey = substr( $0, RSTART+1, RLENGTH-1 ) |
|
} |
|
# idx_ prefix to avoid name clashes (they really happen!) |
|
key[tableName] = key[tableName] "CREATE INDEX \"idx_" \ |
|
tableName "_" indexName "\" ON \"" tableName "\" (" indexKey ");\n" |
|
} |
|
} |
|
|
|
END { |
|
if( no_END ){ exit 1} |
|
# print all KEY creation lines. |
|
for( table in key ){ printf key[table] } |
|
|
|
print "END TRANSACTION;" |
|
|
|
if( caseIssue ){ |
|
printerr( \ |
|
"INFO Pure sqlite identifiers are case insensitive (even if quoted\n" \ |
|
" or if ASCII) and doesnt cross-check TABLE and TEMPORARY TABLE\n" \ |
|
" identifiers. Thus expect errors like \"table T has no column named F\".") |
|
} |
|
} |