diff --git a/README b/README index 8dca8a7..8e1ab3a 100644 --- a/README +++ b/README @@ -9,7 +9,20 @@ How to use: - Execute directly this script in the way: - /usr/bin/php fix-serialization.php my-sql-file.sql + - In place file processing + /usr/bin/php fix-serialization.php my-sql-file.sql + + - Stream processing Examples + + -- Uncompressed to uncompressed + cat my-sql-file.sql | /usr/bin/php fix-serialization.php --stream > my-sql-fixed-file.sql + + -- Compressed to compressed + gunzip < my-sql-file.sql.gz | /usr/bin/php fix-serialization.php --stream | gzip > my-sql-fixed-file.sql.gz + + -- Direct import into mysql from compressed + gunzip < my-sql-file.sql.gz | /usr/bin/php fix-serialization.php --stream | mysql -uuname -p "database name" + - Or use the shell script replace.sh that replaces with sed command each sql file in directory and call fix-serialization script diff --git a/fix-serialization.php b/fix-serialization.php index 2795801..76d64d0 100755 --- a/fix-serialization.php +++ b/fix-serialization.php @@ -2,7 +2,7 @@ my-sql-fixed-file.sql +* +* --Compressed to compressed +* gunzip < my-sql-file.sql.gz | /usr/bin/php fix-serialization.php --stream | gzip > my-sql-fixed-file.sql.gz +* +* --Direct import into mysql from compressed +* gunzip < my-sql-file.sql.gz | /usr/bin/php fix-serialization.php --stream | mysql -uuname -p "database name" +* * Versions: * * 1.0 2011-08-03 Initial release * 1.1 2011-08-18 Support for backslashed quotes, added some code warnings * 1.2 2011-09-29 Support for null or zero length strings after preg_replace is called, and explain how to handle these errors +* 1.2-1 2015-07-02 (jbrule) Added stream processing support to avoid the memory allocation issue when working with large database files (tested up to 6GB). * * Knowed errors: * @@ -37,8 +50,6 @@ * */ - - // Unescape to avoid dump-text issues function unescape_mysql($value) { return str_replace(array("\\\\", "\\0", "\\n", "\\r", "\Z", "\'", '\"'), @@ -46,17 +57,38 @@ function unescape_mysql($value) { $value); } - - // Fix strange behaviour if you have escaped quotes in your replacement function unescape_quotes($value) { return str_replace('\"', '"', $value); -} +} +function replace_serialized_string_values($data){ + // Replace serialized string values + + return preg_replace('!s:(\d+):([\\\\]?"[\\\\]?"|[\\\\]?"((.*?)[^\\\\])[\\\\]?");!e', "'s:'.strlen(unescape_mysql('$3')).':\"'.unescape_quotes('$3').'\";'", $data); +} +//Setup arguments to flag stream processing +$options = getopt("s::",array("stream::")); -// Check command line arguments -if (!(isset($argv) && isset($argv[1]))) { +//Check if data is being feed in via a pipe. Process line by line and output via stdout if that is the case. +if((key_exists("s",$options) || key_exists("stream",$options)) && $fh = fopen('php://stdin','r')) { + stream_set_blocking($fh, false); + + $stdin = ''; + + while (false !== ($stdin = fgets($fh))) { + + //Note. $stdin is a single line of the dumpfile. In testing this appears to be fine as the mysqldump field values do not appear to span past a single line. + + // Replace serialized string values + $data = replace_serialized_string_values($stdin); + + fwrite(STDOUT,$data); + + } + fclose($fh); +} elseif (!(isset($argv) && isset($argv[1]))) { // Check command line arguments // Error echo 'Error: no input file specified'."\n\n"; @@ -109,7 +141,7 @@ function unescape_quotes($value) { $do_preg_replace = true; // Replace serialized string values - $data = preg_replace('!s:(\d+):([\\\\]?"[\\\\]?"|[\\\\]?"((.*?)[^\\\\])[\\\\]?");!e', "'s:'.strlen(unescape_mysql('$3')).':\"'.unescape_quotes('$3').'\";'", $data); + $data = replace_serialized_string_values($data); } // Close file @@ -155,7 +187,3 @@ function unescape_quotes($value) { } } } - - - -?> \ No newline at end of file