From 1e88c9f6737e580c1d72603d07ec27ca922545fe Mon Sep 17 00:00:00 2001 From: Jason Smith Date: Thu, 12 Nov 2020 00:18:17 -0800 Subject: [PATCH 1/3] Fix longFilename NULL pointer dereference --- Marlin/src/sd/SdBaseFile.cpp | 64 +++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 31 deletions(-) diff --git a/Marlin/src/sd/SdBaseFile.cpp b/Marlin/src/sd/SdBaseFile.cpp index acc5ba17f27c..48ee94780e06 100644 --- a/Marlin/src/sd/SdBaseFile.cpp +++ b/Marlin/src/sd/SdBaseFile.cpp @@ -1125,41 +1125,43 @@ int8_t SdBaseFile::readDir(dir_t* dir, char* longFilename) { // Return if normal file or subdirectory if (DIR_IS_FILE_OR_SUBDIR(dir)) { #if ENABLED(UTF_FILENAME_SUPPORT) - // Convert filename from utf-16 to utf-8 as Marlin expects - #if LONG_FILENAME_CHARSIZE > 2 - // Add warning for developers for currently not supported 3-byte cases (Conversion series of 2-byte - // codepoints to 3-byte in-place will break the rest of filename) - #error "Currently filename re-encoding is done in-place. It may break the remaining chars to use 3-byte codepoints." - #endif - uint16_t currentPos = 0; - LOOP_L_N(i, (LONG_FILENAME_LENGTH / 2)) { - uint16_t idx = i * 2; // This is fixed as FAT LFN always contain UTF-16LE encoding - - uint16_t utf16_ch = longFilename[idx] | (longFilename[idx + 1] << 8); - if (0xD800 == (utf16_ch & 0xF800)) // Surrogate pair - encode as '_' - longFilename[currentPos++] = '_'; - else if (0 == (utf16_ch & 0xFF80)) // Encode as 1-byte utf-8 char - longFilename[currentPos++] = utf16_ch & 0x007F; - else if (0 == (utf16_ch & 0xF800)) { // Encode as 2-byte utf-8 char - longFilename[currentPos++] = 0xC0 | ((utf16_ch >> 6) & 0x1F); - longFilename[currentPos++] = 0x80 | (utf16_ch & 0x3F); - } - else { - #if LONG_FILENAME_CHARSIZE > 2 // Encode as 3-byte utf-8 char - longFilename[currentPos++] = 0xE0 | ((utf16_ch >> 12) & 0x0F); - longFilename[currentPos++] = 0xC0 | ((utf16_ch >> 6) & 0x3F); - longFilename[currentPos++] = 0xC0 | (utf16_ch & 0x3F); - #else // Encode as '_' + if (longFilename) { + // Convert filename from utf-16 to utf-8 as Marlin expects + #if LONG_FILENAME_CHARSIZE > 2 + // Add warning for developers for currently not supported 3-byte cases (Conversion series of 2-byte + // codepoints to 3-byte in-place will break the rest of filename) + #error "Currently filename re-encoding is done in-place. It may break the remaining chars to use 3-byte codepoints." + #endif + uint16_t currentPos = 0; + + LOOP_L_N(i, (LONG_FILENAME_LENGTH / 2)) { + uint16_t idx = i * 2; // This is fixed as FAT LFN always contain UTF-16LE encoding + + uint16_t utf16_ch = longFilename[idx] | (longFilename[idx + 1] << 8); + if (0xD800 == (utf16_ch & 0xF800)) // Surrogate pair - encode as '_' longFilename[currentPos++] = '_'; - #endif + else if (0 == (utf16_ch & 0xFF80)) // Encode as 1-byte utf-8 char + longFilename[currentPos++] = utf16_ch & 0x007F; + else if (0 == (utf16_ch & 0xF800)) { // Encode as 2-byte utf-8 char + longFilename[currentPos++] = 0xC0 | ((utf16_ch >> 6) & 0x1F); + longFilename[currentPos++] = 0x80 | (utf16_ch & 0x3F); + } + else { + #if LONG_FILENAME_CHARSIZE > 2 // Encode as 3-byte utf-8 char + longFilename[currentPos++] = 0xE0 | ((utf16_ch >> 12) & 0x0F); + longFilename[currentPos++] = 0xC0 | ((utf16_ch >> 6) & 0x3F); + longFilename[currentPos++] = 0xC0 | (utf16_ch & 0x3F); + #else // Encode as '_' + longFilename[currentPos++] = '_'; + #endif + } + + if (0 == utf16_ch) break; // End of filename } - - if (0 == utf16_ch) break; // End of filename + n = currentPos; } - return currentPos; - #else - return n; #endif + return n; } } } From 6eee9bbaa95c7ae6da04f87a56c467bcd7eda2c6 Mon Sep 17 00:00:00 2001 From: Scott Lahteine Date: Thu, 12 Nov 2020 22:12:55 -0600 Subject: [PATCH 2/3] Final touches --- Marlin/src/sd/SdBaseFile.cpp | 76 ++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 39 deletions(-) diff --git a/Marlin/src/sd/SdBaseFile.cpp b/Marlin/src/sd/SdBaseFile.cpp index 48ee94780e06..a40f20c4f2a1 100644 --- a/Marlin/src/sd/SdBaseFile.cpp +++ b/Marlin/src/sd/SdBaseFile.cpp @@ -1122,47 +1122,45 @@ int8_t SdBaseFile::readDir(dir_t* dir, char* longFilename) { } } - // Return if normal file or subdirectory - if (DIR_IS_FILE_OR_SUBDIR(dir)) { - #if ENABLED(UTF_FILENAME_SUPPORT) - if (longFilename) { - // Convert filename from utf-16 to utf-8 as Marlin expects - #if LONG_FILENAME_CHARSIZE > 2 - // Add warning for developers for currently not supported 3-byte cases (Conversion series of 2-byte - // codepoints to 3-byte in-place will break the rest of filename) - #error "Currently filename re-encoding is done in-place. It may break the remaining chars to use 3-byte codepoints." - #endif - uint16_t currentPos = 0; - - LOOP_L_N(i, (LONG_FILENAME_LENGTH / 2)) { - uint16_t idx = i * 2; // This is fixed as FAT LFN always contain UTF-16LE encoding - - uint16_t utf16_ch = longFilename[idx] | (longFilename[idx + 1] << 8); - if (0xD800 == (utf16_ch & 0xF800)) // Surrogate pair - encode as '_' - longFilename[currentPos++] = '_'; - else if (0 == (utf16_ch & 0xFF80)) // Encode as 1-byte utf-8 char - longFilename[currentPos++] = utf16_ch & 0x007F; - else if (0 == (utf16_ch & 0xF800)) { // Encode as 2-byte utf-8 char - longFilename[currentPos++] = 0xC0 | ((utf16_ch >> 6) & 0x1F); - longFilename[currentPos++] = 0x80 | (utf16_ch & 0x3F); - } - else { - #if LONG_FILENAME_CHARSIZE > 2 // Encode as 3-byte utf-8 char - longFilename[currentPos++] = 0xE0 | ((utf16_ch >> 12) & 0x0F); - longFilename[currentPos++] = 0xC0 | ((utf16_ch >> 6) & 0x3F); - longFilename[currentPos++] = 0xC0 | (utf16_ch & 0x3F); - #else // Encode as '_' - longFilename[currentPos++] = '_'; - #endif - } - - if (0 == utf16_ch) break; // End of filename + #if ENABLED(UTF_FILENAME_SUPPORT) + // Return if normal file or subdirectory + if (DIR_IS_FILE_OR_SUBDIR(dir)) { + // Convert filename from utf-16 to utf-8 as Marlin expects + #if LONG_FILENAME_CHARSIZE > 2 + // Add warning for developers for currently not supported 3-byte cases (Conversion series of 2-byte + // codepoints to 3-byte in-place will break the rest of filename) + #error "Currently filename re-encoding is done in-place. It may break the remaining chars to use 3-byte codepoints." + #endif + + // Is there a long filename to decode? + if (!longFilename) return n; // sizeof(dir_t) + + // Reset n to the start of the long name + n = 0; + for (uint16_t idx = 0; idx < (LONG_FILENAME_LENGTH) / 2; idx += 2) { // idx is fixed since FAT LFN always contains UTF-16LE encoding + uint16_t utf16_ch = longFilename[idx] | (longFilename[idx + 1] << 8); + if (0xD800 == (utf16_ch & 0xF800)) // Surrogate pair - encode as '_' + longFilename[n++] = '_'; + else if (0 == (utf16_ch & 0xFF80)) // Encode as 1-byte UTF-8 char + longFilename[n++] = utf16_ch & 0x007F; + else if (0 == (utf16_ch & 0xF800)) { // Encode as 2-byte UTF-8 char + longFilename[n++] = 0xC0 | ((utf16_ch >> 6) & 0x1F); + longFilename[n++] = 0x80 | ( utf16_ch & 0x3F); } - n = currentPos; + else { + #if LONG_FILENAME_CHARSIZE > 2 // Encode as 3-byte UTF-8 char + longFilename[n++] = 0xE0 | ((utf16_ch >> 12) & 0x0F); + longFilename[n++] = 0xC0 | ((utf16_ch >> 6) & 0x3F); + longFilename[n++] = 0xC0 | ( utf16_ch & 0x3F); + #else // Encode as '_' + longFilename[n++] = '_'; + #endif + } + if (0 == utf16_ch) break; // End of filename } - #endif - return n; - } + return n; + } + #endif } } From 8008c6c8973c1a55cb73253142419651030d51ec Mon Sep 17 00:00:00 2001 From: Scott Lahteine Date: Thu, 12 Nov 2020 22:24:26 -0600 Subject: [PATCH 3/3] =?UTF-8?q?and=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Marlin/src/sd/SdBaseFile.cpp | 65 ++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 33 deletions(-) diff --git a/Marlin/src/sd/SdBaseFile.cpp b/Marlin/src/sd/SdBaseFile.cpp index a40f20c4f2a1..db2a9e2de9c5 100644 --- a/Marlin/src/sd/SdBaseFile.cpp +++ b/Marlin/src/sd/SdBaseFile.cpp @@ -1109,8 +1109,8 @@ int8_t SdBaseFile::readDir(dir_t* dir, char* longFilename) { // We can't reconvert to UTF-8 here as UTF-8 is variable-size encoding, but joining LFN blocks // needs static bytes addressing. So here just store full UTF-16LE words to re-convert later. uint16_t idx = (n + i) * 2; // This is fixed as FAT LFN always contain UTF-16LE encoding - longFilename[idx] = utf16_ch & 0xFF; - longFilename[idx+1] = (utf16_ch >> 8) & 0xFF; + longFilename[idx] = utf16_ch & 0xFF; + longFilename[idx + 1] = (utf16_ch >> 8) & 0xFF; #else // Replace all multibyte characters to '_' longFilename[n + i] = (utf16_ch > 0xFF) ? '_' : (utf16_ch & 0xFF); @@ -1122,10 +1122,9 @@ int8_t SdBaseFile::readDir(dir_t* dir, char* longFilename) { } } - #if ENABLED(UTF_FILENAME_SUPPORT) - // Return if normal file or subdirectory - if (DIR_IS_FILE_OR_SUBDIR(dir)) { - // Convert filename from utf-16 to utf-8 as Marlin expects + // Post-process normal file or subdirectory longname, if any + if (DIR_IS_FILE_OR_SUBDIR(dir)) { + #if ENABLED(UTF_FILENAME_SUPPORT) #if LONG_FILENAME_CHARSIZE > 2 // Add warning for developers for currently not supported 3-byte cases (Conversion series of 2-byte // codepoints to 3-byte in-place will break the rest of filename) @@ -1133,34 +1132,34 @@ int8_t SdBaseFile::readDir(dir_t* dir, char* longFilename) { #endif // Is there a long filename to decode? - if (!longFilename) return n; // sizeof(dir_t) - - // Reset n to the start of the long name - n = 0; - for (uint16_t idx = 0; idx < (LONG_FILENAME_LENGTH) / 2; idx += 2) { // idx is fixed since FAT LFN always contains UTF-16LE encoding - uint16_t utf16_ch = longFilename[idx] | (longFilename[idx + 1] << 8); - if (0xD800 == (utf16_ch & 0xF800)) // Surrogate pair - encode as '_' - longFilename[n++] = '_'; - else if (0 == (utf16_ch & 0xFF80)) // Encode as 1-byte UTF-8 char - longFilename[n++] = utf16_ch & 0x007F; - else if (0 == (utf16_ch & 0xF800)) { // Encode as 2-byte UTF-8 char - longFilename[n++] = 0xC0 | ((utf16_ch >> 6) & 0x1F); - longFilename[n++] = 0x80 | ( utf16_ch & 0x3F); - } - else { - #if LONG_FILENAME_CHARSIZE > 2 // Encode as 3-byte UTF-8 char - longFilename[n++] = 0xE0 | ((utf16_ch >> 12) & 0x0F); - longFilename[n++] = 0xC0 | ((utf16_ch >> 6) & 0x3F); - longFilename[n++] = 0xC0 | ( utf16_ch & 0x3F); - #else // Encode as '_' + if (longFilename) { + // Reset n to the start of the long name + n = 0; + for (uint16_t idx = 0; idx < (LONG_FILENAME_LENGTH) / 2; idx += 2) { // idx is fixed since FAT LFN always contains UTF-16LE encoding + uint16_t utf16_ch = longFilename[idx] | (longFilename[idx + 1] << 8); + if (0xD800 == (utf16_ch & 0xF800)) // Surrogate pair - encode as '_' longFilename[n++] = '_'; - #endif - } - if (0 == utf16_ch) break; // End of filename - } - return n; - } - #endif + else if (0 == (utf16_ch & 0xFF80)) // Encode as 1-byte UTF-8 char + longFilename[n++] = utf16_ch & 0x007F; + else if (0 == (utf16_ch & 0xF800)) { // Encode as 2-byte UTF-8 char + longFilename[n++] = 0xC0 | ((utf16_ch >> 6) & 0x1F); + longFilename[n++] = 0x80 | ( utf16_ch & 0x3F); + } + else { + #if LONG_FILENAME_CHARSIZE > 2 // Encode as 3-byte UTF-8 char + longFilename[n++] = 0xE0 | ((utf16_ch >> 12) & 0x0F); + longFilename[n++] = 0xC0 | ((utf16_ch >> 6) & 0x3F); + longFilename[n++] = 0xC0 | ( utf16_ch & 0x3F); + #else // Encode as '_' + longFilename[n++] = '_'; + #endif + } + if (0 == utf16_ch) break; // End of filename + } // idx + } // longFilename + #endif + return n; + } // DIR_IS_FILE_OR_SUBDIR } }