diff --git a/src/dict/utils/indexedzip.cc b/src/dict/utils/indexedzip.cc index f0e2caa57..494127a28 100644 --- a/src/dict/utils/indexedzip.cc +++ b/src/dict/utils/indexedzip.cc @@ -85,16 +85,19 @@ bool IndexedZip::loadFile( uint32_t offset, vector< char > & data ) case ZipFile::Deflated: { // Decompress the data using the zlib library - - QByteArray compressedData = zip.read( header.compressedSize ); - - if ( compressedData.size() != (int)header.compressedSize ) { + // Check for unusually large compressed size,100MB + if ( header.compressedSize > 100000000 ) { // Example threshold + qDebug() << "Unusually large compressed size:" << header.compressedSize; return false; } - - if ( header.uncompressedSize == 0 ) { + if ( header.compressedSize == 0 ) { //the compress data should have some issue. - qDebug() << "uncompressed size is 0;"; + qDebug() << "compressed size is 0;"; + return false; + } + QByteArray compressedData = zip.read( header.compressedSize ); + + if ( compressedData.size() != (int)header.compressedSize ) { return false; } diff --git a/src/dict/utils/zipfile.cc b/src/dict/utils/zipfile.cc index c568fd3c6..134f094f3 100644 --- a/src/dict/utils/zipfile.cc +++ b/src/dict/utils/zipfile.cc @@ -48,11 +48,23 @@ __attribute__( ( packed ) ) #endif ; +struct DataDescriptor +{ + quint32 crc32; + quint32 compressedSize; + quint32 uncompressedSize; +} +#ifndef _MSC_VER +__attribute__( ( packed ) ) +#endif +; + #pragma pack( pop ) static quint32 const endOfCdirRecordSignatureValue = qToLittleEndian( 0x06054b50 ); static quint32 const centralFileHeaderSignature = qToLittleEndian( 0x02014b50 ); static quint32 const localFileHeaderSignature = qToLittleEndian( 0x04034b50 ); +static quint32 const dataDescriptorHeaderSignature = qToLittleEndian( 0x08074b50 ); static CompressionMethod getCompressionMethod( quint16 compressionMethod ) { @@ -196,9 +208,54 @@ bool readLocalHeader( SplitZipFile & zip, LocalFileHeader & entry ) if ( !zip.seek( zip.pos() + qFromLittleEndian( record.extraFieldLength ) ) ) { return false; } + // Check if the data descriptor is present + quint16 gpBits = qFromLittleEndian( record.gpBits ); + + //bit 3 means the data descriptor is present ,which usually in stream files. + //the data descriptor follows the real file data. skip the file data and check the data descriptor signature, + //from the zlib format description ,the signature is optional! + bool hasDataDescriptor = ( gpBits & 0x0008 ) != 0; + + if ( hasDataDescriptor && ( record.compressedSize == 0 ) ) { + auto current_pos = zip.pos(); + // If compressedSize is 0, we need to find the data descriptor + QByteArray dataDescriptorSignature( (char const *)&dataDescriptorHeaderSignature, sizeof( quint32 ) ); + + QByteArray buffer; + while ( true ) { + char byte; + if ( zip.read( &byte, sizeof( byte ) ) != sizeof( byte ) ) { + return false; + } + buffer.append( byte ); + + if ( buffer.size() >= dataDescriptorSignature.size() ) { + QByteArray lastBytes = buffer.right( sizeof( dataDescriptorSignature ) ); + if ( lastBytes == dataDescriptorSignature ) { + // Found the data descriptor signature + break; + } + buffer.remove( 0, 1 ); + } + } + + DataDescriptor dataDescriptor; + + if ( zip.read( (char *)&dataDescriptor, sizeof( dataDescriptor ) ) != sizeof( dataDescriptor ) ) { + return false; + } + + entry.compressedSize = qFromLittleEndian( dataDescriptor.compressedSize ); + entry.uncompressedSize = qFromLittleEndian( dataDescriptor.uncompressedSize ); + + //restore + zip.seek( current_pos ); + } + else { + entry.compressedSize = qFromLittleEndian( record.compressedSize ); + entry.uncompressedSize = qFromLittleEndian( record.uncompressedSize ); + } - entry.compressedSize = qFromLittleEndian( record.compressedSize ); - entry.uncompressedSize = qFromLittleEndian( record.uncompressedSize ); entry.compressionMethod = getCompressionMethod( record.compressionMethod ); return true;