Skip to content

Commit

Permalink
fix: dsl zip can not read the stream file (#2055)
Browse files Browse the repository at this point in the history
  • Loading branch information
xiaoyifang authored Jan 3, 2025
1 parent f13e223 commit 74f247d
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 9 deletions.
17 changes: 10 additions & 7 deletions src/dict/utils/indexedzip.cc
Original file line number Diff line number Diff line change
Expand Up @@ -85,16 +85,19 @@ bool IndexedZip::loadFile( uint32_t offset, vector< char > & data )

case ZipFile::Deflated: {
// Decompress the data using the zlib library

QByteArray compressedData = zip.read( header.compressedSize );

if ( compressedData.size() != (int)header.compressedSize ) {
// Check for unusually large compressed size,100MB
if ( header.compressedSize > 100000000 ) { // Example threshold
qDebug() << "Unusually large compressed size:" << header.compressedSize;
return false;
}

if ( header.uncompressedSize == 0 ) {
if ( header.compressedSize == 0 ) {
//the compress data should have some issue.
qDebug() << "uncompressed size is 0;";
qDebug() << "compressed size is 0;";
return false;
}
QByteArray compressedData = zip.read( header.compressedSize );

if ( compressedData.size() != (int)header.compressedSize ) {
return false;
}

Expand Down
61 changes: 59 additions & 2 deletions src/dict/utils/zipfile.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,23 @@ __attribute__( ( packed ) )
#endif
;

struct DataDescriptor
{
quint32 crc32;
quint32 compressedSize;
quint32 uncompressedSize;
}
#ifndef _MSC_VER
__attribute__( ( packed ) )
#endif
;

#pragma pack( pop )

static quint32 const endOfCdirRecordSignatureValue = qToLittleEndian( 0x06054b50 );
static quint32 const centralFileHeaderSignature = qToLittleEndian( 0x02014b50 );
static quint32 const localFileHeaderSignature = qToLittleEndian( 0x04034b50 );
static quint32 const dataDescriptorHeaderSignature = qToLittleEndian( 0x08074b50 );

static CompressionMethod getCompressionMethod( quint16 compressionMethod )
{
Expand Down Expand Up @@ -196,9 +208,54 @@ bool readLocalHeader( SplitZipFile & zip, LocalFileHeader & entry )
if ( !zip.seek( zip.pos() + qFromLittleEndian( record.extraFieldLength ) ) ) {
return false;
}
// Check if the data descriptor is present
quint16 gpBits = qFromLittleEndian( record.gpBits );

//bit 3 means the data descriptor is present ,which usually in stream files.
//the data descriptor follows the real file data. skip the file data and check the data descriptor signature,
//from the zlib format description ,the signature is optional!
bool hasDataDescriptor = ( gpBits & 0x0008 ) != 0;

if ( hasDataDescriptor && ( record.compressedSize == 0 ) ) {
auto current_pos = zip.pos();
// If compressedSize is 0, we need to find the data descriptor
QByteArray dataDescriptorSignature( (char const *)&dataDescriptorHeaderSignature, sizeof( quint32 ) );

QByteArray buffer;
while ( true ) {
char byte;
if ( zip.read( &byte, sizeof( byte ) ) != sizeof( byte ) ) {
return false;
}
buffer.append( byte );

if ( buffer.size() >= dataDescriptorSignature.size() ) {
QByteArray lastBytes = buffer.right( sizeof( dataDescriptorSignature ) );
if ( lastBytes == dataDescriptorSignature ) {
// Found the data descriptor signature
break;
}
buffer.remove( 0, 1 );
}
}

DataDescriptor dataDescriptor;

if ( zip.read( (char *)&dataDescriptor, sizeof( dataDescriptor ) ) != sizeof( dataDescriptor ) ) {
return false;
}

entry.compressedSize = qFromLittleEndian( dataDescriptor.compressedSize );
entry.uncompressedSize = qFromLittleEndian( dataDescriptor.uncompressedSize );

//restore
zip.seek( current_pos );
}
else {
entry.compressedSize = qFromLittleEndian( record.compressedSize );
entry.uncompressedSize = qFromLittleEndian( record.uncompressedSize );
}

entry.compressedSize = qFromLittleEndian( record.compressedSize );
entry.uncompressedSize = qFromLittleEndian( record.uncompressedSize );
entry.compressionMethod = getCompressionMethod( record.compressionMethod );

return true;
Expand Down

0 comments on commit 74f247d

Please sign in to comment.