Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: dsl zip can not read the stream file #2055

Merged
merged 8 commits into from
Jan 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 10 additions & 7 deletions src/dict/utils/indexedzip.cc
Original file line number Diff line number Diff line change
Expand Up @@ -85,16 +85,19 @@ bool IndexedZip::loadFile( uint32_t offset, vector< char > & data )

case ZipFile::Deflated: {
// Decompress the data using the zlib library

QByteArray compressedData = zip.read( header.compressedSize );

if ( compressedData.size() != (int)header.compressedSize ) {
// Check for unusually large compressed size,100MB
if ( header.compressedSize > 100000000 ) { // Example threshold
qDebug() << "Unusually large compressed size:" << header.compressedSize;
return false;
}

if ( header.uncompressedSize == 0 ) {
if ( header.compressedSize == 0 ) {
//the compress data should have some issue.
qDebug() << "uncompressed size is 0;";
qDebug() << "compressed size is 0;";
return false;
}
QByteArray compressedData = zip.read( header.compressedSize );

if ( compressedData.size() != (int)header.compressedSize ) {
return false;
}

Expand Down
61 changes: 59 additions & 2 deletions src/dict/utils/zipfile.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,23 @@ __attribute__( ( packed ) )
#endif
;

struct DataDescriptor
{
quint32 crc32;
quint32 compressedSize;
quint32 uncompressedSize;
}
#ifndef _MSC_VER
__attribute__( ( packed ) )
#endif
;

#pragma pack( pop )

static quint32 const endOfCdirRecordSignatureValue = qToLittleEndian( 0x06054b50 );
static quint32 const centralFileHeaderSignature = qToLittleEndian( 0x02014b50 );
static quint32 const localFileHeaderSignature = qToLittleEndian( 0x04034b50 );
static quint32 const dataDescriptorHeaderSignature = qToLittleEndian( 0x08074b50 );

static CompressionMethod getCompressionMethod( quint16 compressionMethod )
{
Expand Down Expand Up @@ -196,9 +208,54 @@ bool readLocalHeader( SplitZipFile & zip, LocalFileHeader & entry )
if ( !zip.seek( zip.pos() + qFromLittleEndian( record.extraFieldLength ) ) ) {
return false;
}
// Check if the data descriptor is present
quint16 gpBits = qFromLittleEndian( record.gpBits );

//bit 3 means the data descriptor is present ,which usually in stream files.
//the data descriptor follows the real file data. skip the file data and check the data descriptor signature,
//from the zlib format description ,the signature is optional!
bool hasDataDescriptor = ( gpBits & 0x0008 ) != 0;

if ( hasDataDescriptor && ( record.compressedSize == 0 ) ) {
auto current_pos = zip.pos();
// If compressedSize is 0, we need to find the data descriptor
QByteArray dataDescriptorSignature( (char const *)&dataDescriptorHeaderSignature, sizeof( quint32 ) );

QByteArray buffer;
while ( true ) {
char byte;
if ( zip.read( &byte, sizeof( byte ) ) != sizeof( byte ) ) {
return false;
}
buffer.append( byte );

if ( buffer.size() >= dataDescriptorSignature.size() ) {
QByteArray lastBytes = buffer.right( sizeof( dataDescriptorSignature ) );
if ( lastBytes == dataDescriptorSignature ) {
// Found the data descriptor signature
break;
}
buffer.remove( 0, 1 );
}
}

DataDescriptor dataDescriptor;

if ( zip.read( (char *)&dataDescriptor, sizeof( dataDescriptor ) ) != sizeof( dataDescriptor ) ) {
return false;
}

entry.compressedSize = qFromLittleEndian( dataDescriptor.compressedSize );
entry.uncompressedSize = qFromLittleEndian( dataDescriptor.uncompressedSize );

//restore
zip.seek( current_pos );
}
else {
entry.compressedSize = qFromLittleEndian( record.compressedSize );
entry.uncompressedSize = qFromLittleEndian( record.uncompressedSize );
}

entry.compressedSize = qFromLittleEndian( record.compressedSize );
entry.uncompressedSize = qFromLittleEndian( record.uncompressedSize );
entry.compressionMethod = getCompressionMethod( record.compressionMethod );

return true;
Expand Down
Loading