Skip to content

Commit

Permalink
Add support to read zlib compressed files, like vmlinux.gz
Browse files Browse the repository at this point in the history
This is useful for decompressing vmlinux files and kernel modules
compressed using zlib/gzip.

Signed-off-by: Marcos Paulo de Souza <mpdesouza@suse.com>
  • Loading branch information
marcosps committed Aug 12, 2024
1 parent bd031b8 commit f57a78f
Show file tree
Hide file tree
Showing 9 changed files with 109 additions and 6 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/testsuite.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
libLLVM${{ matrix.version }}
llvm${{ matrix.version }}
llvm${{ matrix.version }}-devel
meson ninja clang-tools gcc findutils bash libelf-devel
meson ninja clang-tools gcc findutils bash libelf-devel zlib-devel
- uses: actions/checkout@v2
- name: meson
run: meson setup build --buildtype=${{ matrix.build-type }} --native-file ce-native.ini
Expand Down
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@ A tool to extract code content from source files using the clang and LLVM infras

### Compiling clang-extract

clang-extract requires clang, LLVM, libelf, meson and ninja in order to build.
clang-extract requires clang, LLVM, libelf, zlib, meson and ninja in order to build.
On openSUSE, you can install them by running:
```
$ sudo zypper install clang18 clang18-devel libclang-cpp18 \
clang-tools libLLVM18 llvm18 llvm18-devel libelf-devel meson ninja
clang-tools libLLVM18 llvm18 llvm18-devel libelf-devel meson ninja \
zlib-devel
```
It's advised to use LLVM 18 and higher, since it's well tested. But there
support for LLVM 16 and 17 as well, but you might find issues with it.
Expand Down
87 changes: 87 additions & 0 deletions libcextract/ElfCXX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
#include <iostream>
#include <string.h>

#include <zlib.h>

const char *ElfSymbol::Get_Name(void)
{
struct Elf *elf = ElfObj.Get_Wrapped_Object();
Expand Down Expand Up @@ -105,6 +107,12 @@ ElfObject::ElfObject(const char *path)
break;
}

/* gzip magic number (zlib) */
case FileHandling::FILE_TYPE_GZ: {
ElfObj = decompress_gz(ElfFd);
break;
}

default:
close(ElfFd);
throw std::runtime_error("Format not recognized: " + parser_path + "\n");
Expand All @@ -126,6 +134,85 @@ ElfObject::~ElfObject(void)
}
}

Elf *ElfObject::decompress_gz(int fd)
{
const size_t CHUNK = 16384;

unsigned have;
unsigned char in[CHUNK];
unsigned char out[CHUNK];

unsigned long dest_size = CHUNK;
unsigned char *dest = (unsigned char *)malloc(CHUNK);
if (!dest)
throw std::runtime_error("zlib dest malloc failed\n");

unsigned long dest_current = 0;

z_stream strm;
memset(&strm, 0, sizeof(strm));

/* Allocate inflate state. The size is related to how zlib inflates gzip files. */
int ret = inflateInit2(&strm, 16+MAX_WBITS);
if (ret != Z_OK)
throw std::runtime_error("zlib inflateInit failed\n");

/* decompress until deflate stream ends or end of file */
do {
strm.avail_in = read(fd, in, CHUNK);
if (strm.avail_in < 0)
throw std::runtime_error("zlib read failed: " + std::to_string(strm.avail_in) + "\n");

if (strm.avail_in == 0)
break;
strm.next_in = in;

/* run inflate() on input until output buffer not full */
do {
strm.avail_out = CHUNK;
strm.next_out = out;
ret = inflate(&strm, Z_NO_FLUSH);

switch (ret) {
case Z_NEED_DICT:
ret = Z_DATA_ERROR; /* and fall through */
case Z_DATA_ERROR:
case Z_MEM_ERROR:
inflateEnd(&strm);
throw std::runtime_error("zlib inflate error: " + std::to_string(ret) + "\n");
}

have = CHUNK - strm.avail_out;

/* double the buffer when needed */
if (have > dest_size - dest_current) {
dest_size = dest_size * 2;
dest = (unsigned char *)realloc(dest, dest_size);
}

memcpy(dest + dest_current, out, have);
dest_current += have;
} while (strm.avail_out == 0);

/* done when inflate() says it's done */
} while (ret != Z_STREAM_END);

inflateEnd(&strm);

if (ret != Z_STREAM_END)
throw std::runtime_error("zlib inflateEnd error: " + std::to_string(ret) + "\n");

Elf *elf = elf_memory((char *)dest, ret);
if (elf == nullptr) {
free(dest);
throw std::runtime_error("libelf elf_memory error: " + std::string(elf_errmsg(elf_errno())));
}

free(dest);

return elf;
}

/** Get the next ELF section. ELF is a multisection file and we need to
iterate if we want to know certain information. */
ElfSection ElfSection::Get_Next_Section(void)
Expand Down
2 changes: 2 additions & 0 deletions libcextract/ElfCXX.hh
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,8 @@ class ElfObject : public Parser
return ElfObj;
}

static Elf *decompress_gz(int fd);

/** Iterator class for ELF sections. With this one can use C++ iterators
* to iterate through all sections of the ELF file. Like this:
* ```
Expand Down
5 changes: 3 additions & 2 deletions meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ clang_dep += cpp.find_library('clang-cpp', dirs : llvm_libdir)
clang_dep += cpp.find_library('LLVM', dirs : llvm_libdir)
############################# #########################
elf_dep = dependency('libelf') # libelf
zlib_dep = dependency('zlib')

subdir('libcextract')

Expand All @@ -81,14 +82,14 @@ executable('ce-inline', 'Inline.cpp',
include_directories : incdir,
install : true,
link_with : libcextract_static,
dependencies : elf_dep
dependencies : [elf_dep, zlib_dep]
)

executable('clang-extract', 'Main.cpp',
include_directories : incdir,
install : true,
link_with : libcextract_static,
dependencies : [elf_dep, clang_dep]
dependencies : [elf_dep, clang_dep, zlib_dep]
)

#########
Expand Down
Empty file.
Binary file added testsuite/decompress/test.gz
Binary file not shown.
12 changes: 12 additions & 0 deletions testsuite/decompress/test.gz.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
/* { dg-options "-DCE_EXTRACT_FUNCTIONS=f -DCE_DEBUGINFO_PATH=../testsuite/decompress/test.gz" } */
int f(void)
{
return 0;
}

int main(void)
{
return f();
}

/* { dg-final { scan-tree-dump "int f\(void\)" } } */
2 changes: 1 addition & 1 deletion testsuite/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
# Author: Giuliano Belinassi

runtest = find_program('lib/runtest.py')
ordinary_test_dirs = [ 'small/', 'includes/', 'ccp/', 'linux', 'lateext' ]
ordinary_test_dirs = [ 'small/', 'includes/', 'ccp/', 'decompress', 'linux', 'lateext' ]

returncode_to_bool = [ true, false ]

Expand Down

0 comments on commit f57a78f

Please sign in to comment.