Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add support to read zlib compressed files, like vmlinux.gz
Browse files Browse the repository at this point in the history
This is useful for decompressing vmlinux files and kernel modules
compressed using zlib/gzip.

Signed-off-by: Marcos Paulo de Souza <mpdesouza@suse.com>
marcosps committed Aug 12, 2024
1 parent 610c3b9 commit 16a68ce
Showing 9 changed files with 120 additions and 6 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/testsuite.yml
Original file line number Diff line number Diff line change
@@ -29,7 +29,7 @@ jobs:
libLLVM${{ matrix.version }}
llvm${{ matrix.version }}
llvm${{ matrix.version }}-devel
meson ninja clang-tools gcc findutils bash libelf-devel
meson ninja clang-tools gcc findutils bash libelf-devel zlib-devel
- uses: actions/checkout@v2
- name: meson
run: meson setup build --buildtype=${{ matrix.build-type }} --native-file ce-native.ini
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -6,11 +6,12 @@ A tool to extract code content from source files using the clang and LLVM infras

### Compiling clang-extract

clang-extract requires clang, LLVM, libelf, meson and ninja in order to build.
clang-extract requires clang, LLVM, libelf, zlib, meson and ninja in order to build.
On openSUSE, you can install them by running:
```
$ sudo zypper install clang18 clang18-devel libclang-cpp18 \
clang-tools libLLVM18 llvm18 llvm18-devel libelf-devel meson ninja
clang-tools libLLVM18 llvm18 llvm18-devel libelf-devel meson ninja \
zlib-devel
```
It's advised to use LLVM 18 and higher, since it's well tested. But there
support for LLVM 16 and 17 as well, but you might find issues with it.
98 changes: 98 additions & 0 deletions libcextract/ElfCXX.cpp
Original file line number Diff line number Diff line change
@@ -22,6 +22,8 @@
#include <iostream>
#include <string.h>

#include <zlib.h>

const char *ElfSymbol::Get_Name(void)
{
struct Elf *elf = ElfObj.Get_Wrapped_Object();
@@ -105,6 +107,14 @@ ElfObject::ElfObject(const char *path)
break;
}

/* gzip magic number (zlib) */
case FileHandling::FILE_TYPE_GZ: {
ElfObj = decompress_gz(ElfFd);
/* decompress_gz should be closing the fd. */
ElfFd = -1;
break;
}

default:
close(ElfFd);
throw std::runtime_error("Format not recognized: " + parser_path + "\n");
@@ -126,6 +136,94 @@ ElfObject::~ElfObject(void)
}
}

Elf *ElfObject::decompress_gz(int fd)
{
const size_t CHUNK = 16384;

unsigned have;
unsigned char in[CHUNK];
unsigned char out[CHUNK];

unsigned long dest_size = CHUNK;
unsigned char *dest = (unsigned char *)malloc(CHUNK);
if (!dest) {
close(fd);
throw std::runtime_error("zlib dest malloc failed\n");
}

unsigned long dest_current = 0;

z_stream strm;
memset(&strm, 0, sizeof(strm));

/* Allocate inflate state. The size is related to how zlib inflates gzip files. */
int ret = inflateInit2(&strm, 16+MAX_WBITS);
if (ret != Z_OK) {
close(fd);
throw std::runtime_error("zlib inflateInit failed\n");
}

/* decompress until deflate stream ends or end of file */
do {
strm.avail_in = read(fd, in, CHUNK);
if (strm.avail_in < 0) {
close(fd);
throw std::runtime_error("zlib read failed: " + std::to_string(strm.avail_in) + "\n");
}

if (strm.avail_in == 0)
break;
strm.next_in = in;

/* run inflate() on input until output buffer not full */
do {
strm.avail_out = CHUNK;
strm.next_out = out;
ret = inflate(&strm, Z_NO_FLUSH);

switch (ret) {
case Z_NEED_DICT:
ret = Z_DATA_ERROR; /* and fall through */
case Z_DATA_ERROR:
case Z_MEM_ERROR:
inflateEnd(&strm);
close(fd);
throw std::runtime_error("zlib inflate error: " + std::to_string(ret) + "\n");
}

have = CHUNK - strm.avail_out;

/* double the buffer when needed */
if (have > dest_size - dest_current) {
dest_size = dest_size * 2;
dest = (unsigned char *)realloc(dest, dest_size);
}

memcpy(dest + dest_current, out, have);
dest_current += have;
} while (strm.avail_out == 0);

/* done when inflate() says it's done */
} while (ret != Z_STREAM_END);

close(fd);

inflateEnd(&strm);

if (ret != Z_STREAM_END)
throw std::runtime_error("zlib inflateEnd error: " + std::to_string(ret) + "\n");

Elf *elf = elf_memory((char *)dest, ret);
if (elf == nullptr) {
free(dest);
throw std::runtime_error("libelf elf_memory error: " + std::string(elf_errmsg(elf_errno())));
}

free(dest);

return elf;
}

/** Get the next ELF section. ELF is a multisection file and we need to
iterate if we want to know certain information. */
ElfSection ElfSection::Get_Next_Section(void)
2 changes: 2 additions & 0 deletions libcextract/ElfCXX.hh
Original file line number Diff line number Diff line change
@@ -239,6 +239,8 @@ class ElfObject : public Parser
return ElfObj;
}

static Elf *decompress_gz(int fd);

/** Iterator class for ELF sections. With this one can use C++ iterators
* to iterate through all sections of the ELF file. Like this:
* ```
5 changes: 3 additions & 2 deletions meson.build
Original file line number Diff line number Diff line change
@@ -72,6 +72,7 @@ clang_dep += cpp.find_library('clang-cpp', dirs : llvm_libdir)
clang_dep += cpp.find_library('LLVM', dirs : llvm_libdir)
############################# #########################
elf_dep = dependency('libelf') # libelf
zlib_dep = dependency('zlib')

subdir('libcextract')

@@ -81,14 +82,14 @@ executable('ce-inline', 'Inline.cpp',
include_directories : incdir,
install : true,
link_with : libcextract_static,
dependencies : elf_dep
dependencies : [elf_dep, zlib_dep]
)

executable('clang-extract', 'Main.cpp',
include_directories : incdir,
install : true,
link_with : libcextract_static,
dependencies : [elf_dep, clang_dep]
dependencies : [elf_dep, clang_dep, zlib_dep]
)

#########
Empty file.
Binary file added testsuite/decompress/test.gz
Binary file not shown.
12 changes: 12 additions & 0 deletions testsuite/decompress/test.gz.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
/* { dg-options "-DCE_EXTRACT_FUNCTIONS=f -DCE_DEBUGINFO_PATH=../testsuite/decompress/test.gz" } */
int f(void)
{
return 0;
}

int main(void)
{
return f();
}

/* { dg-final { scan-tree-dump "int f\(void\)" } } */
2 changes: 1 addition & 1 deletion testsuite/meson.build
Original file line number Diff line number Diff line change
@@ -14,7 +14,7 @@
# Author: Giuliano Belinassi

runtest = find_program('lib/runtest.py')
ordinary_test_dirs = [ 'small/', 'includes/', 'ccp/', 'linux', 'lateext' ]
ordinary_test_dirs = [ 'small/', 'includes/', 'ccp/', 'decompress', 'linux', 'lateext' ]

returncode_to_bool = [ true, false ]

0 comments on commit 16a68ce

Please sign in to comment.