-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
x86_64: Add vDSO for x86-64 with gettimeofday/clock_gettime/getcpu
This implements new vDSO for x86-64. The concept is similar to the existing vDSOs on i386 and PPC. x86-64 has had static vsyscalls before, but these are not flexible enough anymore. A vDSO is a ELF shared library supplied by the kernel that is mapped into user address space. The vDSO mapping is randomized for each process for security reasons. Doing this was needed for clock_gettime, because clock_gettime always needs a syscall fallback and having one at a fixed address would have made buffer overflow exploits too easy to write. The vdso can be disabled with vdso=0 It currently includes a new gettimeofday implemention and optimized clock_gettime(). The gettimeofday implementation is slightly faster than the one in the old vsyscall. clock_gettime is significantly faster than the syscall for CLOCK_MONOTONIC and CLOCK_REALTIME. The new calls are generally faster than the old vsyscall. Advantages over the old x86-64 vsyscalls: - Extensible - Randomized - Cleaner - Easier to virtualize (the old static address range previously causes overhead e.g. for Xen because it has to create special page tables for it) Weak points: - glibc support still to be written The VM interface is partly based on Ingo Molnar's i386 version. Includes compile fix from Joachim Deguara Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
- Loading branch information
Andi Kleen
authored and
Linus Torvalds
committed
Jul 22, 2007
1 parent
a586df0
commit 2aae950
Showing
23 changed files
with
554 additions
and
21 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
# | ||
# x86-64 vDSO. | ||
# | ||
|
||
# files to link into the vdso | ||
# vdso-start.o has to be first | ||
vobjs-y := vdso-start.o vdso-note.o vclock_gettime.o vgetcpu.o vvar.o | ||
|
||
# files to link into kernel | ||
obj-y := vma.o vdso.o vdso-syms.o | ||
|
||
vobjs := $(foreach F,$(vobjs-y),$(obj)/$F) | ||
|
||
$(obj)/vdso.o: $(obj)/vdso.so | ||
|
||
targets += vdso.so vdso.lds $(vobjs-y) vdso-syms.o | ||
|
||
# The DSO images are built using a special linker script. | ||
quiet_cmd_syscall = SYSCALL $@ | ||
cmd_syscall = $(CC) -m elf_x86_64 -nostdlib $(SYSCFLAGS_$(@F)) \ | ||
-Wl,-T,$(filter-out FORCE,$^) -o $@ | ||
|
||
export CPPFLAGS_vdso.lds += -P -C -U$(ARCH) | ||
|
||
vdso-flags = -fPIC -shared -Wl,-soname=linux-vdso.so.1 \ | ||
$(call ld-option, -Wl$(comma)--hash-style=sysv) \ | ||
-Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 | ||
SYSCFLAGS_vdso.so = $(vdso-flags) | ||
|
||
$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so | ||
|
||
$(obj)/vdso.so: $(src)/vdso.lds $(vobjs) FORCE | ||
$(call if_changed,syscall) | ||
|
||
CF := $(PROFILING) -mcmodel=small -fPIC -g0 -O2 -fasynchronous-unwind-tables -m64 | ||
|
||
$(obj)/vclock_gettime.o: CFLAGS = $(CF) | ||
$(obj)/vgetcpu.o: CFLAGS = $(CF) | ||
|
||
# We also create a special relocatable object that should mirror the symbol | ||
# table and layout of the linked DSO. With ld -R we can then refer to | ||
# these symbols in the kernel code rather than hand-coded addresses. | ||
extra-y += vdso-syms.o | ||
$(obj)/built-in.o: $(obj)/vdso-syms.o | ||
$(obj)/built-in.o: ld_flags += -R $(obj)/vdso-syms.o | ||
|
||
SYSCFLAGS_vdso-syms.o = -r -d | ||
$(obj)/vdso-syms.o: $(src)/vdso.lds $(vobjs) FORCE | ||
$(call if_changed,syscall) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
/* | ||
* Copyright 2006 Andi Kleen, SUSE Labs. | ||
* Subject to the GNU Public License, v.2 | ||
* | ||
* Fast user context implementation of clock_gettime and gettimeofday. | ||
* | ||
* The code should have no internal unresolved relocations. | ||
* Check with readelf after changing. | ||
* Also alternative() doesn't work. | ||
*/ | ||
|
||
#include <linux/kernel.h> | ||
#include <linux/posix-timers.h> | ||
#include <linux/time.h> | ||
#include <linux/string.h> | ||
#include <asm/vsyscall.h> | ||
#include <asm/vgtod.h> | ||
#include <asm/timex.h> | ||
#include <asm/hpet.h> | ||
#include <asm/unistd.h> | ||
#include <asm/io.h> | ||
#include <asm/vgtod.h> | ||
#include "vextern.h" | ||
|
||
#define gtod vdso_vsyscall_gtod_data | ||
|
||
static long vdso_fallback_gettime(long clock, struct timespec *ts) | ||
{ | ||
long ret; | ||
asm("syscall" : "=a" (ret) : | ||
"0" (__NR_clock_gettime),"D" (clock), "S" (ts) : "memory"); | ||
return ret; | ||
} | ||
|
||
static inline long vgetns(void) | ||
{ | ||
cycles_t (*vread)(void); | ||
vread = gtod->clock.vread; | ||
return ((vread() - gtod->clock.cycle_last) * gtod->clock.mult) >> | ||
gtod->clock.shift; | ||
} | ||
|
||
static noinline int do_realtime(struct timespec *ts) | ||
{ | ||
unsigned long seq, ns; | ||
do { | ||
seq = read_seqbegin(>od->lock); | ||
ts->tv_sec = gtod->wall_time_sec; | ||
ts->tv_nsec = gtod->wall_time_nsec; | ||
ns = vgetns(); | ||
} while (unlikely(read_seqretry(>od->lock, seq))); | ||
timespec_add_ns(ts, ns); | ||
return 0; | ||
} | ||
|
||
/* Copy of the version in kernel/time.c which we cannot directly access */ | ||
static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec) | ||
{ | ||
while (nsec >= NSEC_PER_SEC) { | ||
nsec -= NSEC_PER_SEC; | ||
++sec; | ||
} | ||
while (nsec < 0) { | ||
nsec += NSEC_PER_SEC; | ||
--sec; | ||
} | ||
ts->tv_sec = sec; | ||
ts->tv_nsec = nsec; | ||
} | ||
|
||
static noinline int do_monotonic(struct timespec *ts) | ||
{ | ||
unsigned long seq, ns, secs; | ||
do { | ||
seq = read_seqbegin(>od->lock); | ||
secs = gtod->wall_time_sec; | ||
ns = gtod->wall_time_nsec + vgetns(); | ||
secs += gtod->wall_to_monotonic.tv_sec; | ||
ns += gtod->wall_to_monotonic.tv_nsec; | ||
} while (unlikely(read_seqretry(>od->lock, seq))); | ||
vset_normalized_timespec(ts, secs, ns); | ||
return 0; | ||
} | ||
|
||
int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) | ||
{ | ||
if (likely(gtod->sysctl_enabled && gtod->clock.vread)) | ||
switch (clock) { | ||
case CLOCK_REALTIME: | ||
return do_realtime(ts); | ||
case CLOCK_MONOTONIC: | ||
return do_monotonic(ts); | ||
} | ||
return vdso_fallback_gettime(clock, ts); | ||
} | ||
int clock_gettime(clockid_t, struct timespec *) | ||
__attribute__((weak, alias("__vdso_clock_gettime"))); | ||
|
||
int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) | ||
{ | ||
long ret; | ||
if (likely(gtod->sysctl_enabled && gtod->clock.vread)) { | ||
BUILD_BUG_ON(offsetof(struct timeval, tv_usec) != | ||
offsetof(struct timespec, tv_nsec) || | ||
sizeof(*tv) != sizeof(struct timespec)); | ||
do_realtime((struct timespec *)tv); | ||
tv->tv_usec /= 1000; | ||
if (unlikely(tz != NULL)) { | ||
/* This relies on gcc inlining the memcpy. We'll notice | ||
if it ever fails to do so. */ | ||
memcpy(tz, >od->sys_tz, sizeof(struct timezone)); | ||
} | ||
return 0; | ||
} | ||
asm("syscall" : "=a" (ret) : | ||
"0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); | ||
return ret; | ||
} | ||
int gettimeofday(struct timeval *, struct timezone *) | ||
__attribute__((weak, alias("__vdso_gettimeofday"))); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
/* | ||
* This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. | ||
* Here we can supply some information useful to userland. | ||
*/ | ||
|
||
#include <linux/uts.h> | ||
#include <linux/version.h> | ||
#include <linux/elfnote.h> | ||
|
||
ELFNOTE_START(Linux, 0, "a") | ||
.long LINUX_VERSION_CODE | ||
ELFNOTE_END |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
.globl vdso_kernel_start | ||
vdso_kernel_start: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
.section ".vdso","a" | ||
.incbin "arch/x86_64/vdso/vdso.so" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
/* | ||
* Linker script for vsyscall DSO. The vsyscall page is an ELF shared | ||
* object prelinked to its virtual address, and with only one read-only | ||
* segment (that fits in one page). This script controls its layout. | ||
*/ | ||
#include <asm/asm-offsets.h> | ||
#include "voffset.h" | ||
|
||
#define VDSO_PRELINK 0xffffffffff700000 | ||
|
||
SECTIONS | ||
{ | ||
. = VDSO_PRELINK + SIZEOF_HEADERS; | ||
|
||
.hash : { *(.hash) } :text | ||
.gnu.hash : { *(.gnu.hash) } | ||
.dynsym : { *(.dynsym) } | ||
.dynstr : { *(.dynstr) } | ||
.gnu.version : { *(.gnu.version) } | ||
.gnu.version_d : { *(.gnu.version_d) } | ||
.gnu.version_r : { *(.gnu.version_r) } | ||
|
||
/* This linker script is used both with -r and with -shared. | ||
For the layouts to match, we need to skip more than enough | ||
space for the dynamic symbol table et al. If this amount | ||
is insufficient, ld -shared will barf. Just increase it here. */ | ||
. = VDSO_PRELINK + VDSO_TEXT_OFFSET; | ||
|
||
.text : { *(.text) } :text | ||
.text.ptr : { *(.text.ptr) } :text | ||
. = VDSO_PRELINK + 0x900; | ||
.data : { *(.data) } :text | ||
.bss : { *(.bss) } :text | ||
|
||
.altinstructions : { *(.altinstructions) } :text | ||
.altinstr_replacement : { *(.altinstr_replacement) } :text | ||
|
||
.note : { *(.note.*) } :text :note | ||
.eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr | ||
.eh_frame : { KEEP (*(.eh_frame)) } :text | ||
.dynamic : { *(.dynamic) } :text :dynamic | ||
.useless : { | ||
*(.got.plt) *(.got) | ||
*(.gnu.linkonce.d.*) | ||
*(.dynbss) | ||
*(.gnu.linkonce.b.*) | ||
} :text | ||
} | ||
|
||
/* | ||
* We must supply the ELF program headers explicitly to get just one | ||
* PT_LOAD segment, and set the flags explicitly to make segments read-only. | ||
*/ | ||
PHDRS | ||
{ | ||
text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ | ||
dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ | ||
note PT_NOTE FLAGS(4); /* PF_R */ | ||
eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */ | ||
} | ||
|
||
/* | ||
* This controls what symbols we export from the DSO. | ||
*/ | ||
VERSION | ||
{ | ||
LINUX_2.6 { | ||
global: | ||
clock_gettime; | ||
__vdso_clock_gettime; | ||
gettimeofday; | ||
__vdso_gettimeofday; | ||
getcpu; | ||
__vdso_getcpu; | ||
local: *; | ||
}; | ||
} |
Oops, something went wrong.