diff options
Diffstat (limited to 'arch/x86/vdso')
| -rw-r--r-- | arch/x86/vdso/.gitignore | 5 | ||||
| -rw-r--r-- | arch/x86/vdso/Makefile | 165 | ||||
| -rw-r--r-- | arch/x86/vdso/vclock_gettime.c | 249 | ||||
| -rw-r--r-- | arch/x86/vdso/vdso-fakesections.c | 21 | ||||
| -rw-r--r-- | arch/x86/vdso/vdso-layout.lds.S | 96 | ||||
| -rw-r--r-- | arch/x86/vdso/vdso.S | 22 | ||||
| -rw-r--r-- | arch/x86/vdso/vdso.lds.S | 9 | ||||
| -rw-r--r-- | arch/x86/vdso/vdso2c.c | 185 | ||||
| -rw-r--r-- | arch/x86/vdso/vdso2c.h | 318 | ||||
| -rw-r--r-- | arch/x86/vdso/vdso32-setup.c | 354 | ||||
| -rw-r--r-- | arch/x86/vdso/vdso32.S | 22 | ||||
| -rw-r--r-- | arch/x86/vdso/vdso32/vclock_gettime.c | 30 | ||||
| -rw-r--r-- | arch/x86/vdso/vdso32/vdso-fakesections.c | 1 | ||||
| -rw-r--r-- | arch/x86/vdso/vdso32/vdso32.lds.S | 24 | ||||
| -rw-r--r-- | arch/x86/vdso/vdsox32.S | 22 | ||||
| -rw-r--r-- | arch/x86/vdso/vdsox32.lds.S | 9 | ||||
| -rw-r--r-- | arch/x86/vdso/vma.c | 242 | 
17 files changed, 1054 insertions, 720 deletions
diff --git a/arch/x86/vdso/.gitignore b/arch/x86/vdso/.gitignore index 3282874bc61..aae8ffdd588 100644 --- a/arch/x86/vdso/.gitignore +++ b/arch/x86/vdso/.gitignore @@ -1,8 +1,7 @@  vdso.lds -vdso-syms.lds  vdsox32.lds -vdsox32-syms.lds -vdso32-syms.lds  vdso32-syscall-syms.lds  vdso32-sysenter-syms.lds  vdso32-int80-syms.lds +vdso-image-*.c +vdso2c diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index fd14be1d147..61b04fe36e6 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -2,49 +2,63 @@  # Building vDSO images for x86.  # +KBUILD_CFLAGS += $(DISABLE_LTO) +  VDSO64-$(CONFIG_X86_64)		:= y  VDSOX32-$(CONFIG_X86_X32_ABI)	:= y  VDSO32-$(CONFIG_X86_32)		:= y  VDSO32-$(CONFIG_COMPAT)		:= y -vdso-install-$(VDSO64-y)	+= vdso.so -vdso-install-$(VDSOX32-y)	+= vdsox32.so -vdso-install-$(VDSO32-y)	+= $(vdso32-images) - -  # files to link into the vdso -vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o +vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vdso-fakesections.o -vobjs-$(VDSOX32-y) += $(vobjx32s-compat) +# files to link into kernel +obj-y				+= vma.o -# Filter out x32 objects. -vobj64s := $(filter-out $(vobjx32s-compat),$(vobjs-y)) +# vDSO images to build +vdso_img-$(VDSO64-y)		+= 64 +vdso_img-$(VDSOX32-y)		+= x32 +vdso_img-$(VDSO32-y)		+= 32-int80 +vdso_img-$(CONFIG_COMPAT)	+= 32-syscall +vdso_img-$(VDSO32-y)		+= 32-sysenter -# files to link into kernel -obj-$(VDSO64-y)			+= vma.o vdso.o -obj-$(VDSOX32-y)		+= vdsox32.o -obj-$(VDSO32-y)			+= vdso32.o vdso32-setup.o +obj-$(VDSO32-y)			+= vdso32-setup.o -vobjs := $(foreach F,$(vobj64s),$(obj)/$F) +vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)  $(obj)/vdso.o: $(obj)/vdso.so -targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y) +targets += vdso.lds $(vobjs-y) + +# Build the vDSO image C files and link them in. +vdso_img_objs := $(vdso_img-y:%=vdso-image-%.o) +vdso_img_cfiles := $(vdso_img-y:%=vdso-image-%.c) +vdso_img_sodbg := $(vdso_img-y:%=vdso%.so.dbg) +obj-y += $(vdso_img_objs) +targets += $(vdso_img_cfiles) +targets += $(vdso_img_sodbg) +.SECONDARY: $(vdso_img-y:%=$(obj)/vdso-image-%.c)  export CPPFLAGS_vdso.lds += -P -C  VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \  			-Wl,--no-undefined \ -		      	-Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 +			-Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 \ +			$(DISABLE_LTO) -$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so - -$(obj)/vdso.so.dbg: $(src)/vdso.lds $(vobjs) FORCE +$(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE  	$(call if_changed,vdso) -$(obj)/%.so: OBJCOPYFLAGS := -S -$(obj)/%.so: $(obj)/%.so.dbg FORCE -	$(call if_changed,objcopy) +HOST_EXTRACFLAGS += -I$(srctree)/tools/include +hostprogs-y			+= vdso2c + +quiet_cmd_vdso2c = VDSO2C  $@ +define cmd_vdso2c +	$(obj)/vdso2c $< $@ +endef + +$(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso2c FORCE +	$(call if_changed,vdso2c)  #  # Don't omit frame pointers for ease of userspace debugging, but do @@ -52,7 +66,8 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE  #  CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \         $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \ -       -fno-omit-frame-pointer -foptimize-sibling-calls +       -fno-omit-frame-pointer -foptimize-sibling-calls \ +       -DDISABLE_BRANCH_PROFILING  $(vobjs): KBUILD_CFLAGS += $(CFL) @@ -64,22 +79,6 @@ CFLAGS_REMOVE_vclock_gettime.o = -pg  CFLAGS_REMOVE_vgetcpu.o = -pg  CFLAGS_REMOVE_vvar.o = -pg -targets += vdso-syms.lds -obj-$(VDSO64-y)			+= vdso-syms.lds - -# -# Match symbols in the DSO that look like VDSO*; produce a file of constants. -# -sed-vdsosym := -e 's/^00*/0/' \ -	-e 's/^\([0-9a-fA-F]*\) . \(VDSO[a-zA-Z0-9_]*\)$$/\2 = 0x\1;/p' -quiet_cmd_vdsosym = VDSOSYM $@ -define cmd_vdsosym -	$(NM) $< | LC_ALL=C sed -n $(sed-vdsosym) | LC_ALL=C sort > $@ -endef - -$(obj)/%-syms.lds: $(obj)/%.so.dbg FORCE -	$(call if_changed,vdsosym) -  #  # X32 processes use x32 vDSO to access 64bit kernel data.  # @@ -90,16 +89,19 @@ $(obj)/%-syms.lds: $(obj)/%.so.dbg FORCE  # so that it can reach 64bit address space with 64bit pointers.  # -targets += vdsox32-syms.lds -obj-$(VDSOX32-y)		+= vdsox32-syms.lds -  CPPFLAGS_vdsox32.lds = $(CPPFLAGS_vdso.lds)  VDSO_LDFLAGS_vdsox32.lds = -Wl,-m,elf32_x86_64 \  			   -Wl,-soname=linux-vdso.so.1 \  			   -Wl,-z,max-page-size=4096 \  			   -Wl,-z,common-page-size=4096 -vobjx32s-y := $(vobj64s:.o=-x32.o) +# 64-bit objects to re-brand as x32 +vobjs64-for-x32 := $(filter-out $(vobjs-nox32),$(vobjs-y)) + +# x32-rebranded versions +vobjx32s-y := $(vobjs64-for-x32:.o=-x32.o) + +# same thing, but in the output directory  vobjx32s := $(foreach F,$(vobjx32s-y),$(obj)/$F)  # Convert 64bit object file to x32 for x32 vDSO. @@ -109,9 +111,7 @@ quiet_cmd_x32 = X32     $@  $(obj)/%-x32.o: $(obj)/%.o FORCE  	$(call if_changed,x32) -targets += vdsox32.so vdsox32.so.dbg vdsox32.lds $(vobjx32s-y) - -$(obj)/vdsox32.o: $(src)/vdsox32.S $(obj)/vdsox32.so +targets += vdsox32.lds $(vobjx32s-y)  $(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE  	$(call if_changed,vdso) @@ -119,7 +119,6 @@ $(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE  #  # Build multiple 32-bit vDSO images to choose from at boot time.  # -obj-$(VDSO32-y)			+= vdso32-syms.lds  vdso32.so-$(VDSO32-y)		+= int80  vdso32.so-$(CONFIG_COMPAT)	+= syscall  vdso32.so-$(VDSO32-y)		+= sysenter @@ -127,17 +126,15 @@ vdso32.so-$(VDSO32-y)		+= sysenter  vdso32-images			= $(vdso32.so-y:%=vdso32-%.so)  CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) -VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-soname=linux-gate.so.1 +VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1  # This makes sure the $(obj) subdirectory exists even though vdso32/  # is not a kbuild sub-make subdirectory.  override obj-dirs = $(dir $(obj)) $(obj)/vdso32/  targets += vdso32/vdso32.lds -targets += $(vdso32-images) $(vdso32-images:=.dbg) -targets += vdso32/note.o $(vdso32.so-y:%=vdso32/%.o) - -extra-y	+= $(vdso32-images) +targets += vdso32/note.o vdso32/vclock_gettime.o $(vdso32.so-y:%=vdso32/%.o) +targets += vdso32/vclock_gettime.o vdso32/vdso-fakesections.o  $(obj)/vdso32.o: $(vdso32-images:%=$(obj)/%) @@ -145,33 +142,25 @@ KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS))  $(vdso32-images:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)  $(vdso32-images:%=$(obj)/%.dbg): asflags-$(CONFIG_X86_64) += -m32 +KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS)) +KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic +KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector) +KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls) +KBUILD_CFLAGS_32 += -fno-omit-frame-pointer +KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING +$(vdso32-images:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) +  $(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \  				 $(obj)/vdso32/vdso32.lds \ +				 $(obj)/vdso32/vclock_gettime.o \ +				 $(obj)/vdso32/vdso-fakesections.o \  				 $(obj)/vdso32/note.o \  				 $(obj)/vdso32/%.o  	$(call if_changed,vdso) -# Make vdso32-*-syms.lds from each image, and then make sure they match. -# The only difference should be that some do not define VDSO32_SYSENTER_RETURN. - -targets += vdso32-syms.lds $(vdso32.so-y:%=vdso32-%-syms.lds) - -quiet_cmd_vdso32sym = VDSOSYM $@ -define cmd_vdso32sym -	if LC_ALL=C sort -u $(filter-out FORCE,$^) > $(@D)/.tmp_$(@F) && \ -	   $(foreach H,$(filter-out FORCE,$^),\ -		     if grep -q VDSO32_SYSENTER_RETURN $H; \ -		     then diff -u $(@D)/.tmp_$(@F) $H; \ -		     else sed /VDSO32_SYSENTER_RETURN/d $(@D)/.tmp_$(@F) | \ -			  diff -u - $H; fi &&) : ;\ -	then mv -f $(@D)/.tmp_$(@F) $@; \ -	else rm -f $(@D)/.tmp_$(@F); exit 1; \ -	fi -endef - -$(obj)/vdso32-syms.lds: $(vdso32.so-y:%=$(obj)/vdso32-%-syms.lds) FORCE -	$(call if_changed,vdso32sym) -  #  # The DSO images are built using a special linker script.  # @@ -181,19 +170,35 @@ quiet_cmd_vdso = VDSO    $@  		       -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \  		 sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@' -VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) +VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \ +	$(call cc-ldoption, -Wl$(comma)--build-id) -Wl,-Bsymbolic $(LTO_CFLAGS)  GCOV_PROFILE := n  # -# Install the unstripped copy of vdso*.so listed in $(vdso-install-y). +# Install the unstripped copies of vdso*.so.  If our toolchain supports +# build-id, install .build-id links as well.  # -quiet_cmd_vdso_install = INSTALL $@ -      cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ -$(vdso-install-y): %.so: $(obj)/%.so.dbg FORCE +quiet_cmd_vdso_install = INSTALL $(@:install_%=%) +define cmd_vdso_install +	cp $< "$(MODLIB)/vdso/$(@:install_%=%)"; \ +	if readelf -n $< |grep -q 'Build ID'; then \ +	  buildid=`readelf -n $< |grep 'Build ID' |sed -e 's/^.*Build ID: \(.*\)$$/\1/'`; \ +	  first=`echo $$buildid | cut -b-2`; \ +	  last=`echo $$buildid | cut -b3-`; \ +	  mkdir -p "$(MODLIB)/vdso/.build-id/$$first"; \ +	  ln -sf "../../$(@:install_%=%)" "$(MODLIB)/vdso/.build-id/$$first/$$last.debug"; \ +	fi +endef + +vdso_img_insttargets := $(vdso_img_sodbg:%.dbg=install_%) + +$(MODLIB)/vdso: FORCE  	@mkdir -p $(MODLIB)/vdso + +$(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso FORCE  	$(call cmd,vdso_install) -PHONY += vdso_install $(vdso-install-y) -vdso_install: $(vdso-install-y) +PHONY += vdso_install $(vdso_img_insttargets) +vdso_install: $(vdso_img_insttargets) FORCE  clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80* diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 72074d52840..9793322751e 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -4,63 +4,60 @@   *   * Fast user context implementation of clock_gettime, gettimeofday, and time.   * + * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net> + *  sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany + *   * The code should have no internal unresolved relocations.   * Check with readelf after changing.   */ -/* Disable profiling for userspace code: */ -#define DISABLE_BRANCH_PROFILING - -#include <linux/kernel.h> -#include <linux/posix-timers.h> -#include <linux/time.h> -#include <linux/string.h> -#include <asm/vsyscall.h> -#include <asm/fixmap.h> +#include <uapi/linux/time.h>  #include <asm/vgtod.h> -#include <asm/timex.h>  #include <asm/hpet.h> +#include <asm/vvar.h>  #include <asm/unistd.h> -#include <asm/io.h> -#include <asm/pvclock.h> +#include <asm/msr.h> +#include <linux/math64.h> +#include <linux/time.h>  #define gtod (&VVAR(vsyscall_gtod_data)) -notrace static cycle_t vread_tsc(void) -{ -	cycle_t ret; -	u64 last; +extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts); +extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); +extern time_t __vdso_time(time_t *t); -	/* -	 * Empirically, a fence (of type that depends on the CPU) -	 * before rdtsc is enough to ensure that rdtsc is ordered -	 * with respect to loads.  The various CPU manuals are unclear -	 * as to whether rdtsc can be reordered with later loads, -	 * but no one has ever seen it happen. -	 */ -	rdtsc_barrier(); -	ret = (cycle_t)vget_cycles(); +#ifdef CONFIG_HPET_TIMER +extern u8 hpet_page +	__attribute__((visibility("hidden"))); -	last = VVAR(vsyscall_gtod_data).clock.cycle_last; +static notrace cycle_t vread_hpet(void) +{ +	return *(const volatile u32 *)(&hpet_page + HPET_COUNTER); +} +#endif -	if (likely(ret >= last)) -		return ret; +#ifndef BUILD_VDSO32 -	/* -	 * GCC likes to generate cmov here, but this branch is extremely -	 * predictable (it's just a funciton of time and the likely is -	 * very likely) and there's a data dependence, so force GCC -	 * to generate a branch instead.  I don't barrier() because -	 * we don't actually need a barrier, and if this function -	 * ever gets inlined it will generate worse code. -	 */ -	asm volatile (""); -	return last; +#include <linux/kernel.h> +#include <asm/vsyscall.h> +#include <asm/fixmap.h> +#include <asm/pvclock.h> + +notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) +{ +	long ret; +	asm("syscall" : "=a" (ret) : +	    "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory"); +	return ret;  } -static notrace cycle_t vread_hpet(void) +notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)  { -	return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + HPET_COUNTER); +	long ret; + +	asm("syscall" : "=a" (ret) : +	    "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); +	return ret;  }  #ifdef CONFIG_PARAVIRT_CLOCK @@ -124,7 +121,7 @@ static notrace cycle_t vread_pvclock(int *mode)  		*mode = VCLOCK_NONE;  	/* refer to tsc.c read_tsc() comment for rationale */ -	last = VVAR(vsyscall_gtod_data).clock.cycle_last; +	last = gtod->cycle_last;  	if (likely(ret >= last))  		return ret; @@ -133,11 +130,20 @@ static notrace cycle_t vread_pvclock(int *mode)  }  #endif +#else +  notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)  {  	long ret; -	asm("syscall" : "=a" (ret) : -	    "0" (__NR_clock_gettime),"D" (clock), "S" (ts) : "memory"); + +	asm( +		"mov %%ebx, %%edx \n" +		"mov %2, %%ebx \n" +		"call __kernel_vsyscall \n" +		"mov %%edx, %%ebx \n" +		: "=a" (ret) +		: "0" (__NR_clock_gettime), "g" (clock), "c" (ts) +		: "memory", "edx");  	return ret;  } @@ -145,28 +151,79 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)  {  	long ret; -	asm("syscall" : "=a" (ret) : -	    "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); +	asm( +		"mov %%ebx, %%edx \n" +		"mov %2, %%ebx \n" +		"call __kernel_vsyscall \n" +		"mov %%edx, %%ebx \n" +		: "=a" (ret) +		: "0" (__NR_gettimeofday), "g" (tv), "c" (tz) +		: "memory", "edx");  	return ret;  } +#ifdef CONFIG_PARAVIRT_CLOCK + +static notrace cycle_t vread_pvclock(int *mode) +{ +	*mode = VCLOCK_NONE; +	return 0; +} +#endif + +#endif + +notrace static cycle_t vread_tsc(void) +{ +	cycle_t ret; +	u64 last; + +	/* +	 * Empirically, a fence (of type that depends on the CPU) +	 * before rdtsc is enough to ensure that rdtsc is ordered +	 * with respect to loads.  The various CPU manuals are unclear +	 * as to whether rdtsc can be reordered with later loads, +	 * but no one has ever seen it happen. +	 */ +	rdtsc_barrier(); +	ret = (cycle_t)__native_read_tsc(); + +	last = gtod->cycle_last; + +	if (likely(ret >= last)) +		return ret; + +	/* +	 * GCC likes to generate cmov here, but this branch is extremely +	 * predictable (it's just a funciton of time and the likely is +	 * very likely) and there's a data dependence, so force GCC +	 * to generate a branch instead.  I don't barrier() because +	 * we don't actually need a barrier, and if this function +	 * ever gets inlined it will generate worse code. +	 */ +	asm volatile (""); +	return last; +}  notrace static inline u64 vgetsns(int *mode)  { -	long v; +	u64 v;  	cycles_t cycles; -	if (gtod->clock.vclock_mode == VCLOCK_TSC) + +	if (gtod->vclock_mode == VCLOCK_TSC)  		cycles = vread_tsc(); -	else if (gtod->clock.vclock_mode == VCLOCK_HPET) +#ifdef CONFIG_HPET_TIMER +	else if (gtod->vclock_mode == VCLOCK_HPET)  		cycles = vread_hpet(); +#endif  #ifdef CONFIG_PARAVIRT_CLOCK -	else if (gtod->clock.vclock_mode == VCLOCK_PVCLOCK) +	else if (gtod->vclock_mode == VCLOCK_PVCLOCK)  		cycles = vread_pvclock(mode);  #endif  	else  		return 0; -	v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask; -	return v * gtod->clock.mult; +	v = (cycles - gtod->cycle_last) & gtod->mask; +	return v * gtod->mult;  }  /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */ @@ -176,106 +233,102 @@ notrace static int __always_inline do_realtime(struct timespec *ts)  	u64 ns;  	int mode; -	ts->tv_nsec = 0;  	do { -		seq = read_seqcount_begin(>od->seq); -		mode = gtod->clock.vclock_mode; +		seq = gtod_read_begin(gtod); +		mode = gtod->vclock_mode;  		ts->tv_sec = gtod->wall_time_sec;  		ns = gtod->wall_time_snsec;  		ns += vgetsns(&mode); -		ns >>= gtod->clock.shift; -	} while (unlikely(read_seqcount_retry(>od->seq, seq))); +		ns >>= gtod->shift; +	} while (unlikely(gtod_read_retry(gtod, seq))); + +	ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); +	ts->tv_nsec = ns; -	timespec_add_ns(ts, ns);  	return mode;  } -notrace static int do_monotonic(struct timespec *ts) +notrace static int __always_inline do_monotonic(struct timespec *ts)  {  	unsigned long seq;  	u64 ns;  	int mode; -	ts->tv_nsec = 0;  	do { -		seq = read_seqcount_begin(>od->seq); -		mode = gtod->clock.vclock_mode; +		seq = gtod_read_begin(gtod); +		mode = gtod->vclock_mode;  		ts->tv_sec = gtod->monotonic_time_sec;  		ns = gtod->monotonic_time_snsec;  		ns += vgetsns(&mode); -		ns >>= gtod->clock.shift; -	} while (unlikely(read_seqcount_retry(>od->seq, seq))); -	timespec_add_ns(ts, ns); +		ns >>= gtod->shift; +	} while (unlikely(gtod_read_retry(gtod, seq))); + +	ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); +	ts->tv_nsec = ns;  	return mode;  } -notrace static int do_realtime_coarse(struct timespec *ts) +notrace static void do_realtime_coarse(struct timespec *ts)  {  	unsigned long seq;  	do { -		seq = read_seqcount_begin(>od->seq); -		ts->tv_sec = gtod->wall_time_coarse.tv_sec; -		ts->tv_nsec = gtod->wall_time_coarse.tv_nsec; -	} while (unlikely(read_seqcount_retry(>od->seq, seq))); -	return 0; +		seq = gtod_read_begin(gtod); +		ts->tv_sec = gtod->wall_time_coarse_sec; +		ts->tv_nsec = gtod->wall_time_coarse_nsec; +	} while (unlikely(gtod_read_retry(gtod, seq)));  } -notrace static int do_monotonic_coarse(struct timespec *ts) +notrace static void do_monotonic_coarse(struct timespec *ts)  {  	unsigned long seq;  	do { -		seq = read_seqcount_begin(>od->seq); -		ts->tv_sec = gtod->monotonic_time_coarse.tv_sec; -		ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec; -	} while (unlikely(read_seqcount_retry(>od->seq, seq))); - -	return 0; +		seq = gtod_read_begin(gtod); +		ts->tv_sec = gtod->monotonic_time_coarse_sec; +		ts->tv_nsec = gtod->monotonic_time_coarse_nsec; +	} while (unlikely(gtod_read_retry(gtod, seq)));  }  notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)  { -	int ret = VCLOCK_NONE; -  	switch (clock) {  	case CLOCK_REALTIME: -		ret = do_realtime(ts); +		if (do_realtime(ts) == VCLOCK_NONE) +			goto fallback;  		break;  	case CLOCK_MONOTONIC: -		ret = do_monotonic(ts); +		if (do_monotonic(ts) == VCLOCK_NONE) +			goto fallback;  		break;  	case CLOCK_REALTIME_COARSE: -		return do_realtime_coarse(ts); +		do_realtime_coarse(ts); +		break;  	case CLOCK_MONOTONIC_COARSE: -		return do_monotonic_coarse(ts); +		do_monotonic_coarse(ts); +		break; +	default: +		goto fallback;  	} -	if (ret == VCLOCK_NONE) -		return vdso_fallback_gettime(clock, ts);  	return 0; +fallback: +	return vdso_fallback_gettime(clock, ts);  }  int clock_gettime(clockid_t, struct timespec *)  	__attribute__((weak, alias("__vdso_clock_gettime")));  notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)  { -	long ret = VCLOCK_NONE; -  	if (likely(tv != NULL)) { -		BUILD_BUG_ON(offsetof(struct timeval, tv_usec) != -			     offsetof(struct timespec, tv_nsec) || -			     sizeof(*tv) != sizeof(struct timespec)); -		ret = do_realtime((struct timespec *)tv); +		if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE)) +			return vdso_fallback_gtod(tv, tz);  		tv->tv_usec /= 1000;  	}  	if (unlikely(tz != NULL)) { -		/* Avoid memcpy. Some old compilers fail to inline it */ -		tz->tz_minuteswest = gtod->sys_tz.tz_minuteswest; -		tz->tz_dsttime = gtod->sys_tz.tz_dsttime; +		tz->tz_minuteswest = gtod->tz_minuteswest; +		tz->tz_dsttime = gtod->tz_dsttime;  	} -	if (ret == VCLOCK_NONE) -		return vdso_fallback_gtod(tv, tz);  	return 0;  }  int gettimeofday(struct timeval *, struct timezone *) @@ -287,8 +340,8 @@ int gettimeofday(struct timeval *, struct timezone *)   */  notrace time_t __vdso_time(time_t *t)  { -	/* This is atomic on x86_64 so we don't need any locks. */ -	time_t result = ACCESS_ONCE(VVAR(vsyscall_gtod_data).wall_time_sec); +	/* This is atomic on x86 so we don't need any locks. */ +	time_t result = ACCESS_ONCE(gtod->wall_time_sec);  	if (t)  		*t = result; diff --git a/arch/x86/vdso/vdso-fakesections.c b/arch/x86/vdso/vdso-fakesections.c new file mode 100644 index 00000000000..aa5fbfab20a --- /dev/null +++ b/arch/x86/vdso/vdso-fakesections.c @@ -0,0 +1,21 @@ +/* + * Copyright 2014 Andy Lutomirski + * Subject to the GNU Public License, v.2 + * + * String table for loadable section headers.  See vdso2c.h for why + * this exists. + */ + +const char fake_shstrtab[] __attribute__((section(".fake_shstrtab"))) = +	".hash\0" +	".dynsym\0" +	".dynstr\0" +	".gnu.version\0" +	".gnu.version_d\0" +	".dynamic\0" +	".rodata\0" +	".fake_shstrtab\0"  /* Yay, self-referential code. */ +	".note\0" +	".eh_frame_hdr\0" +	".eh_frame\0" +	".text"; diff --git a/arch/x86/vdso/vdso-layout.lds.S b/arch/x86/vdso/vdso-layout.lds.S index 634a2cf6204..9197544eea9 100644 --- a/arch/x86/vdso/vdso-layout.lds.S +++ b/arch/x86/vdso/vdso-layout.lds.S @@ -1,12 +1,24 @@ +#include <asm/vdso.h> +  /*   * Linker script for vDSO.  This is an ELF shared object prelinked to   * its virtual address, and with only one read-only segment.   * This script controls its layout.   */ +#if defined(BUILD_VDSO64) +# define SHDR_SIZE 64 +#elif defined(BUILD_VDSO32) || defined(BUILD_VDSOX32) +# define SHDR_SIZE 40 +#else +# error unknown VDSO target +#endif + +#define NUM_FAKE_SHDRS 13 +  SECTIONS  { -	. = VDSO_PRELINK + SIZEOF_HEADERS; +	. = SIZEOF_HEADERS;  	.hash		: { *(.hash) }			:text  	.gnu.hash	: { *(.gnu.hash) } @@ -16,34 +28,82 @@ SECTIONS  	.gnu.version_d	: { *(.gnu.version_d) }  	.gnu.version_r	: { *(.gnu.version_r) } +	.dynamic	: { *(.dynamic) }		:text	:dynamic + +	.rodata		: { +		*(.rodata*) +		*(.data*) +		*(.sdata*) +		*(.got.plt) *(.got) +		*(.gnu.linkonce.d.*) +		*(.bss*) +		*(.dynbss*) +		*(.gnu.linkonce.b.*) + +		/* +		 * Ideally this would live in a C file, but that won't +		 * work cleanly for x32 until we start building the x32 +		 * C code using an x32 toolchain. +		 */ +		VDSO_FAKE_SECTION_TABLE_START = .; +		. = . + NUM_FAKE_SHDRS * SHDR_SIZE; +		VDSO_FAKE_SECTION_TABLE_END = .; +	}						:text + +	.fake_shstrtab	: { *(.fake_shstrtab) }		:text + +  	.note		: { *(.note.*) }		:text	:note  	.eh_frame_hdr	: { *(.eh_frame_hdr) }		:text	:eh_frame_hdr  	.eh_frame	: { KEEP (*(.eh_frame)) }	:text -	.dynamic	: { *(.dynamic) }		:text	:dynamic -	.rodata		: { *(.rodata*) }		:text -	.data		: { -	      *(.data*) -	      *(.sdata*) -	      *(.got.plt) *(.got) -	      *(.gnu.linkonce.d.*) -	      *(.bss*) -	      *(.dynbss*) -	      *(.gnu.linkonce.b.*) -	} +	/* +	 * Text is well-separated from actual data: there's plenty of +	 * stuff that isn't used at runtime in between. +	 */ + +	.text		: { *(.text*) }			:text	=0x90909090, -	.altinstructions	: { *(.altinstructions) } -	.altinstr_replacement	: { *(.altinstr_replacement) } +	/* +	 * At the end so that eu-elflint stays happy when vdso2c strips +	 * these.  A better implementation would avoid allocating space +	 * for these. +	 */ +	.altinstructions	: { *(.altinstructions) }	:text +	.altinstr_replacement	: { *(.altinstr_replacement) }	:text  	/* -	 * Align the actual code well away from the non-instruction data. -	 * This is the best thing for the I-cache. +	 * The remainder of the vDSO consists of special pages that are +	 * shared between the kernel and userspace.  It needs to be at the +	 * end so that it doesn't overlap the mapping of the actual +	 * vDSO image.  	 */ -	. = ALIGN(0x100); -	.text		: { *(.text*) }			:text	=0x90909090 +	. = ALIGN(PAGE_SIZE); +	vvar_page = .; + +	/* Place all vvars at the offsets in asm/vvar.h. */ +#define EMIT_VVAR(name, offset) vvar_ ## name = vvar_page + offset; +#define __VVAR_KERNEL_LDS +#include <asm/vvar.h> +#undef __VVAR_KERNEL_LDS +#undef EMIT_VVAR + +	. = vvar_page + PAGE_SIZE; + +	hpet_page = .; +	. = . + PAGE_SIZE; + +	. = ALIGN(PAGE_SIZE); +	end_mapping = .; + +	/DISCARD/ : { +		*(.discard) +		*(.discard.*) +		*(__bug_table) +	}  }  /* diff --git a/arch/x86/vdso/vdso.S b/arch/x86/vdso/vdso.S deleted file mode 100644 index 01f5e3b4613..00000000000 --- a/arch/x86/vdso/vdso.S +++ /dev/null @@ -1,22 +0,0 @@ -#include <asm/page_types.h> -#include <linux/linkage.h> -#include <linux/init.h> - -__PAGE_ALIGNED_DATA - -	.globl vdso_start, vdso_end -	.align PAGE_SIZE -vdso_start: -	.incbin "arch/x86/vdso/vdso.so" -vdso_end: -	.align PAGE_SIZE /* extra data here leaks to userspace. */ - -.previous - -	.globl vdso_pages -	.bss -	.align 8 -	.type vdso_pages, @object -vdso_pages: -	.zero (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE * 8 -	.size vdso_pages, .-vdso_pages diff --git a/arch/x86/vdso/vdso.lds.S b/arch/x86/vdso/vdso.lds.S index b96b2677cad..6807932643c 100644 --- a/arch/x86/vdso/vdso.lds.S +++ b/arch/x86/vdso/vdso.lds.S @@ -1,14 +1,13 @@  /*   * Linker script for 64-bit vDSO.   * We #include the file to define the layout details. - * Here we only choose the prelinked virtual address.   *   * This file defines the version script giving the user-exported symbols in - * the DSO.  We can define local symbols here called VDSO* to make their - * values visible using the asm-x86/vdso.h macros from the kernel proper. + * the DSO.   */ -#define VDSO_PRELINK 0xffffffffff700000 +#define BUILD_VDSO64 +  #include "vdso-layout.lds.S"  /* @@ -28,5 +27,3 @@ VERSION {  	local: *;  	};  } - -VDSO64_PRELINK = VDSO_PRELINK; diff --git a/arch/x86/vdso/vdso2c.c b/arch/x86/vdso/vdso2c.c new file mode 100644 index 00000000000..238dbe82776 --- /dev/null +++ b/arch/x86/vdso/vdso2c.c @@ -0,0 +1,185 @@ +#include <inttypes.h> +#include <stdint.h> +#include <unistd.h> +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <fcntl.h> +#include <err.h> + +#include <sys/mman.h> +#include <sys/types.h> + +#include <tools/le_byteshift.h> + +#include <linux/elf.h> +#include <linux/types.h> + +const char *outfilename; + +/* Symbols that we need in vdso2c. */ +enum { +	sym_vvar_page, +	sym_hpet_page, +	sym_end_mapping, +	sym_VDSO_FAKE_SECTION_TABLE_START, +	sym_VDSO_FAKE_SECTION_TABLE_END, +}; + +const int special_pages[] = { +	sym_vvar_page, +	sym_hpet_page, +}; + +struct vdso_sym { +	const char *name; +	bool export; +}; + +struct vdso_sym required_syms[] = { +	[sym_vvar_page] = {"vvar_page", true}, +	[sym_hpet_page] = {"hpet_page", true}, +	[sym_end_mapping] = {"end_mapping", true}, +	[sym_VDSO_FAKE_SECTION_TABLE_START] = { +		"VDSO_FAKE_SECTION_TABLE_START", false +	}, +	[sym_VDSO_FAKE_SECTION_TABLE_END] = { +		"VDSO_FAKE_SECTION_TABLE_END", false +	}, +	{"VDSO32_NOTE_MASK", true}, +	{"VDSO32_SYSENTER_RETURN", true}, +	{"__kernel_vsyscall", true}, +	{"__kernel_sigreturn", true}, +	{"__kernel_rt_sigreturn", true}, +}; + +__attribute__((format(printf, 1, 2))) __attribute__((noreturn)) +static void fail(const char *format, ...) +{ +	va_list ap; +	va_start(ap, format); +	fprintf(stderr, "Error: "); +	vfprintf(stderr, format, ap); +	unlink(outfilename); +	exit(1); +	va_end(ap); +} + +/* + * Evil macros for little-endian reads and writes + */ +#define GLE(x, bits, ifnot)						\ +	__builtin_choose_expr(						\ +		(sizeof(*(x)) == bits/8),				\ +		(__typeof__(*(x)))get_unaligned_le##bits(x), ifnot) + +extern void bad_get_le(void); +#define LAST_GLE(x)							\ +	__builtin_choose_expr(sizeof(*(x)) == 1, *(x), bad_get_le()) + +#define GET_LE(x)							\ +	GLE(x, 64, GLE(x, 32, GLE(x, 16, LAST_GLE(x)))) + +#define PLE(x, val, bits, ifnot)					\ +	__builtin_choose_expr(						\ +		(sizeof(*(x)) == bits/8),				\ +		put_unaligned_le##bits((val), (x)), ifnot) + +extern void bad_put_le(void); +#define LAST_PLE(x, val)						\ +	__builtin_choose_expr(sizeof(*(x)) == 1, *(x) = (val), bad_put_le()) + +#define PUT_LE(x, val)					\ +	PLE(x, val, 64, PLE(x, val, 32, PLE(x, val, 16, LAST_PLE(x, val)))) + + +#define NSYMS (sizeof(required_syms) / sizeof(required_syms[0])) + +#define BITSFUNC3(name, bits) name##bits +#define BITSFUNC2(name, bits) BITSFUNC3(name, bits) +#define BITSFUNC(name) BITSFUNC2(name, ELF_BITS) + +#define ELF_BITS_XFORM2(bits, x) Elf##bits##_##x +#define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x) +#define ELF(x) ELF_BITS_XFORM(ELF_BITS, x) + +#define ELF_BITS 64 +#include "vdso2c.h" +#undef ELF_BITS + +#define ELF_BITS 32 +#include "vdso2c.h" +#undef ELF_BITS + +static void go(void *addr, size_t len, FILE *outfile, const char *name) +{ +	Elf64_Ehdr *hdr = (Elf64_Ehdr *)addr; + +	if (hdr->e_ident[EI_CLASS] == ELFCLASS64) { +		go64(addr, len, outfile, name); +	} else if (hdr->e_ident[EI_CLASS] == ELFCLASS32) { +		go32(addr, len, outfile, name); +	} else { +		fail("unknown ELF class\n"); +	} +} + +int main(int argc, char **argv) +{ +	int fd; +	off_t len; +	void *addr; +	FILE *outfile; +	char *name, *tmp; +	int namelen; + +	if (argc != 3) { +		printf("Usage: vdso2c INPUT OUTPUT\n"); +		return 1; +	} + +	/* +	 * Figure out the struct name.  If we're writing to a .so file, +	 * generate raw output insted. +	 */ +	name = strdup(argv[2]); +	namelen = strlen(name); +	if (namelen >= 3 && !strcmp(name + namelen - 3, ".so")) { +		name = NULL; +	} else { +		tmp = strrchr(name, '/'); +		if (tmp) +			name = tmp + 1; +		tmp = strchr(name, '.'); +		if (tmp) +			*tmp = '\0'; +		for (tmp = name; *tmp; tmp++) +			if (*tmp == '-') +				*tmp = '_'; +	} + +	fd = open(argv[1], O_RDONLY); +	if (fd == -1) +		err(1, "%s", argv[1]); + +	len = lseek(fd, 0, SEEK_END); +	if (len == (off_t)-1) +		err(1, "lseek"); + +	addr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); +	if (addr == MAP_FAILED) +		err(1, "mmap"); + +	outfilename = argv[2]; +	outfile = fopen(outfilename, "w"); +	if (!outfile) +		err(1, "%s", argv[2]); + +	go(addr, (size_t)len, outfile, name); + +	munmap(addr, len); +	fclose(outfile); + +	return 0; +} diff --git a/arch/x86/vdso/vdso2c.h b/arch/x86/vdso/vdso2c.h new file mode 100644 index 00000000000..11b65d4f941 --- /dev/null +++ b/arch/x86/vdso/vdso2c.h @@ -0,0 +1,318 @@ +/* + * This file is included twice from vdso2c.c.  It generates code for 32-bit + * and 64-bit vDSOs.  We need both for 64-bit builds, since 32-bit vDSOs + * are built for 32-bit userspace. + */ + +/* + * We're writing a section table for a few reasons: + * + * The Go runtime had a couple of bugs: it would read the section + * table to try to figure out how many dynamic symbols there were (it + * shouldn't have looked at the section table at all) and, if there + * were no SHT_SYNDYM section table entry, it would use an + * uninitialized value for the number of symbols.  An empty DYNSYM + * table would work, but I see no reason not to write a valid one (and + * keep full performance for old Go programs).  This hack is only + * needed on x86_64. + * + * The bug was introduced on 2012-08-31 by: + * https://code.google.com/p/go/source/detail?r=56ea40aac72b + * and was fixed on 2014-06-13 by: + * https://code.google.com/p/go/source/detail?r=fc1cd5e12595 + * + * Binutils has issues debugging the vDSO: it reads the section table to + * find SHT_NOTE; it won't look at PT_NOTE for the in-memory vDSO, which + * would break build-id if we removed the section table.  Binutils + * also requires that shstrndx != 0.  See: + * https://sourceware.org/bugzilla/show_bug.cgi?id=17064 + * + * elfutils might not look for PT_NOTE if there is a section table at + * all.  I don't know whether this matters for any practical purpose. + * + * For simplicity, rather than hacking up a partial section table, we + * just write a mostly complete one.  We omit non-dynamic symbols, + * though, since they're rather large. + * + * Once binutils gets fixed, we might be able to drop this for all but + * the 64-bit vdso, since build-id only works in kernel RPMs, and + * systems that update to new enough kernel RPMs will likely update + * binutils in sync.  build-id has never worked for home-built kernel + * RPMs without manual symlinking, and I suspect that no one ever does + * that. + */ +struct BITSFUNC(fake_sections) +{ +	ELF(Shdr) *table; +	unsigned long table_offset; +	int count, max_count; + +	int in_shstrndx; +	unsigned long shstr_offset; +	const char *shstrtab; +	size_t shstrtab_len; + +	int out_shstrndx; +}; + +static unsigned int BITSFUNC(find_shname)(struct BITSFUNC(fake_sections) *out, +					  const char *name) +{ +	const char *outname = out->shstrtab; +	while (outname - out->shstrtab < out->shstrtab_len) { +		if (!strcmp(name, outname)) +			return (outname - out->shstrtab) + out->shstr_offset; +		outname += strlen(outname) + 1; +	} + +	if (*name) +		printf("Warning: could not find output name \"%s\"\n", name); +	return out->shstr_offset + out->shstrtab_len - 1;  /* Use a null. */ +} + +static void BITSFUNC(init_sections)(struct BITSFUNC(fake_sections) *out) +{ +	if (!out->in_shstrndx) +		fail("didn't find the fake shstrndx\n"); + +	memset(out->table, 0, out->max_count * sizeof(ELF(Shdr))); + +	if (out->max_count < 1) +		fail("we need at least two fake output sections\n"); + +	PUT_LE(&out->table[0].sh_type, SHT_NULL); +	PUT_LE(&out->table[0].sh_name, BITSFUNC(find_shname)(out, "")); + +	out->count = 1; +} + +static void BITSFUNC(copy_section)(struct BITSFUNC(fake_sections) *out, +				   int in_idx, const ELF(Shdr) *in, +				   const char *name) +{ +	uint64_t flags = GET_LE(&in->sh_flags); + +	bool copy = flags & SHF_ALLOC && +		(GET_LE(&in->sh_size) || +		 (GET_LE(&in->sh_type) != SHT_RELA && +		  GET_LE(&in->sh_type) != SHT_REL)) && +		strcmp(name, ".altinstructions") && +		strcmp(name, ".altinstr_replacement"); + +	if (!copy) +		return; + +	if (out->count >= out->max_count) +		fail("too many copied sections (max = %d)\n", out->max_count); + +	if (in_idx == out->in_shstrndx) +		out->out_shstrndx = out->count; + +	out->table[out->count] = *in; +	PUT_LE(&out->table[out->count].sh_name, +	       BITSFUNC(find_shname)(out, name)); + +	/* elfutils requires that a strtab have the correct type. */ +	if (!strcmp(name, ".fake_shstrtab")) +		PUT_LE(&out->table[out->count].sh_type, SHT_STRTAB); + +	out->count++; +} + +static void BITSFUNC(go)(void *addr, size_t len, +			 FILE *outfile, const char *name) +{ +	int found_load = 0; +	unsigned long load_size = -1;  /* Work around bogus warning */ +	unsigned long data_size; +	ELF(Ehdr) *hdr = (ELF(Ehdr) *)addr; +	int i; +	unsigned long j; +	ELF(Shdr) *symtab_hdr = NULL, *strtab_hdr, *secstrings_hdr, +		*alt_sec = NULL; +	ELF(Dyn) *dyn = 0, *dyn_end = 0; +	const char *secstrings; +	uint64_t syms[NSYMS] = {}; + +	struct BITSFUNC(fake_sections) fake_sections = {}; + +	ELF(Phdr) *pt = (ELF(Phdr) *)(addr + GET_LE(&hdr->e_phoff)); + +	/* Walk the segment table. */ +	for (i = 0; i < GET_LE(&hdr->e_phnum); i++) { +		if (GET_LE(&pt[i].p_type) == PT_LOAD) { +			if (found_load) +				fail("multiple PT_LOAD segs\n"); + +			if (GET_LE(&pt[i].p_offset) != 0 || +			    GET_LE(&pt[i].p_vaddr) != 0) +				fail("PT_LOAD in wrong place\n"); + +			if (GET_LE(&pt[i].p_memsz) != GET_LE(&pt[i].p_filesz)) +				fail("cannot handle memsz != filesz\n"); + +			load_size = GET_LE(&pt[i].p_memsz); +			found_load = 1; +		} else if (GET_LE(&pt[i].p_type) == PT_DYNAMIC) { +			dyn = addr + GET_LE(&pt[i].p_offset); +			dyn_end = addr + GET_LE(&pt[i].p_offset) + +				GET_LE(&pt[i].p_memsz); +		} +	} +	if (!found_load) +		fail("no PT_LOAD seg\n"); +	data_size = (load_size + 4095) / 4096 * 4096; + +	/* Walk the dynamic table */ +	for (i = 0; dyn + i < dyn_end && +		     GET_LE(&dyn[i].d_tag) != DT_NULL; i++) { +		typeof(dyn[i].d_tag) tag = GET_LE(&dyn[i].d_tag); +		if (tag == DT_REL || tag == DT_RELSZ || tag == DT_RELA || +		    tag == DT_RELENT || tag == DT_TEXTREL) +			fail("vdso image contains dynamic relocations\n"); +	} + +	/* Walk the section table */ +	secstrings_hdr = addr + GET_LE(&hdr->e_shoff) + +		GET_LE(&hdr->e_shentsize)*GET_LE(&hdr->e_shstrndx); +	secstrings = addr + GET_LE(&secstrings_hdr->sh_offset); +	for (i = 0; i < GET_LE(&hdr->e_shnum); i++) { +		ELF(Shdr) *sh = addr + GET_LE(&hdr->e_shoff) + +			GET_LE(&hdr->e_shentsize) * i; +		if (GET_LE(&sh->sh_type) == SHT_SYMTAB) +			symtab_hdr = sh; + +		if (!strcmp(secstrings + GET_LE(&sh->sh_name), +			    ".altinstructions")) +			alt_sec = sh; +	} + +	if (!symtab_hdr) +		fail("no symbol table\n"); + +	strtab_hdr = addr + GET_LE(&hdr->e_shoff) + +		GET_LE(&hdr->e_shentsize) * GET_LE(&symtab_hdr->sh_link); + +	/* Walk the symbol table */ +	for (i = 0; +	     i < GET_LE(&symtab_hdr->sh_size) / GET_LE(&symtab_hdr->sh_entsize); +	     i++) { +		int k; +		ELF(Sym) *sym = addr + GET_LE(&symtab_hdr->sh_offset) + +			GET_LE(&symtab_hdr->sh_entsize) * i; +		const char *name = addr + GET_LE(&strtab_hdr->sh_offset) + +			GET_LE(&sym->st_name); + +		for (k = 0; k < NSYMS; k++) { +			if (!strcmp(name, required_syms[k].name)) { +				if (syms[k]) { +					fail("duplicate symbol %s\n", +					     required_syms[k].name); +				} +				syms[k] = GET_LE(&sym->st_value); +			} +		} + +		if (!strcmp(name, "fake_shstrtab")) { +			ELF(Shdr) *sh; + +			fake_sections.in_shstrndx = GET_LE(&sym->st_shndx); +			fake_sections.shstrtab = addr + GET_LE(&sym->st_value); +			fake_sections.shstrtab_len = GET_LE(&sym->st_size); +			sh = addr + GET_LE(&hdr->e_shoff) + +				GET_LE(&hdr->e_shentsize) * +				fake_sections.in_shstrndx; +			fake_sections.shstr_offset = GET_LE(&sym->st_value) - +				GET_LE(&sh->sh_addr); +		} +	} + +	/* Build the output section table. */ +	if (!syms[sym_VDSO_FAKE_SECTION_TABLE_START] || +	    !syms[sym_VDSO_FAKE_SECTION_TABLE_END]) +		fail("couldn't find fake section table\n"); +	if ((syms[sym_VDSO_FAKE_SECTION_TABLE_END] - +	     syms[sym_VDSO_FAKE_SECTION_TABLE_START]) % sizeof(ELF(Shdr))) +		fail("fake section table size isn't a multiple of sizeof(Shdr)\n"); +	fake_sections.table = addr + syms[sym_VDSO_FAKE_SECTION_TABLE_START]; +	fake_sections.table_offset = syms[sym_VDSO_FAKE_SECTION_TABLE_START]; +	fake_sections.max_count = (syms[sym_VDSO_FAKE_SECTION_TABLE_END] - +				   syms[sym_VDSO_FAKE_SECTION_TABLE_START]) / +		sizeof(ELF(Shdr)); + +	BITSFUNC(init_sections)(&fake_sections); +	for (i = 0; i < GET_LE(&hdr->e_shnum); i++) { +		ELF(Shdr) *sh = addr + GET_LE(&hdr->e_shoff) + +			GET_LE(&hdr->e_shentsize) * i; +		BITSFUNC(copy_section)(&fake_sections, i, sh, +				       secstrings + GET_LE(&sh->sh_name)); +	} +	if (!fake_sections.out_shstrndx) +		fail("didn't generate shstrndx?!?\n"); + +	PUT_LE(&hdr->e_shoff, fake_sections.table_offset); +	PUT_LE(&hdr->e_shentsize, sizeof(ELF(Shdr))); +	PUT_LE(&hdr->e_shnum, fake_sections.count); +	PUT_LE(&hdr->e_shstrndx, fake_sections.out_shstrndx); + +	/* Validate mapping addresses. */ +	for (i = 0; i < sizeof(special_pages) / sizeof(special_pages[0]); i++) { +		if (!syms[i]) +			continue;  /* The mapping isn't used; ignore it. */ + +		if (syms[i] % 4096) +			fail("%s must be a multiple of 4096\n", +			     required_syms[i].name); +		if (syms[i] < data_size) +			fail("%s must be after the text mapping\n", +			     required_syms[i].name); +		if (syms[sym_end_mapping] < syms[i] + 4096) +			fail("%s overruns end_mapping\n", +			     required_syms[i].name); +	} +	if (syms[sym_end_mapping] % 4096) +		fail("end_mapping must be a multiple of 4096\n"); + +	if (!name) { +		fwrite(addr, load_size, 1, outfile); +		return; +	} + +	fprintf(outfile, "/* AUTOMATICALLY GENERATED -- DO NOT EDIT */\n\n"); +	fprintf(outfile, "#include <linux/linkage.h>\n"); +	fprintf(outfile, "#include <asm/page_types.h>\n"); +	fprintf(outfile, "#include <asm/vdso.h>\n"); +	fprintf(outfile, "\n"); +	fprintf(outfile, +		"static unsigned char raw_data[%lu] __page_aligned_data = {", +		data_size); +	for (j = 0; j < load_size; j++) { +		if (j % 10 == 0) +			fprintf(outfile, "\n\t"); +		fprintf(outfile, "0x%02X, ", (int)((unsigned char *)addr)[j]); +	} +	fprintf(outfile, "\n};\n\n"); + +	fprintf(outfile, "static struct page *pages[%lu];\n\n", +		data_size / 4096); + +	fprintf(outfile, "const struct vdso_image %s = {\n", name); +	fprintf(outfile, "\t.data = raw_data,\n"); +	fprintf(outfile, "\t.size = %lu,\n", data_size); +	fprintf(outfile, "\t.text_mapping = {\n"); +	fprintf(outfile, "\t\t.name = \"[vdso]\",\n"); +	fprintf(outfile, "\t\t.pages = pages,\n"); +	fprintf(outfile, "\t},\n"); +	if (alt_sec) { +		fprintf(outfile, "\t.alt = %lu,\n", +			(unsigned long)GET_LE(&alt_sec->sh_offset)); +		fprintf(outfile, "\t.alt_len = %lu,\n", +			(unsigned long)GET_LE(&alt_sec->sh_size)); +	} +	for (i = 0; i < NSYMS; i++) { +		if (required_syms[i].export && syms[i]) +			fprintf(outfile, "\t.sym_%s = 0x%" PRIx64 ",\n", +				required_syms[i].name, syms[i]); +	} +	fprintf(outfile, "};\n"); +} diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index d6bfb876cfb..e4f7781ee16 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -8,57 +8,31 @@  #include <linux/init.h>  #include <linux/smp.h> -#include <linux/thread_info.h> -#include <linux/sched.h> -#include <linux/gfp.h> -#include <linux/string.h> -#include <linux/elf.h> -#include <linux/mm.h> -#include <linux/err.h> -#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/mm_types.h>  #include <asm/cpufeature.h> -#include <asm/msr.h> -#include <asm/pgtable.h> -#include <asm/unistd.h> -#include <asm/elf.h> -#include <asm/tlbflush.h> +#include <asm/processor.h>  #include <asm/vdso.h> -#include <asm/proto.h> - -enum { -	VDSO_DISABLED = 0, -	VDSO_ENABLED = 1, -	VDSO_COMPAT = 2, -};  #ifdef CONFIG_COMPAT_VDSO -#define VDSO_DEFAULT	VDSO_COMPAT +#define VDSO_DEFAULT	0  #else -#define VDSO_DEFAULT	VDSO_ENABLED -#endif - -#ifdef CONFIG_X86_64 -#define vdso_enabled			sysctl_vsyscall32 -#define arch_setup_additional_pages	syscall32_setup_pages +#define VDSO_DEFAULT	1  #endif  /* - * This is the difference between the prelinked addresses in the vDSO images - * and the VDSO_HIGH_BASE address where CONFIG_COMPAT_VDSO places the vDSO - * in the user address space. - */ -#define VDSO_ADDR_ADJUST	(VDSO_HIGH_BASE - (unsigned long)VDSO32_PRELINK) - -/*   * Should the kernel map a VDSO page into processes and pass its   * address down to glibc upon exec()?   */ -unsigned int __read_mostly vdso_enabled = VDSO_DEFAULT; +unsigned int __read_mostly vdso32_enabled = VDSO_DEFAULT; -static int __init vdso_setup(char *s) +static int __init vdso32_setup(char *s)  { -	vdso_enabled = simple_strtoul(s, NULL, 0); +	vdso32_enabled = simple_strtoul(s, NULL, 0); + +	if (vdso32_enabled > 1) +		pr_warn("vdso32 values other than 0 and 1 are no longer allowed; vdso disabled\n");  	return 1;  } @@ -68,300 +42,43 @@ static int __init vdso_setup(char *s)   * behavior on both 64-bit and 32-bit kernels.   * On 32-bit kernels, vdso=[012] means the same thing.   */ -__setup("vdso32=", vdso_setup); +__setup("vdso32=", vdso32_setup);  #ifdef CONFIG_X86_32 -__setup_param("vdso=", vdso32_setup, vdso_setup, 0); - -EXPORT_SYMBOL_GPL(vdso_enabled); +__setup_param("vdso=", vdso_setup, vdso32_setup, 0);  #endif -static __init void reloc_symtab(Elf32_Ehdr *ehdr, -				unsigned offset, unsigned size) -{ -	Elf32_Sym *sym = (void *)ehdr + offset; -	unsigned nsym = size / sizeof(*sym); -	unsigned i; - -	for(i = 0; i < nsym; i++, sym++) { -		if (sym->st_shndx == SHN_UNDEF || -		    sym->st_shndx == SHN_ABS) -			continue;  /* skip */ - -		if (sym->st_shndx > SHN_LORESERVE) { -			printk(KERN_INFO "VDSO: unexpected st_shndx %x\n", -			       sym->st_shndx); -			continue; -		} - -		switch(ELF_ST_TYPE(sym->st_info)) { -		case STT_OBJECT: -		case STT_FUNC: -		case STT_SECTION: -		case STT_FILE: -			sym->st_value += VDSO_ADDR_ADJUST; -		} -	} -} - -static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset) -{ -	Elf32_Dyn *dyn = (void *)ehdr + offset; - -	for(; dyn->d_tag != DT_NULL; dyn++) -		switch(dyn->d_tag) { -		case DT_PLTGOT: -		case DT_HASH: -		case DT_STRTAB: -		case DT_SYMTAB: -		case DT_RELA: -		case DT_INIT: -		case DT_FINI: -		case DT_REL: -		case DT_DEBUG: -		case DT_JMPREL: -		case DT_VERSYM: -		case DT_VERDEF: -		case DT_VERNEED: -		case DT_ADDRRNGLO ... DT_ADDRRNGHI: -			/* definitely pointers needing relocation */ -			dyn->d_un.d_ptr += VDSO_ADDR_ADJUST; -			break; - -		case DT_ENCODING ... OLD_DT_LOOS-1: -		case DT_LOOS ... DT_HIOS-1: -			/* Tags above DT_ENCODING are pointers if -			   they're even */ -			if (dyn->d_tag >= DT_ENCODING && -			    (dyn->d_tag & 1) == 0) -				dyn->d_un.d_ptr += VDSO_ADDR_ADJUST; -			break; - -		case DT_VERDEFNUM: -		case DT_VERNEEDNUM: -		case DT_FLAGS_1: -		case DT_RELACOUNT: -		case DT_RELCOUNT: -		case DT_VALRNGLO ... DT_VALRNGHI: -			/* definitely not pointers */ -			break; - -		case OLD_DT_LOOS ... DT_LOOS-1: -		case DT_HIOS ... DT_VALRNGLO-1: -		default: -			if (dyn->d_tag > DT_ENCODING) -				printk(KERN_INFO "VDSO: unexpected DT_tag %x\n", -				       dyn->d_tag); -			break; -		} -} - -static __init void relocate_vdso(Elf32_Ehdr *ehdr) -{ -	Elf32_Phdr *phdr; -	Elf32_Shdr *shdr; -	int i; - -	BUG_ON(memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0 || -	       !elf_check_arch_ia32(ehdr) || -	       ehdr->e_type != ET_DYN); - -	ehdr->e_entry += VDSO_ADDR_ADJUST; - -	/* rebase phdrs */ -	phdr = (void *)ehdr + ehdr->e_phoff; -	for (i = 0; i < ehdr->e_phnum; i++) { -		phdr[i].p_vaddr += VDSO_ADDR_ADJUST; - -		/* relocate dynamic stuff */ -		if (phdr[i].p_type == PT_DYNAMIC) -			reloc_dyn(ehdr, phdr[i].p_offset); -	} - -	/* rebase sections */ -	shdr = (void *)ehdr + ehdr->e_shoff; -	for(i = 0; i < ehdr->e_shnum; i++) { -		if (!(shdr[i].sh_flags & SHF_ALLOC)) -			continue; - -		shdr[i].sh_addr += VDSO_ADDR_ADJUST; - -		if (shdr[i].sh_type == SHT_SYMTAB || -		    shdr[i].sh_type == SHT_DYNSYM) -			reloc_symtab(ehdr, shdr[i].sh_offset, -				     shdr[i].sh_size); -	} -} - -static struct page *vdso32_pages[1]; -  #ifdef CONFIG_X86_64  #define	vdso32_sysenter()	(boot_cpu_has(X86_FEATURE_SYSENTER32))  #define	vdso32_syscall()	(boot_cpu_has(X86_FEATURE_SYSCALL32)) -/* May not be __init: called during resume */ -void syscall32_cpu_init(void) -{ -	/* Load these always in case some future AMD CPU supports -	   SYSENTER from compat mode too. */ -	wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); -	wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); -	wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target); - -	wrmsrl(MSR_CSTAR, ia32_cstar_target); -} - -#define compat_uses_vma		1 - -static inline void map_compat_vdso(int map) -{ -} -  #else  /* CONFIG_X86_32 */  #define vdso32_sysenter()	(boot_cpu_has(X86_FEATURE_SEP))  #define vdso32_syscall()	(0) -void enable_sep_cpu(void) -{ -	int cpu = get_cpu(); -	struct tss_struct *tss = &per_cpu(init_tss, cpu); - -	if (!boot_cpu_has(X86_FEATURE_SEP)) { -		put_cpu(); -		return; -	} - -	tss->x86_tss.ss1 = __KERNEL_CS; -	tss->x86_tss.sp1 = sizeof(struct tss_struct) + (unsigned long) tss; -	wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); -	wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.sp1, 0); -	wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) ia32_sysenter_target, 0); -	put_cpu();	 -} - -static struct vm_area_struct gate_vma; - -static int __init gate_vma_init(void) -{ -	gate_vma.vm_mm = NULL; -	gate_vma.vm_start = FIXADDR_USER_START; -	gate_vma.vm_end = FIXADDR_USER_END; -	gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; -	gate_vma.vm_page_prot = __P101; - -	return 0; -} - -#define compat_uses_vma		0 - -static void map_compat_vdso(int map) -{ -	static int vdso_mapped; - -	if (map == vdso_mapped) -		return; - -	vdso_mapped = map; - -	__set_fixmap(FIX_VDSO, page_to_pfn(vdso32_pages[0]) << PAGE_SHIFT, -		     map ? PAGE_READONLY_EXEC : PAGE_NONE); - -	/* flush stray tlbs */ -	flush_tlb_all(); -} -  #endif	/* CONFIG_X86_64 */ -int __init sysenter_setup(void) -{ -	void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); -	const void *vsyscall; -	size_t vsyscall_len; - -	vdso32_pages[0] = virt_to_page(syscall_page); - -#ifdef CONFIG_X86_32 -	gate_vma_init(); +#if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT) +const struct vdso_image *selected_vdso32;  #endif -	if (vdso32_syscall()) { -		vsyscall = &vdso32_syscall_start; -		vsyscall_len = &vdso32_syscall_end - &vdso32_syscall_start; -	} else if (vdso32_sysenter()){ -		vsyscall = &vdso32_sysenter_start; -		vsyscall_len = &vdso32_sysenter_end - &vdso32_sysenter_start; -	} else { -		vsyscall = &vdso32_int80_start; -		vsyscall_len = &vdso32_int80_end - &vdso32_int80_start; -	} - -	memcpy(syscall_page, vsyscall, vsyscall_len); -	relocate_vdso(syscall_page); - -	return 0; -} - -/* Setup a VMA at program startup for the vsyscall page */ -int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +int __init sysenter_setup(void)  { -	struct mm_struct *mm = current->mm; -	unsigned long addr; -	int ret = 0; -	bool compat; - -#ifdef CONFIG_X86_X32_ABI -	if (test_thread_flag(TIF_X32)) -		return x32_setup_additional_pages(bprm, uses_interp); +#ifdef CONFIG_COMPAT +	if (vdso32_syscall()) +		selected_vdso32 = &vdso_image_32_syscall; +	else  #endif +	if (vdso32_sysenter()) +		selected_vdso32 = &vdso_image_32_sysenter; +	else +		selected_vdso32 = &vdso_image_32_int80; -	if (vdso_enabled == VDSO_DISABLED) -		return 0; - -	down_write(&mm->mmap_sem); - -	/* Test compat mode once here, in case someone -	   changes it via sysctl */ -	compat = (vdso_enabled == VDSO_COMPAT); - -	map_compat_vdso(compat); - -	if (compat) -		addr = VDSO_HIGH_BASE; -	else { -		addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); -		if (IS_ERR_VALUE(addr)) { -			ret = addr; -			goto up_fail; -		} -	} - -	current->mm->context.vdso = (void *)addr; - -	if (compat_uses_vma || !compat) { -		/* -		 * MAYWRITE to allow gdb to COW and set breakpoints -		 */ -		ret = install_special_mapping(mm, addr, PAGE_SIZE, -					      VM_READ|VM_EXEC| -					      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, -					      vdso32_pages); +	init_vdso_image(selected_vdso32); -		if (ret) -			goto up_fail; -	} - -	current_thread_info()->sysenter_return = -		VDSO32_SYMBOL(addr, SYSENTER_RETURN); - -  up_fail: -	if (ret) -		current->mm->context.vdso = NULL; - -	up_write(&mm->mmap_sem); - -	return ret; +	return 0;  }  #ifdef CONFIG_X86_64 @@ -375,7 +92,7 @@ subsys_initcall(sysenter_setup);  static struct ctl_table abi_table2[] = {  	{  		.procname	= "vsyscall32", -		.data		= &sysctl_vsyscall32, +		.data		= &vdso32_enabled,  		.maxlen		= sizeof(int),  		.mode		= 0644,  		.proc_handler	= proc_dointvec @@ -402,29 +119,14 @@ __initcall(ia32_binfmt_init);  #else  /* CONFIG_X86_32 */ -const char *arch_vma_name(struct vm_area_struct *vma) -{ -	if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) -		return "[vdso]"; -	return NULL; -} -  struct vm_area_struct *get_gate_vma(struct mm_struct *mm)  { -	/* -	 * Check to see if the corresponding task was created in compat vdso -	 * mode. -	 */ -	if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE) -		return &gate_vma;  	return NULL;  }  int in_gate_area(struct mm_struct *mm, unsigned long addr)  { -	const struct vm_area_struct *vma = get_gate_vma(mm); - -	return vma && addr >= vma->vm_start && addr < vma->vm_end; +	return 0;  }  int in_gate_area_no_mm(unsigned long addr) diff --git a/arch/x86/vdso/vdso32.S b/arch/x86/vdso/vdso32.S deleted file mode 100644 index 2ce5f82c333..00000000000 --- a/arch/x86/vdso/vdso32.S +++ /dev/null @@ -1,22 +0,0 @@ -#include <linux/init.h> - -__INITDATA - -	.globl vdso32_int80_start, vdso32_int80_end -vdso32_int80_start: -	.incbin "arch/x86/vdso/vdso32-int80.so" -vdso32_int80_end: - -	.globl vdso32_syscall_start, vdso32_syscall_end -vdso32_syscall_start: -#ifdef CONFIG_COMPAT -	.incbin "arch/x86/vdso/vdso32-syscall.so" -#endif -vdso32_syscall_end: - -	.globl vdso32_sysenter_start, vdso32_sysenter_end -vdso32_sysenter_start: -	.incbin "arch/x86/vdso/vdso32-sysenter.so" -vdso32_sysenter_end: - -__FINIT diff --git a/arch/x86/vdso/vdso32/vclock_gettime.c b/arch/x86/vdso/vdso32/vclock_gettime.c new file mode 100644 index 00000000000..175cc72c0f6 --- /dev/null +++ b/arch/x86/vdso/vdso32/vclock_gettime.c @@ -0,0 +1,30 @@ +#define BUILD_VDSO32 + +#ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE +#undef CONFIG_OPTIMIZE_INLINING +#endif + +#undef CONFIG_X86_PPRO_FENCE + +#ifdef CONFIG_X86_64 + +/* + * in case of a 32 bit VDSO for a 64 bit kernel fake a 32 bit kernel + * configuration + */ +#undef CONFIG_64BIT +#undef CONFIG_X86_64 +#undef CONFIG_ILLEGAL_POINTER_VALUE +#undef CONFIG_SPARSEMEM_VMEMMAP +#undef CONFIG_NR_CPUS + +#define CONFIG_X86_32 1 +#define CONFIG_PAGE_OFFSET 0 +#define CONFIG_ILLEGAL_POINTER_VALUE 0 +#define CONFIG_NR_CPUS 1 + +#define BUILD_VDSO32_64 + +#endif + +#include "../vclock_gettime.c" diff --git a/arch/x86/vdso/vdso32/vdso-fakesections.c b/arch/x86/vdso/vdso32/vdso-fakesections.c new file mode 100644 index 00000000000..541468e2526 --- /dev/null +++ b/arch/x86/vdso/vdso32/vdso-fakesections.c @@ -0,0 +1 @@ +#include "../vdso-fakesections.c" diff --git a/arch/x86/vdso/vdso32/vdso32.lds.S b/arch/x86/vdso/vdso32/vdso32.lds.S index 976124bb5f9..31056cf294b 100644 --- a/arch/x86/vdso/vdso32/vdso32.lds.S +++ b/arch/x86/vdso/vdso32/vdso32.lds.S @@ -1,14 +1,15 @@  /*   * Linker script for 32-bit vDSO.   * We #include the file to define the layout details. - * Here we only choose the prelinked virtual address.   *   * This file defines the version script giving the user-exported symbols in - * the DSO.  We can define local symbols here called VDSO* to make their - * values visible using the asm-x86/vdso.h macros from the kernel proper. + * the DSO.   */ -#define VDSO_PRELINK 0 +#include <asm/page.h> + +#define BUILD_VDSO32 +  #include "../vdso-layout.lds.S"  /* The ELF entry point can be used to set the AT_SYSINFO value.  */ @@ -19,6 +20,13 @@ ENTRY(__kernel_vsyscall);   */  VERSION  { +	LINUX_2.6 { +	global: +		__vdso_clock_gettime; +		__vdso_gettimeofday; +		__vdso_time; +	}; +  	LINUX_2.5 {  	global:  		__kernel_vsyscall; @@ -27,11 +35,3 @@ VERSION  	local: *;  	};  } - -/* - * Symbols we define here called VDSO* get their values into vdso32-syms.h. - */ -VDSO32_PRELINK		= VDSO_PRELINK; -VDSO32_vsyscall		= __kernel_vsyscall; -VDSO32_sigreturn	= __kernel_sigreturn; -VDSO32_rt_sigreturn	= __kernel_rt_sigreturn; diff --git a/arch/x86/vdso/vdsox32.S b/arch/x86/vdso/vdsox32.S deleted file mode 100644 index d6b9a7f42a8..00000000000 --- a/arch/x86/vdso/vdsox32.S +++ /dev/null @@ -1,22 +0,0 @@ -#include <asm/page_types.h> -#include <linux/linkage.h> -#include <linux/init.h> - -__PAGE_ALIGNED_DATA - -	.globl vdsox32_start, vdsox32_end -	.align PAGE_SIZE -vdsox32_start: -	.incbin "arch/x86/vdso/vdsox32.so" -vdsox32_end: -	.align PAGE_SIZE /* extra data here leaks to userspace. */ - -.previous - -	.globl vdsox32_pages -	.bss -	.align 8 -	.type vdsox32_pages, @object -vdsox32_pages: -	.zero (vdsox32_end - vdsox32_start + PAGE_SIZE - 1) / PAGE_SIZE * 8 -	.size vdsox32_pages, .-vdsox32_pages diff --git a/arch/x86/vdso/vdsox32.lds.S b/arch/x86/vdso/vdsox32.lds.S index 62272aa2ae0..697c11ece90 100644 --- a/arch/x86/vdso/vdsox32.lds.S +++ b/arch/x86/vdso/vdsox32.lds.S @@ -1,14 +1,13 @@  /*   * Linker script for x32 vDSO.   * We #include the file to define the layout details. - * Here we only choose the prelinked virtual address.   *   * This file defines the version script giving the user-exported symbols in - * the DSO.  We can define local symbols here called VDSO* to make their - * values visible using the asm-x86/vdso.h macros from the kernel proper. + * the DSO.   */ -#define VDSO_PRELINK 0 +#define BUILD_VDSOX32 +  #include "vdso-layout.lds.S"  /* @@ -24,5 +23,3 @@ VERSION {  	local: *;  	};  } - -VDSOX32_PRELINK = VDSO_PRELINK; diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 431e8754441..5a5176de8d0 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c @@ -15,114 +15,56 @@  #include <asm/proto.h>  #include <asm/vdso.h>  #include <asm/page.h> +#include <asm/hpet.h> -unsigned int __read_mostly vdso_enabled = 1; +#if defined(CONFIG_X86_64) +unsigned int __read_mostly vdso64_enabled = 1; -extern char vdso_start[], vdso_end[];  extern unsigned short vdso_sync_cpuid; - -extern struct page *vdso_pages[]; -static unsigned vdso_size; - -#ifdef CONFIG_X86_X32_ABI -extern char vdsox32_start[], vdsox32_end[]; -extern struct page *vdsox32_pages[]; -static unsigned vdsox32_size; - -static void __init patch_vdsox32(void *vdso, size_t len) -{ -	Elf32_Ehdr *hdr = vdso; -	Elf32_Shdr *sechdrs, *alt_sec = 0; -	char *secstrings; -	void *alt_data; -	int i; - -	BUG_ON(len < sizeof(Elf32_Ehdr)); -	BUG_ON(memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0); - -	sechdrs = (void *)hdr + hdr->e_shoff; -	secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; - -	for (i = 1; i < hdr->e_shnum; i++) { -		Elf32_Shdr *shdr = &sechdrs[i]; -		if (!strcmp(secstrings + shdr->sh_name, ".altinstructions")) { -			alt_sec = shdr; -			goto found; -		} -	} - -	/* If we get here, it's probably a bug. */ -	pr_warning("patch_vdsox32: .altinstructions not found\n"); -	return;  /* nothing to patch */ - -found: -	alt_data = (void *)hdr + alt_sec->sh_offset; -	apply_alternatives(alt_data, alt_data + alt_sec->sh_size); -}  #endif -static void __init patch_vdso64(void *vdso, size_t len) +void __init init_vdso_image(const struct vdso_image *image)  { -	Elf64_Ehdr *hdr = vdso; -	Elf64_Shdr *sechdrs, *alt_sec = 0; -	char *secstrings; -	void *alt_data;  	int i; +	int npages = (image->size) / PAGE_SIZE; -	BUG_ON(len < sizeof(Elf64_Ehdr)); -	BUG_ON(memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0); - -	sechdrs = (void *)hdr + hdr->e_shoff; -	secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; - -	for (i = 1; i < hdr->e_shnum; i++) { -		Elf64_Shdr *shdr = &sechdrs[i]; -		if (!strcmp(secstrings + shdr->sh_name, ".altinstructions")) { -			alt_sec = shdr; -			goto found; -		} -	} - -	/* If we get here, it's probably a bug. */ -	pr_warning("patch_vdso64: .altinstructions not found\n"); -	return;  /* nothing to patch */ +	BUG_ON(image->size % PAGE_SIZE != 0); +	for (i = 0; i < npages; i++) +		image->text_mapping.pages[i] = +			virt_to_page(image->data + i*PAGE_SIZE); -found: -	alt_data = (void *)hdr + alt_sec->sh_offset; -	apply_alternatives(alt_data, alt_data + alt_sec->sh_size); +	apply_alternatives((struct alt_instr *)(image->data + image->alt), +			   (struct alt_instr *)(image->data + image->alt + +						image->alt_len));  } +#if defined(CONFIG_X86_64)  static int __init init_vdso(void)  { -	int npages = (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE; -	int i; - -	patch_vdso64(vdso_start, vdso_end - vdso_start); - -	vdso_size = npages << PAGE_SHIFT; -	for (i = 0; i < npages; i++) -		vdso_pages[i] = virt_to_page(vdso_start + i*PAGE_SIZE); +	init_vdso_image(&vdso_image_64);  #ifdef CONFIG_X86_X32_ABI -	patch_vdsox32(vdsox32_start, vdsox32_end - vdsox32_start); -	npages = (vdsox32_end - vdsox32_start + PAGE_SIZE - 1) / PAGE_SIZE; -	vdsox32_size = npages << PAGE_SHIFT; -	for (i = 0; i < npages; i++) -		vdsox32_pages[i] = virt_to_page(vdsox32_start + i*PAGE_SIZE); +	init_vdso_image(&vdso_image_x32);  #endif  	return 0;  }  subsys_initcall(init_vdso); +#endif  struct linux_binprm;  /* Put the vdso above the (randomized) stack with another randomized offset.     This way there is no hole in the middle of address space.     To save memory make sure it is still in the same PTE as the stack top. -   This doesn't give that many random bits */ +   This doesn't give that many random bits. + +   Only used for the 64-bit and x32 vdsos. */  static unsigned long vdso_addr(unsigned long start, unsigned len)  { +#ifdef CONFIG_X86_32 +	return 0; +#else  	unsigned long addr, end;  	unsigned offset;  	end = (start + PMD_SIZE - 1) & PMD_MASK; @@ -144,63 +86,153 @@ static unsigned long vdso_addr(unsigned long start, unsigned len)  	addr = align_vdso_addr(addr);  	return addr; +#endif  } -/* Setup a VMA at program startup for the vsyscall page. -   Not called for compat tasks */ -static int setup_additional_pages(struct linux_binprm *bprm, -				  int uses_interp, -				  struct page **pages, -				  unsigned size) +static int map_vdso(const struct vdso_image *image, bool calculate_addr)  {  	struct mm_struct *mm = current->mm; +	struct vm_area_struct *vma;  	unsigned long addr; -	int ret; - -	if (!vdso_enabled) -		return 0; +	int ret = 0; +	static struct page *no_pages[] = {NULL}; +	static struct vm_special_mapping vvar_mapping = { +		.name = "[vvar]", +		.pages = no_pages, +	}; + +	if (calculate_addr) { +		addr = vdso_addr(current->mm->start_stack, +				 image->sym_end_mapping); +	} else { +		addr = 0; +	}  	down_write(&mm->mmap_sem); -	addr = vdso_addr(mm->start_stack, size); -	addr = get_unmapped_area(NULL, addr, size, 0, 0); + +	addr = get_unmapped_area(NULL, addr, image->sym_end_mapping, 0, 0);  	if (IS_ERR_VALUE(addr)) {  		ret = addr;  		goto up_fail;  	} -	current->mm->context.vdso = (void *)addr; +	current->mm->context.vdso = (void __user *)addr; -	ret = install_special_mapping(mm, addr, size, -				      VM_READ|VM_EXEC| -				      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, -				      pages); -	if (ret) { -		current->mm->context.vdso = NULL; +	/* +	 * MAYWRITE to allow gdb to COW and set breakpoints +	 */ +	vma = _install_special_mapping(mm, +				       addr, +				       image->size, +				       VM_READ|VM_EXEC| +				       VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, +				       &image->text_mapping); + +	if (IS_ERR(vma)) { +		ret = PTR_ERR(vma);  		goto up_fail;  	} +	vma = _install_special_mapping(mm, +				       addr + image->size, +				       image->sym_end_mapping - image->size, +				       VM_READ, +				       &vvar_mapping); + +	if (IS_ERR(vma)) { +		ret = PTR_ERR(vma); +		goto up_fail; +	} + +	if (image->sym_vvar_page) +		ret = remap_pfn_range(vma, +				      addr + image->sym_vvar_page, +				      __pa_symbol(&__vvar_page) >> PAGE_SHIFT, +				      PAGE_SIZE, +				      PAGE_READONLY); + +	if (ret) +		goto up_fail; + +#ifdef CONFIG_HPET_TIMER +	if (hpet_address && image->sym_hpet_page) { +		ret = io_remap_pfn_range(vma, +			addr + image->sym_hpet_page, +			hpet_address >> PAGE_SHIFT, +			PAGE_SIZE, +			pgprot_noncached(PAGE_READONLY)); + +		if (ret) +			goto up_fail; +	} +#endif +  up_fail: +	if (ret) +		current->mm->context.vdso = NULL; +  	up_write(&mm->mmap_sem);  	return ret;  } +#if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT) +static int load_vdso32(void) +{ +	int ret; + +	if (vdso32_enabled != 1)  /* Other values all mean "disabled" */ +		return 0; + +	ret = map_vdso(selected_vdso32, false); +	if (ret) +		return ret; + +	if (selected_vdso32->sym_VDSO32_SYSENTER_RETURN) +		current_thread_info()->sysenter_return = +			current->mm->context.vdso + +			selected_vdso32->sym_VDSO32_SYSENTER_RETURN; + +	return 0; +} +#endif + +#ifdef CONFIG_X86_64  int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)  { -	return setup_additional_pages(bprm, uses_interp, vdso_pages, -				      vdso_size); +	if (!vdso64_enabled) +		return 0; + +	return map_vdso(&vdso_image_64, true);  } +#ifdef CONFIG_COMPAT +int compat_arch_setup_additional_pages(struct linux_binprm *bprm, +				       int uses_interp) +{  #ifdef CONFIG_X86_X32_ABI -int x32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +	if (test_thread_flag(TIF_X32)) { +		if (!vdso64_enabled) +			return 0; + +		return map_vdso(&vdso_image_x32, true); +	} +#endif + +	return load_vdso32(); +} +#endif +#else +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)  { -	return setup_additional_pages(bprm, uses_interp, vdsox32_pages, -				      vdsox32_size); +	return load_vdso32();  }  #endif +#ifdef CONFIG_X86_64  static __init int vdso_setup(char *s)  { -	vdso_enabled = simple_strtoul(s, NULL, 0); +	vdso64_enabled = simple_strtoul(s, NULL, 0);  	return 0;  }  __setup("vdso=", vdso_setup); +#endif  | 
