diff options
| -rw-r--r-- | arch/x86/syscalls/syscall_32.tbl | 1 | ||||
| -rw-r--r-- | arch/x86/syscalls/syscall_64.tbl | 2 | ||||
| -rw-r--r-- | include/linux/kcmp.h | 17 | ||||
| -rw-r--r-- | include/linux/syscalls.h | 2 | ||||
| -rw-r--r-- | kernel/Makefile | 3 | ||||
| -rw-r--r-- | kernel/kcmp.c | 196 | ||||
| -rw-r--r-- | kernel/sys_ni.c | 3 | ||||
| -rw-r--r-- | tools/testing/selftests/Makefile | 2 | ||||
| -rw-r--r-- | tools/testing/selftests/kcmp/Makefile | 29 | ||||
| -rw-r--r-- | tools/testing/selftests/kcmp/kcmp_test.c | 94 | 
10 files changed, 348 insertions, 1 deletions
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index 29f9f0554f7..7a35a6e71d4 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -355,3 +355,4 @@  346	i386	setns			sys_setns  347	i386	process_vm_readv	sys_process_vm_readv		compat_sys_process_vm_readv  348	i386	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev +349	i386	kcmp			sys_kcmp diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index dd29a9ea27c..51171aeff0d 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -318,6 +318,8 @@  309	common	getcpu			sys_getcpu  310	64	process_vm_readv	sys_process_vm_readv  311	64	process_vm_writev	sys_process_vm_writev +312	64	kcmp			sys_kcmp +  #  # x32-specific system call numbers start at 512 to avoid cache impact  # for native 64-bit operation. diff --git a/include/linux/kcmp.h b/include/linux/kcmp.h new file mode 100644 index 00000000000..2dcd1b3aafc --- /dev/null +++ b/include/linux/kcmp.h @@ -0,0 +1,17 @@ +#ifndef _LINUX_KCMP_H +#define _LINUX_KCMP_H + +/* Comparison type */ +enum kcmp_type { +	KCMP_FILE, +	KCMP_VM, +	KCMP_FILES, +	KCMP_FS, +	KCMP_SIGHAND, +	KCMP_IO, +	KCMP_SYSVSEM, + +	KCMP_TYPES, +}; + +#endif /* _LINUX_KCMP_H */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 3de3acb84a9..19439c75c5b 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -858,4 +858,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid,  				      unsigned long riovcnt,  				      unsigned long flags); +asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, +			 unsigned long idx1, unsigned long idx2);  #endif diff --git a/kernel/Makefile b/kernel/Makefile index 6c07f30fa9b..80be6ca0cc7 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -25,6 +25,9 @@ endif  obj-y += sched/  obj-y += power/ +ifeq ($(CONFIG_CHECKPOINT_RESTORE),y) +obj-$(CONFIG_X86) += kcmp.o +endif  obj-$(CONFIG_FREEZER) += freezer.o  obj-$(CONFIG_PROFILING) += profile.o  obj-$(CONFIG_STACKTRACE) += stacktrace.o diff --git a/kernel/kcmp.c b/kernel/kcmp.c new file mode 100644 index 00000000000..30b7b225306 --- /dev/null +++ b/kernel/kcmp.c @@ -0,0 +1,196 @@ +#include <linux/kernel.h> +#include <linux/syscalls.h> +#include <linux/fdtable.h> +#include <linux/string.h> +#include <linux/random.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/cache.h> +#include <linux/bug.h> +#include <linux/err.h> +#include <linux/kcmp.h> + +#include <asm/unistd.h> + +/* + * We don't expose the real in-memory order of objects for security reasons. + * But still the comparison results should be suitable for sorting. So we + * obfuscate kernel pointers values and compare the production instead. + * + * The obfuscation is done in two steps. First we xor the kernel pointer with + * a random value, which puts pointer into a new position in a reordered space. + * Secondly we multiply the xor production with a large odd random number to + * permute its bits even more (the odd multiplier guarantees that the product + * is unique ever after the high bits are truncated, since any odd number is + * relative prime to 2^n). + * + * Note also that the obfuscation itself is invisible to userspace and if needed + * it can be changed to an alternate scheme. + */ +static unsigned long cookies[KCMP_TYPES][2] __read_mostly; + +static long kptr_obfuscate(long v, int type) +{ +	return (v ^ cookies[type][0]) * cookies[type][1]; +} + +/* + * 0 - equal, i.e. v1 = v2 + * 1 - less than, i.e. v1 < v2 + * 2 - greater than, i.e. v1 > v2 + * 3 - not equal but ordering unavailable (reserved for future) + */ +static int kcmp_ptr(void *v1, void *v2, enum kcmp_type type) +{ +	long ret; + +	ret = kptr_obfuscate((long)v1, type) - kptr_obfuscate((long)v2, type); + +	return (ret < 0) | ((ret > 0) << 1); +} + +/* The caller must have pinned the task */ +static struct file * +get_file_raw_ptr(struct task_struct *task, unsigned int idx) +{ +	struct file *file = NULL; + +	task_lock(task); +	rcu_read_lock(); + +	if (task->files) +		file = fcheck_files(task->files, idx); + +	rcu_read_unlock(); +	task_unlock(task); + +	return file; +} + +static void kcmp_unlock(struct mutex *m1, struct mutex *m2) +{ +	if (likely(m2 != m1)) +		mutex_unlock(m2); +	mutex_unlock(m1); +} + +static int kcmp_lock(struct mutex *m1, struct mutex *m2) +{ +	int err; + +	if (m2 > m1) +		swap(m1, m2); + +	err = mutex_lock_killable(m1); +	if (!err && likely(m1 != m2)) { +		err = mutex_lock_killable_nested(m2, SINGLE_DEPTH_NESTING); +		if (err) +			mutex_unlock(m1); +	} + +	return err; +} + +SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type, +		unsigned long, idx1, unsigned long, idx2) +{ +	struct task_struct *task1, *task2; +	int ret; + +	rcu_read_lock(); + +	/* +	 * Tasks are looked up in caller's PID namespace only. +	 */ +	task1 = find_task_by_vpid(pid1); +	task2 = find_task_by_vpid(pid2); +	if (!task1 || !task2) +		goto err_no_task; + +	get_task_struct(task1); +	get_task_struct(task2); + +	rcu_read_unlock(); + +	/* +	 * One should have enough rights to inspect task details. +	 */ +	ret = kcmp_lock(&task1->signal->cred_guard_mutex, +			&task2->signal->cred_guard_mutex); +	if (ret) +		goto err; +	if (!ptrace_may_access(task1, PTRACE_MODE_READ) || +	    !ptrace_may_access(task2, PTRACE_MODE_READ)) { +		ret = -EPERM; +		goto err_unlock; +	} + +	switch (type) { +	case KCMP_FILE: { +		struct file *filp1, *filp2; + +		filp1 = get_file_raw_ptr(task1, idx1); +		filp2 = get_file_raw_ptr(task2, idx2); + +		if (filp1 && filp2) +			ret = kcmp_ptr(filp1, filp2, KCMP_FILE); +		else +			ret = -EBADF; +		break; +	} +	case KCMP_VM: +		ret = kcmp_ptr(task1->mm, task2->mm, KCMP_VM); +		break; +	case KCMP_FILES: +		ret = kcmp_ptr(task1->files, task2->files, KCMP_FILES); +		break; +	case KCMP_FS: +		ret = kcmp_ptr(task1->fs, task2->fs, KCMP_FS); +		break; +	case KCMP_SIGHAND: +		ret = kcmp_ptr(task1->sighand, task2->sighand, KCMP_SIGHAND); +		break; +	case KCMP_IO: +		ret = kcmp_ptr(task1->io_context, task2->io_context, KCMP_IO); +		break; +	case KCMP_SYSVSEM: +#ifdef CONFIG_SYSVIPC +		ret = kcmp_ptr(task1->sysvsem.undo_list, +			       task2->sysvsem.undo_list, +			       KCMP_SYSVSEM); +#else +		ret = -EOPNOTSUPP; +#endif +		break; +	default: +		ret = -EINVAL; +		break; +	} + +err_unlock: +	kcmp_unlock(&task1->signal->cred_guard_mutex, +		    &task2->signal->cred_guard_mutex); +err: +	put_task_struct(task1); +	put_task_struct(task2); + +	return ret; + +err_no_task: +	rcu_read_unlock(); +	return -ESRCH; +} + +static __init int kcmp_cookies_init(void) +{ +	int i; + +	get_random_bytes(cookies, sizeof(cookies)); + +	for (i = 0; i < KCMP_TYPES; i++) +		cookies[i][1] |= (~(~0UL >>  1) | 1); + +	return 0; +} +arch_initcall(kcmp_cookies_init); diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 47bfa16430d..dbff751e408 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -203,3 +203,6 @@ cond_syscall(sys_fanotify_mark);  cond_syscall(sys_name_to_handle_at);  cond_syscall(sys_open_by_handle_at);  cond_syscall(compat_sys_open_by_handle_at); + +/* compare kernel pointers */ +cond_syscall(sys_kcmp); diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 14972017a43..a4162e15c25 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -1,4 +1,4 @@ -TARGETS = breakpoints mqueue vm +TARGETS = breakpoints kcmp mqueue vm  all:  	for TARGET in $(TARGETS); do \ diff --git a/tools/testing/selftests/kcmp/Makefile b/tools/testing/selftests/kcmp/Makefile new file mode 100644 index 00000000000..dc79b86ea65 --- /dev/null +++ b/tools/testing/selftests/kcmp/Makefile @@ -0,0 +1,29 @@ +uname_M := $(shell uname -m 2>/dev/null || echo not) +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/) +ifeq ($(ARCH),i386) +        ARCH := X86 +	CFLAGS := -DCONFIG_X86_32 -D__i386__ +endif +ifeq ($(ARCH),x86_64) +	ARCH := X86 +	CFLAGS := -DCONFIG_X86_64 -D__x86_64__ +endif + +CFLAGS += -I../../../../arch/x86/include/generated/ +CFLAGS += -I../../../../include/ +CFLAGS += -I../../../../usr/include/ +CFLAGS += -I../../../../arch/x86/include/ + +all: +ifeq ($(ARCH),X86) +	gcc $(CFLAGS) kcmp_test.c -o run_test +else +	echo "Not an x86 target, can't build kcmp selftest" +endif + +run-tests: all +	./kcmp_test + +clean: +	rm -fr ./run_test +	rm -fr ./test-file diff --git a/tools/testing/selftests/kcmp/kcmp_test.c b/tools/testing/selftests/kcmp/kcmp_test.c new file mode 100644 index 00000000000..358cc6bfa35 --- /dev/null +++ b/tools/testing/selftests/kcmp/kcmp_test.c @@ -0,0 +1,94 @@ +#define _GNU_SOURCE + +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <limits.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <fcntl.h> + +#include <linux/unistd.h> +#include <linux/kcmp.h> + +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/wait.h> + +static long sys_kcmp(int pid1, int pid2, int type, int fd1, int fd2) +{ +	return syscall(__NR_kcmp, pid1, pid2, type, fd1, fd2); +} + +int main(int argc, char **argv) +{ +	const char kpath[] = "kcmp-test-file"; +	int pid1, pid2; +	int fd1, fd2; +	int status; + +	fd1 = open(kpath, O_RDWR | O_CREAT | O_TRUNC, 0644); +	pid1 = getpid(); + +	if (fd1 < 0) { +		perror("Can't create file"); +		exit(1); +	} + +	pid2 = fork(); +	if (pid2 < 0) { +		perror("fork failed"); +		exit(1); +	} + +	if (!pid2) { +		int pid2 = getpid(); +		int ret; + +		fd2 = open(kpath, O_RDWR, 0644); +		if (fd2 < 0) { +			perror("Can't open file"); +			exit(1); +		} + +		/* An example of output and arguments */ +		printf("pid1: %6d pid2: %6d FD: %2ld FILES: %2ld VM: %2ld " +		       "FS: %2ld SIGHAND: %2ld IO: %2ld SYSVSEM: %2ld " +		       "INV: %2ld\n", +		       pid1, pid2, +		       sys_kcmp(pid1, pid2, KCMP_FILE,		fd1, fd2), +		       sys_kcmp(pid1, pid2, KCMP_FILES,		0, 0), +		       sys_kcmp(pid1, pid2, KCMP_VM,		0, 0), +		       sys_kcmp(pid1, pid2, KCMP_FS,		0, 0), +		       sys_kcmp(pid1, pid2, KCMP_SIGHAND,	0, 0), +		       sys_kcmp(pid1, pid2, KCMP_IO,		0, 0), +		       sys_kcmp(pid1, pid2, KCMP_SYSVSEM,	0, 0), + +			/* This one should fail */ +		       sys_kcmp(pid1, pid2, KCMP_TYPES + 1,	0, 0)); + +		/* This one should return same fd */ +		ret = sys_kcmp(pid1, pid2, KCMP_FILE, fd1, fd1); +		if (ret) { +			printf("FAIL: 0 expected but %d returned\n", ret); +			ret = -1; +		} else +			printf("PASS: 0 returned as expected\n"); + +		/* Compare with self */ +		ret = sys_kcmp(pid1, pid1, KCMP_VM, 0, 0); +		if (ret) { +			printf("FAIL: 0 expected but %li returned\n", ret); +			ret = -1; +		} else +			printf("PASS: 0 returned as expected\n"); + +		exit(ret); +	} + +	waitpid(pid2, &status, P_ALL); + +	return 0; +}  |