From 556d2f055bf6d79ce81587dfe774d4dd10da473f Mon Sep 17 00:00:00 2001 From: Yalin Wang Date: Mon, 3 Nov 2014 03:01:03 +0100 Subject: [PATCH 001/601] ARM: 8187/1: add CONFIG_HAVE_ARCH_BITREVERSE to support rbit instruction this change add CONFIG_HAVE_ARCH_BITREVERSE config option, so that we can use some architecture's bitrev hardware instruction to do bitrev operation. Introduce __constant_bitrev* macro for constant bitrev operation. Change __bitrev16() __bitrev32() to be inline function, don't need export symbol for these tiny functions. Signed-off-by: Yalin Wang Acked-by: Will Deacon Signed-off-by: Russell King --- include/linux/bitrev.h | 77 +++++++++++++++++++++++++++++++++++++++--- lib/Kconfig | 9 +++++ lib/bitrev.c | 17 ++-------- 3 files changed, 84 insertions(+), 19 deletions(-) diff --git a/include/linux/bitrev.h b/include/linux/bitrev.h index 7ffe03f4693d9c..fb790b8449c1ff 100644 --- a/include/linux/bitrev.h +++ b/include/linux/bitrev.h @@ -3,14 +3,83 @@ #include -extern u8 const byte_rev_table[256]; +#ifdef CONFIG_HAVE_ARCH_BITREVERSE +#include + +#define __bitrev32 __arch_bitrev32 +#define __bitrev16 __arch_bitrev16 +#define __bitrev8 __arch_bitrev8 -static inline u8 bitrev8(u8 byte) +#else +extern u8 const byte_rev_table[256]; +static inline u8 __bitrev8(u8 byte) { return byte_rev_table[byte]; } -extern u16 bitrev16(u16 in); -extern u32 bitrev32(u32 in); +static inline u16 __bitrev16(u16 x) +{ + return (__bitrev8(x & 0xff) << 8) | __bitrev8(x >> 8); +} + +static inline u32 __bitrev32(u32 x) +{ + return (__bitrev16(x & 0xffff) << 16) | __bitrev16(x >> 16); +} + +#endif /* CONFIG_HAVE_ARCH_BITREVERSE */ + +#define __constant_bitrev32(x) \ +({ \ + u32 __x = x; \ + __x = (__x >> 16) | (__x << 16); \ + __x = ((__x & (u32)0xFF00FF00UL) >> 8) | ((__x & (u32)0x00FF00FFUL) << 8); \ + __x = ((__x & (u32)0xF0F0F0F0UL) >> 4) | ((__x & (u32)0x0F0F0F0FUL) << 4); \ + __x = ((__x & (u32)0xCCCCCCCCUL) >> 2) | ((__x & (u32)0x33333333UL) << 2); \ + __x = ((__x & (u32)0xAAAAAAAAUL) >> 1) | ((__x & (u32)0x55555555UL) << 1); \ + __x; \ +}) + +#define __constant_bitrev16(x) \ +({ \ + u16 __x = x; \ + __x = (__x >> 8) | (__x << 8); \ + __x = ((__x & (u16)0xF0F0U) >> 4) | ((__x & (u16)0x0F0FU) << 4); \ + __x = ((__x & (u16)0xCCCCU) >> 2) | ((__x & (u16)0x3333U) << 2); \ + __x = ((__x & (u16)0xAAAAU) >> 1) | ((__x & (u16)0x5555U) << 1); \ + __x; \ +}) + +#define __constant_bitrev8(x) \ +({ \ + u8 __x = x; \ + __x = (__x >> 4) | (__x << 4); \ + __x = ((__x & (u8)0xCCU) >> 2) | ((__x & (u8)0x33U) << 2); \ + __x = ((__x & (u8)0xAAU) >> 1) | ((__x & (u8)0x55U) << 1); \ + __x; \ +}) + +#define bitrev32(x) \ +({ \ + u32 __x = x; \ + __builtin_constant_p(__x) ? \ + __constant_bitrev32(__x) : \ + __bitrev32(__x); \ +}) + +#define bitrev16(x) \ +({ \ + u16 __x = x; \ + __builtin_constant_p(__x) ? \ + __constant_bitrev16(__x) : \ + __bitrev16(__x); \ + }) +#define bitrev8(x) \ +({ \ + u8 __x = x; \ + __builtin_constant_p(__x) ? \ + __constant_bitrev8(__x) : \ + __bitrev8(__x) ; \ + }) #endif /* _LINUX_BITREV_H */ diff --git a/lib/Kconfig b/lib/Kconfig index 54cf309a92a5ed..cd177caf38761a 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -13,6 +13,15 @@ config RAID6_PQ config BITREVERSE tristate +config HAVE_ARCH_BITREVERSE + boolean + default n + depends on BITREVERSE + help + This option provides an config for the architecture which have instruction + can do bitreverse operation, we use the hardware instruction if the architecture + have this capability. + config RATIONAL boolean diff --git a/lib/bitrev.c b/lib/bitrev.c index 3956203456d468..40ffda94cc5dbf 100644 --- a/lib/bitrev.c +++ b/lib/bitrev.c @@ -1,3 +1,4 @@ +#ifndef CONFIG_HAVE_ARCH_BITREVERSE #include #include #include @@ -42,18 +43,4 @@ const u8 byte_rev_table[256] = { }; EXPORT_SYMBOL_GPL(byte_rev_table); -u16 bitrev16(u16 x) -{ - return (bitrev8(x & 0xff) << 8) | bitrev8(x >> 8); -} -EXPORT_SYMBOL(bitrev16); - -/** - * bitrev32 - reverse the order of bits in a u32 value - * @x: value to be bit-reversed - */ -u32 bitrev32(u32 x) -{ - return (bitrev16(x & 0xffff) << 16) | bitrev16(x >> 16); -} -EXPORT_SYMBOL(bitrev32); +#endif /* CONFIG_HAVE_ARCH_BITREVERSE */ From 8e7a4cef71790d9406a7bd85aea6cfb12bc07d3c Mon Sep 17 00:00:00 2001 From: Yalin Wang Date: Mon, 3 Nov 2014 03:02:23 +0100 Subject: [PATCH 002/601] ARM: 8189/1: arm64:add bitrev.h file to support rbit instruction This patch add bitrev.h file to support rbit instruction, so that we can do bitrev operation by hardware. Signed-off-by: Yalin Wang Acked-by: Will Deacon Signed-off-by: Russell King --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/bitrev.h | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 arch/arm64/include/asm/bitrev.h diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b1f9a20a367746..0ed36d1f536d54 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -39,6 +39,7 @@ config ARM64 select HARDIRQS_SW_RESEND select HAVE_ALIGNED_STRUCT_PAGE if SLUB select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_BITREVERSE select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KGDB select HAVE_ARCH_SECCOMP_FILTER diff --git a/arch/arm64/include/asm/bitrev.h b/arch/arm64/include/asm/bitrev.h new file mode 100644 index 00000000000000..a5a0c366013773 --- /dev/null +++ b/arch/arm64/include/asm/bitrev.h @@ -0,0 +1,19 @@ +#ifndef __ASM_BITREV_H +#define __ASM_BITREV_H +static __always_inline __attribute_const__ u32 __arch_bitrev32(u32 x) +{ + __asm__ ("rbit %w0, %w1" : "=r" (x) : "r" (x)); + return x; +} + +static __always_inline __attribute_const__ u16 __arch_bitrev16(u16 x) +{ + return __arch_bitrev32((u32)x) >> 16; +} + +static __always_inline __attribute_const__ u8 __arch_bitrev8(u8 x) +{ + return __arch_bitrev32((u32)x) >> 24; +} + +#endif From cf6ab6d9143b157786bf29bca5c32e55234bb07d Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 15 Dec 2014 20:13:31 -0500 Subject: [PATCH 003/601] tracing: Add ref count to tracer for when they are being read by pipe When one of the trace pipe files are being read (by either the trace_pipe or trace_pipe_raw), do not allow the current_trace to change. By adding a ref count that is incremented when the pipe files are opened, will prevent the current_trace from being changed. This will allow for the removal of the global trace_types_lock from reading the pipe buffers (which is currently a bottle neck). Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 16 +++++++++++++++- kernel/trace/trace.h | 1 + 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2e767972e99c2e..ed3fba1d657052 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4140,6 +4140,12 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf) goto out; } + /* If trace pipe files are being read, we can't change the tracer */ + if (tr->current_trace->ref) { + ret = -EBUSY; + goto out; + } + trace_branch_disable(); tr->current_trace->enabled--; @@ -4363,6 +4369,8 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) iter->trace->pipe_open(iter); nonseekable_open(inode, filp); + + tr->current_trace->ref++; out: mutex_unlock(&trace_types_lock); return ret; @@ -4382,6 +4390,8 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) mutex_lock(&trace_types_lock); + tr->current_trace->ref--; + if (iter->trace->pipe_close) iter->trace->pipe_close(iter); @@ -5331,6 +5341,8 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp) filp->private_data = info; + tr->current_trace->ref++; + mutex_unlock(&trace_types_lock); ret = nonseekable_open(inode, filp); @@ -5437,6 +5449,8 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) mutex_lock(&trace_types_lock); + iter->tr->current_trace->ref--; + __trace_array_put(iter->tr); if (info->spare) @@ -6416,7 +6430,7 @@ static int instance_delete(const char *name) goto out_unlock; ret = -EBUSY; - if (tr->ref) + if (tr->ref || (tr->current_trace && tr->current_trace->ref)) goto out_unlock; list_del(&tr->list); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 8de48bac1ce2a5..0eddfeb05fee13 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -388,6 +388,7 @@ struct tracer { struct tracer *next; struct tracer_flags *flags; int enabled; + int ref; bool print_max; bool allow_instances; #ifdef CONFIG_TRACER_MAX_TRACE From d716ff71dd12bc6328f84a9ec1c3647daf01c827 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 15 Dec 2014 22:31:07 -0500 Subject: [PATCH 004/601] tracing: Remove taking of trace_types_lock in pipe files Taking the global mutex "trace_types_lock" in the trace_pipe files causes a bottle neck as most the pipe files can be read per cpu and there's no reason to serialize them. The current_trace variable was given a ref count and it can not change when the ref count is not zero. Opening the trace_pipe files will up the ref count (and decremented on close), so that the lock no longer needs to be taken when accessing the current_trace variable. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 110 +++++++++++-------------------------------- 1 file changed, 28 insertions(+), 82 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ed3fba1d657052..7669b1f3234e0d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4332,17 +4332,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) } trace_seq_init(&iter->seq); - - /* - * We make a copy of the current tracer to avoid concurrent - * changes on it while we are reading. - */ - iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL); - if (!iter->trace) { - ret = -ENOMEM; - goto fail; - } - *iter->trace = *tr->current_trace; + iter->trace = tr->current_trace; if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) { ret = -ENOMEM; @@ -4399,7 +4389,6 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) free_cpumask_var(iter->started); mutex_destroy(&iter->mutex); - kfree(iter->trace); kfree(iter); trace_array_put(tr); @@ -4432,7 +4421,7 @@ tracing_poll_pipe(struct file *filp, poll_table *poll_table) return trace_poll(iter, filp, poll_table); } -/* Must be called with trace_types_lock mutex held. */ +/* Must be called with iter->mutex held. */ static int tracing_wait_pipe(struct file *filp) { struct trace_iterator *iter = filp->private_data; @@ -4477,7 +4466,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct trace_iterator *iter = filp->private_data; - struct trace_array *tr = iter->tr; ssize_t sret; /* return any leftover data */ @@ -4487,12 +4475,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf, trace_seq_init(&iter->seq); - /* copy the tracer to avoid using a global lock all around */ - mutex_lock(&trace_types_lock); - if (unlikely(iter->trace->name != tr->current_trace->name)) - *iter->trace = *tr->current_trace; - mutex_unlock(&trace_types_lock); - /* * Avoid more than one consumer on a single file descriptor * This is just a matter of traces coherency, the ring buffer itself @@ -4652,7 +4634,6 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, .ops = &tracing_pipe_buf_ops, .spd_release = tracing_spd_release_pipe, }; - struct trace_array *tr = iter->tr; ssize_t ret; size_t rem; unsigned int i; @@ -4660,12 +4641,6 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, if (splice_grow_spd(pipe, &spd)) return -ENOMEM; - /* copy the tracer to avoid using a global lock all around */ - mutex_lock(&trace_types_lock); - if (unlikely(iter->trace->name != tr->current_trace->name)) - *iter->trace = *tr->current_trace; - mutex_unlock(&trace_types_lock); - mutex_lock(&iter->mutex); if (iter->trace->splice_read) { @@ -5373,21 +5348,16 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, if (!count) return 0; - mutex_lock(&trace_types_lock); - #ifdef CONFIG_TRACER_MAX_TRACE - if (iter->snapshot && iter->tr->current_trace->use_max_tr) { - size = -EBUSY; - goto out_unlock; - } + if (iter->snapshot && iter->tr->current_trace->use_max_tr) + return -EBUSY; #endif if (!info->spare) info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer, iter->cpu_file); - size = -ENOMEM; if (!info->spare) - goto out_unlock; + return -ENOMEM; /* Do we have previous read data to read? */ if (info->read < PAGE_SIZE) @@ -5403,21 +5373,16 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, if (ret < 0) { if (trace_empty(iter)) { - if ((filp->f_flags & O_NONBLOCK)) { - size = -EAGAIN; - goto out_unlock; - } - mutex_unlock(&trace_types_lock); + if ((filp->f_flags & O_NONBLOCK)) + return -EAGAIN; + ret = wait_on_pipe(iter, false); - mutex_lock(&trace_types_lock); - if (ret) { - size = ret; - goto out_unlock; - } + if (ret) + return ret; + goto again; } - size = 0; - goto out_unlock; + return 0; } info->read = 0; @@ -5427,18 +5392,14 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, size = count; ret = copy_to_user(ubuf, info->spare + info->read, size); - if (ret == size) { - size = -EFAULT; - goto out_unlock; - } + if (ret == size) + return -EFAULT; + size -= ret; *ppos += size; info->read += size; - out_unlock: - mutex_unlock(&trace_types_lock); - return size; } @@ -5536,30 +5497,20 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, int entries, size, i; ssize_t ret = 0; - mutex_lock(&trace_types_lock); - #ifdef CONFIG_TRACER_MAX_TRACE - if (iter->snapshot && iter->tr->current_trace->use_max_tr) { - ret = -EBUSY; - goto out; - } + if (iter->snapshot && iter->tr->current_trace->use_max_tr) + return -EBUSY; #endif - if (splice_grow_spd(pipe, &spd)) { - ret = -ENOMEM; - goto out; - } + if (splice_grow_spd(pipe, &spd)) + return -ENOMEM; - if (*ppos & (PAGE_SIZE - 1)) { - ret = -EINVAL; - goto out; - } + if (*ppos & (PAGE_SIZE - 1)) + return -EINVAL; if (len & (PAGE_SIZE - 1)) { - if (len < PAGE_SIZE) { - ret = -EINVAL; - goto out; - } + if (len < PAGE_SIZE) + return -EINVAL; len &= PAGE_MASK; } @@ -5620,25 +5571,20 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, /* did we read anything? */ if (!spd.nr_pages) { if (ret) - goto out; + return ret; + + if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) + return -EAGAIN; - if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) { - ret = -EAGAIN; - goto out; - } - mutex_unlock(&trace_types_lock); ret = wait_on_pipe(iter, true); - mutex_lock(&trace_types_lock); if (ret) - goto out; + return ret; goto again; } ret = splice_to_pipe(pipe, &spd); splice_shrink_spd(&spd); -out: - mutex_unlock(&trace_types_lock); return ret; } From e3a8446a6a1bff8c2345cacd4f3b8bdea0ba36d8 Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Mon, 12 May 2014 20:09:55 -0700 Subject: [PATCH 005/601] powerpc/pseries: relocate "config DTL" so kconfig nests properly Moving config DTL up so it is below config PPC_SPLPAR means that menuconfig will show config DTL nicely indented right below config PPC_SPLPAR when PPC_SPLPAR is enabled. To contrast that, right now if I enable PPC_SPLPAR in menuconfig, all I can immediately tell is that "something showed up further down the list where I wasn't looking", and I end up having to toggle the option a few times to figure out what showed up, or look at the KConfig to find out that config DTL depends on config PPC_SPLPAR. Signed-off-by: Cody P Schafer Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/Kconfig | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index 756b482f819a42..a758a9c3bbbab5 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -34,6 +34,16 @@ config PPC_SPLPAR processors, that is, which share physical processors between two or more partitions. +config DTL + bool "Dispatch Trace Log" + depends on PPC_SPLPAR && DEBUG_FS + help + SPLPAR machines can log hypervisor preempt & dispatch events to a + kernel buffer. Saying Y here will enable logging these events, + which are accessible through a debugfs file. + + Say N if you are unsure. + config PSERIES_MSI bool depends on PCI_MSI && PPC_PSERIES && EEH @@ -123,13 +133,3 @@ config HV_PERF_CTRS systems. 24x7 is available on Power 8 systems. If unsure, select Y. - -config DTL - bool "Dispatch Trace Log" - depends on PPC_SPLPAR && DEBUG_FS - help - SPLPAR machines can log hypervisor preempt & dispatch events to a - kernel buffer. Saying Y here will enable logging these events, - which are accessible through a debugfs file. - - Say N if you are unsure. From 05b981f493e07856371ecd4a9294f187f5b50cf6 Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Tue, 15 Jul 2014 13:43:47 +0200 Subject: [PATCH 006/601] powerpc/xmon: use isspace/isxdigit/isalnum from linux/ctype.h isxdigit() macro definition is the same. isalnum() from linux/ctype.h will accept additional latin non-ASCII characters. This is harmless since this macro is used in scanhex() which parses user input. isspace() from linux/ctype.h will accept vertical tab and form feed but not NULL. The use of this macro is modified to accept NULL as well. Additional characters are harmless since this macro is also only used in scanhex(). Signed-off-by: Vincent Bernat Signed-off-by: Michael Ellerman --- arch/powerpc/xmon/xmon.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 5b150f0c5df94a..e66ace703a6945 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -183,14 +184,6 @@ extern void xmon_leave(void); #define GETWORD(v) (((v)[0] << 24) + ((v)[1] << 16) + ((v)[2] << 8) + (v)[3]) #endif -#define isxdigit(c) (('0' <= (c) && (c) <= '9') \ - || ('a' <= (c) && (c) <= 'f') \ - || ('A' <= (c) && (c) <= 'F')) -#define isalnum(c) (('0' <= (c) && (c) <= '9') \ - || ('a' <= (c) && (c) <= 'z') \ - || ('A' <= (c) && (c) <= 'Z')) -#define isspace(c) (c == ' ' || c == '\t' || c == 10 || c == 13 || c == 0) - static char *help_string = "\ Commands:\n\ b show breakpoints\n\ @@ -2164,9 +2157,6 @@ static void dump_pacas(void) } #endif -#define isxdigit(c) (('0' <= (c) && (c) <= '9') \ - || ('a' <= (c) && (c) <= 'f') \ - || ('A' <= (c) && (c) <= 'F')) static void dump(void) { @@ -2569,7 +2559,7 @@ scanhex(unsigned long *vp) int i; for (i=0; i<63; i++) { c = inchar(); - if (isspace(c)) { + if (isspace(c) || c == '\0') { termch = c; break; } From e144d4edbce67ea269d181f81b9c5ef631743430 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 20 Jul 2014 15:20:59 +0800 Subject: [PATCH 007/601] powerpc/4xx: Fix return value check in hsta_msi_probe() In case of error, the function ioremap() returns NULL not ERR_PTR(). The IS_ERR() test in the return value check should be replaced with NULL test. Signed-off-by: Wei Yongjun Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/ppc4xx_hsta_msi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c index ed9970ff8d94ab..f366d2d4c07906 100644 --- a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c +++ b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c @@ -145,7 +145,7 @@ static int hsta_msi_probe(struct platform_device *pdev) ppc4xx_hsta_msi.address = mem->start; ppc4xx_hsta_msi.data = ioremap(mem->start, resource_size(mem)); ppc4xx_hsta_msi.irq_count = irq_count; - if (IS_ERR(ppc4xx_hsta_msi.data)) { + if (!ppc4xx_hsta_msi.data) { dev_err(dev, "Unable to map memory\n"); return -ENOMEM; } From 456295e284beb7b61a55ead9500d30f94ab06d52 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Mon, 8 Dec 2014 19:17:57 +1100 Subject: [PATCH 008/601] cxl: Fix leaking interrupts if attach process fails In this particular error path we have already allocated the AFU interrupts, but have not yet set the status to STARTED. The detach context code will only attempt to release the interrupts if the context is in state STARTED, so in this case the interrupts would remain allocated. This patch releases the AFU interrupts immediately if the attach call fails to prevent them leaking. Signed-off-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/file.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c index e9f2f10dbb3734..b09be4462294e0 100644 --- a/drivers/misc/cxl/file.c +++ b/drivers/misc/cxl/file.c @@ -185,8 +185,10 @@ static long afu_ioctl_start_work(struct cxl_context *ctx, ctx->pid = get_pid(get_task_pid(current, PIDTYPE_PID)); if ((rc = cxl_attach_process(ctx, false, work.work_element_descriptor, - amr))) + amr))) { + afu_release_irqs(ctx); goto out; + } ctx->status = STARTED; rc = 0; From 13da704682471669685ccc3fe111fd6c0127b2eb Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Mon, 8 Dec 2014 19:17:58 +1100 Subject: [PATCH 009/601] cxl: Early return from cxl_handle_fault for a shut down context If a context is being detached and we get a translation fault for it there is little point getting it's mm and handling the fault, so just respond with an address error and return earlier. Signed-off-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/fault.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c index f8684bca2d79dd..e010302a192bc3 100644 --- a/drivers/misc/cxl/fault.c +++ b/drivers/misc/cxl/fault.c @@ -180,6 +180,12 @@ void cxl_handle_fault(struct work_struct *fault_work) return; } + /* Early return if the context is being / has been detached */ + if (ctx->status == CLOSED) { + cxl_ack_ae(ctx); + return; + } + pr_devel("CXL BOTTOM HALF handling fault for afu pe: %i. " "DSISR: %#llx DAR: %#llx\n", ctx->pe, dsisr, dar); From d6a6af2c181400aade59417e698c7cd9bec8804e Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Mon, 8 Dec 2014 19:17:59 +1100 Subject: [PATCH 010/601] cxl: Disable AFU debug flag Upon inspection of the implementation specific registers, it was discovered that the high bit of the implementation specific RXCTL register was enabled, which enables the DEADB00F debug feature. The debug feature causes MMIO reads to a disabled AFU to respond with 0xDEADB00F instead of all Fs. In general this should not be visible as the kernel will only allow MMIO access to enabled AFUs, but there may be some circumstances where an AFU may become disabled while it is use. One such case would be an AFU designed to only be used in the dedicated process mode and to disable itself after it has completed it's work (however even in that case the effects of this debug flag would be limited as the userspace application must have completed any required MMIO accesses before the AFU disables itself with or without the flag). This patch removes the debug flag and replaces the magic value programmed into this register with a preprocessor define so it is clearer what the rest of this initialisation does. Signed-off-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/cxl.h | 7 +++++++ drivers/misc/cxl/pci.c | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 28078f8894a5be..0df04380bfd63b 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -287,6 +287,13 @@ static const cxl_p2n_reg_t CXL_PSL_WED_An = {0x0A0}; #define CXL_PE_SOFTWARE_STATE_S (1ul << (31 - 30)) /* Suspend */ #define CXL_PE_SOFTWARE_STATE_T (1ul << (31 - 31)) /* Terminate */ +/****** CXL_PSL_RXCTL_An (Implementation Specific) ************************** + * Controls AFU Hang Pulse, which sets the timeout for the AFU to respond to + * the PSL for any response (except MMIO). Timeouts will occur between 1x to 2x + * of the hang pulse frequency. + */ +#define CXL_PSL_RXCTL_AFUHP_4S 0x7000000000000000ULL + /* SPA->sw_command_status */ #define CXL_SPA_SW_CMD_MASK 0xffff000000000000ULL #define CXL_SPA_SW_CMD_TERMINATE 0x0001000000000000ULL diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 0f2cc9f8b4dbcd..2ccd0a91d486e1 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -348,7 +348,7 @@ static int init_implementation_afu_regs(struct cxl_afu *afu) cxl_p1n_write(afu, CXL_PSL_COALLOC_A, 0xFF000000FEFEFEFEULL); /* for debugging with trace arrays */ cxl_p1n_write(afu, CXL_PSL_SLICE_TRACE, 0x0000FFFF00000000ULL); - cxl_p1n_write(afu, CXL_PSL_RXCTL_A, 0xF000000000000000ULL); + cxl_p1n_write(afu, CXL_PSL_RXCTL_A, CXL_PSL_RXCTL_AFUHP_4S); return 0; } From db7933f392ac4d9719d41d3f203a5f6a1c40f300 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Mon, 8 Dec 2014 19:18:00 +1100 Subject: [PATCH 011/601] cxl: Disable SPAP register when freeing SPA When we deactivate the AFU directed mode we free the scheduled process area, but did not clear the register in the hardware that has a pointer to it. This should be fine since we will have already cleared out every context and we won't do anything that would cause the hardware to access it until after we have allocated a new one, but just to be safe this patch clears out the register when we free the page. Signed-off-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/native.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index f2b37b41a0da6e..0f24fa5b0de38f 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -185,6 +185,7 @@ static int alloc_spa(struct cxl_afu *afu) static void release_spa(struct cxl_afu *afu) { + cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0); free_pages((unsigned long) afu->spa, afu->spa_order); } From 01b14505c35cdc47f5abc1f6799cee23f2f10149 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Tue, 23 Dec 2014 11:32:07 +1100 Subject: [PATCH 012/601] powerpc/44x/Akebono: Remove select of IBM_EMAC_RGMII_WOL Commit 2a2c74b2efcb ("IBM Akebono: Add the Akebono platform") added a select of IBM_EMAC_RGMII_WOL. But that Kconfig symbol isn't (yet) part of the tree. So this select has been a nop since that commit was included in v3.16-rc1. Signed-off-by: Paul Bolle Acked-by: Alistair Popple Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/44x/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig index d2ac1c1164546c..5538e57c36c1ca 100644 --- a/arch/powerpc/platforms/44x/Kconfig +++ b/arch/powerpc/platforms/44x/Kconfig @@ -214,7 +214,6 @@ config AKEBONO select ETHERNET select NET_VENDOR_IBM select IBM_EMAC_EMAC4 - select IBM_EMAC_RGMII_WOL select USB if USB_SUPPORT select USB_OHCI_HCD_PLATFORM if USB_OHCI_HCD select USB_EHCI_HCD_PLATFORM if USB_EHCI_HCD From 803d57de2b27000ed4400d16561c75821efe8333 Mon Sep 17 00:00:00 2001 From: Andreas Ruprecht Date: Wed, 17 Dec 2014 12:05:13 +0100 Subject: [PATCH 013/601] powerpc/lib: Do not include string.o in obj-y twice In the Makefile, string.o (which is generated from string.S) is included into the list of objects being built unconditionally (obj-y) in line 12. Additionally, if CONFIG_PPC64 is set, it is included again in line 17. This patch removes the latter unnecessary inclusion. Signed-off-by: Andreas Ruprecht Signed-off-by: Michael Ellerman --- arch/powerpc/lib/Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 597562f69b2df4..1b01159b81f3ea 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -14,8 +14,7 @@ obj-y := string.o alloc.o \ obj-$(CONFIG_PPC32) += div64.o copy_32.o obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \ - usercopy_64.o mem_64.o string.o \ - hweight_64.o \ + usercopy_64.o mem_64.o hweight_64.o \ copyuser_power7.o string_64.o copypage_power7.o ifeq ($(CONFIG_GENERIC_CSUM),) obj-y += checksum_$(CONFIG_WORD_SIZE).o From 9a4fc4eaf111ca960c9f524b850598e9dbc9697f Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 10 Jul 2014 19:34:31 +1000 Subject: [PATCH 014/601] powerpc/kvm: Create proper names for the kvm_host_state PMU fields We have two arrays in kvm_host_state that contain register values for the PMU. Currently we only create an asm-offsets symbol for the base of the arrays, and do the array offset in the assembly code. Creating an asm-offsets symbol for each field individually makes the code much nicer to read, particularly for the MMCRx/SIxR/SDAR fields, and might have helped us notice the recent double restore bug we had in this code. Signed-off-by: Michael Ellerman Acked-by: Alexander Graf --- arch/powerpc/kernel/asm-offsets.c | 15 +++++++++++-- arch/powerpc/kvm/book3s_hv_interrupts.S | 26 +++++++++++------------ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 28 ++++++++++++------------- 3 files changed, 40 insertions(+), 29 deletions(-) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index e624f9646350cc..4717859fdd0440 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -644,8 +644,19 @@ int main(void) HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr); HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi); HSTATE_FIELD(HSTATE_PTID, ptid); - HSTATE_FIELD(HSTATE_MMCR, host_mmcr); - HSTATE_FIELD(HSTATE_PMC, host_pmc); + HSTATE_FIELD(HSTATE_MMCR0, host_mmcr[0]); + HSTATE_FIELD(HSTATE_MMCR1, host_mmcr[1]); + HSTATE_FIELD(HSTATE_MMCRA, host_mmcr[2]); + HSTATE_FIELD(HSTATE_SIAR, host_mmcr[3]); + HSTATE_FIELD(HSTATE_SDAR, host_mmcr[4]); + HSTATE_FIELD(HSTATE_MMCR2, host_mmcr[5]); + HSTATE_FIELD(HSTATE_SIER, host_mmcr[6]); + HSTATE_FIELD(HSTATE_PMC1, host_pmc[0]); + HSTATE_FIELD(HSTATE_PMC2, host_pmc[1]); + HSTATE_FIELD(HSTATE_PMC3, host_pmc[2]); + HSTATE_FIELD(HSTATE_PMC4, host_pmc[3]); + HSTATE_FIELD(HSTATE_PMC5, host_pmc[4]); + HSTATE_FIELD(HSTATE_PMC6, host_pmc[5]); HSTATE_FIELD(HSTATE_PURR, host_purr); HSTATE_FIELD(HSTATE_SPURR, host_spurr); HSTATE_FIELD(HSTATE_DSCR, host_dscr); diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S index 36540a99d17849..0fdc4a28970b3c 100644 --- a/arch/powerpc/kvm/book3s_hv_interrupts.S +++ b/arch/powerpc/kvm/book3s_hv_interrupts.S @@ -93,15 +93,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mfspr r5, SPRN_MMCR1 mfspr r9, SPRN_SIAR mfspr r10, SPRN_SDAR - std r7, HSTATE_MMCR(r13) - std r5, HSTATE_MMCR + 8(r13) - std r6, HSTATE_MMCR + 16(r13) - std r9, HSTATE_MMCR + 24(r13) - std r10, HSTATE_MMCR + 32(r13) + std r7, HSTATE_MMCR0(r13) + std r5, HSTATE_MMCR1(r13) + std r6, HSTATE_MMCRA(r13) + std r9, HSTATE_SIAR(r13) + std r10, HSTATE_SDAR(r13) BEGIN_FTR_SECTION mfspr r9, SPRN_SIER - std r8, HSTATE_MMCR + 40(r13) - std r9, HSTATE_MMCR + 48(r13) + std r8, HSTATE_MMCR2(r13) + std r9, HSTATE_SIER(r13) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mfspr r3, SPRN_PMC1 mfspr r5, SPRN_PMC2 @@ -109,12 +109,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mfspr r7, SPRN_PMC4 mfspr r8, SPRN_PMC5 mfspr r9, SPRN_PMC6 - stw r3, HSTATE_PMC(r13) - stw r5, HSTATE_PMC + 4(r13) - stw r6, HSTATE_PMC + 8(r13) - stw r7, HSTATE_PMC + 12(r13) - stw r8, HSTATE_PMC + 16(r13) - stw r9, HSTATE_PMC + 20(r13) + stw r3, HSTATE_PMC1(r13) + stw r5, HSTATE_PMC2(r13) + stw r6, HSTATE_PMC3(r13) + stw r7, HSTATE_PMC4(r13) + stw r8, HSTATE_PMC5(r13) + stw r9, HSTATE_PMC6(r13) 31: /* diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 10554df13852fe..bb94e6f20c813b 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -83,35 +83,35 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) cmpwi r4, 0 beq 23f /* skip if not */ BEGIN_FTR_SECTION - ld r3, HSTATE_MMCR(r13) + ld r3, HSTATE_MMCR0(r13) andi. r4, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO cmpwi r4, MMCR0_PMAO beql kvmppc_fix_pmao END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG) - lwz r3, HSTATE_PMC(r13) - lwz r4, HSTATE_PMC + 4(r13) - lwz r5, HSTATE_PMC + 8(r13) - lwz r6, HSTATE_PMC + 12(r13) - lwz r8, HSTATE_PMC + 16(r13) - lwz r9, HSTATE_PMC + 20(r13) + lwz r3, HSTATE_PMC1(r13) + lwz r4, HSTATE_PMC2(r13) + lwz r5, HSTATE_PMC3(r13) + lwz r6, HSTATE_PMC4(r13) + lwz r8, HSTATE_PMC5(r13) + lwz r9, HSTATE_PMC6(r13) mtspr SPRN_PMC1, r3 mtspr SPRN_PMC2, r4 mtspr SPRN_PMC3, r5 mtspr SPRN_PMC4, r6 mtspr SPRN_PMC5, r8 mtspr SPRN_PMC6, r9 - ld r3, HSTATE_MMCR(r13) - ld r4, HSTATE_MMCR + 8(r13) - ld r5, HSTATE_MMCR + 16(r13) - ld r6, HSTATE_MMCR + 24(r13) - ld r7, HSTATE_MMCR + 32(r13) + ld r3, HSTATE_MMCR0(r13) + ld r4, HSTATE_MMCR1(r13) + ld r5, HSTATE_MMCRA(r13) + ld r6, HSTATE_SIAR(r13) + ld r7, HSTATE_SDAR(r13) mtspr SPRN_MMCR1, r4 mtspr SPRN_MMCRA, r5 mtspr SPRN_SIAR, r6 mtspr SPRN_SDAR, r7 BEGIN_FTR_SECTION - ld r8, HSTATE_MMCR + 40(r13) - ld r9, HSTATE_MMCR + 48(r13) + ld r8, HSTATE_MMCR2(r13) + ld r9, HSTATE_SIER(r13) mtspr SPRN_MMCR2, r8 mtspr SPRN_SIER, r9 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) From 58498ee3e573dc03be651b6839dbdf865ae7ee38 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 9 Dec 2014 10:18:49 +0100 Subject: [PATCH 015/601] s390/ftrace: add code replacement sanity checks Always verify that the to be replaced code matches what we expect to see. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/ftrace.c | 95 ++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 46 deletions(-) diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index b86bb8823f1519..3dabcae40e0421 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -59,62 +59,65 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - struct ftrace_insn insn; - unsigned short op; - void *from, *to; - size_t size; - - ftrace_generate_nop_insn(&insn); - size = sizeof(insn); - from = &insn; - to = (void *) rec->ip; - if (probe_kernel_read(&op, (void *) rec->ip, sizeof(op))) + struct ftrace_insn orig, new, old; + + if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old))) return -EFAULT; - /* - * If we find a breakpoint instruction, a kprobe has been placed - * at the beginning of the function. We write the constant - * KPROBE_ON_FTRACE_NOP into the remaining four bytes of the original - * instruction so that the kprobes handler can execute a nop, if it - * reaches this breakpoint. - */ - if (op == BREAKPOINT_INSTRUCTION) { - size -= 2; - from += 2; - to += 2; - insn.disp = KPROBE_ON_FTRACE_NOP; + if (addr == MCOUNT_ADDR) { + /* Initial code replacement; we expect to see stg r14,8(r15) */ + orig.opc = 0xe3e0; + orig.disp = 0xf0080024; + ftrace_generate_nop_insn(&new); + } else if (old.opc == BREAKPOINT_INSTRUCTION) { + /* + * If we find a breakpoint instruction, a kprobe has been + * placed at the beginning of the function. We write the + * constant KPROBE_ON_FTRACE_NOP into the remaining four + * bytes of the original instruction so that the kprobes + * handler can execute a nop, if it reaches this breakpoint. + */ + new.opc = orig.opc = BREAKPOINT_INSTRUCTION; + orig.disp = KPROBE_ON_FTRACE_CALL; + new.disp = KPROBE_ON_FTRACE_NOP; + } else { + /* Replace ftrace call with a nop. */ + ftrace_generate_call_insn(&orig, rec->ip); + ftrace_generate_nop_insn(&new); } - if (probe_kernel_write(to, from, size)) + /* Verify that the to be replaced code matches what we expect. */ + if (memcmp(&orig, &old, sizeof(old))) + return -EINVAL; + if (probe_kernel_write((void *) rec->ip, &new, sizeof(new))) return -EPERM; return 0; } int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - struct ftrace_insn insn; - unsigned short op; - void *from, *to; - size_t size; - - ftrace_generate_call_insn(&insn, rec->ip); - size = sizeof(insn); - from = &insn; - to = (void *) rec->ip; - if (probe_kernel_read(&op, (void *) rec->ip, sizeof(op))) + struct ftrace_insn orig, new, old; + + if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old))) return -EFAULT; - /* - * If we find a breakpoint instruction, a kprobe has been placed - * at the beginning of the function. We write the constant - * KPROBE_ON_FTRACE_CALL into the remaining four bytes of the original - * instruction so that the kprobes handler can execute a brasl if it - * reaches this breakpoint. - */ - if (op == BREAKPOINT_INSTRUCTION) { - size -= 2; - from += 2; - to += 2; - insn.disp = KPROBE_ON_FTRACE_CALL; + if (old.opc == BREAKPOINT_INSTRUCTION) { + /* + * If we find a breakpoint instruction, a kprobe has been + * placed at the beginning of the function. We write the + * constant KPROBE_ON_FTRACE_CALL into the remaining four + * bytes of the original instruction so that the kprobes + * handler can execute a brasl if it reaches this breakpoint. + */ + new.opc = orig.opc = BREAKPOINT_INSTRUCTION; + orig.disp = KPROBE_ON_FTRACE_NOP; + new.disp = KPROBE_ON_FTRACE_CALL; + } else { + /* Replace nop with an ftrace call. */ + ftrace_generate_nop_insn(&orig); + ftrace_generate_call_insn(&new, rec->ip); } - if (probe_kernel_write(to, from, size)) + /* Verify that the to be replaced code matches what we expect. */ + if (memcmp(&orig, &old, sizeof(old))) + return -EINVAL; + if (probe_kernel_write((void *) rec->ip, &new, sizeof(new))) return -EPERM; return 0; } From eba8452525e3fd0b982f78365dea8bd2ce11a20a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 12 Dec 2014 12:46:35 +0100 Subject: [PATCH 016/601] s390/pci: add missing address space annotation Signed-off-by: Heiko Carstens --- arch/s390/pci/pci_mmio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c index 62c5ea6d8682a5..8aa271b3d1ad95 100644 --- a/arch/s390/pci/pci_mmio.c +++ b/arch/s390/pci/pci_mmio.c @@ -55,7 +55,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr, ret = get_pfn(mmio_addr, VM_WRITE, &pfn); if (ret) goto out; - io_addr = (void *)((pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK)); + io_addr = (void __iomem *)((pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK)); ret = -EFAULT; if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE) @@ -96,7 +96,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr, ret = get_pfn(mmio_addr, VM_READ, &pfn); if (ret) goto out; - io_addr = (void *)((pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK)); + io_addr = (void __iomem *)((pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK)); ret = -EFAULT; if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE) From 8d1f211ebbdfd57843a52fa7efe34251530beec1 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 12 Dec 2014 12:52:00 +0100 Subject: [PATCH 017/601] s390/disassembler: remove indentical initializer Remove one of the two identical initializer entries. Signed-off-by: Heiko Carstens --- arch/s390/kernel/dis.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index f3762937dd82ee..d46d0b0b2cdab0 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -226,7 +226,6 @@ static const struct s390_operand operands[] = [U16_32] = { 16, 32, 0 }, [J16_16] = { 16, 16, OPERAND_PCREL }, [J16_32] = { 16, 32, OPERAND_PCREL }, - [I16_32] = { 16, 32, OPERAND_SIGNED }, [I24_24] = { 24, 24, OPERAND_SIGNED }, [J32_16] = { 32, 16, OPERAND_PCREL }, [I32_16] = { 32, 16, OPERAND_SIGNED }, From 925dfc020a41ce484172a43b603437e58aecd1c1 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 12 Dec 2014 13:04:21 +0100 Subject: [PATCH 018/601] s390/pgtable: add unsigned long casts Get rid of warnings like this one: warning: constant 0xffe0000000000000 is so big it is unsigned long Signed-off-by: Heiko Carstens --- arch/s390/mm/pgtable.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 601deb85d2a058..47cbca079740ae 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -527,7 +527,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) table += (gaddr >> 53) & 0x7ff; if ((*table & _REGION_ENTRY_INVALID) && gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY, - gaddr & 0xffe0000000000000)) + gaddr & 0xffe0000000000000UL)) return -ENOMEM; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); } @@ -535,7 +535,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) table += (gaddr >> 42) & 0x7ff; if ((*table & _REGION_ENTRY_INVALID) && gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY, - gaddr & 0xfffffc0000000000)) + gaddr & 0xfffffc0000000000UL)) return -ENOMEM; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); } @@ -543,7 +543,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) table += (gaddr >> 31) & 0x7ff; if ((*table & _REGION_ENTRY_INVALID) && gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY, - gaddr & 0xffffffff80000000)) + gaddr & 0xffffffff80000000UL)) return -ENOMEM; table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); } From e0a50545480de0936ab867168d9bd086e56f465c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 12 Dec 2014 13:11:08 +0100 Subject: [PATCH 019/601] s390/signal: add sys_sigreturn and sys_rt_sigreturn declarations Get rid of sparse warnings like this one: arch/s390/kernel/signal.c:244:1: warning: symbol 'sys_sigreturn' was not declared. Should it be static? Signed-off-by: Heiko Carstens --- arch/s390/kernel/entry.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 8e61393c827577..834df047d35f67 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -71,9 +71,11 @@ struct s390_mmap_arg_struct; struct fadvise64_64_args; struct old_sigaction; +long sys_rt_sigreturn(void); +long sys_sigreturn(void); + long sys_s390_personality(unsigned int personality); long sys_s390_runtime_instr(int command, int signum); - long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t); long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t); #endif /* _ENTRY_H */ From 8ebd51a705c56520481f2b813790dc5afdb0a751 Mon Sep 17 00:00:00 2001 From: Rickard Strandqvist Date: Sat, 20 Dec 2014 13:27:49 +0100 Subject: [PATCH 020/601] s390/cio: idset.c: remove some unused functions Removes some functions that are not used anywhere: idset_clear() idset_sch_get_first() This was partially found by using a static code analysis program called cppcheck. Signed-off-by: Rickard Strandqvist Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/idset.c | 20 -------------------- drivers/s390/cio/idset.h | 2 -- 2 files changed, 22 deletions(-) diff --git a/drivers/s390/cio/idset.c b/drivers/s390/cio/idset.c index 5a999084a22954..b3e06a7b9480f3 100644 --- a/drivers/s390/cio/idset.c +++ b/drivers/s390/cio/idset.c @@ -38,11 +38,6 @@ void idset_free(struct idset *set) vfree(set); } -void idset_clear(struct idset *set) -{ - memset(set->bitmap, 0, bitmap_size(set->num_ssid, set->num_id)); -} - void idset_fill(struct idset *set) { memset(set->bitmap, 0xff, bitmap_size(set->num_ssid, set->num_id)); @@ -103,21 +98,6 @@ int idset_sch_contains(struct idset *set, struct subchannel_id schid) return idset_contains(set, schid.ssid, schid.sch_no); } -int idset_sch_get_first(struct idset *set, struct subchannel_id *schid) -{ - int ssid = 0; - int id = 0; - int rc; - - rc = idset_get_first(set, &ssid, &id); - if (rc) { - init_subchannel_id(schid); - schid->ssid = ssid; - schid->sch_no = id; - } - return rc; -} - int idset_is_empty(struct idset *set) { return bitmap_empty(set->bitmap, set->num_ssid * set->num_id); diff --git a/drivers/s390/cio/idset.h b/drivers/s390/cio/idset.h index 06d3bc01bb0934..22b58104683be8 100644 --- a/drivers/s390/cio/idset.h +++ b/drivers/s390/cio/idset.h @@ -11,7 +11,6 @@ struct idset; void idset_free(struct idset *set); -void idset_clear(struct idset *set); void idset_fill(struct idset *set); struct idset *idset_sch_new(void); @@ -19,7 +18,6 @@ void idset_sch_add(struct idset *set, struct subchannel_id id); void idset_sch_del(struct idset *set, struct subchannel_id id); void idset_sch_del_subseq(struct idset *set, struct subchannel_id schid); int idset_sch_contains(struct idset *set, struct subchannel_id id); -int idset_sch_get_first(struct idset *set, struct subchannel_id *id); int idset_is_empty(struct idset *set); void idset_add_set(struct idset *to, struct idset *from); From 47523c983f55448c3a09cc3f1a885bf81cd422e3 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 22 Dec 2014 10:07:04 +0100 Subject: [PATCH 021/601] s390: keep Kconfig sorted Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 68b68d755fdfe4..e79c3eab40b9e8 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -66,6 +66,7 @@ config S390 select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS select ARCH_HAS_GCOV_PROFILE_ALL + select ARCH_HAS_SG_CHAIN select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_INLINE_READ_LOCK select ARCH_INLINE_READ_LOCK_BH @@ -151,7 +152,6 @@ config S390 select TTY select VIRT_CPU_ACCOUNTING select VIRT_TO_BUS - select ARCH_HAS_SG_CHAIN config SCHED_OMIT_FRAME_POINTER def_bool y From fbf87dff6706d412fe69b8158f7ae415e5e7380b Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Sat, 3 Jan 2015 17:29:07 +0800 Subject: [PATCH 022/601] s390/sclp: fix declaration of _sclp_print_early() _sclp_print_early() has return value: at present, return 0 for OK, 1 for failure. It returns '%r2', so use 'long' as return value (upper caller can check '%r2' directly). The related warning: CC arch/s390/boot/compressed/misc.o arch/s390/boot/compressed/misc.c:66:8: warning: type defaults to 'int' in declaration of '_sclp_print_early' [-Wimplicit-int] extern _sclp_print_early(const char *); ^ At present, _sclp_print_early() is only used by puts(), so can still remain its declaration in 'misc.c' file. [heiko.carstens@de.ibm.com]: move declaration to sclp.h header file Signed-off-by: Chen Gang Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/boot/compressed/misc.c | 3 +-- arch/s390/include/asm/sclp.h | 2 ++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c index 57cbaff1f39778..42506b371b7414 100644 --- a/arch/s390/boot/compressed/misc.c +++ b/arch/s390/boot/compressed/misc.c @@ -8,6 +8,7 @@ #include #include +#include #include #include "sizes.h" @@ -63,8 +64,6 @@ static unsigned long free_mem_end_ptr; #include "../../../../lib/decompress_unxz.c" #endif -extern _sclp_print_early(const char *); - static int puts(const char *s) { _sclp_print_early(s); diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 1aba89b53cb9e1..b6f8066789c1cc 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -68,4 +68,6 @@ void sclp_early_detect(void); int sclp_has_siif(void); unsigned int sclp_get_ibc(void); +long _sclp_print_early(const char *); + #endif /* _ASM_S390_SCLP_H */ From a7e75d434b53b45ae60779903904d4fbdbd145a5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 5 Jan 2015 10:10:14 +0100 Subject: [PATCH 023/601] s390/sclp: sign extend return value of _sclp_print_early() _sclp_print_early() has a return value, but misses to sign extend it if called from 64 bit code. This is not really a bug, since currently no caller cares what the return value is. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/sclp.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/sclp.S b/arch/s390/kernel/sclp.S index a41f2c99dcc85d..7e77e03378f38f 100644 --- a/arch/s390/kernel/sclp.S +++ b/arch/s390/kernel/sclp.S @@ -294,7 +294,8 @@ ENTRY(_sclp_print_early) #ifdef CONFIG_64BIT tm LC_AR_MODE_ID,1 jno .Lesa3 - lmh %r6,%r15,96(%r15) # store upper register halves + lgfr %r2,%r2 # sign extend return value + lmh %r6,%r15,96(%r15) # restore upper register halves ahi %r15,80 .Lesa3: #endif From 91c0837e6dee8c694c8849c70e1f0f770d92d072 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 5 Jan 2015 04:29:18 -0800 Subject: [PATCH 024/601] s390: remove unnecessary KERN_CONT This has no effect as KERN_CONT is an empty string, It's probably just a missing conversion artifact as the other pr_cont uses in the same file don't have this prefix. Signed-off-by: Joe Perches Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 811937bb90be69..232c14ea42695b 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -171,7 +171,7 @@ static void dump_pagetable(unsigned long asce, unsigned long address) table = table + ((address >> 20) & 0x7ff); if (bad_address(table)) goto bad; - pr_cont(KERN_CONT "S:%016lx ", *table); + pr_cont("S:%016lx ", *table); if (*table & (_SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_LARGE)) goto out; table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN); From e6a67ad0e29087201536792f7d5cecec4ff6fc64 Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Thu, 1 Jan 2015 22:56:02 +0800 Subject: [PATCH 025/601] s390/crypto: remove 'const' to avoid compiler warnings In aes_encrypt() and aes_decrypt(), need let 'sctx->key' be modified, so remove 'const' for it. The related warnings: CC [M] arch/s390/crypto/aes_s390.o arch/s390/crypto/aes_s390.c: In function 'aes_encrypt': arch/s390/crypto/aes_s390.c:146:37: warning: passing argument 2 of 'crypt_s390_km' discards 'const' qualifier from pointer target type [-Wdiscarded-array-qualifiers] crypt_s390_km(KM_AES_128_ENCRYPT, &sctx->key, out, in, ^ ... In file included from arch/s390/crypto/aes_s390.c:29:0: arch/s390/crypto/crypt_s390.h:154:19: note: expected 'void *' but argument is of type 'const u8 (*)[32] {aka const unsigned char (*)[32]}' static inline int crypt_s390_km(long func, void *param, ^ Signed-off-by: Chen Gang Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/crypto/aes_s390.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 1f272b24fc0bf9..5566ce80abdbf7 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -134,7 +134,7 @@ static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { - const struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); if (unlikely(need_fallback(sctx->key_len))) { crypto_cipher_encrypt_one(sctx->fallback.cip, out, in); @@ -159,7 +159,7 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { - const struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); if (unlikely(need_fallback(sctx->key_len))) { crypto_cipher_decrypt_one(sctx->fallback.cip, out, in); From d97d929f06d0e072cd36fba6bd9d25b29bae34fd Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 8 Jan 2015 07:41:52 +0000 Subject: [PATCH 026/601] s390: move cacheinfo sysfs to generic cacheinfo infrastructure This patch removes the redundant sysfs cacheinfo code by reusing the newly introduced generic cacheinfo infrastructure through the commit 246246cbde5e ("drivers: base: support cpu cache information interface to userspace via sysfs") Signed-off-by: Sudeep Holla Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/cache.c | 388 ++++++++++----------------------------- 1 file changed, 92 insertions(+), 296 deletions(-) diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c index c0b03c28d15717..fe21f074cf9f7f 100644 --- a/arch/s390/kernel/cache.c +++ b/arch/s390/kernel/cache.c @@ -5,37 +5,11 @@ * Author(s): Heiko Carstens */ -#include #include -#include -#include -#include #include +#include #include -struct cache { - unsigned long size; - unsigned int line_size; - unsigned int associativity; - unsigned int nr_sets; - unsigned int level : 3; - unsigned int type : 2; - unsigned int private : 1; - struct list_head list; -}; - -struct cache_dir { - struct kobject *kobj; - struct cache_index_dir *index; -}; - -struct cache_index_dir { - struct kobject kobj; - int cpu; - struct cache *cache; - struct cache_index_dir *next; -}; - enum { CACHE_SCOPE_NOTEXISTS, CACHE_SCOPE_PRIVATE, @@ -44,10 +18,10 @@ enum { }; enum { - CACHE_TYPE_SEPARATE, - CACHE_TYPE_DATA, - CACHE_TYPE_INSTRUCTION, - CACHE_TYPE_UNIFIED, + CTYPE_SEPARATE, + CTYPE_DATA, + CTYPE_INSTRUCTION, + CTYPE_UNIFIED, }; enum { @@ -70,39 +44,59 @@ struct cache_info { }; #define CACHE_MAX_LEVEL 8 - union cache_topology { struct cache_info ci[CACHE_MAX_LEVEL]; unsigned long long raw; }; static const char * const cache_type_string[] = { - "Data", + "", "Instruction", + "Data", + "", "Unified", }; -static struct cache_dir *cache_dir_cpu[NR_CPUS]; -static LIST_HEAD(cache_list); +static const enum cache_type cache_type_map[] = { + [CTYPE_SEPARATE] = CACHE_TYPE_SEPARATE, + [CTYPE_DATA] = CACHE_TYPE_DATA, + [CTYPE_INSTRUCTION] = CACHE_TYPE_INST, + [CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED, +}; void show_cacheinfo(struct seq_file *m) { - struct cache *cache; - int index = 0; + int cpu = smp_processor_id(), idx; + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + struct cacheinfo *cache; - list_for_each_entry(cache, &cache_list, list) { - seq_printf(m, "cache%-11d: ", index); + for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) { + cache = this_cpu_ci->info_list + idx; + seq_printf(m, "cache%-11d: ", idx); seq_printf(m, "level=%d ", cache->level); seq_printf(m, "type=%s ", cache_type_string[cache->type]); - seq_printf(m, "scope=%s ", cache->private ? "Private" : "Shared"); - seq_printf(m, "size=%luK ", cache->size >> 10); - seq_printf(m, "line_size=%u ", cache->line_size); - seq_printf(m, "associativity=%d", cache->associativity); + seq_printf(m, "scope=%s ", + cache->disable_sysfs ? "Shared" : "Private"); + seq_printf(m, "size=%dK ", cache->size >> 10); + seq_printf(m, "line_size=%u ", cache->coherency_line_size); + seq_printf(m, "associativity=%d", cache->ways_of_associativity); seq_puts(m, "\n"); - index++; } } +static inline enum cache_type get_cache_type(struct cache_info *ci, int level) +{ + if (level >= CACHE_MAX_LEVEL) + return CACHE_TYPE_NOCACHE; + + ci += level; + + if (ci->scope != CACHE_SCOPE_SHARED && ci->scope != CACHE_SCOPE_PRIVATE) + return CACHE_TYPE_NOCACHE; + + return cache_type_map[ci->type]; +} + static inline unsigned long ecag(int ai, int li, int ti) { unsigned long cmd, val; @@ -113,277 +107,79 @@ static inline unsigned long ecag(int ai, int li, int ti) return val; } -static int __init cache_add(int level, int private, int type) +static void ci_leaf_init(struct cacheinfo *this_leaf, int private, + enum cache_type type, unsigned int level) { - struct cache *cache; - int ti; + int ti, num_sets; + int cpu = smp_processor_id(); - cache = kzalloc(sizeof(*cache), GFP_KERNEL); - if (!cache) - return -ENOMEM; - if (type == CACHE_TYPE_INSTRUCTION) + if (type == CACHE_TYPE_INST) ti = CACHE_TI_INSTRUCTION; else ti = CACHE_TI_UNIFIED; - cache->size = ecag(EXTRACT_SIZE, level, ti); - cache->line_size = ecag(EXTRACT_LINE_SIZE, level, ti); - cache->associativity = ecag(EXTRACT_ASSOCIATIVITY, level, ti); - cache->nr_sets = cache->size / cache->associativity; - cache->nr_sets /= cache->line_size; - cache->private = private; - cache->level = level + 1; - cache->type = type - 1; - list_add_tail(&cache->list, &cache_list); - return 0; -} - -static void __init cache_build_info(void) -{ - struct cache *cache, *next; - union cache_topology ct; - int level, private, rc; - - ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0); - for (level = 0; level < CACHE_MAX_LEVEL; level++) { - switch (ct.ci[level].scope) { - case CACHE_SCOPE_SHARED: - private = 0; - break; - case CACHE_SCOPE_PRIVATE: - private = 1; - break; - default: - return; - } - if (ct.ci[level].type == CACHE_TYPE_SEPARATE) { - rc = cache_add(level, private, CACHE_TYPE_DATA); - rc |= cache_add(level, private, CACHE_TYPE_INSTRUCTION); - } else { - rc = cache_add(level, private, ct.ci[level].type); - } - if (rc) - goto error; - } - return; -error: - list_for_each_entry_safe(cache, next, &cache_list, list) { - list_del(&cache->list); - kfree(cache); - } -} - -static struct cache_dir *cache_create_cache_dir(int cpu) -{ - struct cache_dir *cache_dir; - struct kobject *kobj = NULL; - struct device *dev; - - dev = get_cpu_device(cpu); - if (!dev) - goto out; - kobj = kobject_create_and_add("cache", &dev->kobj); - if (!kobj) - goto out; - cache_dir = kzalloc(sizeof(*cache_dir), GFP_KERNEL); - if (!cache_dir) - goto out; - cache_dir->kobj = kobj; - cache_dir_cpu[cpu] = cache_dir; - return cache_dir; -out: - kobject_put(kobj); - return NULL; -} - -static struct cache_index_dir *kobj_to_cache_index_dir(struct kobject *kobj) -{ - return container_of(kobj, struct cache_index_dir, kobj); -} - -static void cache_index_release(struct kobject *kobj) -{ - struct cache_index_dir *index; - - index = kobj_to_cache_index_dir(kobj); - kfree(index); -} - -static ssize_t cache_index_show(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - struct kobj_attribute *kobj_attr; - - kobj_attr = container_of(attr, struct kobj_attribute, attr); - return kobj_attr->show(kobj, kobj_attr, buf); -} - -#define DEFINE_CACHE_ATTR(_name, _format, _value) \ -static ssize_t cache_##_name##_show(struct kobject *kobj, \ - struct kobj_attribute *attr, \ - char *buf) \ -{ \ - struct cache_index_dir *index; \ - \ - index = kobj_to_cache_index_dir(kobj); \ - return sprintf(buf, _format, _value); \ -} \ -static struct kobj_attribute cache_##_name##_attr = \ - __ATTR(_name, 0444, cache_##_name##_show, NULL); -DEFINE_CACHE_ATTR(size, "%luK\n", index->cache->size >> 10); -DEFINE_CACHE_ATTR(coherency_line_size, "%u\n", index->cache->line_size); -DEFINE_CACHE_ATTR(number_of_sets, "%u\n", index->cache->nr_sets); -DEFINE_CACHE_ATTR(ways_of_associativity, "%u\n", index->cache->associativity); -DEFINE_CACHE_ATTR(type, "%s\n", cache_type_string[index->cache->type]); -DEFINE_CACHE_ATTR(level, "%d\n", index->cache->level); + this_leaf->level = level + 1; + this_leaf->type = type; + this_leaf->coherency_line_size = ecag(EXTRACT_LINE_SIZE, level, ti); + this_leaf->ways_of_associativity = ecag(EXTRACT_ASSOCIATIVITY, + level, ti); + this_leaf->size = ecag(EXTRACT_SIZE, level, ti); -static ssize_t shared_cpu_map_func(struct kobject *kobj, int type, char *buf) -{ - struct cache_index_dir *index; - int len; - - index = kobj_to_cache_index_dir(kobj); - len = type ? - cpulist_scnprintf(buf, PAGE_SIZE - 2, cpumask_of(index->cpu)) : - cpumask_scnprintf(buf, PAGE_SIZE - 2, cpumask_of(index->cpu)); - len += sprintf(&buf[len], "\n"); - return len; -} - -static ssize_t shared_cpu_map_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - return shared_cpu_map_func(kobj, 0, buf); + num_sets = this_leaf->size / this_leaf->coherency_line_size; + num_sets /= this_leaf->ways_of_associativity; + this_leaf->number_of_sets = num_sets; + cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map); + if (!private) + this_leaf->disable_sysfs = true; } -static struct kobj_attribute cache_shared_cpu_map_attr = - __ATTR(shared_cpu_map, 0444, shared_cpu_map_show, NULL); -static ssize_t shared_cpu_list_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) +int init_cache_level(unsigned int cpu) { - return shared_cpu_map_func(kobj, 1, buf); -} -static struct kobj_attribute cache_shared_cpu_list_attr = - __ATTR(shared_cpu_list, 0444, shared_cpu_list_show, NULL); - -static struct attribute *cache_index_default_attrs[] = { - &cache_type_attr.attr, - &cache_size_attr.attr, - &cache_number_of_sets_attr.attr, - &cache_ways_of_associativity_attr.attr, - &cache_level_attr.attr, - &cache_coherency_line_size_attr.attr, - &cache_shared_cpu_map_attr.attr, - &cache_shared_cpu_list_attr.attr, - NULL, -}; - -static const struct sysfs_ops cache_index_ops = { - .show = cache_index_show, -}; - -static struct kobj_type cache_index_type = { - .sysfs_ops = &cache_index_ops, - .release = cache_index_release, - .default_attrs = cache_index_default_attrs, -}; - -static int cache_create_index_dir(struct cache_dir *cache_dir, - struct cache *cache, int index, int cpu) -{ - struct cache_index_dir *index_dir; - int rc; - - index_dir = kzalloc(sizeof(*index_dir), GFP_KERNEL); - if (!index_dir) - return -ENOMEM; - index_dir->cache = cache; - index_dir->cpu = cpu; - rc = kobject_init_and_add(&index_dir->kobj, &cache_index_type, - cache_dir->kobj, "index%d", index); - if (rc) - goto out; - index_dir->next = cache_dir->index; - cache_dir->index = index_dir; - return 0; -out: - kfree(index_dir); - return rc; -} + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + unsigned int level = 0, leaves = 0; + union cache_topology ct; + enum cache_type ctype; -static int cache_add_cpu(int cpu) -{ - struct cache_dir *cache_dir; - struct cache *cache; - int rc, index = 0; + if (!this_cpu_ci) + return -EINVAL; - if (list_empty(&cache_list)) - return 0; - cache_dir = cache_create_cache_dir(cpu); - if (!cache_dir) - return -ENOMEM; - list_for_each_entry(cache, &cache_list, list) { - if (!cache->private) + ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0); + do { + ctype = get_cache_type(&ct.ci[0], level); + if (ctype == CACHE_TYPE_NOCACHE) break; - rc = cache_create_index_dir(cache_dir, cache, index, cpu); - if (rc) - return rc; - index++; - } - return 0; -} + /* Separate instruction and data caches */ + leaves += (ctype == CACHE_TYPE_SEPARATE) ? 2 : 1; + } while (++level < CACHE_MAX_LEVEL); -static void cache_remove_cpu(int cpu) -{ - struct cache_index_dir *index, *next; - struct cache_dir *cache_dir; + this_cpu_ci->num_levels = level; + this_cpu_ci->num_leaves = leaves; - cache_dir = cache_dir_cpu[cpu]; - if (!cache_dir) - return; - index = cache_dir->index; - while (index) { - next = index->next; - kobject_put(&index->kobj); - index = next; - } - kobject_put(cache_dir->kobj); - kfree(cache_dir); - cache_dir_cpu[cpu] = NULL; + return 0; } -static int cache_hotplug(struct notifier_block *nfb, unsigned long action, - void *hcpu) +int populate_cache_leaves(unsigned int cpu) { - int cpu = (long)hcpu; - int rc = 0; + unsigned int level, idx, pvt; + union cache_topology ct; + enum cache_type ctype; + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + struct cacheinfo *this_leaf = this_cpu_ci->info_list; - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_ONLINE: - rc = cache_add_cpu(cpu); - if (rc) - cache_remove_cpu(cpu); - break; - case CPU_DEAD: - cache_remove_cpu(cpu); - break; + ct.raw = ecag(EXTRACT_TOPOLOGY, 0, 0); + for (idx = 0, level = 0; level < this_cpu_ci->num_levels && + idx < this_cpu_ci->num_leaves; idx++, level++) { + if (!this_leaf) + return -EINVAL; + + pvt = (ct.ci[level].scope == CACHE_SCOPE_PRIVATE) ? 1 : 0; + ctype = get_cache_type(&ct.ci[0], level); + if (ctype == CACHE_TYPE_SEPARATE) { + ci_leaf_init(this_leaf++, pvt, CACHE_TYPE_DATA, level); + ci_leaf_init(this_leaf++, pvt, CACHE_TYPE_INST, level); + } else { + ci_leaf_init(this_leaf++, pvt, ctype, level); + } } - return rc ? NOTIFY_BAD : NOTIFY_OK; -} - -static int __init cache_init(void) -{ - int cpu; - - if (!test_facility(34)) - return 0; - cache_build_info(); - - cpu_notifier_register_begin(); - for_each_online_cpu(cpu) - cache_add_cpu(cpu); - __hotcpu_notifier(cache_hotplug, 0); - cpu_notifier_register_done(); return 0; } -device_initcall(cache_init); From fca08f326ae0423f03b097ff54de432fe77b95d0 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 9 Jan 2015 10:19:49 +0800 Subject: [PATCH 027/601] ARM: probes: move all probe code to dedicate directory In discussion on LKML (https://lkml.org/lkml/2014/11/28/158), Russell King suggests to move all probe related code to arch/arm/probes. This patch does the work. Due to dependency on 'arch/arm/kernel/patch.h', this patch also moves patch.h to 'arch/arm/include/asm/patch.h', and related '#include' directives are also midified to '#include '. Following is an overview of this patch: ./arch/arm/kernel/ ./arch/arm/probes/ |-- Makefile |-- Makefile |-- probes-arm.c ==> |-- decode-arm.c |-- probes-arm.h ==> |-- decode-arm.h |-- probes-thumb.c ==> |-- decode-thumb.c |-- probes-thumb.h ==> |-- decode-thumb.h |-- probes.c ==> |-- decode.c |-- probes.h ==> |-- decode.h | |-- kprobes | | |-- Makefile |-- kprobes-arm.c ==> | |-- actions-arm.c |-- kprobes-common.c ==> | |-- actions-common.c |-- kprobes-thumb.c ==> | |-- actions-thumb.c |-- kprobes.c ==> | |-- core.c |-- kprobes.h ==> | |-- core.h |-- kprobes-test-arm.c ==> | |-- test-arm.c |-- kprobes-test.c ==> | |-- test-core.c |-- kprobes-test.h ==> | |-- test-core.h |-- kprobes-test-thumb.c ==> | `-- test-thumb.c | `-- uprobes | |-- Makefile |-- uprobes-arm.c ==> |-- actions-arm.c |-- uprobes.c ==> |-- core.c |-- uprobes.h ==> `-- core.h | `-- patch.h ==> arch/arm/include/asm/patch.h Signed-off-by: Wang Nan Acked-by: Masami Hiramatsu Signed-off-by: Jon Medhurst --- arch/arm/Makefile | 1 + arch/arm/{kernel => include/asm}/patch.h | 0 arch/arm/kernel/Makefile | 16 ++-------------- arch/arm/kernel/jump_label.c | 2 +- arch/arm/kernel/kgdb.c | 3 +-- arch/arm/kernel/patch.c | 3 +-- arch/arm/probes/Makefile | 7 +++++++ .../{kernel/probes-arm.c => probes/decode-arm.c} | 7 ++++--- .../{kernel/probes-arm.h => probes/decode-arm.h} | 4 +++- .../probes-thumb.c => probes/decode-thumb.c} | 6 +++--- .../probes-thumb.h => probes/decode-thumb.h} | 4 +++- arch/arm/{kernel/probes.c => probes/decode.c} | 4 ++-- arch/arm/{kernel/probes.h => probes/decode.h} | 2 +- arch/arm/probes/kprobes/Makefile | 11 +++++++++++ .../kprobes/actions-arm.c} | 6 +++--- .../kprobes/actions-common.c} | 4 ++-- .../kprobes/actions-thumb.c} | 6 +++--- .../{kernel/kprobes.c => probes/kprobes/core.c} | 8 ++++---- .../{kernel/kprobes.h => probes/kprobes/core.h} | 3 ++- .../kprobes/test-arm.c} | 2 +- .../kprobes/test-core.c} | 8 ++++---- .../kprobes/test-core.h} | 2 +- .../kprobes/test-thumb.c} | 4 ++-- arch/arm/probes/uprobes/Makefile | 1 + .../uprobes/actions-arm.c} | 6 +++--- .../{kernel/uprobes.c => probes/uprobes/core.c} | 6 +++--- .../{kernel/uprobes.h => probes/uprobes/core.h} | 0 27 files changed, 69 insertions(+), 57 deletions(-) rename arch/arm/{kernel => include/asm}/patch.h (100%) create mode 100644 arch/arm/probes/Makefile rename arch/arm/{kernel/probes-arm.c => probes/decode-arm.c} (99%) rename arch/arm/{kernel/probes-arm.h => probes/decode-arm.h} (97%) rename arch/arm/{kernel/probes-thumb.c => probes/decode-thumb.c} (99%) rename arch/arm/{kernel/probes-thumb.h => probes/decode-thumb.h} (97%) rename arch/arm/{kernel/probes.c => probes/decode.c} (99%) rename arch/arm/{kernel/probes.h => probes/decode.h} (99%) create mode 100644 arch/arm/probes/kprobes/Makefile rename arch/arm/{kernel/kprobes-arm.c => probes/kprobes/actions-arm.c} (99%) rename arch/arm/{kernel/kprobes-common.c => probes/kprobes/actions-common.c} (98%) rename arch/arm/{kernel/kprobes-thumb.c => probes/kprobes/actions-thumb.c} (99%) rename arch/arm/{kernel/kprobes.c => probes/kprobes/core.c} (99%) rename arch/arm/{kernel/kprobes.h => probes/kprobes/core.h} (96%) rename arch/arm/{kernel/kprobes-test-arm.c => probes/kprobes/test-arm.c} (99%) rename arch/arm/{kernel/kprobes-test.c => probes/kprobes/test-core.c} (99%) rename arch/arm/{kernel/kprobes-test.h => probes/kprobes/test-core.h} (99%) rename arch/arm/{kernel/kprobes-test-thumb.c => probes/kprobes/test-thumb.c} (99%) create mode 100644 arch/arm/probes/uprobes/Makefile rename arch/arm/{kernel/uprobes-arm.c => probes/uprobes/actions-arm.c} (98%) rename arch/arm/{kernel/uprobes.c => probes/uprobes/core.c} (98%) rename arch/arm/{kernel/uprobes.h => probes/uprobes/core.h} (100%) diff --git a/arch/arm/Makefile b/arch/arm/Makefile index c1785eec2cf772..7f99cd652203ce 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -266,6 +266,7 @@ core-$(CONFIG_KVM_ARM_HOST) += arch/arm/kvm/ # If we have a machine-specific directory, then include it in the build. core-y += arch/arm/kernel/ arch/arm/mm/ arch/arm/common/ +core-y += arch/arm/probes/ core-y += arch/arm/net/ core-y += arch/arm/crypto/ core-y += arch/arm/firmware/ diff --git a/arch/arm/kernel/patch.h b/arch/arm/include/asm/patch.h similarity index 100% rename from arch/arm/kernel/patch.h rename to arch/arm/include/asm/patch.h diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index fb2b71ebe3f22a..9c51a433e02552 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -51,20 +51,8 @@ obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o insn.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o insn.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o insn.o patch.o obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o -obj-$(CONFIG_UPROBES) += probes.o probes-arm.o uprobes.o uprobes-arm.o -obj-$(CONFIG_KPROBES) += probes.o kprobes.o kprobes-common.o patch.o -ifdef CONFIG_THUMB2_KERNEL -obj-$(CONFIG_KPROBES) += kprobes-thumb.o probes-thumb.o -else -obj-$(CONFIG_KPROBES) += kprobes-arm.o probes-arm.o -endif -obj-$(CONFIG_ARM_KPROBES_TEST) += test-kprobes.o -test-kprobes-objs := kprobes-test.o -ifdef CONFIG_THUMB2_KERNEL -test-kprobes-objs += kprobes-test-thumb.o -else -test-kprobes-objs += kprobes-test-arm.o -endif +# Main staffs in KPROBES are in arch/arm/probes/ . +obj-$(CONFIG_KPROBES) += patch.o obj-$(CONFIG_OABI_COMPAT) += sys_oabi-compat.o obj-$(CONFIG_ARM_THUMBEE) += thumbee.o obj-$(CONFIG_KGDB) += kgdb.o patch.o diff --git a/arch/arm/kernel/jump_label.c b/arch/arm/kernel/jump_label.c index afeeb9ea6f439e..d8da075959bf38 100644 --- a/arch/arm/kernel/jump_label.c +++ b/arch/arm/kernel/jump_label.c @@ -1,8 +1,8 @@ #include #include +#include #include "insn.h" -#include "patch.h" #ifdef HAVE_JUMP_LABEL diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index 07db2f8a1b4505..a6ad93c9bce35e 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -14,10 +14,9 @@ #include #include +#include #include -#include "patch.h" - struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { { "r0", 4, offsetof(struct pt_regs, ARM_r0)}, diff --git a/arch/arm/kernel/patch.c b/arch/arm/kernel/patch.c index 5038960e3c55ab..69bda1a5707ee8 100644 --- a/arch/arm/kernel/patch.c +++ b/arch/arm/kernel/patch.c @@ -8,8 +8,7 @@ #include #include #include - -#include "patch.h" +#include struct patch { void *addr; diff --git a/arch/arm/probes/Makefile b/arch/arm/probes/Makefile new file mode 100644 index 00000000000000..aa1f8590dcdd4d --- /dev/null +++ b/arch/arm/probes/Makefile @@ -0,0 +1,7 @@ +obj-$(CONFIG_UPROBES) += decode.o decode-arm.o uprobes/ +obj-$(CONFIG_KPROBES) += decode.o kprobes/ +ifdef CONFIG_THUMB2_KERNEL +obj-$(CONFIG_KPROBES) += decode-thumb.o +else +obj-$(CONFIG_KPROBES) += decode-arm.o +endif diff --git a/arch/arm/kernel/probes-arm.c b/arch/arm/probes/decode-arm.c similarity index 99% rename from arch/arm/kernel/probes-arm.c rename to arch/arm/probes/decode-arm.c index 8eaef81d834495..e39cc75952f279 100644 --- a/arch/arm/kernel/probes-arm.c +++ b/arch/arm/probes/decode-arm.c @@ -1,5 +1,6 @@ /* - * arch/arm/kernel/probes-arm.c + * + * arch/arm/probes/decode-arm.c * * Some code moved here from arch/arm/kernel/kprobes-arm.c * @@ -20,8 +21,8 @@ #include #include -#include "probes.h" -#include "probes-arm.h" +#include "decode.h" +#include "decode-arm.h" #define sign_extend(x, signbit) ((x) | (0 - ((x) & (1 << (signbit))))) diff --git a/arch/arm/kernel/probes-arm.h b/arch/arm/probes/decode-arm.h similarity index 97% rename from arch/arm/kernel/probes-arm.h rename to arch/arm/probes/decode-arm.h index ace6572f6e26a2..9c56b40d6a5713 100644 --- a/arch/arm/kernel/probes-arm.h +++ b/arch/arm/probes/decode-arm.h @@ -1,5 +1,5 @@ /* - * arch/arm/kernel/probes-arm.h + * arch/arm/probes/decode-arm.h * * Copyright 2013 Linaro Ltd. * Written by: David A. Long @@ -15,6 +15,8 @@ #ifndef _ARM_KERNEL_PROBES_ARM_H #define _ARM_KERNEL_PROBES_ARM_H +#include "decode.h" + enum probes_arm_action { PROBES_EMULATE_NONE, PROBES_SIMULATE_NOP, diff --git a/arch/arm/kernel/probes-thumb.c b/arch/arm/probes/decode-thumb.c similarity index 99% rename from arch/arm/kernel/probes-thumb.c rename to arch/arm/probes/decode-thumb.c index 4131351e812f47..2f0453a895dc61 100644 --- a/arch/arm/kernel/probes-thumb.c +++ b/arch/arm/probes/decode-thumb.c @@ -1,5 +1,5 @@ /* - * arch/arm/kernel/probes-thumb.c + * arch/arm/probes/decode-thumb.c * * Copyright (C) 2011 Jon Medhurst . * @@ -12,8 +12,8 @@ #include #include -#include "probes.h" -#include "probes-thumb.h" +#include "decode.h" +#include "decode-thumb.h" static const union decode_item t32_table_1110_100x_x0xx[] = { diff --git a/arch/arm/kernel/probes-thumb.h b/arch/arm/probes/decode-thumb.h similarity index 97% rename from arch/arm/kernel/probes-thumb.h rename to arch/arm/probes/decode-thumb.h index 7c6f6ebe514fee..039013c7131d65 100644 --- a/arch/arm/kernel/probes-thumb.h +++ b/arch/arm/probes/decode-thumb.h @@ -1,5 +1,5 @@ /* - * arch/arm/kernel/probes-thumb.h + * arch/arm/probes/decode-thumb.h * * Copyright 2013 Linaro Ltd. * Written by: David A. Long @@ -15,6 +15,8 @@ #ifndef _ARM_KERNEL_PROBES_THUMB_H #define _ARM_KERNEL_PROBES_THUMB_H +#include "decode.h" + /* * True if current instruction is in an IT block. */ diff --git a/arch/arm/kernel/probes.c b/arch/arm/probes/decode.c similarity index 99% rename from arch/arm/kernel/probes.c rename to arch/arm/probes/decode.c index a8ab540d7e73a0..3b05d574235922 100644 --- a/arch/arm/kernel/probes.c +++ b/arch/arm/probes/decode.c @@ -1,5 +1,5 @@ /* - * arch/arm/kernel/probes.c + * arch/arm/probes/decode.c * * Copyright (C) 2011 Jon Medhurst . * @@ -17,7 +17,7 @@ #include #include -#include "probes.h" +#include "decode.h" #ifndef find_str_pc_offset diff --git a/arch/arm/kernel/probes.h b/arch/arm/probes/decode.h similarity index 99% rename from arch/arm/kernel/probes.h rename to arch/arm/probes/decode.h index dba9f2466a93fc..1d0b5316908032 100644 --- a/arch/arm/kernel/probes.h +++ b/arch/arm/probes/decode.h @@ -1,5 +1,5 @@ /* - * arch/arm/kernel/probes.h + * arch/arm/probes/decode.h * * Copyright (C) 2011 Jon Medhurst . * diff --git a/arch/arm/probes/kprobes/Makefile b/arch/arm/probes/kprobes/Makefile new file mode 100644 index 00000000000000..eb38a428ecd617 --- /dev/null +++ b/arch/arm/probes/kprobes/Makefile @@ -0,0 +1,11 @@ +obj-$(CONFIG_KPROBES) += core.o actions-common.o +obj-$(CONFIG_ARM_KPROBES_TEST) += test-kprobes.o +test-kprobes-objs := test-core.o + +ifdef CONFIG_THUMB2_KERNEL +obj-$(CONFIG_KPROBES) += actions-thumb.o +test-kprobes-objs += test-thumb.o +else +obj-$(CONFIG_KPROBES) += actions-arm.o +test-kprobes-objs += test-arm.o +endif diff --git a/arch/arm/kernel/kprobes-arm.c b/arch/arm/probes/kprobes/actions-arm.c similarity index 99% rename from arch/arm/kernel/kprobes-arm.c rename to arch/arm/probes/kprobes/actions-arm.c index ac300c60d65698..8797879f7b3ab6 100644 --- a/arch/arm/kernel/kprobes-arm.c +++ b/arch/arm/probes/kprobes/actions-arm.c @@ -1,5 +1,5 @@ /* - * arch/arm/kernel/kprobes-decode.c + * arch/arm/probes/kprobes/actions-arm.c * * Copyright (C) 2006, 2007 Motorola Inc. * @@ -62,8 +62,8 @@ #include #include -#include "kprobes.h" -#include "probes-arm.h" +#include "../decode-arm.h" +#include "core.h" #if __LINUX_ARM_ARCH__ >= 6 #define BLX(reg) "blx "reg" \n\t" diff --git a/arch/arm/kernel/kprobes-common.c b/arch/arm/probes/kprobes/actions-common.c similarity index 98% rename from arch/arm/kernel/kprobes-common.c rename to arch/arm/probes/kprobes/actions-common.c index 0bf5d64eba1d26..bd20a71cd34a0f 100644 --- a/arch/arm/kernel/kprobes-common.c +++ b/arch/arm/probes/kprobes/actions-common.c @@ -1,5 +1,5 @@ /* - * arch/arm/kernel/kprobes-common.c + * arch/arm/probes/kprobes/actions-common.c * * Copyright (C) 2011 Jon Medhurst . * @@ -15,7 +15,7 @@ #include #include -#include "kprobes.h" +#include "core.h" static void __kprobes simulate_ldm1stm1(probes_opcode_t insn, diff --git a/arch/arm/kernel/kprobes-thumb.c b/arch/arm/probes/kprobes/actions-thumb.c similarity index 99% rename from arch/arm/kernel/kprobes-thumb.c rename to arch/arm/probes/kprobes/actions-thumb.c index 9495d7f3516fca..6c4e60b6282618 100644 --- a/arch/arm/kernel/kprobes-thumb.c +++ b/arch/arm/probes/kprobes/actions-thumb.c @@ -1,5 +1,5 @@ /* - * arch/arm/kernel/kprobes-thumb.c + * arch/arm/probes/kprobes/actions-thumb.c * * Copyright (C) 2011 Jon Medhurst . * @@ -13,8 +13,8 @@ #include #include -#include "kprobes.h" -#include "probes-thumb.h" +#include "../decode-thumb.h" +#include "core.h" /* These emulation encodings are functionally equivalent... */ #define t32_emulate_rd8rn16rm0ra12_noflags \ diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/probes/kprobes/core.c similarity index 99% rename from arch/arm/kernel/kprobes.c rename to arch/arm/probes/kprobes/core.c index 6d644202c8dcb1..701f49d74c35d3 100644 --- a/arch/arm/kernel/kprobes.c +++ b/arch/arm/probes/kprobes/core.c @@ -30,11 +30,11 @@ #include #include #include +#include -#include "kprobes.h" -#include "probes-arm.h" -#include "probes-thumb.h" -#include "patch.h" +#include "../decode-arm.h" +#include "../decode-thumb.h" +#include "core.h" #define MIN_STACK_SIZE(addr) \ min((unsigned long)MAX_STACK_SIZE, \ diff --git a/arch/arm/kernel/kprobes.h b/arch/arm/probes/kprobes/core.h similarity index 96% rename from arch/arm/kernel/kprobes.h rename to arch/arm/probes/kprobes/core.h index 9a2712ecefc32d..2e1e5a3d915587 100644 --- a/arch/arm/kernel/kprobes.h +++ b/arch/arm/probes/kprobes/core.h @@ -19,7 +19,8 @@ #ifndef _ARM_KERNEL_KPROBES_H #define _ARM_KERNEL_KPROBES_H -#include "probes.h" +#include +#include "../decode.h" /* * These undefined instructions must be unique and diff --git a/arch/arm/kernel/kprobes-test-arm.c b/arch/arm/probes/kprobes/test-arm.c similarity index 99% rename from arch/arm/kernel/kprobes-test-arm.c rename to arch/arm/probes/kprobes/test-arm.c index cb1424240ff652..d9a1255f3043ae 100644 --- a/arch/arm/kernel/kprobes-test-arm.c +++ b/arch/arm/probes/kprobes/test-arm.c @@ -13,7 +13,7 @@ #include #include -#include "kprobes-test.h" +#include "test-core.h" #define TEST_ISA "32" diff --git a/arch/arm/kernel/kprobes-test.c b/arch/arm/probes/kprobes/test-core.c similarity index 99% rename from arch/arm/kernel/kprobes-test.c rename to arch/arm/probes/kprobes/test-core.c index b206d7790c7790..7ab633d51954ab 100644 --- a/arch/arm/kernel/kprobes-test.c +++ b/arch/arm/probes/kprobes/test-core.c @@ -209,10 +209,10 @@ #include #include -#include "kprobes.h" -#include "probes-arm.h" -#include "probes-thumb.h" -#include "kprobes-test.h" +#include "core.h" +#include "test-core.h" +#include "../decode-arm.h" +#include "../decode-thumb.h" #define BENCHMARKING 1 diff --git a/arch/arm/kernel/kprobes-test.h b/arch/arm/probes/kprobes/test-core.h similarity index 99% rename from arch/arm/kernel/kprobes-test.h rename to arch/arm/probes/kprobes/test-core.h index 4430990e90e7aa..9991754947bc1f 100644 --- a/arch/arm/kernel/kprobes-test.h +++ b/arch/arm/probes/kprobes/test-core.h @@ -1,5 +1,5 @@ /* - * arch/arm/kernel/kprobes-test.h + * arch/arm/probes/kprobes/test-core.h * * Copyright (C) 2011 Jon Medhurst . * diff --git a/arch/arm/kernel/kprobes-test-thumb.c b/arch/arm/probes/kprobes/test-thumb.c similarity index 99% rename from arch/arm/kernel/kprobes-test-thumb.c rename to arch/arm/probes/kprobes/test-thumb.c index 844dd10d859396..6c6e9a9bb6757d 100644 --- a/arch/arm/kernel/kprobes-test-thumb.c +++ b/arch/arm/probes/kprobes/test-thumb.c @@ -1,5 +1,5 @@ /* - * arch/arm/kernel/kprobes-test-thumb.c + * arch/arm/probes/kprobes/test-thumb.c * * Copyright (C) 2011 Jon Medhurst . * @@ -12,7 +12,7 @@ #include #include -#include "kprobes-test.h" +#include "test-core.h" #define TEST_ISA "16" diff --git a/arch/arm/probes/uprobes/Makefile b/arch/arm/probes/uprobes/Makefile new file mode 100644 index 00000000000000..e1dc3d0f6d5a14 --- /dev/null +++ b/arch/arm/probes/uprobes/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_UPROBES) += core.o actions-arm.o diff --git a/arch/arm/kernel/uprobes-arm.c b/arch/arm/probes/uprobes/actions-arm.c similarity index 98% rename from arch/arm/kernel/uprobes-arm.c rename to arch/arm/probes/uprobes/actions-arm.c index d3b655ff17da20..1dd4916ba8aaa9 100644 --- a/arch/arm/kernel/uprobes-arm.c +++ b/arch/arm/probes/uprobes/actions-arm.c @@ -13,9 +13,9 @@ #include #include -#include "probes.h" -#include "probes-arm.h" -#include "uprobes.h" +#include "../decode.h" +#include "../decode-arm.h" +#include "core.h" static int uprobes_substitute_pc(unsigned long *pinsn, u32 oregs) { diff --git a/arch/arm/kernel/uprobes.c b/arch/arm/probes/uprobes/core.c similarity index 98% rename from arch/arm/kernel/uprobes.c rename to arch/arm/probes/uprobes/core.c index 56adf9c1fde067..b2954f6d3abef3 100644 --- a/arch/arm/kernel/uprobes.c +++ b/arch/arm/probes/uprobes/core.c @@ -17,9 +17,9 @@ #include #include -#include "probes.h" -#include "probes-arm.h" -#include "uprobes.h" +#include "../decode.h" +#include "../decode-arm.h" +#include "core.h" #define UPROBE_TRAP_NR UINT_MAX diff --git a/arch/arm/kernel/uprobes.h b/arch/arm/probes/uprobes/core.h similarity index 100% rename from arch/arm/kernel/uprobes.h rename to arch/arm/probes/uprobes/core.h From 832607e79d7423c67ef8a3de8dfa3d25b5b0bf86 Mon Sep 17 00:00:00 2001 From: Jon Medhurst Date: Wed, 7 Jan 2015 11:42:30 +0000 Subject: [PATCH 028/601] ARM: probes: Use correct action types for MOVW, SEV and WFI This doesn't correct any bugs when probing these instructions but makes MOVW slightly faster and makes everything more symmetric with the Thumb instruction cases. We can also remove the now redundant PROBES_EMULATE_NONE and PROBES_SIMULATE_NOP actions. Signed-off-by: Jon Medhurst --- arch/arm/probes/decode-arm.c | 6 +++--- arch/arm/probes/decode-arm.h | 2 -- arch/arm/probes/kprobes/actions-arm.c | 2 -- arch/arm/probes/uprobes/actions-arm.c | 2 -- 4 files changed, 3 insertions(+), 9 deletions(-) diff --git a/arch/arm/probes/decode-arm.c b/arch/arm/probes/decode-arm.c index e39cc75952f279..04114f74a2d2e1 100644 --- a/arch/arm/probes/decode-arm.c +++ b/arch/arm/probes/decode-arm.c @@ -370,17 +370,17 @@ static const union decode_item arm_cccc_001x_table[] = { /* MOVW cccc 0011 0000 xxxx xxxx xxxx xxxx xxxx */ /* MOVT cccc 0011 0100 xxxx xxxx xxxx xxxx xxxx */ - DECODE_EMULATEX (0x0fb00000, 0x03000000, PROBES_DATA_PROCESSING_IMM, + DECODE_EMULATEX (0x0fb00000, 0x03000000, PROBES_MOV_HALFWORD, REGS(0, NOPC, 0, 0, 0)), /* YIELD cccc 0011 0010 0000 xxxx xxxx 0000 0001 */ DECODE_OR (0x0fff00ff, 0x03200001), /* SEV cccc 0011 0010 0000 xxxx xxxx 0000 0100 */ - DECODE_EMULATE (0x0fff00ff, 0x03200004, PROBES_EMULATE_NONE), + DECODE_EMULATE (0x0fff00ff, 0x03200004, PROBES_SEV), /* NOP cccc 0011 0010 0000 xxxx xxxx 0000 0000 */ /* WFE cccc 0011 0010 0000 xxxx xxxx 0000 0010 */ /* WFI cccc 0011 0010 0000 xxxx xxxx 0000 0011 */ - DECODE_SIMULATE (0x0fff00fc, 0x03200000, PROBES_SIMULATE_NOP), + DECODE_SIMULATE (0x0fff00fc, 0x03200000, PROBES_WFE), /* DBG cccc 0011 0010 0000 xxxx xxxx ffff xxxx */ /* unallocated hints cccc 0011 0010 0000 xxxx xxxx xxxx xxxx */ /* MSR (immediate) cccc 0011 0x10 xxxx xxxx xxxx xxxx xxxx */ diff --git a/arch/arm/probes/decode-arm.h b/arch/arm/probes/decode-arm.h index 9c56b40d6a5713..cb0b2633193009 100644 --- a/arch/arm/probes/decode-arm.h +++ b/arch/arm/probes/decode-arm.h @@ -18,8 +18,6 @@ #include "decode.h" enum probes_arm_action { - PROBES_EMULATE_NONE, - PROBES_SIMULATE_NOP, PROBES_PRELOAD_IMM, PROBES_PRELOAD_REG, PROBES_BRANCH_IMM, diff --git a/arch/arm/probes/kprobes/actions-arm.c b/arch/arm/probes/kprobes/actions-arm.c index 8797879f7b3ab6..2206f2d80c7601 100644 --- a/arch/arm/probes/kprobes/actions-arm.c +++ b/arch/arm/probes/kprobes/actions-arm.c @@ -302,8 +302,6 @@ emulate_rdlo12rdhi16rn0rm8_rwflags_nopc(probes_opcode_t insn, } const union decode_action kprobes_arm_actions[NUM_PROBES_ARM_ACTIONS] = { - [PROBES_EMULATE_NONE] = {.handler = probes_emulate_none}, - [PROBES_SIMULATE_NOP] = {.handler = probes_simulate_nop}, [PROBES_PRELOAD_IMM] = {.handler = probes_simulate_nop}, [PROBES_PRELOAD_REG] = {.handler = probes_simulate_nop}, [PROBES_BRANCH_IMM] = {.handler = simulate_blx1}, diff --git a/arch/arm/probes/uprobes/actions-arm.c b/arch/arm/probes/uprobes/actions-arm.c index 1dd4916ba8aaa9..76eb44972ebeaf 100644 --- a/arch/arm/probes/uprobes/actions-arm.c +++ b/arch/arm/probes/uprobes/actions-arm.c @@ -195,8 +195,6 @@ uprobe_decode_ldmstm(probes_opcode_t insn, } const union decode_action uprobes_probes_actions[] = { - [PROBES_EMULATE_NONE] = {.handler = probes_simulate_nop}, - [PROBES_SIMULATE_NOP] = {.handler = probes_simulate_nop}, [PROBES_PRELOAD_IMM] = {.handler = probes_simulate_nop}, [PROBES_PRELOAD_REG] = {.handler = probes_simulate_nop}, [PROBES_BRANCH_IMM] = {.handler = simulate_blx1}, From 83803d97dae1eaf6850a45ef8ee179cc66e147dc Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 5 Jan 2015 19:29:18 +0800 Subject: [PATCH 029/601] ARM: kprobes: introduces checker This patch introdces 'checker' to decoding phase, and calls checkers when instruction decoding. This allows further decoding for specific instructions. This patch introduces a stub call of checkers in kprobe arch_prepare_kprobe() as an example and for further expansion. Signed-off-by: Wang Nan Reviewed-by: Jon Medhurst Reviewed-by: Masami Hiramatsu Signed-off-by: Jon Medhurst --- arch/arm/probes/decode-arm.c | 5 ++- arch/arm/probes/decode-arm.h | 3 +- arch/arm/probes/decode-thumb.c | 10 +++-- arch/arm/probes/decode-thumb.h | 6 ++- arch/arm/probes/decode.c | 60 ++++++++++++++++++++++--- arch/arm/probes/decode.h | 11 ++++- arch/arm/probes/kprobes/actions-arm.c | 2 + arch/arm/probes/kprobes/actions-thumb.c | 3 ++ arch/arm/probes/kprobes/core.c | 6 ++- arch/arm/probes/kprobes/core.h | 7 ++- arch/arm/probes/uprobes/core.c | 2 +- 11 files changed, 95 insertions(+), 20 deletions(-) diff --git a/arch/arm/probes/decode-arm.c b/arch/arm/probes/decode-arm.c index 04114f74a2d2e1..f72c33a2dcfbe6 100644 --- a/arch/arm/probes/decode-arm.c +++ b/arch/arm/probes/decode-arm.c @@ -726,10 +726,11 @@ static void __kprobes arm_singlestep(probes_opcode_t insn, */ enum probes_insn __kprobes arm_probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi, - bool emulate, const union decode_action *actions) + bool emulate, const union decode_action *actions, + const struct decode_checker *checkers[]) { asi->insn_singlestep = arm_singlestep; asi->insn_check_cc = probes_condition_checks[insn>>28]; return probes_decode_insn(insn, asi, probes_decode_arm_table, false, - emulate, actions); + emulate, actions, checkers); } diff --git a/arch/arm/probes/decode-arm.h b/arch/arm/probes/decode-arm.h index cb0b2633193009..b3b80f6d414b46 100644 --- a/arch/arm/probes/decode-arm.h +++ b/arch/arm/probes/decode-arm.h @@ -68,6 +68,7 @@ extern const union decode_item probes_decode_arm_table[]; enum probes_insn arm_probes_decode_insn(probes_opcode_t, struct arch_probes_insn *, bool emulate, - const union decode_action *actions); + const union decode_action *actions, + const struct decode_checker *checkers[]); #endif diff --git a/arch/arm/probes/decode-thumb.c b/arch/arm/probes/decode-thumb.c index 2f0453a895dc61..985e7dd4cac6b7 100644 --- a/arch/arm/probes/decode-thumb.c +++ b/arch/arm/probes/decode-thumb.c @@ -863,20 +863,22 @@ static void __kprobes thumb32_singlestep(probes_opcode_t opcode, enum probes_insn __kprobes thumb16_probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi, - bool emulate, const union decode_action *actions) + bool emulate, const union decode_action *actions, + const struct decode_checker *checkers[]) { asi->insn_singlestep = thumb16_singlestep; asi->insn_check_cc = thumb_check_cc; return probes_decode_insn(insn, asi, probes_decode_thumb16_table, true, - emulate, actions); + emulate, actions, checkers); } enum probes_insn __kprobes thumb32_probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi, - bool emulate, const union decode_action *actions) + bool emulate, const union decode_action *actions, + const struct decode_checker *checkers[]) { asi->insn_singlestep = thumb32_singlestep; asi->insn_check_cc = thumb_check_cc; return probes_decode_insn(insn, asi, probes_decode_thumb32_table, true, - emulate, actions); + emulate, actions, checkers); } diff --git a/arch/arm/probes/decode-thumb.h b/arch/arm/probes/decode-thumb.h index 039013c7131d65..8457add0a2d800 100644 --- a/arch/arm/probes/decode-thumb.h +++ b/arch/arm/probes/decode-thumb.h @@ -91,9 +91,11 @@ extern const union decode_item probes_decode_thumb16_table[]; enum probes_insn __kprobes thumb16_probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi, - bool emulate, const union decode_action *actions); + bool emulate, const union decode_action *actions, + const struct decode_checker *checkers[]); enum probes_insn __kprobes thumb32_probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi, - bool emulate, const union decode_action *actions); + bool emulate, const union decode_action *actions, + const struct decode_checker *checkers[]); #endif diff --git a/arch/arm/probes/decode.c b/arch/arm/probes/decode.c index 3b05d574235922..c7d442018902ac 100644 --- a/arch/arm/probes/decode.c +++ b/arch/arm/probes/decode.c @@ -342,6 +342,31 @@ static const int decode_struct_sizes[NUM_DECODE_TYPES] = { [DECODE_TYPE_REJECT] = sizeof(struct decode_reject) }; +static int run_checkers(const struct decode_checker *checkers[], + int action, probes_opcode_t insn, + struct arch_probes_insn *asi, + const struct decode_header *h) +{ + const struct decode_checker **p; + + if (!checkers) + return INSN_GOOD; + + p = checkers; + while (*p != NULL) { + int retval; + probes_check_t *checker_func = (*p)[action].checker; + + retval = INSN_GOOD; + if (checker_func) + retval = checker_func(insn, asi, h); + if (retval == INSN_REJECTED) + return retval; + p++; + } + return INSN_GOOD; +} + /* * probes_decode_insn operates on data tables in order to decode an ARM * architecture instruction onto which a kprobe has been placed. @@ -388,11 +413,17 @@ static const int decode_struct_sizes[NUM_DECODE_TYPES] = { int __kprobes probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi, const union decode_item *table, bool thumb, - bool emulate, const union decode_action *actions) + bool emulate, const union decode_action *actions, + const struct decode_checker *checkers[]) { const struct decode_header *h = (struct decode_header *)table; const struct decode_header *next; bool matched = false; + /* + * @insn can be modified by decode_regs. Save its original + * value for checkers. + */ + probes_opcode_t origin_insn = insn; if (emulate) insn = prepare_emulated_insn(insn, asi, thumb); @@ -422,24 +453,41 @@ probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi, } case DECODE_TYPE_CUSTOM: { + int err; struct decode_custom *d = (struct decode_custom *)h; - return actions[d->decoder.action].decoder(insn, asi, h); + int action = d->decoder.action; + + err = run_checkers(checkers, action, origin_insn, asi, h); + if (err == INSN_REJECTED) + return INSN_REJECTED; + return actions[action].decoder(insn, asi, h); } case DECODE_TYPE_SIMULATE: { + int err; struct decode_simulate *d = (struct decode_simulate *)h; - asi->insn_handler = actions[d->handler.action].handler; + int action = d->handler.action; + + err = run_checkers(checkers, action, origin_insn, asi, h); + if (err == INSN_REJECTED) + return INSN_REJECTED; + asi->insn_handler = actions[action].handler; return INSN_GOOD_NO_SLOT; } case DECODE_TYPE_EMULATE: { + int err; struct decode_emulate *d = (struct decode_emulate *)h; + int action = d->handler.action; + + err = run_checkers(checkers, action, origin_insn, asi, h); + if (err == INSN_REJECTED) + return INSN_REJECTED; if (!emulate) - return actions[d->handler.action].decoder(insn, - asi, h); + return actions[action].decoder(insn, asi, h); - asi->insn_handler = actions[d->handler.action].handler; + asi->insn_handler = actions[action].handler; set_emulated_insn(insn, asi, thumb); return INSN_GOOD; } diff --git a/arch/arm/probes/decode.h b/arch/arm/probes/decode.h index 1d0b5316908032..f9b08ba7fe73ef 100644 --- a/arch/arm/probes/decode.h +++ b/arch/arm/probes/decode.h @@ -314,6 +314,14 @@ union decode_action { probes_custom_decode_t *decoder; }; +typedef enum probes_insn (probes_check_t)(probes_opcode_t, + struct arch_probes_insn *, + const struct decode_header *); + +struct decode_checker { + probes_check_t *checker; +}; + #define DECODE_END \ {.bits = DECODE_TYPE_END} @@ -402,6 +410,7 @@ probes_insn_handler_t probes_emulate_none; int __kprobes probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi, const union decode_item *table, bool thumb, bool emulate, - const union decode_action *actions); + const union decode_action *actions, + const struct decode_checker **checkers); #endif diff --git a/arch/arm/probes/kprobes/actions-arm.c b/arch/arm/probes/kprobes/actions-arm.c index 2206f2d80c7601..fbd93a9ada75cd 100644 --- a/arch/arm/probes/kprobes/actions-arm.c +++ b/arch/arm/probes/kprobes/actions-arm.c @@ -339,3 +339,5 @@ const union decode_action kprobes_arm_actions[NUM_PROBES_ARM_ACTIONS] = { [PROBES_BRANCH] = {.handler = simulate_bbl}, [PROBES_LDMSTM] = {.decoder = kprobe_decode_ldmstm} }; + +const struct decode_checker *kprobes_arm_checkers[] = {NULL}; diff --git a/arch/arm/probes/kprobes/actions-thumb.c b/arch/arm/probes/kprobes/actions-thumb.c index 6c4e60b6282618..2796121fe90e18 100644 --- a/arch/arm/probes/kprobes/actions-thumb.c +++ b/arch/arm/probes/kprobes/actions-thumb.c @@ -664,3 +664,6 @@ const union decode_action kprobes_t32_actions[NUM_PROBES_T32_ACTIONS] = { [PROBES_T32_MUL_ADD_LONG] = { .handler = t32_emulate_rdlo12rdhi8rn16rm0_noflags}, }; + +const struct decode_checker *kprobes_t32_checkers[] = {NULL}; +const struct decode_checker *kprobes_t16_checkers[] = {NULL}; diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c index 701f49d74c35d3..74f3dc3ac212b8 100644 --- a/arch/arm/probes/kprobes/core.c +++ b/arch/arm/probes/kprobes/core.c @@ -61,6 +61,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) kprobe_decode_insn_t *decode_insn; const union decode_action *actions; int is; + const struct decode_checker **checkers; if (in_exception_text(addr)) return -EINVAL; @@ -74,9 +75,11 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) insn = __opcode_thumb32_compose(insn, inst2); decode_insn = thumb32_probes_decode_insn; actions = kprobes_t32_actions; + checkers = kprobes_t32_checkers; } else { decode_insn = thumb16_probes_decode_insn; actions = kprobes_t16_actions; + checkers = kprobes_t16_checkers; } #else /* !CONFIG_THUMB2_KERNEL */ thumb = false; @@ -85,12 +88,13 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) insn = __mem_to_opcode_arm(*p->addr); decode_insn = arm_probes_decode_insn; actions = kprobes_arm_actions; + checkers = kprobes_arm_checkers; #endif p->opcode = insn; p->ainsn.insn = tmp_insn; - switch ((*decode_insn)(insn, &p->ainsn, true, actions)) { + switch ((*decode_insn)(insn, &p->ainsn, true, actions, checkers)) { case INSN_REJECTED: /* not supported */ return -EINVAL; diff --git a/arch/arm/probes/kprobes/core.h b/arch/arm/probes/kprobes/core.h index 2e1e5a3d915587..f88c79fe632a5f 100644 --- a/arch/arm/probes/kprobes/core.h +++ b/arch/arm/probes/kprobes/core.h @@ -37,16 +37,19 @@ kprobe_decode_ldmstm(kprobe_opcode_t insn, struct arch_probes_insn *asi, typedef enum probes_insn (kprobe_decode_insn_t)(probes_opcode_t, struct arch_probes_insn *, bool, - const union decode_action *); + const union decode_action *, + const struct decode_checker *[*]); #ifdef CONFIG_THUMB2_KERNEL extern const union decode_action kprobes_t32_actions[]; extern const union decode_action kprobes_t16_actions[]; - +extern const struct decode_checker *kprobes_t32_checkers[]; +extern const struct decode_checker *kprobes_t16_checkers[]; #else /* !CONFIG_THUMB2_KERNEL */ extern const union decode_action kprobes_arm_actions[]; +extern const struct decode_checker *kprobes_arm_checkers[]; #endif diff --git a/arch/arm/probes/uprobes/core.c b/arch/arm/probes/uprobes/core.c index b2954f6d3abef3..d1329f1ba4e4c3 100644 --- a/arch/arm/probes/uprobes/core.c +++ b/arch/arm/probes/uprobes/core.c @@ -88,7 +88,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, auprobe->ixol[1] = __opcode_to_mem_arm(UPROBE_SS_ARM_INSN); ret = arm_probes_decode_insn(insn, &auprobe->asi, false, - uprobes_probes_actions); + uprobes_probes_actions, NULL); switch (ret) { case INSN_REJECTED: return -EINVAL; From e1e1fddae74b72d0415965821ad00fe39aac6f13 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 20 Oct 2014 14:02:15 +0200 Subject: [PATCH 030/601] arm64/mm: add explicit struct_mm argument to __create_mapping() Currently, swapper_pg_dir and idmap_pg_dir share the init_mm mm_struct instance. To allow the introduction of other pg_dir instances, for instance, for UEFI's mapping of Runtime Services, make the struct_mm instance an explicit argument that gets passed down to the pmd and pte instantiation functions. Note that the consumers (pmd_populate/pgd_populate) of the mm_struct argument don't actually inspect it, but let's fix it for correctness' sake. Acked-by: Steve Capper Tested-by: Leif Lindholm Signed-off-by: Ard Biesheuvel --- arch/arm64/mm/mmu.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 6032f3e3056a6c..7d5dfe2d3de0c0 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -156,9 +156,9 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, } while (pte++, addr += PAGE_SIZE, addr != end); } -static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, - unsigned long end, phys_addr_t phys, - int map_io) +static void __init alloc_init_pmd(struct mm_struct *mm, pud_t *pud, + unsigned long addr, unsigned long end, + phys_addr_t phys, int map_io) { pmd_t *pmd; unsigned long next; @@ -178,7 +178,7 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, */ if (pud_none(*pud) || pud_bad(*pud)) { pmd = early_alloc(PTRS_PER_PMD * sizeof(pmd_t)); - pud_populate(&init_mm, pud, pmd); + pud_populate(mm, pud, pmd); } pmd = pmd_offset(pud, addr); @@ -202,16 +202,16 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, } while (pmd++, addr = next, addr != end); } -static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, - unsigned long end, phys_addr_t phys, - int map_io) +static void __init alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, + unsigned long addr, unsigned long end, + phys_addr_t phys, int map_io) { pud_t *pud; unsigned long next; if (pgd_none(*pgd)) { pud = early_alloc(PTRS_PER_PUD * sizeof(pud_t)); - pgd_populate(&init_mm, pgd, pud); + pgd_populate(mm, pgd, pud); } BUG_ON(pgd_bad(*pgd)); @@ -240,7 +240,7 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, flush_tlb_all(); } } else { - alloc_init_pmd(pud, addr, next, phys, map_io); + alloc_init_pmd(mm, pud, addr, next, phys, map_io); } phys += next - addr; } while (pud++, addr = next, addr != end); @@ -250,9 +250,9 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, * Create the page directory entries and any necessary page tables for the * mapping specified by 'md'. */ -static void __init __create_mapping(pgd_t *pgd, phys_addr_t phys, - unsigned long virt, phys_addr_t size, - int map_io) +static void __init __create_mapping(struct mm_struct *mm, pgd_t *pgd, + phys_addr_t phys, unsigned long virt, + phys_addr_t size, int map_io) { unsigned long addr, length, end, next; @@ -262,7 +262,7 @@ static void __init __create_mapping(pgd_t *pgd, phys_addr_t phys, end = addr + length; do { next = pgd_addr_end(addr, end); - alloc_init_pud(pgd, addr, next, phys, map_io); + alloc_init_pud(mm, pgd, addr, next, phys, map_io); phys += next - addr; } while (pgd++, addr = next, addr != end); } @@ -275,7 +275,8 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt, &phys, virt); return; } - __create_mapping(pgd_offset_k(virt & PAGE_MASK), phys, virt, size, 0); + __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), phys, virt, + size, 0); } void __init create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io) @@ -284,7 +285,7 @@ void __init create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io) pr_warn("BUG: not creating id mapping for %pa\n", &addr); return; } - __create_mapping(&idmap_pg_dir[pgd_index(addr)], + __create_mapping(&init_mm, &idmap_pg_dir[pgd_index(addr)], addr, addr, size, map_io); } From 8ce837cee8f51fb0eacb32c85461ea2f0fafc9f8 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 20 Oct 2014 15:42:07 +0200 Subject: [PATCH 031/601] arm64/mm: add create_pgd_mapping() to create private page tables For UEFI, we need to install the memory mappings used for Runtime Services in a dedicated set of page tables. Add create_pgd_mapping(), which allows us to allocate and install those page table entries early. Reviewed-by: Will Deacon Tested-by: Leif Lindholm Signed-off-by: Ard Biesheuvel --- arch/arm64/include/asm/mmu.h | 3 +++ arch/arm64/include/asm/pgtable.h | 5 ++++ arch/arm64/mm/mmu.c | 43 ++++++++++++++++---------------- 3 files changed, 30 insertions(+), 21 deletions(-) diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index c2f006c48bdb2f..5fd40c43be8050 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -33,5 +33,8 @@ extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); extern void init_mem_pgprot(void); /* create an identity mapping for memory (or io if map_io is true) */ extern void create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io); +extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot); #endif diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 210d632aa5ad38..59079248529d20 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -264,6 +264,11 @@ static inline pmd_t pte_pmd(pte_t pte) return __pmd(pte_val(pte)); } +static inline pgprot_t mk_sect_prot(pgprot_t prot) +{ + return __pgprot(pgprot_val(prot) & ~PTE_TABLE_BIT); +} + /* * THP definitions. */ diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 7d5dfe2d3de0c0..3f3d5aa4a8b11d 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -158,20 +158,10 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, static void __init alloc_init_pmd(struct mm_struct *mm, pud_t *pud, unsigned long addr, unsigned long end, - phys_addr_t phys, int map_io) + phys_addr_t phys, pgprot_t prot) { pmd_t *pmd; unsigned long next; - pmdval_t prot_sect; - pgprot_t prot_pte; - - if (map_io) { - prot_sect = PROT_SECT_DEVICE_nGnRE; - prot_pte = __pgprot(PROT_DEVICE_nGnRE); - } else { - prot_sect = PROT_SECT_NORMAL_EXEC; - prot_pte = PAGE_KERNEL_EXEC; - } /* * Check for initial section mappings in the pgd/pud and remove them. @@ -187,7 +177,8 @@ static void __init alloc_init_pmd(struct mm_struct *mm, pud_t *pud, /* try section mapping first */ if (((addr | next | phys) & ~SECTION_MASK) == 0) { pmd_t old_pmd =*pmd; - set_pmd(pmd, __pmd(phys | prot_sect)); + set_pmd(pmd, __pmd(phys | + pgprot_val(mk_sect_prot(prot)))); /* * Check for previous table entries created during * boot (__create_page_tables) and flush them. @@ -196,7 +187,7 @@ static void __init alloc_init_pmd(struct mm_struct *mm, pud_t *pud, flush_tlb_all(); } else { alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys), - prot_pte); + prot); } phys += next - addr; } while (pmd++, addr = next, addr != end); @@ -204,7 +195,7 @@ static void __init alloc_init_pmd(struct mm_struct *mm, pud_t *pud, static void __init alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, unsigned long addr, unsigned long end, - phys_addr_t phys, int map_io) + phys_addr_t phys, pgprot_t prot) { pud_t *pud; unsigned long next; @@ -222,10 +213,11 @@ static void __init alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, /* * For 4K granule only, attempt to put down a 1GB block */ - if (!map_io && (PAGE_SHIFT == 12) && + if ((PAGE_SHIFT == 12) && ((addr | next | phys) & ~PUD_MASK) == 0) { pud_t old_pud = *pud; - set_pud(pud, __pud(phys | PROT_SECT_NORMAL_EXEC)); + set_pud(pud, __pud(phys | + pgprot_val(mk_sect_prot(prot)))); /* * If we have an old value for a pud, it will @@ -240,7 +232,7 @@ static void __init alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, flush_tlb_all(); } } else { - alloc_init_pmd(mm, pud, addr, next, phys, map_io); + alloc_init_pmd(mm, pud, addr, next, phys, prot); } phys += next - addr; } while (pud++, addr = next, addr != end); @@ -252,7 +244,7 @@ static void __init alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, */ static void __init __create_mapping(struct mm_struct *mm, pgd_t *pgd, phys_addr_t phys, unsigned long virt, - phys_addr_t size, int map_io) + phys_addr_t size, pgprot_t prot) { unsigned long addr, length, end, next; @@ -262,7 +254,7 @@ static void __init __create_mapping(struct mm_struct *mm, pgd_t *pgd, end = addr + length; do { next = pgd_addr_end(addr, end); - alloc_init_pud(mm, pgd, addr, next, phys, map_io); + alloc_init_pud(mm, pgd, addr, next, phys, prot); phys += next - addr; } while (pgd++, addr = next, addr != end); } @@ -276,7 +268,7 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt, return; } __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), phys, virt, - size, 0); + size, PAGE_KERNEL_EXEC); } void __init create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io) @@ -286,7 +278,16 @@ void __init create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io) return; } __create_mapping(&init_mm, &idmap_pg_dir[pgd_index(addr)], - addr, addr, size, map_io); + addr, addr, size, + map_io ? __pgprot(PROT_DEVICE_nGnRE) + : PAGE_KERNEL_EXEC); +} + +void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot) +{ + __create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot); } static void __init map_mem(void) From 7bb68410ef22067b08fd52887875b8f337f89dcc Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 18 Oct 2014 15:04:15 +0200 Subject: [PATCH 032/601] efi: split off remapping code from efi_config_init() Split of the remapping code from efi_config_init() so that the caller can perform its own remapping. This is necessary to correctly handle virtually remapped UEFI memory regions under kexec, as efi.systab will have been updated to a virtual address. Acked-by: Matt Fleming Tested-by: Leif Lindholm Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/efi.c | 56 ++++++++++++++++++++++---------------- include/linux/efi.h | 2 ++ 2 files changed, 34 insertions(+), 24 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 9035c1b74d5839..b7ba9d8ec4b0b5 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -293,29 +293,15 @@ static __init int match_config_table(efi_guid_t *guid, return 0; } -int __init efi_config_init(efi_config_table_type_t *arch_tables) +int __init efi_config_parse_tables(void *config_tables, int count, int sz, + efi_config_table_type_t *arch_tables) { - void *config_tables, *tablep; - int i, sz; - - if (efi_enabled(EFI_64BIT)) - sz = sizeof(efi_config_table_64_t); - else - sz = sizeof(efi_config_table_32_t); - - /* - * Let's see what config tables the firmware passed to us. - */ - config_tables = early_memremap(efi.systab->tables, - efi.systab->nr_tables * sz); - if (config_tables == NULL) { - pr_err("Could not map Configuration table!\n"); - return -ENOMEM; - } + void *tablep; + int i; tablep = config_tables; pr_info(""); - for (i = 0; i < efi.systab->nr_tables; i++) { + for (i = 0; i < count; i++) { efi_guid_t guid; unsigned long table; @@ -328,8 +314,6 @@ int __init efi_config_init(efi_config_table_type_t *arch_tables) if (table64 >> 32) { pr_cont("\n"); pr_err("Table located above 4GB, disabling EFI.\n"); - early_memunmap(config_tables, - efi.systab->nr_tables * sz); return -EINVAL; } #endif @@ -344,13 +328,37 @@ int __init efi_config_init(efi_config_table_type_t *arch_tables) tablep += sz; } pr_cont("\n"); - early_memunmap(config_tables, efi.systab->nr_tables * sz); - set_bit(EFI_CONFIG_TABLES, &efi.flags); - return 0; } +int __init efi_config_init(efi_config_table_type_t *arch_tables) +{ + void *config_tables; + int sz, ret; + + if (efi_enabled(EFI_64BIT)) + sz = sizeof(efi_config_table_64_t); + else + sz = sizeof(efi_config_table_32_t); + + /* + * Let's see what config tables the firmware passed to us. + */ + config_tables = early_memremap(efi.systab->tables, + efi.systab->nr_tables * sz); + if (config_tables == NULL) { + pr_err("Could not map Configuration table!\n"); + return -ENOMEM; + } + + ret = efi_config_parse_tables(config_tables, efi.systab->nr_tables, sz, + arch_tables); + + early_memunmap(config_tables, efi.systab->nr_tables * sz); + return ret; +} + #ifdef CONFIG_EFI_VARS_MODULE static int __init efi_load_efivars(void) { diff --git a/include/linux/efi.h b/include/linux/efi.h index 0238d612750e9f..5ffe5115951fff 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -875,6 +875,8 @@ static inline efi_status_t efi_query_variable_store(u32 attributes, unsigned lon #endif extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr); extern int efi_config_init(efi_config_table_type_t *arch_tables); +extern int efi_config_parse_tables(void *config_tables, int count, int sz, + efi_config_table_type_t *arch_tables); extern u64 efi_get_iobase (void); extern u32 efi_mem_type (unsigned long phys_addr); extern u64 efi_mem_attributes (unsigned long phys_addr); From cf2b0f102cdf912eedb87b10271fa0ad582cf2c1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 17 Nov 2014 13:46:44 +0100 Subject: [PATCH 033/601] efi: efistub: allow allocation alignment larger than EFI_PAGE_SIZE On systems with 64 KB pages, it is preferable for UEFI memory map entries to be 64 KB aligned multiples of 64 KB, because it relieves us of having to deal with the residues. So, if EFI_ALLOC_ALIGN is #define'd by the platform, use it to round up all memory allocations made. Acked-by: Matt Fleming Acked-by: Borislav Petkov Tested-by: Leif Lindholm Signed-off-by: Ard Biesheuvel --- .../firmware/efi/libstub/efi-stub-helper.c | 25 +++++++++++++------ 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index a920fec8fe8856..e766df60fbfb6d 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -32,6 +32,15 @@ static unsigned long __chunk_size = EFI_READ_CHUNK_SIZE; +/* + * Allow the platform to override the allocation granularity: this allows + * systems that have the capability to run with a larger page size to deal + * with the allocations for initrd and fdt more efficiently. + */ +#ifndef EFI_ALLOC_ALIGN +#define EFI_ALLOC_ALIGN EFI_PAGE_SIZE +#endif + struct file_info { efi_file_handle_t *handle; u64 size; @@ -150,10 +159,10 @@ efi_status_t efi_high_alloc(efi_system_table_t *sys_table_arg, * a specific address. We are doing page-based allocations, * so we must be aligned to a page. */ - if (align < EFI_PAGE_SIZE) - align = EFI_PAGE_SIZE; + if (align < EFI_ALLOC_ALIGN) + align = EFI_ALLOC_ALIGN; - nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; + nr_pages = round_up(size, EFI_ALLOC_ALIGN) / EFI_PAGE_SIZE; again: for (i = 0; i < map_size / desc_size; i++) { efi_memory_desc_t *desc; @@ -235,10 +244,10 @@ efi_status_t efi_low_alloc(efi_system_table_t *sys_table_arg, * a specific address. We are doing page-based allocations, * so we must be aligned to a page. */ - if (align < EFI_PAGE_SIZE) - align = EFI_PAGE_SIZE; + if (align < EFI_ALLOC_ALIGN) + align = EFI_ALLOC_ALIGN; - nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; + nr_pages = round_up(size, EFI_ALLOC_ALIGN) / EFI_PAGE_SIZE; for (i = 0; i < map_size / desc_size; i++) { efi_memory_desc_t *desc; unsigned long m = (unsigned long)map; @@ -292,7 +301,7 @@ void efi_free(efi_system_table_t *sys_table_arg, unsigned long size, if (!size) return; - nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; + nr_pages = round_up(size, EFI_ALLOC_ALIGN) / EFI_PAGE_SIZE; efi_call_early(free_pages, addr, nr_pages); } @@ -561,7 +570,7 @@ efi_status_t efi_relocate_kernel(efi_system_table_t *sys_table_arg, * to the preferred address. If that fails, allocate as low * as possible while respecting the required alignment. */ - nr_pages = round_up(alloc_size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE; + nr_pages = round_up(alloc_size, EFI_ALLOC_ALIGN) / EFI_PAGE_SIZE; status = efi_call_early(allocate_pages, EFI_ALLOCATE_ADDRESS, EFI_LOADER_DATA, nr_pages, &efi_addr); From 1bd0abb0c924a8b28c6466cdd6bb34ea053541dc Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 17 Nov 2014 13:50:21 +0100 Subject: [PATCH 034/601] arm64/efi: set EFI_ALLOC_ALIGN to 64 KB Set EFI_ALLOC_ALIGN to 64 KB so that all allocations done by the stub are naturally compatible with a 64 KB granule kernel. Acked-by: Leif Lindholm Tested-by: Leif Lindholm Signed-off-by: Ard Biesheuvel --- arch/arm64/include/asm/efi.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index a34fd3b12e2b96..71291253114f93 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -44,4 +44,6 @@ extern void efi_idmap_init(void); #define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) +#define EFI_ALLOC_ALIGN SZ_64K + #endif /* _ASM_EFI_H */ From f3cdfd239da56a4cea75a2920dc326f0f45f67e3 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 20 Oct 2014 16:27:26 +0200 Subject: [PATCH 035/601] arm64/efi: move SetVirtualAddressMap() to UEFI stub In order to support kexec, the kernel needs to be able to deal with the state of the UEFI firmware after SetVirtualAddressMap() has been called. To avoid having separate code paths for non-kexec and kexec, let's move the call to SetVirtualAddressMap() to the stub: this will guarantee us that it will only be called once (since the stub is not executed during kexec), and ensures that the UEFI state is identical between kexec and normal boot. This implies that the layout of the virtual mapping needs to be created by the stub as well. All regions are rounded up to a naturally aligned multiple of 64 KB (for compatibility with 64k pages kernels) and recorded in the UEFI memory map. The kernel proper reads those values and installs the mappings in a dedicated set of page tables that are swapped in during UEFI Runtime Services calls. Acked-by: Leif Lindholm Acked-by: Matt Fleming Tested-by: Leif Lindholm Signed-off-by: Ard Biesheuvel --- arch/arm64/include/asm/efi.h | 34 +++- arch/arm64/kernel/efi.c | 230 +++++++++++++----------- arch/arm64/kernel/setup.c | 1 + drivers/firmware/efi/libstub/arm-stub.c | 59 ++++++ drivers/firmware/efi/libstub/efistub.h | 4 + drivers/firmware/efi/libstub/fdt.c | 62 ++++++- 6 files changed, 282 insertions(+), 108 deletions(-) diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 71291253114f93..effef3713c5a89 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -7,28 +7,36 @@ #ifdef CONFIG_EFI extern void efi_init(void); extern void efi_idmap_init(void); +extern void efi_virtmap_init(void); #else #define efi_init() #define efi_idmap_init() +#define efi_virtmap_init() #endif #define efi_call_virt(f, ...) \ ({ \ - efi_##f##_t *__f = efi.systab->runtime->f; \ + efi_##f##_t *__f; \ efi_status_t __s; \ \ kernel_neon_begin(); \ + efi_virtmap_load(); \ + __f = efi.systab->runtime->f; \ __s = __f(__VA_ARGS__); \ + efi_virtmap_unload(); \ kernel_neon_end(); \ __s; \ }) #define __efi_call_virt(f, ...) \ ({ \ - efi_##f##_t *__f = efi.systab->runtime->f; \ + efi_##f##_t *__f; \ \ kernel_neon_begin(); \ + efi_virtmap_load(); \ + __f = efi.systab->runtime->f; \ __f(__VA_ARGS__); \ + efi_virtmap_unload(); \ kernel_neon_end(); \ }) @@ -46,4 +54,26 @@ extern void efi_idmap_init(void); #define EFI_ALLOC_ALIGN SZ_64K +/* + * On ARM systems, virtually remapped UEFI runtime services are set up in three + * distinct stages: + * - The stub retrieves the final version of the memory map from UEFI, populates + * the virt_addr fields and calls the SetVirtualAddressMap() [SVAM] runtime + * service to communicate the new mapping to the firmware (Note that the new + * mapping is not live at this time) + * - During early boot, the page tables are allocated and populated based on the + * virt_addr fields in the memory map, but only if all descriptors with the + * EFI_MEMORY_RUNTIME attribute have a non-zero value for virt_addr. If this + * succeeds, the EFI_VIRTMAP flag is set to indicate that the virtual mappings + * have been installed successfully. + * - During an early initcall(), the UEFI Runtime Services are enabled and the + * EFI_RUNTIME_SERVICES bit set if some conditions are met, i.e., we need a + * non-early mapping of the UEFI system table, and we need to have the virtmap + * installed. + */ +#define EFI_VIRTMAP EFI_ARCH_1 + +void efi_virtmap_load(void); +void efi_virtmap_unload(void); + #endif /* _ASM_EFI_H */ diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 2bb4347d0edfd9..755e545144ea90 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -11,25 +11,31 @@ * */ +#include #include #include #include #include +#include #include #include #include +#include +#include +#include #include #include +#include #include #include #include #include +#include +#include struct efi_memory_map memmap; -static efi_runtime_services_t *runtime; - static u64 efi_system_table; static int uefi_debug __initdata; @@ -69,9 +75,33 @@ static void __init efi_setup_idmap(void) } } +/* + * Translate a EFI virtual address into a physical address: this is necessary, + * as some data members of the EFI system table are virtually remapped after + * SetVirtualAddressMap() has been called. + */ +static phys_addr_t efi_to_phys(unsigned long addr) +{ + efi_memory_desc_t *md; + + for_each_efi_memory_desc(&memmap, md) { + if (!(md->attribute & EFI_MEMORY_RUNTIME)) + continue; + if (md->virt_addr == 0) + /* no virtual mapping has been installed by the stub */ + break; + if (md->virt_addr <= addr && + (addr - md->virt_addr) < (md->num_pages << EFI_PAGE_SHIFT)) + return md->phys_addr + addr - md->virt_addr; + } + return addr; +} + static int __init uefi_init(void) { efi_char16_t *c16; + void *config_tables; + u64 table_size; char vendor[100] = "unknown"; int i, retval; @@ -99,7 +129,7 @@ static int __init uefi_init(void) efi.systab->hdr.revision & 0xffff); /* Show what we know for posterity */ - c16 = early_memremap(efi.systab->fw_vendor, + c16 = early_memremap(efi_to_phys(efi.systab->fw_vendor), sizeof(vendor)); if (c16) { for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i) @@ -112,8 +142,14 @@ static int __init uefi_init(void) efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, vendor); - retval = efi_config_init(NULL); + table_size = sizeof(efi_config_table_64_t) * efi.systab->nr_tables; + config_tables = early_memremap(efi_to_phys(efi.systab->tables), + table_size); + + retval = efi_config_parse_tables(config_tables, efi.systab->nr_tables, + sizeof(efi_config_table_64_t), NULL); + early_memunmap(config_tables, table_size); out: early_memunmap(efi.systab, sizeof(efi_system_table_t)); return retval; @@ -329,51 +365,14 @@ void __init efi_idmap_init(void) early_memunmap(memmap.map, memmap.map_end - memmap.map); } -static int __init remap_region(efi_memory_desc_t *md, void **new) -{ - u64 paddr, vaddr, npages, size; - - paddr = md->phys_addr; - npages = md->num_pages; - memrange_efi_to_native(&paddr, &npages); - size = npages << PAGE_SHIFT; - - if (is_normal_ram(md)) - vaddr = (__force u64)ioremap_cache(paddr, size); - else - vaddr = (__force u64)ioremap(paddr, size); - - if (!vaddr) { - pr_err("Unable to remap 0x%llx pages @ %p\n", - npages, (void *)paddr); - return 0; - } - - /* adjust for any rounding when EFI and system pagesize differs */ - md->virt_addr = vaddr + (md->phys_addr - paddr); - - if (uefi_debug) - pr_info(" EFI remap 0x%012llx => %p\n", - md->phys_addr, (void *)md->virt_addr); - - memcpy(*new, md, memmap.desc_size); - *new += memmap.desc_size; - - return 1; -} - /* - * Switch UEFI from an identity map to a kernel virtual map + * Enable the UEFI Runtime Services if all prerequisites are in place, i.e., + * non-early mapping of the UEFI system table and virtual mappings for all + * EFI_MEMORY_RUNTIME regions. */ -static int __init arm64_enter_virtual_mode(void) +static int __init arm64_enable_runtime_services(void) { - efi_memory_desc_t *md; - phys_addr_t virtmap_phys; - void *virtmap, *virt_md; - efi_status_t status; u64 mapsize; - int count = 0; - unsigned long flags; if (!efi_enabled(EFI_BOOT)) { pr_info("EFI services will not be available.\n"); @@ -395,81 +394,30 @@ static int __init arm64_enter_virtual_mode(void) efi.memmap = &memmap; - /* Map the runtime regions */ - virtmap = kmalloc(mapsize, GFP_KERNEL); - if (!virtmap) { - pr_err("Failed to allocate EFI virtual memmap\n"); - return -1; - } - virtmap_phys = virt_to_phys(virtmap); - virt_md = virtmap; - - for_each_efi_memory_desc(&memmap, md) { - if (!(md->attribute & EFI_MEMORY_RUNTIME)) - continue; - if (!remap_region(md, &virt_md)) - goto err_unmap; - ++count; - } - - efi.systab = (__force void *)efi_lookup_mapped_addr(efi_system_table); + efi.systab = (__force void *)ioremap_cache(efi_system_table, + sizeof(efi_system_table_t)); if (!efi.systab) { - /* - * If we have no virtual mapping for the System Table at this - * point, the memory map doesn't cover the physical offset where - * it resides. This means the System Table will be inaccessible - * to Runtime Services themselves once the virtual mapping is - * installed. - */ - pr_err("Failed to remap EFI System Table -- buggy firmware?\n"); - goto err_unmap; + pr_err("Failed to remap EFI System Table\n"); + return -1; } set_bit(EFI_SYSTEM_TABLES, &efi.flags); - local_irq_save(flags); - cpu_switch_mm(idmap_pg_dir, &init_mm); - - /* Call SetVirtualAddressMap with the physical address of the map */ - runtime = efi.systab->runtime; - efi.set_virtual_address_map = runtime->set_virtual_address_map; - - status = efi.set_virtual_address_map(count * memmap.desc_size, - memmap.desc_size, - memmap.desc_version, - (efi_memory_desc_t *)virtmap_phys); - cpu_set_reserved_ttbr0(); - flush_tlb_all(); - local_irq_restore(flags); - - kfree(virtmap); - free_boot_services(); - if (status != EFI_SUCCESS) { - pr_err("Failed to set EFI virtual address map! [%lx]\n", - status); + if (!efi_enabled(EFI_VIRTMAP)) { + pr_err("No UEFI virtual mapping was installed -- runtime services will not be available\n"); return -1; } /* Set up runtime services function pointers */ - runtime = efi.systab->runtime; efi_native_runtime_setup(); set_bit(EFI_RUNTIME_SERVICES, &efi.flags); efi.runtime_version = efi.systab->hdr.revision; return 0; - -err_unmap: - /* unmap all mappings that succeeded: there are 'count' of those */ - for (virt_md = virtmap; count--; virt_md += memmap.desc_size) { - md = virt_md; - iounmap((__force void __iomem *)md->virt_addr); - } - kfree(virtmap); - return -1; } -early_initcall(arm64_enter_virtual_mode); +early_initcall(arm64_enable_runtime_services); static int __init arm64_dmi_init(void) { @@ -484,3 +432,79 @@ static int __init arm64_dmi_init(void) return 0; } core_initcall(arm64_dmi_init); + +static pgd_t efi_pgd[PTRS_PER_PGD] __page_aligned_bss; + +static struct mm_struct efi_mm = { + .mm_rb = RB_ROOT, + .pgd = efi_pgd, + .mm_users = ATOMIC_INIT(2), + .mm_count = ATOMIC_INIT(1), + .mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem), + .page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock), + .mmlist = LIST_HEAD_INIT(efi_mm.mmlist), + INIT_MM_CONTEXT(efi_mm) +}; + +static void efi_set_pgd(struct mm_struct *mm) +{ + cpu_switch_mm(mm->pgd, mm); + flush_tlb_all(); + if (icache_is_aivivt()) + __flush_icache_all(); +} + +void efi_virtmap_load(void) +{ + preempt_disable(); + efi_set_pgd(&efi_mm); +} + +void efi_virtmap_unload(void) +{ + efi_set_pgd(current->active_mm); + preempt_enable(); +} + +void __init efi_virtmap_init(void) +{ + efi_memory_desc_t *md; + + if (!efi_enabled(EFI_BOOT)) + return; + + for_each_efi_memory_desc(&memmap, md) { + u64 paddr, npages, size; + pgprot_t prot; + + if (!(md->attribute & EFI_MEMORY_RUNTIME)) + continue; + if (WARN(md->virt_addr == 0, + "UEFI virtual mapping incomplete or missing -- no entry found for 0x%llx\n", + md->phys_addr)) + return; + + paddr = md->phys_addr; + npages = md->num_pages; + memrange_efi_to_native(&paddr, &npages); + size = npages << PAGE_SHIFT; + + pr_info(" EFI remap 0x%016llx => %p\n", + md->phys_addr, (void *)md->virt_addr); + + /* + * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be + * executable, everything else can be mapped with the XN bits + * set. + */ + if (!is_normal_ram(md)) + prot = __pgprot(PROT_DEVICE_nGnRE); + else if (md->type == EFI_RUNTIME_SERVICES_CODE) + prot = PAGE_KERNEL_EXEC; + else + prot = PAGE_KERNEL; + + create_pgd_mapping(&efi_mm, paddr, md->virt_addr, size, prot); + } + set_bit(EFI_VIRTMAP, &efi.flags); +} diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 20fe2932ad0c47..beac8188fdbd25 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -401,6 +401,7 @@ void __init setup_arch(char **cmdline_p) paging_init(); request_standard_resources(); + efi_virtmap_init(); efi_idmap_init(); early_ioremap_reset(); diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c index eb48a1a1a576aa..e2432b39b6df79 100644 --- a/drivers/firmware/efi/libstub/arm-stub.c +++ b/drivers/firmware/efi/libstub/arm-stub.c @@ -295,3 +295,62 @@ unsigned long __init efi_entry(void *handle, efi_system_table_t *sys_table, fail: return EFI_ERROR; } + +/* + * This is the base address at which to start allocating virtual memory ranges + * for UEFI Runtime Services. This is in the low TTBR0 range so that we can use + * any allocation we choose, and eliminate the risk of a conflict after kexec. + * The value chosen is the largest non-zero power of 2 suitable for this purpose + * both on 32-bit and 64-bit ARM CPUs, to maximize the likelihood that it can + * be mapped efficiently. + */ +#define EFI_RT_VIRTUAL_BASE 0x40000000 + +/* + * efi_get_virtmap() - create a virtual mapping for the EFI memory map + * + * This function populates the virt_addr fields of all memory region descriptors + * in @memory_map whose EFI_MEMORY_RUNTIME attribute is set. Those descriptors + * are also copied to @runtime_map, and their total count is returned in @count. + */ +void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size, + unsigned long desc_size, efi_memory_desc_t *runtime_map, + int *count) +{ + u64 efi_virt_base = EFI_RT_VIRTUAL_BASE; + efi_memory_desc_t *out = runtime_map; + int l; + + for (l = 0; l < map_size; l += desc_size) { + efi_memory_desc_t *in = (void *)memory_map + l; + u64 paddr, size; + + if (!(in->attribute & EFI_MEMORY_RUNTIME)) + continue; + + /* + * Make the mapping compatible with 64k pages: this allows + * a 4k page size kernel to kexec a 64k page size kernel and + * vice versa. + */ + paddr = round_down(in->phys_addr, SZ_64K); + size = round_up(in->num_pages * EFI_PAGE_SIZE + + in->phys_addr - paddr, SZ_64K); + + /* + * Avoid wasting memory on PTEs by choosing a virtual base that + * is compatible with section mappings if this region has the + * appropriate size and physical alignment. (Sections are 2 MB + * on 4k granule kernels) + */ + if (IS_ALIGNED(in->phys_addr, SZ_2M) && size >= SZ_2M) + efi_virt_base = round_up(efi_virt_base, SZ_2M); + + in->virt_addr = efi_virt_base + in->phys_addr - paddr; + efi_virt_base += size; + + memcpy(out, in, desc_size); + out = (void *)out + desc_size; + ++*count; + } +} diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 304ab295ca1a8b..2be10984a67aaf 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -39,4 +39,8 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, void *get_fdt(efi_system_table_t *sys_table); +void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size, + unsigned long desc_size, efi_memory_desc_t *runtime_map, + int *count); + #endif diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c index c846a9608cbd7b..91da56c4fd540c 100644 --- a/drivers/firmware/efi/libstub/fdt.c +++ b/drivers/firmware/efi/libstub/fdt.c @@ -14,6 +14,8 @@ #include #include +#include "efistub.h" + efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt, unsigned long orig_fdt_size, void *fdt, int new_fdt_size, char *cmdline_ptr, @@ -193,9 +195,26 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, unsigned long map_size, desc_size; u32 desc_ver; unsigned long mmap_key; - efi_memory_desc_t *memory_map; + efi_memory_desc_t *memory_map, *runtime_map; unsigned long new_fdt_size; efi_status_t status; + int runtime_entry_count = 0; + + /* + * Get a copy of the current memory map that we will use to prepare + * the input for SetVirtualAddressMap(). We don't have to worry about + * subsequent allocations adding entries, since they could not affect + * the number of EFI_MEMORY_RUNTIME regions. + */ + status = efi_get_memory_map(sys_table, &runtime_map, &map_size, + &desc_size, &desc_ver, &mmap_key); + if (status != EFI_SUCCESS) { + pr_efi_err(sys_table, "Unable to retrieve UEFI memory map.\n"); + return status; + } + + pr_efi(sys_table, + "Exiting boot services and installing virtual address map...\n"); /* * Estimate size of new FDT, and allocate memory for it. We @@ -248,12 +267,48 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, } } + /* + * Update the memory map with virtual addresses. The function will also + * populate @runtime_map with copies of just the EFI_MEMORY_RUNTIME + * entries so that we can pass it straight into SetVirtualAddressMap() + */ + efi_get_virtmap(memory_map, map_size, desc_size, runtime_map, + &runtime_entry_count); + /* Now we are ready to exit_boot_services.*/ status = sys_table->boottime->exit_boot_services(handle, mmap_key); + if (status == EFI_SUCCESS) { + efi_set_virtual_address_map_t *svam; - if (status == EFI_SUCCESS) - return status; + /* Install the new virtual address map */ + svam = sys_table->runtime->set_virtual_address_map; + status = svam(runtime_entry_count * desc_size, desc_size, + desc_ver, runtime_map); + + /* + * We are beyond the point of no return here, so if the call to + * SetVirtualAddressMap() failed, we need to signal that to the + * incoming kernel but proceed normally otherwise. + */ + if (status != EFI_SUCCESS) { + int l; + + /* + * Set the virtual address field of all + * EFI_MEMORY_RUNTIME entries to 0. This will signal + * the incoming kernel that no virtual translation has + * been installed. + */ + for (l = 0; l < map_size; l += desc_size) { + efi_memory_desc_t *p = (void *)memory_map + l; + + if (p->attribute & EFI_MEMORY_RUNTIME) + p->virt_addr = 0; + } + } + return EFI_SUCCESS; + } pr_efi_err(sys_table, "Exit boot services failed.\n"); @@ -264,6 +319,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, efi_free(sys_table, new_fdt_size, *new_fdt_addr); fail: + sys_table->boottime->free_pool(runtime_map); return EFI_LOAD_ERROR; } From 3033b84596eaec0093b68c5711c265738eb0745d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 20 Oct 2014 16:35:21 +0200 Subject: [PATCH 036/601] arm64/efi: remove free_boot_services() and friends Now that we are calling SetVirtualAddressMap() from the stub, there is no need to reserve boot-only memory regions, which implies that there is also no reason to free them again later. Acked-by: Leif Lindholm Acked-by: Will Deacon Tested-by: Leif Lindholm Signed-off-by: Ard Biesheuvel --- arch/arm64/kernel/efi.c | 123 +--------------------------------------- 1 file changed, 1 insertion(+), 122 deletions(-) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 755e545144ea90..4a5d7343dddd01 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -199,9 +199,7 @@ static __init void reserve_regions(void) if (is_normal_ram(md)) early_init_dt_add_memory_arch(paddr, size); - if (is_reserve_region(md) || - md->type == EFI_BOOT_SERVICES_CODE || - md->type == EFI_BOOT_SERVICES_DATA) { + if (is_reserve_region(md)) { memblock_reserve(paddr, size); if (uefi_debug) pr_cont("*"); @@ -214,123 +212,6 @@ static __init void reserve_regions(void) set_bit(EFI_MEMMAP, &efi.flags); } - -static u64 __init free_one_region(u64 start, u64 end) -{ - u64 size = end - start; - - if (uefi_debug) - pr_info(" EFI freeing: 0x%012llx-0x%012llx\n", start, end - 1); - - free_bootmem_late(start, size); - return size; -} - -static u64 __init free_region(u64 start, u64 end) -{ - u64 map_start, map_end, total = 0; - - if (end <= start) - return total; - - map_start = (u64)memmap.phys_map; - map_end = PAGE_ALIGN(map_start + (memmap.map_end - memmap.map)); - map_start &= PAGE_MASK; - - if (start < map_end && end > map_start) { - /* region overlaps UEFI memmap */ - if (start < map_start) - total += free_one_region(start, map_start); - - if (map_end < end) - total += free_one_region(map_end, end); - } else - total += free_one_region(start, end); - - return total; -} - -static void __init free_boot_services(void) -{ - u64 total_freed = 0; - u64 keep_end, free_start, free_end; - efi_memory_desc_t *md; - - /* - * If kernel uses larger pages than UEFI, we have to be careful - * not to inadvertantly free memory we want to keep if there is - * overlap at the kernel page size alignment. We do not want to - * free is_reserve_region() memory nor the UEFI memmap itself. - * - * The memory map is sorted, so we keep track of the end of - * any previous region we want to keep, remember any region - * we want to free and defer freeing it until we encounter - * the next region we want to keep. This way, before freeing - * it, we can clip it as needed to avoid freeing memory we - * want to keep for UEFI. - */ - - keep_end = 0; - free_start = 0; - - for_each_efi_memory_desc(&memmap, md) { - u64 paddr, npages, size; - - if (is_reserve_region(md)) { - /* - * We don't want to free any memory from this region. - */ - if (free_start) { - /* adjust free_end then free region */ - if (free_end > md->phys_addr) - free_end -= PAGE_SIZE; - total_freed += free_region(free_start, free_end); - free_start = 0; - } - keep_end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); - continue; - } - - if (md->type != EFI_BOOT_SERVICES_CODE && - md->type != EFI_BOOT_SERVICES_DATA) { - /* no need to free this region */ - continue; - } - - /* - * We want to free memory from this region. - */ - paddr = md->phys_addr; - npages = md->num_pages; - memrange_efi_to_native(&paddr, &npages); - size = npages << PAGE_SHIFT; - - if (free_start) { - if (paddr <= free_end) - free_end = paddr + size; - else { - total_freed += free_region(free_start, free_end); - free_start = paddr; - free_end = paddr + size; - } - } else { - free_start = paddr; - free_end = paddr + size; - } - if (free_start < keep_end) { - free_start += PAGE_SIZE; - if (free_start >= free_end) - free_start = 0; - } - } - if (free_start) - total_freed += free_region(free_start, free_end); - - if (total_freed) - pr_info("Freed 0x%llx bytes of EFI boot services memory", - total_freed); -} - void __init efi_init(void) { struct efi_fdt_params params; @@ -402,8 +283,6 @@ static int __init arm64_enable_runtime_services(void) } set_bit(EFI_SYSTEM_TABLES, &efi.flags); - free_boot_services(); - if (!efi_enabled(EFI_VIRTMAP)) { pr_err("No UEFI virtual mapping was installed -- runtime services will not be available\n"); return -1; From 9679be103108926cfe9e6fd2f6829cefa77e47b0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 20 Oct 2014 16:41:38 +0200 Subject: [PATCH 037/601] arm64/efi: remove idmap manipulations from UEFI code Now that we have moved the call to SetVirtualAddressMap() to the stub, UEFI has no use for the ID map, so we can drop the code that installs ID mappings for UEFI memory regions. Acked-by: Leif Lindholm Acked-by: Will Deacon Tested-by: Leif Lindholm Signed-off-by: Ard Biesheuvel --- arch/arm64/include/asm/efi.h | 2 -- arch/arm64/include/asm/mmu.h | 2 -- arch/arm64/kernel/efi.c | 32 +------------------------------- arch/arm64/kernel/setup.c | 1 - arch/arm64/mm/mmu.c | 12 ------------ 5 files changed, 1 insertion(+), 48 deletions(-) diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index effef3713c5a89..7baf2cc04e1e0c 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -6,11 +6,9 @@ #ifdef CONFIG_EFI extern void efi_init(void); -extern void efi_idmap_init(void); extern void efi_virtmap_init(void); #else #define efi_init() -#define efi_idmap_init() #define efi_virtmap_init() #endif diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 5fd40c43be8050..3d311761e3c274 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -31,8 +31,6 @@ extern void paging_init(void); extern void setup_mm_for_reboot(void); extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); extern void init_mem_pgprot(void); -/* create an identity mapping for memory (or io if map_io is true) */ -extern void create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io); extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot); diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index 4a5d7343dddd01..a98415b5979c2c 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -54,27 +54,6 @@ static int __init is_normal_ram(efi_memory_desc_t *md) return 0; } -static void __init efi_setup_idmap(void) -{ - struct memblock_region *r; - efi_memory_desc_t *md; - u64 paddr, npages, size; - - for_each_memblock(memory, r) - create_id_mapping(r->base, r->size, 0); - - /* map runtime io spaces */ - for_each_efi_memory_desc(&memmap, md) { - if (!(md->attribute & EFI_MEMORY_RUNTIME) || is_normal_ram(md)) - continue; - paddr = md->phys_addr; - npages = md->num_pages; - memrange_efi_to_native(&paddr, &npages); - size = npages << PAGE_SHIFT; - create_id_mapping(paddr, size, 1); - } -} - /* * Translate a EFI virtual address into a physical address: this is necessary, * as some data members of the EFI system table are virtually remapped after @@ -236,16 +215,6 @@ void __init efi_init(void) reserve_regions(); } -void __init efi_idmap_init(void) -{ - if (!efi_enabled(EFI_BOOT)) - return; - - /* boot time idmap_pg_dir is incomplete, so fill in missing parts */ - efi_setup_idmap(); - early_memunmap(memmap.map, memmap.map_end - memmap.map); -} - /* * Enable the UEFI Runtime Services if all prerequisites are in place, i.e., * non-early mapping of the UEFI system table and virtual mappings for all @@ -386,4 +355,5 @@ void __init efi_virtmap_init(void) create_pgd_mapping(&efi_mm, paddr, md->virt_addr, size, prot); } set_bit(EFI_VIRTMAP, &efi.flags); + early_memunmap(memmap.map, memmap.map_end - memmap.map); } diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index beac8188fdbd25..199d1b7809d7fa 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -402,7 +402,6 @@ void __init setup_arch(char **cmdline_p) request_standard_resources(); efi_virtmap_init(); - efi_idmap_init(); early_ioremap_reset(); unflatten_device_tree(); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 3f3d5aa4a8b11d..3286385488714c 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -271,18 +271,6 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt, size, PAGE_KERNEL_EXEC); } -void __init create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io) -{ - if ((addr >> PGDIR_SHIFT) >= ARRAY_SIZE(idmap_pg_dir)) { - pr_warn("BUG: not creating id mapping for %pa\n", &addr); - return; - } - __create_mapping(&init_mm, &idmap_pg_dir[pgd_index(addr)], - addr, addr, size, - map_io ? __pgprot(PROT_DEVICE_nGnRE) - : PAGE_KERNEL_EXEC); -} - void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) From 6624cf651f1a14363d0385f36dc255d304ac7ebb Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 5 Jan 2015 19:29:21 +0800 Subject: [PATCH 038/601] ARM: kprobes: collects stack consumption for store instructions This patch uses the previously introduced checker functionality on store instructions to record their stack consumption information to arch_probes_insn. Signed-off-by: Wang Nan Reviewed-by: Jon Medhurst Signed-off-by: Jon Medhurst --- arch/arm/include/asm/probes.h | 1 + arch/arm/probes/decode.c | 10 ++ arch/arm/probes/kprobes/Makefile | 6 +- arch/arm/probes/kprobes/actions-arm.c | 3 +- arch/arm/probes/kprobes/actions-thumb.c | 5 +- arch/arm/probes/kprobes/checkers-arm.c | 99 +++++++++++++++++++ arch/arm/probes/kprobes/checkers-common.c | 101 ++++++++++++++++++++ arch/arm/probes/kprobes/checkers-thumb.c | 110 ++++++++++++++++++++++ arch/arm/probes/kprobes/checkers.h | 54 +++++++++++ 9 files changed, 383 insertions(+), 6 deletions(-) create mode 100644 arch/arm/probes/kprobes/checkers-arm.c create mode 100644 arch/arm/probes/kprobes/checkers-common.c create mode 100644 arch/arm/probes/kprobes/checkers-thumb.c create mode 100644 arch/arm/probes/kprobes/checkers.h diff --git a/arch/arm/include/asm/probes.h b/arch/arm/include/asm/probes.h index 806cfe622a9e31..6026deb28794bf 100644 --- a/arch/arm/include/asm/probes.h +++ b/arch/arm/include/asm/probes.h @@ -38,6 +38,7 @@ struct arch_probes_insn { probes_check_cc *insn_check_cc; probes_insn_singlestep_t *insn_singlestep; probes_insn_fn_t *insn_fn; + int stack_space; }; #endif diff --git a/arch/arm/probes/decode.c b/arch/arm/probes/decode.c index c7d442018902ac..f9d7c423f2cc0a 100644 --- a/arch/arm/probes/decode.c +++ b/arch/arm/probes/decode.c @@ -425,6 +425,16 @@ probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi, */ probes_opcode_t origin_insn = insn; + /* + * stack_space is initialized to 0 here. Checker functions + * should update is value if they find this is a stack store + * instruction: positive value means bytes of stack usage, + * negitive value means unable to determine stack usage + * statically. For instruction doesn't store to stack, checker + * do nothing with it. + */ + asi->stack_space = 0; + if (emulate) insn = prepare_emulated_insn(insn, asi, thumb); diff --git a/arch/arm/probes/kprobes/Makefile b/arch/arm/probes/kprobes/Makefile index eb38a428ecd617..bc8d504c3d7824 100644 --- a/arch/arm/probes/kprobes/Makefile +++ b/arch/arm/probes/kprobes/Makefile @@ -1,11 +1,11 @@ -obj-$(CONFIG_KPROBES) += core.o actions-common.o +obj-$(CONFIG_KPROBES) += core.o actions-common.o checkers-common.o obj-$(CONFIG_ARM_KPROBES_TEST) += test-kprobes.o test-kprobes-objs := test-core.o ifdef CONFIG_THUMB2_KERNEL -obj-$(CONFIG_KPROBES) += actions-thumb.o +obj-$(CONFIG_KPROBES) += actions-thumb.o checkers-thumb.o test-kprobes-objs += test-thumb.o else -obj-$(CONFIG_KPROBES) += actions-arm.o +obj-$(CONFIG_KPROBES) += actions-arm.o checkers-arm.o test-kprobes-objs += test-arm.o endif diff --git a/arch/arm/probes/kprobes/actions-arm.c b/arch/arm/probes/kprobes/actions-arm.c index fbd93a9ada75cd..06988ef7eeb775 100644 --- a/arch/arm/probes/kprobes/actions-arm.c +++ b/arch/arm/probes/kprobes/actions-arm.c @@ -64,6 +64,7 @@ #include "../decode-arm.h" #include "core.h" +#include "checkers.h" #if __LINUX_ARM_ARCH__ >= 6 #define BLX(reg) "blx "reg" \n\t" @@ -340,4 +341,4 @@ const union decode_action kprobes_arm_actions[NUM_PROBES_ARM_ACTIONS] = { [PROBES_LDMSTM] = {.decoder = kprobe_decode_ldmstm} }; -const struct decode_checker *kprobes_arm_checkers[] = {NULL}; +const struct decode_checker *kprobes_arm_checkers[] = {arm_stack_checker, NULL}; diff --git a/arch/arm/probes/kprobes/actions-thumb.c b/arch/arm/probes/kprobes/actions-thumb.c index 2796121fe90e18..07cfd9bef34099 100644 --- a/arch/arm/probes/kprobes/actions-thumb.c +++ b/arch/arm/probes/kprobes/actions-thumb.c @@ -15,6 +15,7 @@ #include "../decode-thumb.h" #include "core.h" +#include "checkers.h" /* These emulation encodings are functionally equivalent... */ #define t32_emulate_rd8rn16rm0ra12_noflags \ @@ -665,5 +666,5 @@ const union decode_action kprobes_t32_actions[NUM_PROBES_T32_ACTIONS] = { .handler = t32_emulate_rdlo12rdhi8rn16rm0_noflags}, }; -const struct decode_checker *kprobes_t32_checkers[] = {NULL}; -const struct decode_checker *kprobes_t16_checkers[] = {NULL}; +const struct decode_checker *kprobes_t32_checkers[] = {t32_stack_checker, NULL}; +const struct decode_checker *kprobes_t16_checkers[] = {t16_stack_checker, NULL}; diff --git a/arch/arm/probes/kprobes/checkers-arm.c b/arch/arm/probes/kprobes/checkers-arm.c new file mode 100644 index 00000000000000..f8176631d2a546 --- /dev/null +++ b/arch/arm/probes/kprobes/checkers-arm.c @@ -0,0 +1,99 @@ +/* + * arch/arm/probes/kprobes/checkers-arm.c + * + * Copyright (C) 2014 Huawei Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include +#include "../decode.h" +#include "../decode-arm.h" +#include "checkers.h" + +static enum probes_insn __kprobes arm_check_stack(probes_opcode_t insn, + struct arch_probes_insn *asi, + const struct decode_header *h) +{ + /* + * PROBES_LDRSTRD, PROBES_LDMSTM, PROBES_STORE, + * PROBES_STORE_EXTRA may get here. Simply mark all normal + * insns as STACK_USE_NONE. + */ + static const union decode_item table[] = { + /* + * 'STR{,D,B,H}, Rt, [Rn, Rm]' should be marked as UNKNOWN + * if Rn or Rm is SP. + * x + * STR (register) cccc 011x x0x0 xxxx xxxx xxxx xxxx xxxx + * STRB (register) cccc 011x x1x0 xxxx xxxx xxxx xxxx xxxx + */ + DECODE_OR (0x0e10000f, 0x0600000d), + DECODE_OR (0x0e1f0000, 0x060d0000), + + /* + * x + * STRD (register) cccc 000x x0x0 xxxx xxxx xxxx 1111 xxxx + * STRH (register) cccc 000x x0x0 xxxx xxxx xxxx 1011 xxxx + */ + DECODE_OR (0x0e5000bf, 0x000000bd), + DECODE_CUSTOM (0x0e5f00b0, 0x000d00b0, STACK_USE_UNKNOWN), + + /* + * For PROBES_LDMSTM, only stmdx sp, [...] need to examine + * + * Bit B/A (bit 24) encodes arithmetic operation order. 1 means + * before, 0 means after. + * Bit I/D (bit 23) encodes arithmetic operation. 1 means + * increment, 0 means decrement. + * + * So: + * B I + * / / + * A D | Rn | + * STMDX SP, [...] cccc 100x 00x0 xxxx xxxx xxxx xxxx xxxx + */ + DECODE_CUSTOM (0x0edf0000, 0x080d0000, STACK_USE_STMDX), + + /* P U W | Rn | Rt | imm12 |*/ + /* STR (immediate) cccc 010x x0x0 1101 xxxx xxxx xxxx xxxx */ + /* STRB (immediate) cccc 010x x1x0 1101 xxxx xxxx xxxx xxxx */ + /* P U W | Rn | Rt |imm4| |imm4|*/ + /* STRD (immediate) cccc 000x x1x0 1101 xxxx xxxx 1111 xxxx */ + /* STRH (immediate) cccc 000x x1x0 1101 xxxx xxxx 1011 xxxx */ + /* + * index = (P == '1'); add = (U == '1'). + * Above insns with: + * index == 0 (str{,d,h} rx, [sp], #+/-imm) or + * add == 1 (str{,d,h} rx, [sp, #+]) + * should be STACK_USE_NONE. + * Only str{,b,d,h} rx,[sp,#-n] (P == 1 and U == 0) are + * required to be examined. + */ + /* STR{,B} Rt,[SP,#-n] cccc 0101 0xx0 1101 xxxx xxxx xxxx xxxx */ + DECODE_CUSTOM (0x0f9f0000, 0x050d0000, STACK_USE_FIXED_XXX), + + /* STR{D,H} Rt,[SP,#-n] cccc 0001 01x0 1101 xxxx xxxx 1x11 xxxx */ + DECODE_CUSTOM (0x0fdf00b0, 0x014d00b0, STACK_USE_FIXED_X0X), + + /* fall through */ + DECODE_CUSTOM (0, 0, STACK_USE_NONE), + DECODE_END + }; + + return probes_decode_insn(insn, asi, table, false, false, stack_check_actions, NULL); +} + +const struct decode_checker arm_stack_checker[NUM_PROBES_ARM_ACTIONS] = { + [PROBES_LDRSTRD] = {.checker = arm_check_stack}, + [PROBES_STORE_EXTRA] = {.checker = arm_check_stack}, + [PROBES_STORE] = {.checker = arm_check_stack}, + [PROBES_LDMSTM] = {.checker = arm_check_stack}, +}; diff --git a/arch/arm/probes/kprobes/checkers-common.c b/arch/arm/probes/kprobes/checkers-common.c new file mode 100644 index 00000000000000..971119c2947413 --- /dev/null +++ b/arch/arm/probes/kprobes/checkers-common.c @@ -0,0 +1,101 @@ +/* + * arch/arm/probes/kprobes/checkers-common.c + * + * Copyright (C) 2014 Huawei Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include +#include "../decode.h" +#include "../decode-arm.h" +#include "checkers.h" + +enum probes_insn checker_stack_use_none(probes_opcode_t insn, + struct arch_probes_insn *asi, + const struct decode_header *h) +{ + asi->stack_space = 0; + return INSN_GOOD_NO_SLOT; +} + +enum probes_insn checker_stack_use_unknown(probes_opcode_t insn, + struct arch_probes_insn *asi, + const struct decode_header *h) +{ + asi->stack_space = -1; + return INSN_GOOD_NO_SLOT; +} + +#ifdef CONFIG_THUMB2_KERNEL +enum probes_insn checker_stack_use_imm_0xx(probes_opcode_t insn, + struct arch_probes_insn *asi, + const struct decode_header *h) +{ + int imm = insn & 0xff; + asi->stack_space = imm; + return INSN_GOOD_NO_SLOT; +} + +/* + * Different from other insn uses imm8, the real addressing offset of + * STRD in T32 encoding should be imm8 * 4. See ARMARM description. + */ +enum probes_insn checker_stack_use_t32strd(probes_opcode_t insn, + struct arch_probes_insn *asi, + const struct decode_header *h) +{ + int imm = insn & 0xff; + asi->stack_space = imm << 2; + return INSN_GOOD_NO_SLOT; +} +#else +enum probes_insn checker_stack_use_imm_x0x(probes_opcode_t insn, + struct arch_probes_insn *asi, + const struct decode_header *h) +{ + int imm = ((insn & 0xf00) >> 4) + (insn & 0xf); + asi->stack_space = imm; + return INSN_GOOD_NO_SLOT; +} +#endif + +enum probes_insn checker_stack_use_imm_xxx(probes_opcode_t insn, + struct arch_probes_insn *asi, + const struct decode_header *h) +{ + int imm = insn & 0xfff; + asi->stack_space = imm; + return INSN_GOOD_NO_SLOT; +} + +enum probes_insn checker_stack_use_stmdx(probes_opcode_t insn, + struct arch_probes_insn *asi, + const struct decode_header *h) +{ + unsigned int reglist = insn & 0xffff; + int pbit = insn & (1 << 24); + asi->stack_space = (hweight32(reglist) - (!pbit ? 1 : 0)) * 4; + + return INSN_GOOD_NO_SLOT; +} + +const union decode_action stack_check_actions[] = { + [STACK_USE_NONE] = {.decoder = checker_stack_use_none}, + [STACK_USE_UNKNOWN] = {.decoder = checker_stack_use_unknown}, +#ifdef CONFIG_THUMB2_KERNEL + [STACK_USE_FIXED_0XX] = {.decoder = checker_stack_use_imm_0xx}, + [STACK_USE_T32STRD] = {.decoder = checker_stack_use_t32strd}, +#else + [STACK_USE_FIXED_X0X] = {.decoder = checker_stack_use_imm_x0x}, +#endif + [STACK_USE_FIXED_XXX] = {.decoder = checker_stack_use_imm_xxx}, + [STACK_USE_STMDX] = {.decoder = checker_stack_use_stmdx}, +}; diff --git a/arch/arm/probes/kprobes/checkers-thumb.c b/arch/arm/probes/kprobes/checkers-thumb.c new file mode 100644 index 00000000000000..d608e3b9017a88 --- /dev/null +++ b/arch/arm/probes/kprobes/checkers-thumb.c @@ -0,0 +1,110 @@ +/* + * arch/arm/probes/kprobes/checkers-thumb.c + * + * Copyright (C) 2014 Huawei Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include +#include "../decode.h" +#include "../decode-thumb.h" +#include "checkers.h" + +static enum probes_insn __kprobes t32_check_stack(probes_opcode_t insn, + struct arch_probes_insn *asi, + const struct decode_header *h) +{ + /* + * PROBES_T32_LDMSTM, PROBES_T32_LDRDSTRD and PROBES_T32_LDRSTR + * may get here. Simply mark all normal insns as STACK_USE_NONE. + */ + static const union decode_item table[] = { + + /* + * First, filter out all ldr insns to make our life easier. + * Following load insns may come here: + * LDM, LDRD, LDR. + * In T32 encoding, bit 20 is enough for distinguishing + * load and store. All load insns have this bit set, when + * all store insns have this bit clear. + */ + DECODE_CUSTOM (0x00100000, 0x00100000, STACK_USE_NONE), + + /* + * Mark all 'STR{,B,H}, Rt, [Rn, Rm]' as STACK_USE_UNKNOWN + * if Rn or Rm is SP. T32 doesn't encode STRD. + */ + /* xx | Rn | Rt | | Rm |*/ + /* STR (register) 1111 1000 0100 xxxx xxxx 0000 00xx xxxx */ + /* STRB (register) 1111 1000 0000 xxxx xxxx 0000 00xx xxxx */ + /* STRH (register) 1111 1000 0010 xxxx xxxx 0000 00xx xxxx */ + /* INVALID INSN 1111 1000 0110 xxxx xxxx 0000 00xx xxxx */ + /* By Introducing INVALID INSN, bit 21 and 22 can be ignored. */ + DECODE_OR (0xff9f0fc0, 0xf80d0000), + DECODE_CUSTOM (0xff900fcf, 0xf800000d, STACK_USE_UNKNOWN), + + + /* xx | Rn | Rt | PUW| imm8 |*/ + /* STR (imm 8) 1111 1000 0100 1101 xxxx 110x xxxx xxxx */ + /* STRB (imm 8) 1111 1000 0000 1101 xxxx 110x xxxx xxxx */ + /* STRH (imm 8) 1111 1000 0010 1101 xxxx 110x xxxx xxxx */ + /* INVALID INSN 1111 1000 0110 1101 xxxx 110x xxxx xxxx */ + /* Only consider U == 0 and P == 1: strx rx, [sp, #-] */ + DECODE_CUSTOM (0xff9f0e00, 0xf80d0c00, STACK_USE_FIXED_0XX), + + /* For STR{,B,H} (imm 12), offset is always positive, so ignore them. */ + + /* P U W | Rn | Rt | Rt2| imm8 |*/ + /* STRD (immediate) 1110 1001 01x0 1101 xxxx xxxx xxxx xxxx */ + /* + * Only consider U == 0 and P == 1. + * Also note that STRD in T32 encoding is special: + * imm = ZeroExtend(imm8:'00', 32) + */ + DECODE_CUSTOM (0xffdf0000, 0xe94d0000, STACK_USE_T32STRD), + + /* | Rn | */ + /* STMDB 1110 1001 00x0 1101 xxxx xxxx xxxx xxxx */ + DECODE_CUSTOM (0xffdf0000, 0xe90d0000, STACK_USE_STMDX), + + /* fall through */ + DECODE_CUSTOM (0, 0, STACK_USE_NONE), + DECODE_END + }; + + return probes_decode_insn(insn, asi, table, false, false, stack_check_actions, NULL); +} + +const struct decode_checker t32_stack_checker[NUM_PROBES_T32_ACTIONS] = { + [PROBES_T32_LDMSTM] = {.checker = t32_check_stack}, + [PROBES_T32_LDRDSTRD] = {.checker = t32_check_stack}, + [PROBES_T32_LDRSTR] = {.checker = t32_check_stack}, +}; + +/* + * See following comments. This insn must be 'push'. + */ +static enum probes_insn __kprobes t16_check_stack(probes_opcode_t insn, + struct arch_probes_insn *asi, + const struct decode_header *h) +{ + unsigned int reglist = insn & 0x1ff; + asi->stack_space = hweight32(reglist) * 4; + return INSN_GOOD; +} + +/* + * T16 encoding is simple: only the 'push' insn can need extra stack space. + * Other insns, like str, can only use r0-r7 as Rn. + */ +const struct decode_checker t16_stack_checker[NUM_PROBES_T16_ACTIONS] = { + [PROBES_T16_PUSH] = {.checker = t16_check_stack}, +}; diff --git a/arch/arm/probes/kprobes/checkers.h b/arch/arm/probes/kprobes/checkers.h new file mode 100644 index 00000000000000..bddfa0e82389c6 --- /dev/null +++ b/arch/arm/probes/kprobes/checkers.h @@ -0,0 +1,54 @@ +/* + * arch/arm/probes/kprobes/checkers.h + * + * Copyright (C) 2014 Huawei Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef _ARM_KERNEL_PROBES_CHECKERS_H +#define _ARM_KERNEL_PROBES_CHECKERS_H + +#include +#include +#include "../decode.h" + +extern probes_check_t checker_stack_use_none; +extern probes_check_t checker_stack_use_unknown; +#ifdef CONFIG_THUMB2_KERNEL +extern probes_check_t checker_stack_use_imm_0xx; +#else +extern probes_check_t checker_stack_use_imm_x0x; +#endif +extern probes_check_t checker_stack_use_imm_xxx; +extern probes_check_t checker_stack_use_stmdx; + +enum { + STACK_USE_NONE, + STACK_USE_UNKNOWN, +#ifdef CONFIG_THUMB2_KERNEL + STACK_USE_FIXED_0XX, + STACK_USE_T32STRD, +#else + STACK_USE_FIXED_X0X, +#endif + STACK_USE_FIXED_XXX, + STACK_USE_STMDX, + NUM_STACK_USE_TYPES +}; + +extern const union decode_action stack_check_actions[]; + +#ifndef CONFIG_THUMB2_KERNEL +extern const struct decode_checker arm_stack_checker[]; +#else +#endif +extern const struct decode_checker t32_stack_checker[]; +extern const struct decode_checker t16_stack_checker[]; +#endif From a0266c214fab21371a499e6ab1c9385cc6589189 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 5 Jan 2015 19:29:25 +0800 Subject: [PATCH 039/601] ARM: kprobes: disallow probing stack consuming instructions This patch prohibits probing instructions for which the stack requirements are unable to be determined statically. Some test cases are found not work again after the modification, this patch also removes them. Signed-off-by: Wang Nan Reviewed-by: Jon Medhurst Signed-off-by: Jon Medhurst --- arch/arm/include/asm/kprobes.h | 1 - arch/arm/include/asm/probes.h | 12 ++++++++++++ arch/arm/kernel/entry-armv.S | 3 ++- arch/arm/probes/kprobes/core.c | 9 +++++++++ arch/arm/probes/kprobes/test-arm.c | 16 ++++++++++------ 5 files changed, 33 insertions(+), 8 deletions(-) diff --git a/arch/arm/include/asm/kprobes.h b/arch/arm/include/asm/kprobes.h index 49fa0dfaad3365..56f9ac68fbd180 100644 --- a/arch/arm/include/asm/kprobes.h +++ b/arch/arm/include/asm/kprobes.h @@ -22,7 +22,6 @@ #define __ARCH_WANT_KPROBES_INSN_SLOT #define MAX_INSN_SIZE 2 -#define MAX_STACK_SIZE 64 /* 32 would probably be OK */ #define flush_insn_slot(p) do { } while (0) #define kretprobe_blacklist_size 0 diff --git a/arch/arm/include/asm/probes.h b/arch/arm/include/asm/probes.h index 6026deb28794bf..cd9e81588d830b 100644 --- a/arch/arm/include/asm/probes.h +++ b/arch/arm/include/asm/probes.h @@ -19,6 +19,8 @@ #ifndef _ASM_PROBES_H #define _ASM_PROBES_H +#ifndef __ASSEMBLY__ + typedef u32 probes_opcode_t; struct arch_probes_insn; @@ -41,4 +43,14 @@ struct arch_probes_insn { int stack_space; }; +#endif /* __ASSEMBLY__ */ + +/* + * We assume one instruction can consume at most 64 bytes stack, which is + * 'push {r0-r15}'. Instructions consume more or unknown stack space like + * 'str r0, [sp, #-80]' and 'str r0, [sp, r1]' should be prohibit to probe. + * Both kprobe and jprobe use this macro. + */ +#define MAX_STACK_SIZE 64 + #endif diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 2f5555d307b345..672b21942fff77 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -31,6 +31,7 @@ #include "entry-header.S" #include +#include /* * Interrupt handling. @@ -249,7 +250,7 @@ __und_svc: @ If a kprobe is about to simulate a "stmdb sp..." instruction, @ it obviously needs free stack space which then will belong to @ the saved context. - svc_entry 64 + svc_entry MAX_STACK_SIZE #else svc_entry #endif diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c index 74f3dc3ac212b8..3a58db4cc1c6b2 100644 --- a/arch/arm/probes/kprobes/core.c +++ b/arch/arm/probes/kprobes/core.c @@ -115,6 +115,15 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) break; } + /* + * Never instrument insn like 'str r0, [sp, +/-r1]'. Also, insn likes + * 'str r0, [sp, #-68]' should also be prohibited. + * See __und_svc. + */ + if ((p->ainsn.stack_space < 0) || + (p->ainsn.stack_space > MAX_STACK_SIZE)) + return -EINVAL; + return 0; } diff --git a/arch/arm/probes/kprobes/test-arm.c b/arch/arm/probes/kprobes/test-arm.c index d9a1255f3043ae..fdeb300b0fc80c 100644 --- a/arch/arm/probes/kprobes/test-arm.c +++ b/arch/arm/probes/kprobes/test-arm.c @@ -476,7 +476,8 @@ void kprobe_arm_test_cases(void) TEST_GROUP("Extra load/store instructions") TEST_RPR( "strh r",0, VAL1,", [r",1, 48,", -r",2, 24,"]") - TEST_RPR( "streqh r",14,VAL2,", [r",13,0, ", r",12, 48,"]") + TEST_RPR( "streqh r",14,VAL2,", [r",11,0, ", r",12, 48,"]") + TEST_UNSUPPORTED( "streqh r14, [r13, r12]") TEST_RPR( "strh r",1, VAL1,", [r",2, 24,", r",3, 48,"]!") TEST_RPR( "strneh r",12,VAL2,", [r",11,48,", -r",10,24,"]!") TEST_RPR( "strh r",2, VAL1,", [r",3, 24,"], r",4, 48,"") @@ -565,7 +566,8 @@ void kprobe_arm_test_cases(void) #if __LINUX_ARM_ARCH__ >= 5 TEST_RPR( "strd r",0, VAL1,", [r",1, 48,", -r",2,24,"]") - TEST_RPR( "strccd r",8, VAL2,", [r",13,0, ", r",12,48,"]") + TEST_RPR( "strccd r",8, VAL2,", [r",11,0, ", r",12,48,"]") + TEST_UNSUPPORTED( "strccd r8, [r13, r12]") TEST_RPR( "strd r",4, VAL1,", [r",2, 24,", r",3, 48,"]!") TEST_RPR( "strcsd r",12,VAL2,", [r",11,48,", -r",10,24,"]!") TEST_RPR( "strd r",2, VAL1,", [r",5, 24,"], r",4,48,"") @@ -638,13 +640,15 @@ void kprobe_arm_test_cases(void) TEST_RP( "str"byte" r",2, VAL1,", [r",3, 24,"], #48") \ TEST_RP( "str"byte" r",10,VAL2,", [r",9, 64,"], #-48") \ TEST_RPR("str"byte" r",0, VAL1,", [r",1, 48,", -r",2, 24,"]") \ - TEST_RPR("str"byte" r",14,VAL2,", [r",13,0, ", r",12, 48,"]") \ + TEST_RPR("str"byte" r",14,VAL2,", [r",11,0, ", r",12, 48,"]") \ + TEST_UNSUPPORTED("str"byte" r14, [r13, r12]") \ TEST_RPR("str"byte" r",1, VAL1,", [r",2, 24,", r",3, 48,"]!") \ TEST_RPR("str"byte" r",12,VAL2,", [r",11,48,", -r",10,24,"]!") \ TEST_RPR("str"byte" r",2, VAL1,", [r",3, 24,"], r",4, 48,"") \ TEST_RPR("str"byte" r",10,VAL2,", [r",9, 48,"], -r",11,24,"") \ TEST_RPR("str"byte" r",0, VAL1,", [r",1, 24,", r",2, 32,", asl #1]")\ - TEST_RPR("str"byte" r",14,VAL2,", [r",13,0, ", r",12, 32,", lsr #2]")\ + TEST_RPR("str"byte" r",14,VAL2,", [r",11,0, ", r",12, 32,", lsr #2]")\ + TEST_UNSUPPORTED("str"byte" r14, [r13, r12, lsr #2]")\ TEST_RPR("str"byte" r",1, VAL1,", [r",2, 24,", r",3, 32,", asr #3]!")\ TEST_RPR("str"byte" r",12,VAL2,", [r",11,24,", r",10, 4,", ror #31]!")\ TEST_P( "ldr"byte" r0, [r",0, 24,", #-2]") \ @@ -668,12 +672,12 @@ void kprobe_arm_test_cases(void) LOAD_STORE("") TEST_P( "str pc, [r",0,0,", #15*4]") - TEST_R( "str pc, [sp, r",2,15*4,"]") + TEST_UNSUPPORTED( "str pc, [sp, r2]") TEST_BF( "ldr pc, [sp, #15*4]") TEST_BF_R("ldr pc, [sp, r",2,15*4,"]") TEST_P( "str sp, [r",0,0,", #13*4]") - TEST_R( "str sp, [sp, r",2,13*4,"]") + TEST_UNSUPPORTED( "str sp, [sp, r2]") TEST_BF( "ldr sp, [sp, #13*4]") TEST_BF_R("ldr sp, [sp, r",2,13*4,"]") From 8d257e95a9e643518e72232bf852b054a3d06c95 Mon Sep 17 00:00:00 2001 From: "Jon Medhurst (Tixy)" Date: Mon, 5 Jan 2015 19:29:29 +0800 Subject: [PATCH 040/601] ARM: kprobes: Add test cases for stack consuming instructions These have extra 'checker' functions associated with them so lets make sure those get covered by testing. As they may create uninitialised space on the stack we also update the test code to ensure such space is consistent between test runs. This is done by disabling interrupts in setup_test_context(). Signed-off-by: Jon Medhurst --- arch/arm/probes/kprobes/test-arm.c | 17 +++++++++++++++-- arch/arm/probes/kprobes/test-core.c | 9 +++++++++ arch/arm/probes/kprobes/test-thumb.c | 12 ++++++++++++ 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/arch/arm/probes/kprobes/test-arm.c b/arch/arm/probes/kprobes/test-arm.c index fdeb300b0fc80c..9b3b1b4a09391d 100644 --- a/arch/arm/probes/kprobes/test-arm.c +++ b/arch/arm/probes/kprobes/test-arm.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "test-core.h" @@ -478,6 +479,7 @@ void kprobe_arm_test_cases(void) TEST_RPR( "strh r",0, VAL1,", [r",1, 48,", -r",2, 24,"]") TEST_RPR( "streqh r",14,VAL2,", [r",11,0, ", r",12, 48,"]") TEST_UNSUPPORTED( "streqh r14, [r13, r12]") + TEST_UNSUPPORTED( "streqh r14, [r12, r13]") TEST_RPR( "strh r",1, VAL1,", [r",2, 24,", r",3, 48,"]!") TEST_RPR( "strneh r",12,VAL2,", [r",11,48,", -r",10,24,"]!") TEST_RPR( "strh r",2, VAL1,", [r",3, 24,"], r",4, 48,"") @@ -502,6 +504,9 @@ void kprobe_arm_test_cases(void) TEST_RP( "strplh r",12,VAL2,", [r",11,24,", #-4]!") TEST_RP( "strh r",2, VAL1,", [r",3, 24,"], #48") TEST_RP( "strh r",10,VAL2,", [r",9, 64,"], #-48") + TEST_RP( "strh r",3, VAL1,", [r",13,TEST_MEMORY_SIZE,", #-"__stringify(MAX_STACK_SIZE)"]!") + TEST_UNSUPPORTED("strh r3, [r13, #-"__stringify(MAX_STACK_SIZE)"-8]!") + TEST_RP( "strh r",4, VAL1,", [r",14,TEST_MEMORY_SIZE,", #-"__stringify(MAX_STACK_SIZE)"-8]!") TEST_UNSUPPORTED(__inst_arm(0xe1efc3b0) " @ strh r12, [pc, #48]!") TEST_UNSUPPORTED(__inst_arm(0xe0c9f3b0) " @ strh pc, [r9], #48") @@ -568,6 +573,7 @@ void kprobe_arm_test_cases(void) TEST_RPR( "strd r",0, VAL1,", [r",1, 48,", -r",2,24,"]") TEST_RPR( "strccd r",8, VAL2,", [r",11,0, ", r",12,48,"]") TEST_UNSUPPORTED( "strccd r8, [r13, r12]") + TEST_UNSUPPORTED( "strccd r8, [r12, r13]") TEST_RPR( "strd r",4, VAL1,", [r",2, 24,", r",3, 48,"]!") TEST_RPR( "strcsd r",12,VAL2,", [r",11,48,", -r",10,24,"]!") TEST_RPR( "strd r",2, VAL1,", [r",5, 24,"], r",4,48,"") @@ -591,6 +597,9 @@ void kprobe_arm_test_cases(void) TEST_RP( "strvcd r",12,VAL2,", [r",11,24,", #-16]!") TEST_RP( "strd r",2, VAL1,", [r",4, 24,"], #48") TEST_RP( "strd r",10,VAL2,", [r",9, 64,"], #-48") + TEST_RP( "strd r",6, VAL1,", [r",13,TEST_MEMORY_SIZE,", #-"__stringify(MAX_STACK_SIZE)"]!") + TEST_UNSUPPORTED("strd r6, [r13, #-"__stringify(MAX_STACK_SIZE)"-8]!") + TEST_RP( "strd r",4, VAL1,", [r",12,TEST_MEMORY_SIZE,", #-"__stringify(MAX_STACK_SIZE)"-8]!") TEST_UNSUPPORTED(__inst_arm(0xe1efc3f0) " @ strd r12, [pc, #48]!") TEST_P( "ldrd r0, [r",0, 24,", #-8]") @@ -639,16 +648,20 @@ void kprobe_arm_test_cases(void) TEST_RP( "str"byte" r",12,VAL2,", [r",11,24,", #-4]!") \ TEST_RP( "str"byte" r",2, VAL1,", [r",3, 24,"], #48") \ TEST_RP( "str"byte" r",10,VAL2,", [r",9, 64,"], #-48") \ + TEST_RP( "str"byte" r",3, VAL1,", [r",13,TEST_MEMORY_SIZE,", #-"__stringify(MAX_STACK_SIZE)"]!") \ + TEST_UNSUPPORTED("str"byte" r3, [r13, #-"__stringify(MAX_STACK_SIZE)"-8]!") \ + TEST_RP( "str"byte" r",4, VAL1,", [r",10,TEST_MEMORY_SIZE,", #-"__stringify(MAX_STACK_SIZE)"-8]!") \ TEST_RPR("str"byte" r",0, VAL1,", [r",1, 48,", -r",2, 24,"]") \ TEST_RPR("str"byte" r",14,VAL2,", [r",11,0, ", r",12, 48,"]") \ - TEST_UNSUPPORTED("str"byte" r14, [r13, r12]") \ + TEST_UNSUPPORTED("str"byte" r14, [r13, r12]") \ + TEST_UNSUPPORTED("str"byte" r14, [r12, r13]") \ TEST_RPR("str"byte" r",1, VAL1,", [r",2, 24,", r",3, 48,"]!") \ TEST_RPR("str"byte" r",12,VAL2,", [r",11,48,", -r",10,24,"]!") \ TEST_RPR("str"byte" r",2, VAL1,", [r",3, 24,"], r",4, 48,"") \ TEST_RPR("str"byte" r",10,VAL2,", [r",9, 48,"], -r",11,24,"") \ TEST_RPR("str"byte" r",0, VAL1,", [r",1, 24,", r",2, 32,", asl #1]")\ TEST_RPR("str"byte" r",14,VAL2,", [r",11,0, ", r",12, 32,", lsr #2]")\ - TEST_UNSUPPORTED("str"byte" r14, [r13, r12, lsr #2]")\ + TEST_UNSUPPORTED("str"byte" r14, [r13, r12, lsr #2]") \ TEST_RPR("str"byte" r",1, VAL1,", [r",2, 24,", r",3, 32,", asr #3]!")\ TEST_RPR("str"byte" r",12,VAL2,", [r",11,24,", r",10, 4,", ror #31]!")\ TEST_P( "ldr"byte" r0, [r",0, 24,", #-2]") \ diff --git a/arch/arm/probes/kprobes/test-core.c b/arch/arm/probes/kprobes/test-core.c index 7ab633d51954ab..7c5ddd5a6afd48 100644 --- a/arch/arm/probes/kprobes/test-core.c +++ b/arch/arm/probes/kprobes/test-core.c @@ -1196,6 +1196,13 @@ static void setup_test_context(struct pt_regs *regs) regs->uregs[arg->reg] = (unsigned long)current_stack + arg->val; memory_needs_checking = true; + /* + * Test memory at an address below SP is in danger of + * being altered by an interrupt occurring and pushing + * data onto the stack. Disable interrupts to stop this. + */ + if (arg->reg == 13) + regs->ARM_cpsr |= PSR_I_BIT; break; } case ARG_TYPE_MEM: { @@ -1272,6 +1279,8 @@ test_after_pre_handler(struct kprobe *p, struct pt_regs *regs) /* Undo any changes done to SP by the test case */ regs->ARM_sp = (unsigned long)current_stack; + /* Enable interrupts in case setup_test_context disabled them */ + regs->ARM_cpsr &= ~PSR_I_BIT; container_of(p, struct test_probe, kprobe)->hit = test_instance; return 0; diff --git a/arch/arm/probes/kprobes/test-thumb.c b/arch/arm/probes/kprobes/test-thumb.c index 6c6e9a9bb6757d..e8cf193db1ea30 100644 --- a/arch/arm/probes/kprobes/test-thumb.c +++ b/arch/arm/probes/kprobes/test-thumb.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "test-core.h" @@ -416,6 +417,9 @@ void kprobe_thumb32_test_cases(void) TEST_RR( "strd r",14,VAL2,", r",12,VAL1,", [sp, #16]!") TEST_RRP("strd r",1, VAL1,", r",0, VAL2,", [r",7, 24,"], #16") TEST_RR( "strd r",7, VAL2,", r",8, VAL1,", [sp], #-16") + TEST_RRP("strd r",6, VAL1,", r",7, VAL2,", [r",13, TEST_MEMORY_SIZE,", #-"__stringify(MAX_STACK_SIZE)"]!") + TEST_UNSUPPORTED("strd r6, r7, [r13, #-"__stringify(MAX_STACK_SIZE)"-8]!") + TEST_RRP("strd r",4, VAL1,", r",5, VAL2,", [r",14, TEST_MEMORY_SIZE,", #-"__stringify(MAX_STACK_SIZE)"-8]!") TEST_UNSUPPORTED(__inst_thumb32(0xe9efec04) " @ strd r14, r12, [pc, #16]!") TEST_UNSUPPORTED(__inst_thumb32(0xe8efec04) " @ strd r14, r12, [pc], #16") @@ -821,14 +825,22 @@ CONDITION_INSTRUCTIONS(22, TEST_RP( "str"size" r",14,VAL2,", [r",1, 256, ", #-128]!") \ TEST_RPR("str"size".w r",0, VAL1,", [r",1, 0,", r",2, 4,"]") \ TEST_RPR("str"size" r",14,VAL2,", [r",10,0,", r",11,4,", lsl #1]") \ + TEST_UNSUPPORTED("str"size" r0, [r13, r1]") \ TEST_R( "str"size".w r",7, VAL1,", [sp, #24]") \ TEST_RP( "str"size".w r",0, VAL2,", [r",0,0, "]") \ + TEST_RP( "str"size" r",6, VAL1,", [r",13, TEST_MEMORY_SIZE,", #-"__stringify(MAX_STACK_SIZE)"]!") \ + TEST_UNSUPPORTED("str"size" r6, [r13, #-"__stringify(MAX_STACK_SIZE)"-8]!") \ + TEST_RP( "str"size" r",4, VAL2,", [r",12, TEST_MEMORY_SIZE,", #-"__stringify(MAX_STACK_SIZE)"-8]!") \ TEST_UNSUPPORTED("str"size"t r0, [r1, #4]") SINGLE_STORE("b") SINGLE_STORE("h") SINGLE_STORE("") + TEST_UNSUPPORTED(__inst_thumb32(0xf801000d) " @ strb r0, [r1, r13]") + TEST_UNSUPPORTED(__inst_thumb32(0xf821000d) " @ strh r0, [r1, r13]") + TEST_UNSUPPORTED(__inst_thumb32(0xf841000d) " @ str r0, [r1, r13]") + TEST("str sp, [sp]") TEST_UNSUPPORTED(__inst_thumb32(0xf8cfe000) " @ str r14, [pc]") TEST_UNSUPPORTED(__inst_thumb32(0xf8cef000) " @ str pc, [r14]") From cbf6ab52add20b845f903decc973afbd5463c527 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 5 Jan 2015 19:29:32 +0800 Subject: [PATCH 041/601] kprobes: Pass the original kprobe for preparing optimized kprobe Pass the original kprobe for preparing an optimized kprobe arch-dep part, since for some architecture (e.g. ARM32) requires the information in original kprobe. Signed-off-by: Masami Hiramatsu Signed-off-by: Wang Nan Signed-off-by: Jon Medhurst --- arch/x86/kernel/kprobes/opt.c | 3 ++- include/linux/kprobes.h | 3 ++- kernel/kprobes.c | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 7c523bbf3dc8fc..0dd8d089c315e0 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -322,7 +322,8 @@ void arch_remove_optimized_kprobe(struct optimized_kprobe *op) * Target instructions MUST be relocatable (checked inside) * This is called when new aggr(opt)probe is allocated or reused. */ -int arch_prepare_optimized_kprobe(struct optimized_kprobe *op) +int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, + struct kprobe *__unused) { u8 *buf; int ret; diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 5297f9fa0ef26f..1ab54754a86d21 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -308,7 +308,8 @@ struct optimized_kprobe { /* Architecture dependent functions for direct jump optimization */ extern int arch_prepared_optinsn(struct arch_optimized_insn *optinsn); extern int arch_check_optimized_kprobe(struct optimized_kprobe *op); -extern int arch_prepare_optimized_kprobe(struct optimized_kprobe *op); +extern int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, + struct kprobe *orig); extern void arch_remove_optimized_kprobe(struct optimized_kprobe *op); extern void arch_optimize_kprobes(struct list_head *oplist); extern void arch_unoptimize_kprobes(struct list_head *oplist, diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 06f58309fed2d0..bad4e959f2f7b3 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -717,7 +717,7 @@ static void prepare_optimized_kprobe(struct kprobe *p) struct optimized_kprobe *op; op = container_of(p, struct optimized_kprobe, kp); - arch_prepare_optimized_kprobe(op); + arch_prepare_optimized_kprobe(op, p); } /* Allocate new optimized_kprobe and try to prepare optimized instructions */ @@ -731,7 +731,7 @@ static struct kprobe *alloc_aggr_kprobe(struct kprobe *p) INIT_LIST_HEAD(&op->list); op->kp.addr = p->addr; - arch_prepare_optimized_kprobe(op); + arch_prepare_optimized_kprobe(op, p); return &op->kp; } From 0dc016dbd820260b8ea74337980735b8c88d4ef2 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Fri, 9 Jan 2015 14:37:36 +0800 Subject: [PATCH 042/601] ARM: kprobes: enable OPTPROBES for ARM 32 This patch introduce kprobeopt for ARM 32. Limitations: - Currently only kernel compiled with ARM ISA is supported. - Offset between probe point and optinsn slot must not larger than 32MiB. Masami Hiramatsu suggests replacing 2 words, it will make things complex. Futher patch can make such optimization. Kprobe opt on ARM is relatively simpler than kprobe opt on x86 because ARM instruction is always 4 bytes aligned and 4 bytes long. This patch replace probed instruction by a 'b', branch to trampoline code and then calls optimized_callback(). optimized_callback() calls opt_pre_handler() to execute kprobe handler. It also emulate/simulate replaced instruction. When unregistering kprobe, the deferred manner of unoptimizer may leave branch instruction before optimizer is called. Different from x86_64, which only copy the probed insn after optprobe_template_end and reexecute them, this patch call singlestep to emulate/simulate the insn directly. Futher patch can optimize this behavior. Signed-off-by: Wang Nan Acked-by: Masami Hiramatsu Cc: Will Deacon Reviewed-by: Jon Medhurst (Tixy) Signed-off-by: Jon Medhurst --- arch/arm/Kconfig | 1 + arch/arm/{kernel => include/asm}/insn.h | 0 arch/arm/include/asm/kprobes.h | 29 +++ arch/arm/kernel/Makefile | 2 +- arch/arm/kernel/ftrace.c | 3 +- arch/arm/kernel/jump_label.c | 3 +- arch/arm/probes/kprobes/Makefile | 1 + arch/arm/probes/kprobes/core.c | 26 +- arch/arm/probes/kprobes/core.h | 2 + arch/arm/probes/kprobes/opt-arm.c | 322 ++++++++++++++++++++++++ 10 files changed, 377 insertions(+), 12 deletions(-) rename arch/arm/{kernel => include/asm}/insn.h (100%) create mode 100644 arch/arm/probes/kprobes/opt-arm.c diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 97d07ed60a0b7b..3d5dc2df835bf9 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -60,6 +60,7 @@ config ARM select HAVE_MEMBLOCK select HAVE_MOD_ARCH_SPECIFIC if ARM_UNWIND select HAVE_OPROFILE if (HAVE_PERF_EVENTS) + select HAVE_OPTPROBES if !THUMB2_KERNEL select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP diff --git a/arch/arm/kernel/insn.h b/arch/arm/include/asm/insn.h similarity index 100% rename from arch/arm/kernel/insn.h rename to arch/arm/include/asm/insn.h diff --git a/arch/arm/include/asm/kprobes.h b/arch/arm/include/asm/kprobes.h index 56f9ac68fbd180..50ff3bc7928e91 100644 --- a/arch/arm/include/asm/kprobes.h +++ b/arch/arm/include/asm/kprobes.h @@ -50,5 +50,34 @@ int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr); int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); +/* optinsn template addresses */ +extern __visible kprobe_opcode_t optprobe_template_entry; +extern __visible kprobe_opcode_t optprobe_template_val; +extern __visible kprobe_opcode_t optprobe_template_call; +extern __visible kprobe_opcode_t optprobe_template_end; +extern __visible kprobe_opcode_t optprobe_template_sub_sp; +extern __visible kprobe_opcode_t optprobe_template_add_sp; + +#define MAX_OPTIMIZED_LENGTH 4 +#define MAX_OPTINSN_SIZE \ + ((unsigned long)&optprobe_template_end - \ + (unsigned long)&optprobe_template_entry) +#define RELATIVEJUMP_SIZE 4 + +struct arch_optimized_insn { + /* + * copy of the original instructions. + * Different from x86, ARM kprobe_opcode_t is u32. + */ +#define MAX_COPIED_INSN DIV_ROUND_UP(RELATIVEJUMP_SIZE, sizeof(kprobe_opcode_t)) + kprobe_opcode_t copied_insn[MAX_COPIED_INSN]; + /* detour code buffer */ + kprobe_opcode_t *insn; + /* + * We always copy one instruction on ARM, + * so size will always be 4, and unlike x86, there is no + * need for a size field. + */ +}; #endif /* _ARM_KPROBES_H */ diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 9c51a433e02552..902397dd100070 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -52,7 +52,7 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o insn.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o insn.o patch.o obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o # Main staffs in KPROBES are in arch/arm/probes/ . -obj-$(CONFIG_KPROBES) += patch.o +obj-$(CONFIG_KPROBES) += patch.o insn.o obj-$(CONFIG_OABI_COMPAT) += sys_oabi-compat.o obj-$(CONFIG_ARM_THUMBEE) += thumbee.o obj-$(CONFIG_KGDB) += kgdb.o patch.o diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index b8c75e45a950af..709ee1d6d4df48 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -20,8 +20,7 @@ #include #include #include - -#include "insn.h" +#include #ifdef CONFIG_THUMB2_KERNEL #define NOP 0xf85deb04 /* pop.w {lr} */ diff --git a/arch/arm/kernel/jump_label.c b/arch/arm/kernel/jump_label.c index d8da075959bf38..e39cbf488cfebc 100644 --- a/arch/arm/kernel/jump_label.c +++ b/arch/arm/kernel/jump_label.c @@ -1,8 +1,7 @@ #include #include #include - -#include "insn.h" +#include #ifdef HAVE_JUMP_LABEL diff --git a/arch/arm/probes/kprobes/Makefile b/arch/arm/probes/kprobes/Makefile index bc8d504c3d7824..76a36bf102b7fc 100644 --- a/arch/arm/probes/kprobes/Makefile +++ b/arch/arm/probes/kprobes/Makefile @@ -7,5 +7,6 @@ obj-$(CONFIG_KPROBES) += actions-thumb.o checkers-thumb.o test-kprobes-objs += test-thumb.o else obj-$(CONFIG_KPROBES) += actions-arm.o checkers-arm.o +obj-$(CONFIG_OPTPROBES) += opt-arm.o test-kprobes-objs += test-arm.o endif diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c index 3a58db4cc1c6b2..a4ec240ee7ba38 100644 --- a/arch/arm/probes/kprobes/core.c +++ b/arch/arm/probes/kprobes/core.c @@ -163,19 +163,31 @@ void __kprobes arch_arm_kprobe(struct kprobe *p) * memory. It is also needed to atomically set the two half-words of a 32-bit * Thumb breakpoint. */ -int __kprobes __arch_disarm_kprobe(void *p) -{ - struct kprobe *kp = p; - void *addr = (void *)((uintptr_t)kp->addr & ~1); - - __patch_text(addr, kp->opcode); +struct patch { + void *addr; + unsigned int insn; +}; +static int __kprobes_remove_breakpoint(void *data) +{ + struct patch *p = data; + __patch_text(p->addr, p->insn); return 0; } +void __kprobes kprobes_remove_breakpoint(void *addr, unsigned int insn) +{ + struct patch p = { + .addr = addr, + .insn = insn, + }; + stop_machine(__kprobes_remove_breakpoint, &p, cpu_online_mask); +} + void __kprobes arch_disarm_kprobe(struct kprobe *p) { - stop_machine(__arch_disarm_kprobe, p, cpu_online_mask); + kprobes_remove_breakpoint((void *)((uintptr_t)p->addr & ~1), + p->opcode); } void __kprobes arch_remove_kprobe(struct kprobe *p) diff --git a/arch/arm/probes/kprobes/core.h b/arch/arm/probes/kprobes/core.h index f88c79fe632a5f..b3036c587a764d 100644 --- a/arch/arm/probes/kprobes/core.h +++ b/arch/arm/probes/kprobes/core.h @@ -30,6 +30,8 @@ #define KPROBE_THUMB16_BREAKPOINT_INSTRUCTION 0xde18 #define KPROBE_THUMB32_BREAKPOINT_INSTRUCTION 0xf7f0a018 +extern void kprobes_remove_breakpoint(void *addr, unsigned int insn); + enum probes_insn __kprobes kprobe_decode_ldmstm(kprobe_opcode_t insn, struct arch_probes_insn *asi, const struct decode_header *h); diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c new file mode 100644 index 00000000000000..13d5232118df16 --- /dev/null +++ b/arch/arm/probes/kprobes/opt-arm.c @@ -0,0 +1,322 @@ +/* + * Kernel Probes Jump Optimization (Optprobes) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2004 + * Copyright (C) Hitachi Ltd., 2012 + * Copyright (C) Huawei Inc., 2014 + */ + +#include +#include +#include +#include +/* for arm_gen_branch */ +#include +/* for patch_text */ +#include + +#include "core.h" + +/* + * NOTE: the first sub and add instruction will be modified according + * to the stack cost of the instruction. + */ +asm ( + ".global optprobe_template_entry\n" + "optprobe_template_entry:\n" + ".global optprobe_template_sub_sp\n" + "optprobe_template_sub_sp:" + " sub sp, sp, #0xff\n" + " stmia sp, {r0 - r14} \n" + ".global optprobe_template_add_sp\n" + "optprobe_template_add_sp:" + " add r3, sp, #0xff\n" + " str r3, [sp, #52]\n" + " mrs r4, cpsr\n" + " str r4, [sp, #64]\n" + " mov r1, sp\n" + " ldr r0, 1f\n" + " ldr r2, 2f\n" + /* + * AEABI requires an 8-bytes alignment stack. If + * SP % 8 != 0 (SP % 4 == 0 should be ensured), + * alloc more bytes here. + */ + " and r4, sp, #4\n" + " sub sp, sp, r4\n" +#if __LINUX_ARM_ARCH__ >= 5 + " blx r2\n" +#else + " mov lr, pc\n" + " mov pc, r2\n" +#endif + " add sp, sp, r4\n" + " ldr r1, [sp, #64]\n" + " tst r1, #"__stringify(PSR_T_BIT)"\n" + " ldrne r2, [sp, #60]\n" + " orrne r2, #1\n" + " strne r2, [sp, #60] @ set bit0 of PC for thumb\n" + " msr cpsr_cxsf, r1\n" + " ldmia sp, {r0 - r15}\n" + ".global optprobe_template_val\n" + "optprobe_template_val:\n" + "1: .long 0\n" + ".global optprobe_template_call\n" + "optprobe_template_call:\n" + "2: .long 0\n" + ".global optprobe_template_end\n" + "optprobe_template_end:\n"); + +#define TMPL_VAL_IDX \ + ((unsigned long *)&optprobe_template_val - (unsigned long *)&optprobe_template_entry) +#define TMPL_CALL_IDX \ + ((unsigned long *)&optprobe_template_call - (unsigned long *)&optprobe_template_entry) +#define TMPL_END_IDX \ + ((unsigned long *)&optprobe_template_end - (unsigned long *)&optprobe_template_entry) +#define TMPL_ADD_SP \ + ((unsigned long *)&optprobe_template_add_sp - (unsigned long *)&optprobe_template_entry) +#define TMPL_SUB_SP \ + ((unsigned long *)&optprobe_template_sub_sp - (unsigned long *)&optprobe_template_entry) + +/* + * ARM can always optimize an instruction when using ARM ISA, except + * instructions like 'str r0, [sp, r1]' which store to stack and unable + * to determine stack space consumption statically. + */ +int arch_prepared_optinsn(struct arch_optimized_insn *optinsn) +{ + return optinsn->insn != NULL; +} + +/* + * In ARM ISA, kprobe opt always replace one instruction (4 bytes + * aligned and 4 bytes long). It is impossible to encounter another + * kprobe in the address range. So always return 0. + */ +int arch_check_optimized_kprobe(struct optimized_kprobe *op) +{ + return 0; +} + +/* Caller must ensure addr & 3 == 0 */ +static int can_optimize(struct kprobe *kp) +{ + if (kp->ainsn.stack_space < 0) + return 0; + /* + * 255 is the biggest imm can be used in 'sub r0, r0, #'. + * Number larger than 255 needs special encoding. + */ + if (kp->ainsn.stack_space > 255 - sizeof(struct pt_regs)) + return 0; + return 1; +} + +/* Free optimized instruction slot */ +static void +__arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) +{ + if (op->optinsn.insn) { + free_optinsn_slot(op->optinsn.insn, dirty); + op->optinsn.insn = NULL; + } +} + +extern void kprobe_handler(struct pt_regs *regs); + +static void +optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) +{ + unsigned long flags; + struct kprobe *p = &op->kp; + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + /* Save skipped registers */ + regs->ARM_pc = (unsigned long)op->kp.addr; + regs->ARM_ORIG_r0 = ~0UL; + + local_irq_save(flags); + + if (kprobe_running()) { + kprobes_inc_nmissed_count(&op->kp); + } else { + __this_cpu_write(current_kprobe, &op->kp); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + opt_pre_handler(&op->kp, regs); + __this_cpu_write(current_kprobe, NULL); + } + + /* In each case, we must singlestep the replaced instruction. */ + op->kp.ainsn.insn_singlestep(p->opcode, &p->ainsn, regs); + + local_irq_restore(flags); +} + +int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *orig) +{ + kprobe_opcode_t *code; + unsigned long rel_chk; + unsigned long val; + unsigned long stack_protect = sizeof(struct pt_regs); + + if (!can_optimize(orig)) + return -EILSEQ; + + code = get_optinsn_slot(); + if (!code) + return -ENOMEM; + + /* + * Verify if the address gap is in 32MiB range, because this uses + * a relative jump. + * + * kprobe opt use a 'b' instruction to branch to optinsn.insn. + * According to ARM manual, branch instruction is: + * + * 31 28 27 24 23 0 + * +------+---+---+---+---+----------------+ + * | cond | 1 | 0 | 1 | 0 | imm24 | + * +------+---+---+---+---+----------------+ + * + * imm24 is a signed 24 bits integer. The real branch offset is computed + * by: imm32 = SignExtend(imm24:'00', 32); + * + * So the maximum forward branch should be: + * (0x007fffff << 2) = 0x01fffffc = 0x1fffffc + * The maximum backword branch should be: + * (0xff800000 << 2) = 0xfe000000 = -0x2000000 + * + * We can simply check (rel & 0xfe000003): + * if rel is positive, (rel & 0xfe000000) shoule be 0 + * if rel is negitive, (rel & 0xfe000000) should be 0xfe000000 + * the last '3' is used for alignment checking. + */ + rel_chk = (unsigned long)((long)code - + (long)orig->addr + 8) & 0xfe000003; + + if ((rel_chk != 0) && (rel_chk != 0xfe000000)) { + /* + * Different from x86, we free code buf directly instead of + * calling __arch_remove_optimized_kprobe() because + * we have not fill any field in op. + */ + free_optinsn_slot(code, 0); + return -ERANGE; + } + + /* Copy arch-dep-instance from template. */ + memcpy(code, &optprobe_template_entry, + TMPL_END_IDX * sizeof(kprobe_opcode_t)); + + /* Adjust buffer according to instruction. */ + BUG_ON(orig->ainsn.stack_space < 0); + + stack_protect += orig->ainsn.stack_space; + + /* Should have been filtered by can_optimize(). */ + BUG_ON(stack_protect > 255); + + /* Create a 'sub sp, sp, #' */ + code[TMPL_SUB_SP] = __opcode_to_mem_arm(0xe24dd000 | stack_protect); + /* Create a 'add r3, sp, #' */ + code[TMPL_ADD_SP] = __opcode_to_mem_arm(0xe28d3000 | stack_protect); + + /* Set probe information */ + val = (unsigned long)op; + code[TMPL_VAL_IDX] = val; + + /* Set probe function call */ + val = (unsigned long)optimized_callback; + code[TMPL_CALL_IDX] = val; + + flush_icache_range((unsigned long)code, + (unsigned long)(&code[TMPL_END_IDX])); + + /* Set op->optinsn.insn means prepared. */ + op->optinsn.insn = code; + return 0; +} + +void __kprobes arch_optimize_kprobes(struct list_head *oplist) +{ + struct optimized_kprobe *op, *tmp; + + list_for_each_entry_safe(op, tmp, oplist, list) { + unsigned long insn; + WARN_ON(kprobe_disabled(&op->kp)); + + /* + * Backup instructions which will be replaced + * by jump address + */ + memcpy(op->optinsn.copied_insn, op->kp.addr, + RELATIVEJUMP_SIZE); + + insn = arm_gen_branch((unsigned long)op->kp.addr, + (unsigned long)op->optinsn.insn); + BUG_ON(insn == 0); + + /* + * Make it a conditional branch if replaced insn + * is consitional + */ + insn = (__mem_to_opcode_arm( + op->optinsn.copied_insn[0]) & 0xf0000000) | + (insn & 0x0fffffff); + + /* + * Similar to __arch_disarm_kprobe, operations which + * removing breakpoints must be wrapped by stop_machine + * to avoid racing. + */ + kprobes_remove_breakpoint(op->kp.addr, insn); + + list_del_init(&op->list); + } +} + +void arch_unoptimize_kprobe(struct optimized_kprobe *op) +{ + arch_arm_kprobe(&op->kp); +} + +/* + * Recover original instructions and breakpoints from relative jumps. + * Caller must call with locking kprobe_mutex. + */ +void arch_unoptimize_kprobes(struct list_head *oplist, + struct list_head *done_list) +{ + struct optimized_kprobe *op, *tmp; + + list_for_each_entry_safe(op, tmp, oplist, list) { + arch_unoptimize_kprobe(op); + list_move(&op->list, done_list); + } +} + +int arch_within_optimized_kprobe(struct optimized_kprobe *op, + unsigned long addr) +{ + return ((unsigned long)op->kp.addr <= addr && + (unsigned long)op->kp.addr + RELATIVEJUMP_SIZE > addr); +} + +void arch_remove_optimized_kprobe(struct optimized_kprobe *op) +{ + __arch_remove_optimized_kprobe(op, 1); +} From 4cd872d973c7e1ce6a41e36db9d9352152da32d4 Mon Sep 17 00:00:00 2001 From: "Jon Medhurst (Tixy)" Date: Mon, 5 Jan 2015 19:29:40 +0800 Subject: [PATCH 043/601] ARM: kprobes: Fix unreliable MRS instruction tests For the instruction 'mrs Rn, cpsr' the resulting value of Rn can vary due to external factors we can't control. So get the test code to mask out these indeterminate bits. Signed-off-by: Jon Medhurst --- arch/arm/probes/kprobes/test-arm.c | 6 ++--- arch/arm/probes/kprobes/test-core.c | 19 ++++++++-------- arch/arm/probes/kprobes/test-core.h | 33 +++++++++++++++++++++++----- arch/arm/probes/kprobes/test-thumb.c | 4 ++-- 4 files changed, 43 insertions(+), 19 deletions(-) diff --git a/arch/arm/probes/kprobes/test-arm.c b/arch/arm/probes/kprobes/test-arm.c index 9b3b1b4a09391d..e72b07e8cd9acc 100644 --- a/arch/arm/probes/kprobes/test-arm.c +++ b/arch/arm/probes/kprobes/test-arm.c @@ -204,9 +204,9 @@ void kprobe_arm_test_cases(void) #endif TEST_GROUP("Miscellaneous instructions") - TEST("mrs r0, cpsr") - TEST("mrspl r7, cpsr") - TEST("mrs r14, cpsr") + TEST_RMASKED("mrs r",0,~PSR_IGNORE_BITS,", cpsr") + TEST_RMASKED("mrspl r",7,~PSR_IGNORE_BITS,", cpsr") + TEST_RMASKED("mrs r",14,~PSR_IGNORE_BITS,", cpsr") TEST_UNSUPPORTED(__inst_arm(0xe10ff000) " @ mrs r15, cpsr") TEST_UNSUPPORTED("mrs r0, spsr") TEST_UNSUPPORTED("mrs lr, spsr") diff --git a/arch/arm/probes/kprobes/test-core.c b/arch/arm/probes/kprobes/test-core.c index 7c5ddd5a6afd48..e495127d757105 100644 --- a/arch/arm/probes/kprobes/test-core.c +++ b/arch/arm/probes/kprobes/test-core.c @@ -1056,15 +1056,6 @@ static int test_case_run_count; static bool test_case_is_thumb; static int test_instance; -/* - * We ignore the state of the imprecise abort disable flag (CPSR.A) because this - * can change randomly as the kernel doesn't take care to preserve or initialise - * this across context switches. Also, with Security Extentions, the flag may - * not be under control of the kernel; for this reason we ignore the state of - * the FIQ disable flag CPSR.F as well. - */ -#define PSR_IGNORE_BITS (PSR_A_BIT | PSR_F_BIT) - static unsigned long test_check_cc(int cc, unsigned long cpsr) { int ret = arm_check_condition(cc << 28, cpsr); @@ -1271,11 +1262,21 @@ test_case_pre_handler(struct kprobe *p, struct pt_regs *regs) static int __kprobes test_after_pre_handler(struct kprobe *p, struct pt_regs *regs) { + struct test_arg *args; + if (container_of(p, struct test_probe, kprobe)->hit == test_instance) return 0; /* Already run for this test instance */ result_regs = *regs; + + /* Mask out results which are indeterminate */ result_regs.ARM_cpsr &= ~PSR_IGNORE_BITS; + for (args = current_args; args[0].type != ARG_TYPE_END; ++args) + if (args[0].type == ARG_TYPE_REG_MASKED) { + struct test_arg_regptr *arg = + (struct test_arg_regptr *)args; + result_regs.uregs[arg->reg] &= arg->val; + } /* Undo any changes done to SP by the test case */ regs->ARM_sp = (unsigned long)current_stack; diff --git a/arch/arm/probes/kprobes/test-core.h b/arch/arm/probes/kprobes/test-core.h index 9991754947bc1f..94285203e9f744 100644 --- a/arch/arm/probes/kprobes/test-core.h +++ b/arch/arm/probes/kprobes/test-core.h @@ -45,10 +45,11 @@ extern int kprobe_test_cc_position; * */ -#define ARG_TYPE_END 0 -#define ARG_TYPE_REG 1 -#define ARG_TYPE_PTR 2 -#define ARG_TYPE_MEM 3 +#define ARG_TYPE_END 0 +#define ARG_TYPE_REG 1 +#define ARG_TYPE_PTR 2 +#define ARG_TYPE_MEM 3 +#define ARG_TYPE_REG_MASKED 4 #define ARG_FLAG_UNSUPPORTED 0x01 #define ARG_FLAG_SUPPORTED 0x02 @@ -61,7 +62,7 @@ struct test_arg { }; struct test_arg_regptr { - u8 type; /* ARG_TYPE_REG or ARG_TYPE_PTR */ + u8 type; /* ARG_TYPE_REG or ARG_TYPE_PTR or ARG_TYPE_REG_MASKED */ u8 reg; u8 _padding[2]; u32 val; @@ -138,6 +139,12 @@ struct test_arg_end { ".short 0 \n\t" \ ".word "#val" \n\t" +#define TEST_ARG_REG_MASKED(reg, val) \ + ".byte "__stringify(ARG_TYPE_REG_MASKED)" \n\t" \ + ".byte "#reg" \n\t" \ + ".short 0 \n\t" \ + ".word "#val" \n\t" + #define TEST_ARG_END(flags) \ ".byte "__stringify(ARG_TYPE_END)" \n\t" \ ".byte "TEST_ISA flags" \n\t" \ @@ -395,6 +402,22 @@ struct test_arg_end { " "codex" \n\t" \ TESTCASE_END +#define TEST_RMASKED(code1, reg, mask, code2) \ + TESTCASE_START(code1 #reg code2) \ + TEST_ARG_REG_MASKED(reg, mask) \ + TEST_ARG_END("") \ + TEST_INSTRUCTION(code1 #reg code2) \ + TESTCASE_END + +/* + * We ignore the state of the imprecise abort disable flag (CPSR.A) because this + * can change randomly as the kernel doesn't take care to preserve or initialise + * this across context switches. Also, with Security Extensions, the flag may + * not be under control of the kernel; for this reason we ignore the state of + * the FIQ disable flag CPSR.F as well. + */ +#define PSR_IGNORE_BITS (PSR_A_BIT | PSR_F_BIT) + /* * Macros for defining space directives spread over multiple lines. diff --git a/arch/arm/probes/kprobes/test-thumb.c b/arch/arm/probes/kprobes/test-thumb.c index e8cf193db1ea30..b683b4517458ce 100644 --- a/arch/arm/probes/kprobes/test-thumb.c +++ b/arch/arm/probes/kprobes/test-thumb.c @@ -778,8 +778,8 @@ CONDITION_INSTRUCTIONS(22, TEST_UNSUPPORTED("subs pc, lr, #4") - TEST("mrs r0, cpsr") - TEST("mrs r14, cpsr") + TEST_RMASKED("mrs r",0,~PSR_IGNORE_BITS,", cpsr") + TEST_RMASKED("mrs r",14,~PSR_IGNORE_BITS,", cpsr") TEST_UNSUPPORTED(__inst_thumb32(0xf3ef8d00) " @ mrs sp, spsr") TEST_UNSUPPORTED(__inst_thumb32(0xf3ef8f00) " @ mrs pc, spsr") TEST_UNSUPPORTED("mrs r0, spsr") From 28a1899db30a9325498aef114055506286dc8010 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 5 Jan 2015 19:29:44 +0800 Subject: [PATCH 044/601] ARM: kprobes: check register usage for probed instruction. This patch utilizes the previously introduced checker to check register usage for probed ARM instruction and saves it in a mask. A further patch will use such information to avoid simulation or emulation. Signed-off-by: Wang Nan Reviewed-by: Jon Medhurst Signed-off-by: Jon Medhurst --- arch/arm/include/asm/probes.h | 1 + arch/arm/probes/decode.c | 7 ++ arch/arm/probes/kprobes/actions-arm.c | 2 +- arch/arm/probes/kprobes/checkers-arm.c | 93 ++++++++++++++++++++++++++ arch/arm/probes/kprobes/checkers.h | 1 + 5 files changed, 103 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/probes.h b/arch/arm/include/asm/probes.h index cd9e81588d830b..b668e60f759cc5 100644 --- a/arch/arm/include/asm/probes.h +++ b/arch/arm/include/asm/probes.h @@ -41,6 +41,7 @@ struct arch_probes_insn { probes_insn_singlestep_t *insn_singlestep; probes_insn_fn_t *insn_fn; int stack_space; + unsigned long register_usage_flags; }; #endif /* __ASSEMBLY__ */ diff --git a/arch/arm/probes/decode.c b/arch/arm/probes/decode.c index f9d7c423f2cc0a..880ebe0cdf1908 100644 --- a/arch/arm/probes/decode.c +++ b/arch/arm/probes/decode.c @@ -435,6 +435,13 @@ probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi, */ asi->stack_space = 0; + /* + * Similarly to stack_space, register_usage_flags is filled by + * checkers. Its default value is set to ~0, which is 'all + * registers are used', to prevent any potential optimization. + */ + asi->register_usage_flags = ~0UL; + if (emulate) insn = prepare_emulated_insn(insn, asi, thumb); diff --git a/arch/arm/probes/kprobes/actions-arm.c b/arch/arm/probes/kprobes/actions-arm.c index 06988ef7eeb775..b9056d649607f3 100644 --- a/arch/arm/probes/kprobes/actions-arm.c +++ b/arch/arm/probes/kprobes/actions-arm.c @@ -341,4 +341,4 @@ const union decode_action kprobes_arm_actions[NUM_PROBES_ARM_ACTIONS] = { [PROBES_LDMSTM] = {.decoder = kprobe_decode_ldmstm} }; -const struct decode_checker *kprobes_arm_checkers[] = {arm_stack_checker, NULL}; +const struct decode_checker *kprobes_arm_checkers[] = {arm_stack_checker, arm_regs_checker, NULL}; diff --git a/arch/arm/probes/kprobes/checkers-arm.c b/arch/arm/probes/kprobes/checkers-arm.c index f8176631d2a546..7b9817333b6897 100644 --- a/arch/arm/probes/kprobes/checkers-arm.c +++ b/arch/arm/probes/kprobes/checkers-arm.c @@ -97,3 +97,96 @@ const struct decode_checker arm_stack_checker[NUM_PROBES_ARM_ACTIONS] = { [PROBES_STORE] = {.checker = arm_check_stack}, [PROBES_LDMSTM] = {.checker = arm_check_stack}, }; + +static enum probes_insn __kprobes arm_check_regs_nouse(probes_opcode_t insn, + struct arch_probes_insn *asi, + const struct decode_header *h) +{ + asi->register_usage_flags = 0; + return INSN_GOOD; +} + +static enum probes_insn arm_check_regs_normal(probes_opcode_t insn, + struct arch_probes_insn *asi, + const struct decode_header *h) +{ + u32 regs = h->type_regs.bits >> DECODE_TYPE_BITS; + int i; + + asi->register_usage_flags = 0; + for (i = 0; i < 5; regs >>= 4, insn >>= 4, i++) + if (regs & 0xf) + asi->register_usage_flags |= 1 << (insn & 0xf); + + return INSN_GOOD; +} + + +static enum probes_insn arm_check_regs_ldmstm(probes_opcode_t insn, + struct arch_probes_insn *asi, + const struct decode_header *h) +{ + unsigned int reglist = insn & 0xffff; + unsigned int rn = (insn >> 16) & 0xf; + asi->register_usage_flags = reglist | (1 << rn); + return INSN_GOOD; +} + +static enum probes_insn arm_check_regs_mov_ip_sp(probes_opcode_t insn, + struct arch_probes_insn *asi, + const struct decode_header *h) +{ + /* Instruction is 'mov ip, sp' i.e. 'mov r12, r13' */ + asi->register_usage_flags = (1 << 12) | (1<< 13); + return INSN_GOOD; +} + +/* + * | Rn |Rt/d| | Rm | + * LDRD (register) cccc 000x x0x0 xxxx xxxx xxxx 1101 xxxx + * STRD (register) cccc 000x x0x0 xxxx xxxx xxxx 1111 xxxx + * | Rn |Rt/d| |imm4L| + * LDRD (immediate) cccc 000x x1x0 xxxx xxxx xxxx 1101 xxxx + * STRD (immediate) cccc 000x x1x0 xxxx xxxx xxxx 1111 xxxx + * + * Such instructions access Rt/d and its next register, so different + * from others, a specific checker is required to handle this extra + * implicit register usage. + */ +static enum probes_insn arm_check_regs_ldrdstrd(probes_opcode_t insn, + struct arch_probes_insn *asi, + const struct decode_header *h) +{ + int rdt = (insn >> 12) & 0xf; + arm_check_regs_normal(insn, asi, h); + asi->register_usage_flags |= 1 << (rdt + 1); + return INSN_GOOD; +} + + +const struct decode_checker arm_regs_checker[NUM_PROBES_ARM_ACTIONS] = { + [PROBES_MRS] = {.checker = arm_check_regs_normal}, + [PROBES_SATURATING_ARITHMETIC] = {.checker = arm_check_regs_normal}, + [PROBES_MUL1] = {.checker = arm_check_regs_normal}, + [PROBES_MUL2] = {.checker = arm_check_regs_normal}, + [PROBES_MUL_ADD_LONG] = {.checker = arm_check_regs_normal}, + [PROBES_MUL_ADD] = {.checker = arm_check_regs_normal}, + [PROBES_LOAD] = {.checker = arm_check_regs_normal}, + [PROBES_LOAD_EXTRA] = {.checker = arm_check_regs_normal}, + [PROBES_STORE] = {.checker = arm_check_regs_normal}, + [PROBES_STORE_EXTRA] = {.checker = arm_check_regs_normal}, + [PROBES_DATA_PROCESSING_REG] = {.checker = arm_check_regs_normal}, + [PROBES_DATA_PROCESSING_IMM] = {.checker = arm_check_regs_normal}, + [PROBES_SEV] = {.checker = arm_check_regs_nouse}, + [PROBES_WFE] = {.checker = arm_check_regs_nouse}, + [PROBES_SATURATE] = {.checker = arm_check_regs_normal}, + [PROBES_REV] = {.checker = arm_check_regs_normal}, + [PROBES_MMI] = {.checker = arm_check_regs_normal}, + [PROBES_PACK] = {.checker = arm_check_regs_normal}, + [PROBES_EXTEND] = {.checker = arm_check_regs_normal}, + [PROBES_EXTEND_ADD] = {.checker = arm_check_regs_normal}, + [PROBES_BITFIELD] = {.checker = arm_check_regs_normal}, + [PROBES_LDMSTM] = {.checker = arm_check_regs_ldmstm}, + [PROBES_MOV_IP_SP] = {.checker = arm_check_regs_mov_ip_sp}, + [PROBES_LDRSTRD] = {.checker = arm_check_regs_ldrdstrd}, +}; diff --git a/arch/arm/probes/kprobes/checkers.h b/arch/arm/probes/kprobes/checkers.h index bddfa0e82389c6..cf6c9e74d66632 100644 --- a/arch/arm/probes/kprobes/checkers.h +++ b/arch/arm/probes/kprobes/checkers.h @@ -47,6 +47,7 @@ extern const union decode_action stack_check_actions[]; #ifndef CONFIG_THUMB2_KERNEL extern const struct decode_checker arm_stack_checker[]; +extern const struct decode_checker arm_regs_checker[]; #else #endif extern const struct decode_checker t32_stack_checker[]; From 615fde79feb854bcda56abf9e5cfcd6638b3d310 Mon Sep 17 00:00:00 2001 From: "George G. Davis" Date: Fri, 9 Jan 2015 09:29:05 -0500 Subject: [PATCH 045/601] drivers: of: Export of_reserved_mem_device_{init,release} Export of_reserved_mem_device_{init,release} so that modules can initialize and release their assigned per-device cma_area. Signed-off-by: George G. Davis [robh: s/EXPORT_SYMBOL/EXPORT_SYMBOL_GPL/] Signed-off-by: Rob Herring --- drivers/of/of_reserved_mem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index dc566b38645f6f..726ebe79281301 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -265,6 +265,7 @@ int of_reserved_mem_device_init(struct device *dev) return ret; } +EXPORT_SYMBOL_GPL(of_reserved_mem_device_init); /** * of_reserved_mem_device_release() - release reserved memory device structures @@ -289,3 +290,4 @@ void of_reserved_mem_device_release(struct device *dev) rmem->ops->device_release(rmem, dev); } +EXPORT_SYMBOL_GPL(of_reserved_mem_device_release); From 1b4e119b7aacce72c117efa8de776bf8e860551a Mon Sep 17 00:00:00 2001 From: Alan Tull Date: Fri, 9 Jan 2015 13:00:53 -0600 Subject: [PATCH 046/601] doc: add bindings document for altera fpga manager New bindings document for Altera fpga manager. Signed-off-by: Alan Tull Acked-by: Rob Herring Signed-off-by: Rob Herring --- .../bindings/fpga/altera-socfpga-fpga-mgr.txt | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 Documentation/devicetree/bindings/fpga/altera-socfpga-fpga-mgr.txt diff --git a/Documentation/devicetree/bindings/fpga/altera-socfpga-fpga-mgr.txt b/Documentation/devicetree/bindings/fpga/altera-socfpga-fpga-mgr.txt new file mode 100644 index 00000000000000..9b027a615486a9 --- /dev/null +++ b/Documentation/devicetree/bindings/fpga/altera-socfpga-fpga-mgr.txt @@ -0,0 +1,17 @@ +Altera SOCFPGA FPGA Manager + +Required properties: +- compatible : should contain "altr,socfpga-fpga-mgr" +- reg : base address and size for memory mapped io. + - The first index is for FPGA manager register access. + - The second index is for writing FPGA configuration data. +- interrupts : interrupt for the FPGA Manager device. + +Example: + + hps_0_fpgamgr: fpgamgr@0xff706000 { + compatible = "altr,socfpga-fpga-mgr"; + reg = <0xFF706000 0x1000 + 0xFFB90000 0x1000>; + interrupts = <0 175 4>; + }; From 96225fdf69e229d190a39a49685e30fb2348fbc1 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Fri, 9 Jan 2015 23:55:20 +0100 Subject: [PATCH 047/601] DT: Vendors: Add Everspin Everspin is a vendor of MRAM devices. Add them to the list of Vendors. Signed-off-by: Andrew Lunn Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/vendor-prefixes.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index d443279c95dca7..6b750fd299cdc8 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -56,6 +56,7 @@ epson Seiko Epson Corp. est ESTeem Wireless Modems eukrea Eukréa Electromatique everest Everest Semiconductor Co. Ltd. +everspin Everspin Technologies, Inc. excito Excito fcs Fairchild Semiconductor fsl Freescale Semiconductor From 9a4305bde41e87a2c56b560ebd1783cf3fbbcea3 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 10 Jan 2015 22:14:17 +0100 Subject: [PATCH 048/601] pinctrl: fix up device tree bindings After the Nomadik pin controller was force migrated to generic pin control bindings, some leftovers in the documentation need to be cleaned up. The code and device trees are already migrated. Signed-off-by: Linus Walleij Signed-off-by: Rob Herring --- .../bindings/pinctrl/ste,nomadik.txt | 35 +++++++++++-------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/Documentation/devicetree/bindings/pinctrl/ste,nomadik.txt b/Documentation/devicetree/bindings/pinctrl/ste,nomadik.txt index 6b33b9f18e8834..f63fcb3ed35288 100644 --- a/Documentation/devicetree/bindings/pinctrl/ste,nomadik.txt +++ b/Documentation/devicetree/bindings/pinctrl/ste,nomadik.txt @@ -16,17 +16,22 @@ mux function to select on those pin(s)/group(s), and various pin configuration parameters, such as input, output, pull up, pull down... The name of each subnode is not important; all subnodes should be enumerated -and processed purely based on their content. +and processed purely based on their content. The subnodes use the generic +pin multiplexing node layout from the standard pin control bindings +(see pinctrl-bindings.txt): -Required subnode-properties: -- ste,pins : An array of strings. Each string contains the name of a pin or - group. - -Optional subnode-properties: -- ste,function: A string containing the name of the function to mux to the +Required pin multiplexing subnode properties: +- function: A string containing the name of the function to mux to the pin or group. +- groups : An array of strings. Each string contains the name of a pin + group that will be combined with the function to form a multiplexing + set-up. -- ste,config: Handle of pin configuration node (e.g. ste,config = <&slpm_in_wkup_pdis>) +Required pin configuration subnode properties: +- pins: A string array describing the pins affected by the configuration + in the node. +- ste,config: Handle of pin configuration node + (e.g. ste,config = <&slpm_in_wkup_pdis>) - ste,input : <0/1/2> 0: input with no pull @@ -97,32 +102,32 @@ Example board file extract: uart0 { uart0_default_mux: uart0_mux { u0_default_mux { - ste,function = "u0"; - ste,pins = "u0_a_1"; + function = "u0"; + pins = "u0_a_1"; }; }; uart0_default_mode: uart0_default { uart0_default_cfg1 { - ste,pins = "GPIO0", "GPIO2"; + pins = "GPIO0", "GPIO2"; ste,input = <1>; }; uart0_default_cfg2 { - ste,pins = "GPIO1", "GPIO3"; + pins = "GPIO1", "GPIO3"; ste,output = <1>; }; }; uart0_sleep_mode: uart0_sleep { uart0_sleep_cfg1 { - ste,pins = "GPIO0", "GPIO2"; + pins = "GPIO0", "GPIO2"; ste,config = <&slpm_in_wkup_pdis>; }; uart0_sleep_cfg2 { - ste,pins = "GPIO1"; + pins = "GPIO1"; ste,config = <&slpm_out_hi_wkup_pdis>; }; uart0_sleep_cfg3 { - ste,pins = "GPIO3"; + pins = "GPIO3"; ste,config = <&slpm_out_wkup_pdis>; }; }; From 3ce04b4a9fdc30b6ec651e477dd08fee4e48f9aa Mon Sep 17 00:00:00 2001 From: Gaurav Minocha Date: Sat, 10 Jan 2015 23:19:51 -0800 Subject: [PATCH 049/601] Removes OF_UNITTEST dependency on OF_DYNAMIC config symbol This patch intends to remove the unittests dependency on the functions defined in dynamic.c. So, rather than calling of_attach_node defined in dynamic.c, minimal functionality required to attach a new node is re-defined in unittest.c. Also, now after executing the tests the test data is not removed from the device tree so there is no need to call of_detach_node. Tested with and without OF_DYNAMIC enabled on ppc, arm and x86 Signed-off-by: Gaurav Minocha Signed-off-by: Rob Herring --- drivers/of/Kconfig | 1 - drivers/of/unittest.c | 70 ++++++++----------------------------------- 2 files changed, 12 insertions(+), 59 deletions(-) diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig index b5e0c873d4e140..38d1c51f58b108 100644 --- a/drivers/of/Kconfig +++ b/drivers/of/Kconfig @@ -10,7 +10,6 @@ menu "Device Tree and Open Firmware support" config OF_UNITTEST bool "Device Tree runtime unit tests" depends on OF_IRQ && OF_EARLY_FLATTREE - select OF_DYNAMIC select OF_RESOLVE help This option builds in test cases for the device tree infrastructure diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 844838e11ef1a1..139363af5c887f 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -822,6 +822,7 @@ static void update_node_properties(struct device_node *np, static int attach_node_and_children(struct device_node *np) { struct device_node *next, *dup, *child; + unsigned long flags; dup = of_find_node_by_path(np->full_name); if (dup) { @@ -838,8 +839,17 @@ static int attach_node_and_children(struct device_node *np) child = np->child; np->child = NULL; - np->sibling = NULL; - of_attach_node(np); + + mutex_lock(&of_mutex); + raw_spin_lock_irqsave(&devtree_lock, flags); + np->sibling = np->parent->child; + np->parent->child = np; + of_node_clear_flag(np, OF_DETACHED); + raw_spin_unlock_irqrestore(&devtree_lock, flags); + + __of_attach_node_sysfs(np); + mutex_unlock(&of_mutex); + while (child) { next = child->sibling; attach_node_and_children(child); @@ -911,59 +921,6 @@ static int __init selftest_data_add(void) return 0; } -/** - * detach_node_and_children - detaches node - * and its children from live tree - * - * @np: Node to detach from live tree - */ -static void detach_node_and_children(struct device_node *np) -{ - while (np->child) - detach_node_and_children(np->child); - of_detach_node(np); -} - -/** - * selftest_data_remove - removes the selftest data - * nodes from the live tree - */ -static void selftest_data_remove(void) -{ - struct device_node *np; - struct property *prop; - - if (selftest_live_tree) { - of_node_put(of_aliases); - of_node_put(of_chosen); - of_aliases = NULL; - of_chosen = NULL; - for_each_child_of_node(of_root, np) - detach_node_and_children(np); - __of_detach_node_sysfs(of_root); - of_root = NULL; - return; - } - - while (last_node_index-- > 0) { - if (nodes[last_node_index]) { - np = of_find_node_by_path(nodes[last_node_index]->full_name); - if (np == nodes[last_node_index]) { - if (of_aliases == np) { - of_node_put(of_aliases); - of_aliases = NULL; - } - detach_node_and_children(np); - } else { - for_each_property_of_node(np, prop) { - if (strcmp(prop->name, "testcase-alias") == 0) - of_remove_property(np, prop); - } - } - } - } -} - #ifdef CONFIG_OF_OVERLAY static int selftest_probe(struct platform_device *pdev) @@ -1475,9 +1432,6 @@ static int __init of_selftest(void) of_selftest_platform_populate(); of_selftest_overlay(); - /* removing selftest data from live tree */ - selftest_data_remove(); - /* Double check linkage after removing testcase data */ of_selftest_check_tree_linkage(); From aa45f1c700fd90319745b45f89d19d6de26093be Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 12 Jan 2015 15:54:11 +0100 Subject: [PATCH 050/601] dt-bindings: Add Silicon Mitus vendor prefix Add vendor prefix for Silicon Mitus. Currently there is binding for sm5502 extcon driver ("siliconmitus,sm5502-muic"). Signed-off-by: Krzysztof Kozlowski Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/vendor-prefixes.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index 6b750fd299cdc8..d297de1c2b4763 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -145,6 +145,7 @@ schindler Schindler seagate Seagate Technology PLC sil Silicon Image silabs Silicon Laboratories +siliconmitus Silicon Mitus, Inc. simtek sii Seiko Instruments, Inc. silergy Silergy Corp. From c11261f22f653962829efcca76e99231320096a7 Mon Sep 17 00:00:00 2001 From: Ian Molton Date: Tue, 13 Jan 2015 14:18:09 +0000 Subject: [PATCH 051/601] Add AD Holdings Plc. to vendor-prefixes. AD Holdings design and manufacture IP Camera technology in the UK. http://www.ad-group.co.uk/ Signed-off-by: Ian Molton Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/vendor-prefixes.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index d297de1c2b4763..9fff11c03f2e45 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -7,6 +7,7 @@ abilis Abilis Systems active-semi Active-Semi International Inc ad Avionic Design GmbH adapteva Adapteva, Inc. +adh AD Holdings Plc. adi Analog Devices, Inc. aeroflexgaisler Aeroflex Gaisler AB allwinner Allwinner Technology Co., Ltd. From e94fbe607f8c6bf87358ca84e9631a2fe5ebdc21 Mon Sep 17 00:00:00 2001 From: Arnaud Ebalard Date: Tue, 16 Dec 2014 22:18:38 +0100 Subject: [PATCH 052/601] dt-bindings: use isil prefix for Intersil in vendor-prefixes.txt This patch fixes DT vendor-prefixes.txt documentation to reference isil (NASDAQ symbol and the most used prefix inside the kernel) for Intersil. It reverts 7c75c1d5e72b ("dt-bindings: Document deprecated device vendor name to fix related warning"). Signed-off-by: Arnaud Ebalard Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/vendor-prefixes.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index 9fff11c03f2e45..860b18df0548e0 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -84,8 +84,7 @@ innolux Innolux Corporation intel Intel Corporation intercontrol Inter Control Group isee ISEE 2007 S.L. -isil Intersil (deprecated, use isl) -isl Intersil +isil Intersil karo Ka-Ro electronics GmbH keymile Keymile GmbH lacie LaCie From 26a945caf381225c9a1e68f14826a884c08ea9cb Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 8 Jan 2015 17:07:47 +0000 Subject: [PATCH 053/601] arm64: remove broken cachepolicy code The cachepolicy kernel parameter was intended to aid in the debugging of coherency issues, but it is fundamentally broken for several reasons: * On SMP platforms, only the boot CPU's tcr_el1 is altered. Secondary CPUs may therefore use differ w.r.t. the attributes they apply to MT_NORMAL memory, resulting in a loss of coherency. * The cache maintenance using flush_dcache_all (based on Set/Way operations) is not guaranteed to empty a given CPU's cache hierarchy while said CPU has caches enabled, it cannot empty the caches of other coherent PEs, nor is it guaranteed to flush data to the PoC even when caches are disabled. * The TLBs are not invalidated around the modification of MAIR_EL1 and TCR_EL1, as required by the architecture (as both are permitted to be cached in a TLB). This may result in CPUs using attributes other than those expected for some memory accesses, resulting in a loss of coherency. * Exclusive accesses are not architecturally guaranteed to function as expected on memory marked as Write-Through or Non-Cacheable. Thus changing the attributes of MT_NORMAL away from the (architecurally safe) defaults may cause uses of these instructions (e.g. atomics) to behave erratically. Given this, the cachepolicy code cannot be used for debugging purposes as it alone is likely to cause coherency issues. This patch removes the broken cachepolicy code. Signed-off-by: Mark Rutland Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 74 --------------------------------------------- 1 file changed, 74 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 3286385488714c..e57c170a91f35f 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -45,80 +45,6 @@ struct page *empty_zero_page; EXPORT_SYMBOL(empty_zero_page); -struct cachepolicy { - const char policy[16]; - u64 mair; - u64 tcr; -}; - -static struct cachepolicy cache_policies[] __initdata = { - { - .policy = "uncached", - .mair = 0x44, /* inner, outer non-cacheable */ - .tcr = TCR_IRGN_NC | TCR_ORGN_NC, - }, { - .policy = "writethrough", - .mair = 0xaa, /* inner, outer write-through, read-allocate */ - .tcr = TCR_IRGN_WT | TCR_ORGN_WT, - }, { - .policy = "writeback", - .mair = 0xee, /* inner, outer write-back, read-allocate */ - .tcr = TCR_IRGN_WBnWA | TCR_ORGN_WBnWA, - } -}; - -/* - * These are useful for identifying cache coherency problems by allowing the - * cache or the cache and writebuffer to be turned off. It changes the Normal - * memory caching attributes in the MAIR_EL1 register. - */ -static int __init early_cachepolicy(char *p) -{ - int i; - u64 tmp; - - for (i = 0; i < ARRAY_SIZE(cache_policies); i++) { - int len = strlen(cache_policies[i].policy); - - if (memcmp(p, cache_policies[i].policy, len) == 0) - break; - } - if (i == ARRAY_SIZE(cache_policies)) { - pr_err("ERROR: unknown or unsupported cache policy: %s\n", p); - return 0; - } - - flush_cache_all(); - - /* - * Modify MT_NORMAL attributes in MAIR_EL1. - */ - asm volatile( - " mrs %0, mair_el1\n" - " bfi %0, %1, %2, #8\n" - " msr mair_el1, %0\n" - " isb\n" - : "=&r" (tmp) - : "r" (cache_policies[i].mair), "i" (MT_NORMAL * 8)); - - /* - * Modify TCR PTW cacheability attributes. - */ - asm volatile( - " mrs %0, tcr_el1\n" - " bic %0, %0, %2\n" - " orr %0, %0, %1\n" - " msr tcr_el1, %0\n" - " isb\n" - : "=&r" (tmp) - : "r" (cache_policies[i].tcr), "r" (TCR_IRGN_MASK | TCR_ORGN_MASK)); - - flush_cache_all(); - - return 0; -} -early_param("cachepolicy", early_cachepolicy); - pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { From ee3a4020f7c91ffc3c1a680c88652371b9711809 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Sun, 21 Dec 2014 16:07:43 +0100 Subject: [PATCH 054/601] ARM: 8250/1: sa1100: provide OSTIMER0 clock for pxa_timer Pxa_timer clocksource requires OSTIMER0 clock to be provided. Add dummy clock returning proper rate. Signed-off-by: Dmitry Eremin-Solenikov Signed-off-by: Russell King --- arch/arm/mach-sa1100/clock.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/arm/mach-sa1100/clock.c b/arch/arm/mach-sa1100/clock.c index 03c75a811cb025..cbf53bb9c814dc 100644 --- a/arch/arm/mach-sa1100/clock.c +++ b/arch/arm/mach-sa1100/clock.c @@ -119,6 +119,17 @@ static DEFINE_CLK(gpio27, &clk_gpio27_ops); static DEFINE_CLK(cpu, &clk_cpu_ops); +static unsigned long clk_36864_get_rate(struct clk *clk) +{ + return 3686400; +} + +static struct clkops clk_36864_ops = { + .get_rate = clk_36864_get_rate, +}; + +static DEFINE_CLK(36864, &clk_36864_ops); + static struct clk_lookup sa11xx_clkregs[] = { CLKDEV_INIT("sa1111.0", NULL, &clk_gpio27), CLKDEV_INIT("sa1100-rtc", NULL, NULL), @@ -126,6 +137,7 @@ static struct clk_lookup sa11xx_clkregs[] = { CLKDEV_INIT("sa11x0-pcmcia", NULL, &clk_cpu), /* sa1111 names devices using internal offsets, PCMCIA is at 0x1800 */ CLKDEV_INIT("1800", NULL, &clk_cpu), + CLKDEV_INIT(NULL, "OSTIMER0", &clk_36864), }; static int __init sa11xx_clk_init(void) From e074ff86e8b4f4f7983ff9b752bc904c8a729900 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Sun, 21 Dec 2014 16:07:09 +0100 Subject: [PATCH 055/601] ARM: 8251/1: clocksource: enable pxa_timer for SA-1100 platform SA-11x0 platform used the same IP block as was used on PXA. Consequently it makes sense to have only one driver. Enable pxa_timer clocksource for StrongARM platform. Signed-off-by: Dmitry Eremin-Solenikov Acked-by: Thomas Gleixner Signed-off-by: Russell King --- drivers/clocksource/Kconfig | 7 +++++++ drivers/clocksource/Makefile | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index fc01ec27d3c842..8a1479fc64793f 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -229,4 +229,11 @@ config CLKSRC_MIPS_GIC depends on MIPS_GIC select CLKSRC_OF +config CLKSRC_PXA + def_bool y if ARCH_PXA || ARCH_SA1100 + select CLKSRC_OF if USE_OF + help + This enables OST0 support available on PXA and SA-11x0 + platforms. + endmenu diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index 94d90b24b56bf4..aa526f4bd3cfd9 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -21,7 +21,7 @@ obj-$(CONFIG_ARCH_CLPS711X) += clps711x-timer.o obj-$(CONFIG_ARCH_MARCO) += timer-marco.o obj-$(CONFIG_ARCH_MOXART) += moxart_timer.o obj-$(CONFIG_ARCH_MXS) += mxs_timer.o -obj-$(CONFIG_ARCH_PXA) += pxa_timer.o +obj-$(CONFIG_CLKSRC_PXA) += pxa_timer.o obj-$(CONFIG_ARCH_PRIMA2) += timer-prima2.o obj-$(CONFIG_ARCH_U300) += timer-u300.o obj-$(CONFIG_SUN4I_TIMER) += sun4i_timer.o From 7a8ca0a0c480fedf91bdbadf8b90edd5374ce18b Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Sun, 21 Dec 2014 16:08:13 +0100 Subject: [PATCH 056/601] ARM: 8252/1: sa1100: use pxa_timer clocksource driver Use pxa_timer clocksource driver. Signed-off-by: Dmitry Eremin-Solenikov Signed-off-by: Russell King --- arch/arm/mach-sa1100/Makefile | 2 +- arch/arm/mach-sa1100/generic.c | 6 ++ arch/arm/mach-sa1100/time.c | 139 --------------------------------- 3 files changed, 7 insertions(+), 140 deletions(-) delete mode 100644 arch/arm/mach-sa1100/time.c diff --git a/arch/arm/mach-sa1100/Makefile b/arch/arm/mach-sa1100/Makefile index f1114d11fe13eb..61ff91e76e0a40 100644 --- a/arch/arm/mach-sa1100/Makefile +++ b/arch/arm/mach-sa1100/Makefile @@ -3,7 +3,7 @@ # # Common support -obj-y := clock.o generic.o irq.o time.o #nmi-oopser.o +obj-y := clock.o generic.o irq.o #nmi-oopser.o # Specific board support obj-$(CONFIG_SA1100_ASSABET) += assabet.o diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c index d4ea142c4eddb7..40e0d8619a2d97 100644 --- a/arch/arm/mach-sa1100/generic.c +++ b/arch/arm/mach-sa1100/generic.c @@ -33,6 +33,7 @@ #include #include "generic.h" +#include unsigned int reset_status; EXPORT_SYMBOL(reset_status); @@ -369,6 +370,11 @@ void __init sa1100_map_io(void) iotable_init(standard_io_desc, ARRAY_SIZE(standard_io_desc)); } +void __init sa1100_timer_init(void) +{ + pxa_timer_nodt_init(IRQ_OST0, io_p2v(0x90000000), 3686400); +} + /* * Disable the memory bus request/grant signals on the SA1110 to * ensure that we don't receive spurious memory requests. We set diff --git a/arch/arm/mach-sa1100/time.c b/arch/arm/mach-sa1100/time.c deleted file mode 100644 index 1dea6cfafb3101..00000000000000 --- a/arch/arm/mach-sa1100/time.c +++ /dev/null @@ -1,139 +0,0 @@ -/* - * linux/arch/arm/mach-sa1100/time.c - * - * Copyright (C) 1998 Deborah Wallach. - * Twiddles (C) 1999 Hugo Fiennes - * - * 2000/03/29 (C) Nicolas Pitre - * Rewritten: big cleanup, much simpler, better HZ accuracy. - * - */ -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#define SA1100_CLOCK_FREQ 3686400 -#define SA1100_LATCH DIV_ROUND_CLOSEST(SA1100_CLOCK_FREQ, HZ) - -static u64 notrace sa1100_read_sched_clock(void) -{ - return readl_relaxed(OSCR); -} - -#define MIN_OSCR_DELTA 2 - -static irqreturn_t sa1100_ost0_interrupt(int irq, void *dev_id) -{ - struct clock_event_device *c = dev_id; - - /* Disarm the compare/match, signal the event. */ - writel_relaxed(readl_relaxed(OIER) & ~OIER_E0, OIER); - writel_relaxed(OSSR_M0, OSSR); - c->event_handler(c); - - return IRQ_HANDLED; -} - -static int -sa1100_osmr0_set_next_event(unsigned long delta, struct clock_event_device *c) -{ - unsigned long next, oscr; - - writel_relaxed(readl_relaxed(OIER) | OIER_E0, OIER); - next = readl_relaxed(OSCR) + delta; - writel_relaxed(next, OSMR0); - oscr = readl_relaxed(OSCR); - - return (signed)(next - oscr) <= MIN_OSCR_DELTA ? -ETIME : 0; -} - -static void -sa1100_osmr0_set_mode(enum clock_event_mode mode, struct clock_event_device *c) -{ - switch (mode) { - case CLOCK_EVT_MODE_ONESHOT: - case CLOCK_EVT_MODE_UNUSED: - case CLOCK_EVT_MODE_SHUTDOWN: - writel_relaxed(readl_relaxed(OIER) & ~OIER_E0, OIER); - writel_relaxed(OSSR_M0, OSSR); - break; - - case CLOCK_EVT_MODE_RESUME: - case CLOCK_EVT_MODE_PERIODIC: - break; - } -} - -#ifdef CONFIG_PM -unsigned long osmr[4], oier; - -static void sa1100_timer_suspend(struct clock_event_device *cedev) -{ - osmr[0] = readl_relaxed(OSMR0); - osmr[1] = readl_relaxed(OSMR1); - osmr[2] = readl_relaxed(OSMR2); - osmr[3] = readl_relaxed(OSMR3); - oier = readl_relaxed(OIER); -} - -static void sa1100_timer_resume(struct clock_event_device *cedev) -{ - writel_relaxed(0x0f, OSSR); - writel_relaxed(osmr[0], OSMR0); - writel_relaxed(osmr[1], OSMR1); - writel_relaxed(osmr[2], OSMR2); - writel_relaxed(osmr[3], OSMR3); - writel_relaxed(oier, OIER); - - /* - * OSMR0 is the system timer: make sure OSCR is sufficiently behind - */ - writel_relaxed(OSMR0 - SA1100_LATCH, OSCR); -} -#else -#define sa1100_timer_suspend NULL -#define sa1100_timer_resume NULL -#endif - -static struct clock_event_device ckevt_sa1100_osmr0 = { - .name = "osmr0", - .features = CLOCK_EVT_FEAT_ONESHOT, - .rating = 200, - .set_next_event = sa1100_osmr0_set_next_event, - .set_mode = sa1100_osmr0_set_mode, - .suspend = sa1100_timer_suspend, - .resume = sa1100_timer_resume, -}; - -static struct irqaction sa1100_timer_irq = { - .name = "ost0", - .flags = IRQF_TIMER | IRQF_IRQPOLL, - .handler = sa1100_ost0_interrupt, - .dev_id = &ckevt_sa1100_osmr0, -}; - -void __init sa1100_timer_init(void) -{ - writel_relaxed(0, OIER); - writel_relaxed(OSSR_M0 | OSSR_M1 | OSSR_M2 | OSSR_M3, OSSR); - - sched_clock_register(sa1100_read_sched_clock, 32, 3686400); - - ckevt_sa1100_osmr0.cpumask = cpumask_of(0); - - setup_irq(IRQ_OST0, &sa1100_timer_irq); - - clocksource_mmio_init(OSCR, "oscr", SA1100_CLOCK_FREQ, 200, 32, - clocksource_mmio_readl_up); - clockevents_config_and_register(&ckevt_sa1100_osmr0, 3686400, - MIN_OSCR_DELTA * 2, 0x7fffffff); -} From bfc9657d752c47d59dc0bab85ebdc19cf60100dd Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 5 Jan 2015 19:34:47 +0800 Subject: [PATCH 057/601] ARM: optprobes: execute instruction during restoring if possible. This patch removes software emulation or simulation for most of probed instructions. If the instruction doesn't use PC relative addressing, it will be translated into following instructions in the restore code in code template: ldmia {r0 - r14} // restore all instruction except PC // direct execute the probed instruction b next_insn // branch to next instruction. Signed-off-by: Wang Nan Reviewed-by: Masami Hiramatsu Signed-off-by: Jon Medhurst --- arch/arm/include/asm/kprobes.h | 3 ++ arch/arm/include/asm/probes.h | 1 + arch/arm/probes/kprobes/opt-arm.c | 52 +++++++++++++++++++++++++++++-- 3 files changed, 54 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/kprobes.h b/arch/arm/include/asm/kprobes.h index 50ff3bc7928e91..3ea9be559726ec 100644 --- a/arch/arm/include/asm/kprobes.h +++ b/arch/arm/include/asm/kprobes.h @@ -57,6 +57,9 @@ extern __visible kprobe_opcode_t optprobe_template_call; extern __visible kprobe_opcode_t optprobe_template_end; extern __visible kprobe_opcode_t optprobe_template_sub_sp; extern __visible kprobe_opcode_t optprobe_template_add_sp; +extern __visible kprobe_opcode_t optprobe_template_restore_begin; +extern __visible kprobe_opcode_t optprobe_template_restore_orig_insn; +extern __visible kprobe_opcode_t optprobe_template_restore_end; #define MAX_OPTIMIZED_LENGTH 4 #define MAX_OPTINSN_SIZE \ diff --git a/arch/arm/include/asm/probes.h b/arch/arm/include/asm/probes.h index b668e60f759cc5..1e5b9bb9227066 100644 --- a/arch/arm/include/asm/probes.h +++ b/arch/arm/include/asm/probes.h @@ -42,6 +42,7 @@ struct arch_probes_insn { probes_insn_fn_t *insn_fn; int stack_space; unsigned long register_usage_flags; + bool kprobe_direct_exec; }; #endif /* __ASSEMBLY__ */ diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c index 13d5232118df16..bcdecc25461bca 100644 --- a/arch/arm/probes/kprobes/opt-arm.c +++ b/arch/arm/probes/kprobes/opt-arm.c @@ -31,6 +31,14 @@ #include "core.h" +/* + * See register_usage_flags. If the probed instruction doesn't use PC, + * we can copy it into template and have it executed directly without + * simulation or emulation. + */ +#define ARM_REG_PC 15 +#define can_kprobe_direct_exec(m) (!test_bit(ARM_REG_PC, &(m))) + /* * NOTE: the first sub and add instruction will be modified according * to the stack cost of the instruction. @@ -71,7 +79,15 @@ asm ( " orrne r2, #1\n" " strne r2, [sp, #60] @ set bit0 of PC for thumb\n" " msr cpsr_cxsf, r1\n" + ".global optprobe_template_restore_begin\n" + "optprobe_template_restore_begin:\n" " ldmia sp, {r0 - r15}\n" + ".global optprobe_template_restore_orig_insn\n" + "optprobe_template_restore_orig_insn:\n" + " nop\n" + ".global optprobe_template_restore_end\n" + "optprobe_template_restore_end:\n" + " nop\n" ".global optprobe_template_val\n" "optprobe_template_val:\n" "1: .long 0\n" @@ -91,6 +107,12 @@ asm ( ((unsigned long *)&optprobe_template_add_sp - (unsigned long *)&optprobe_template_entry) #define TMPL_SUB_SP \ ((unsigned long *)&optprobe_template_sub_sp - (unsigned long *)&optprobe_template_entry) +#define TMPL_RESTORE_BEGIN \ + ((unsigned long *)&optprobe_template_restore_begin - (unsigned long *)&optprobe_template_entry) +#define TMPL_RESTORE_ORIGN_INSN \ + ((unsigned long *)&optprobe_template_restore_orig_insn - (unsigned long *)&optprobe_template_entry) +#define TMPL_RESTORE_END \ + ((unsigned long *)&optprobe_template_restore_end - (unsigned long *)&optprobe_template_entry) /* * ARM can always optimize an instruction when using ARM ISA, except @@ -160,8 +182,12 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) __this_cpu_write(current_kprobe, NULL); } - /* In each case, we must singlestep the replaced instruction. */ - op->kp.ainsn.insn_singlestep(p->opcode, &p->ainsn, regs); + /* + * We singlestep the replaced instruction only when it can't be + * executed directly during restore. + */ + if (!p->ainsn.kprobe_direct_exec) + op->kp.ainsn.insn_singlestep(p->opcode, &p->ainsn, regs); local_irq_restore(flags); } @@ -243,6 +269,28 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *or val = (unsigned long)optimized_callback; code[TMPL_CALL_IDX] = val; + /* If possible, copy insn and have it executed during restore */ + orig->ainsn.kprobe_direct_exec = false; + if (can_kprobe_direct_exec(orig->ainsn.register_usage_flags)) { + kprobe_opcode_t final_branch = arm_gen_branch( + (unsigned long)(&code[TMPL_RESTORE_END]), + (unsigned long)(op->kp.addr) + 4); + if (final_branch != 0) { + /* + * Replace original 'ldmia sp, {r0 - r15}' with + * 'ldmia {r0 - r14}', restore all registers except pc. + */ + code[TMPL_RESTORE_BEGIN] = __opcode_to_mem_arm(0xe89d7fff); + + /* The original probed instruction */ + code[TMPL_RESTORE_ORIGN_INSN] = __opcode_to_mem_arm(orig->opcode); + + /* Jump back to next instruction */ + code[TMPL_RESTORE_END] = __opcode_to_mem_arm(final_branch); + orig->ainsn.kprobe_direct_exec = true; + } + } + flush_icache_range((unsigned long)code, (unsigned long)(&code[TMPL_END_IDX])); From 7fe21291bad93458100fbbf0078d40cdb529a646 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 14 Jan 2015 15:15:57 +0000 Subject: [PATCH 058/601] MPILIB: Deobfuscate mpi_cmp The condition preceding 'return 1;' makes my head hurt. At this point, we know that u and v have the same sign; if they are negative, they compare opposite to how their absolute values compare (which mpihelp_cmp found for us), otherwise cmp itself is the answer. Negating cmp is ok since mpihelp_cmp returns {-1,0,1}; -INT_MIN==INT_MIN won't bite us. Signed-off-by: Rasmus Villemoes Signed-off-by: David Howells Acked-by: Dmitry Kasatkin --- lib/mpi/mpi-cmp.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/lib/mpi/mpi-cmp.c b/lib/mpi/mpi-cmp.c index 1871e7b61ca00d..8ed36b8cbe02b3 100644 --- a/lib/mpi/mpi-cmp.c +++ b/lib/mpi/mpi-cmp.c @@ -61,10 +61,8 @@ int mpi_cmp(MPI u, MPI v) if (!usize) return 0; cmp = mpihelp_cmp(u->d, v->d, usize); - if (!cmp) - return 0; - if ((cmp < 0 ? 1 : 0) == (u->sign ? 1 : 0)) - return 1; - return -1; + if (u->sign) + return -cmp; + return cmp; } EXPORT_SYMBOL_GPL(mpi_cmp); From 98dbbcba1b0c1874b9faf9c77b83f9729360aa2c Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 14 Jan 2015 15:16:00 +0000 Subject: [PATCH 059/601] MPILIB: Fix obvious but harmless typo The macro MPN_COPY_INCR this occurs in isn't used anywhere. Signed-off-by: Rasmus Villemoes Signed-off-by: David Howells --- lib/mpi/mpi-internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/mpi/mpi-internal.h b/lib/mpi/mpi-internal.h index 60cf765628e90d..c65dd1bff45a98 100644 --- a/lib/mpi/mpi-internal.h +++ b/lib/mpi/mpi-internal.h @@ -84,7 +84,7 @@ static inline int RESIZE_IF_NEEDED(MPI a, unsigned b) do { \ mpi_size_t _i; \ for (_i = 0; _i < (n); _i++) \ - (d)[_i] = (d)[_i]; \ + (d)[_i] = (s)[_i]; \ } while (0) #define MPN_COPY_DECR(d, s, n) \ From b9f918a31d629e99c9ca3d6ac2a2d7a905715c69 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Wed, 14 Jan 2015 16:10:12 +0000 Subject: [PATCH 060/601] MPILIB: Fix comparison of negative MPIs If u and v both represent negative integers and their limb counts happen to differ, mpi_cmp will always return a positive value - this is obviously bogus. u is smaller than v if and only if it is larger in absolute value. Signed-off-by: Rasmus Villemoes Signed-off-by: David Howells Acked-by: Dmitry Kasatkin --- lib/mpi/mpi-cmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/mpi/mpi-cmp.c b/lib/mpi/mpi-cmp.c index 8ed36b8cbe02b3..d25e9e96c310fc 100644 --- a/lib/mpi/mpi-cmp.c +++ b/lib/mpi/mpi-cmp.c @@ -57,7 +57,7 @@ int mpi_cmp(MPI u, MPI v) if (usize != vsize && !u->sign && !v->sign) return usize - vsize; if (usize != vsize && u->sign && v->sign) - return vsize + usize; + return vsize - usize; if (!usize) return 0; cmp = mpihelp_cmp(u->d, v->d, usize); From bae2a3cc4f5e2cd5b1902a040e6d3ff1f21f488a Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 9 Jan 2015 09:49:20 +0100 Subject: [PATCH 061/601] s390/docs: Break long lines in Debugging390.txt There are a lot of lines that are longer than 80 columns in this file, rendering it hard to read in a terminal window. This patch fixes most of these long lines, and while we're at it, also makes some sentences more readable, e.g. by replacing "&" with "and", adding proper punctuation, removing superfluous clauses, etc. Signed-off-by: Thomas Huth Signed-off-by: Martin Schwidefsky --- Documentation/s390/Debugging390.txt | 464 ++++++++++++++-------------- 1 file changed, 237 insertions(+), 227 deletions(-) diff --git a/Documentation/s390/Debugging390.txt b/Documentation/s390/Debugging390.txt index 08911b5c6b0e43..ae75366cc3d084 100644 --- a/Documentation/s390/Debugging390.txt +++ b/Documentation/s390/Debugging390.txt @@ -1,14 +1,14 @@ - - Debugging on Linux for s/390 & z/Architecture - by - Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) - Copyright (C) 2000-2001 IBM Deutschland Entwicklung GmbH, IBM Corporation - Best viewed with fixed width fonts + + Debugging on Linux for s/390 & z/Architecture + by + Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com) + Copyright (C) 2000-2001 IBM Deutschland Entwicklung GmbH, IBM Corporation + Best viewed with fixed width fonts Overview of Document: ===================== -This document is intended to give a good overview of how to debug -Linux for s/390 & z/Architecture. It isn't intended as a complete reference & not a +This document is intended to give a good overview of how to debug Linux for +s/390 and z/Architecture. It is not intended as a complete reference and not a tutorial on the fundamentals of C & assembly. It doesn't go into 390 IO in any detail. It is intended to complement the documents in the reference section below & any other worthwhile references you get. @@ -44,18 +44,20 @@ Register Set ============ The current architectures have the following registers. -16 General propose registers, 32 bit on s/390 64 bit on z/Architecture, r0-r15 or gpr0-gpr15 used for arithmetic & addressing. - -16 Control registers, 32 bit on s/390 64 bit on z/Architecture, ( cr0-cr15 kernel usage only ) used for memory management, -interrupt control,debugging control etc. - -16 Access registers ( ar0-ar15 ) 32 bit on s/390 & z/Architecture -not used by normal programs but potentially could -be used as temporary storage. Their main purpose is their 1 to 1 -association with general purpose registers and are used in -the kernel for copying data between kernel & user address spaces. -Access register 0 ( & access register 1 on z/Architecture ( needs 64 bit -pointer ) ) is currently used by the pthread library as a pointer to +16 General propose registers, 32 bit on s/390 and 64 bit on z/Architecture, +r0-r15 (or gpr0-gpr15), used for arithmetic and addressing. + +16 Control registers, 32 bit on s/390 and 64 bit on z/Architecture, cr0-cr15, +kernel usage only, used for memory management, interrupt control, debugging +control etc. + +16 Access registers (ar0-ar15), 32 bit on both s/390 and z/Architecture, +normally not used by normal programs but potentially could be used as +temporary storage. These registers have a 1:1 association with general +purpose registers and are designed to be used in the so-called access +register mode to select different address spaces. +Access register 0 (and access register 1 on z/Architecture, which needs a +64 bit pointer) is currently used by the pthread library as a pointer to the current running threads private area. 16 64 bit floating point registers (fp0-fp15 ) IEEE & HFP floating @@ -90,18 +92,19 @@ s/390 z/Architecture 6 6 Input/Output interrupt Mask -7 7 External interrupt Mask used primarily for interprocessor signalling & - clock interrupts. +7 7 External interrupt Mask used primarily for interprocessor + signalling and clock interrupts. -8-11 8-11 PSW Key used for complex memory protection mechanism not used under linux +8-11 8-11 PSW Key used for complex memory protection mechanism + (not used under linux) 12 12 1 on s/390 0 on z/Architecture 13 13 Machine Check Mask 1=enable machine check interrupts -14 14 Wait State set this to 1 to stop the processor except for interrupts & give - time to other LPARS used in CPU idle in the kernel to increase overall - usage of processor resources. +14 14 Wait State. Set this to 1 to stop the processor except for + interrupts and give time to other LPARS. Used in CPU idle in + the kernel to increase overall usage of processor resources. 15 15 Problem state ( if set to 1 certain instructions are disabled ) all linux user programs run with this bit 1 @@ -165,21 +168,23 @@ s/390 z/Architecture when loading the address with LPSWE otherwise a specification exception occurs, LPSW is fully backward compatible. - - + + Prefix Page(s) --------------- +-------------- This per cpu memory area is too intimately tied to the processor not to mention. -It exists between the real addresses 0-4096 on s/390 & 0-8192 z/Architecture & is exchanged -with a 1 page on s/390 or 2 pages on z/Architecture in absolute storage by the set -prefix instruction in linux'es startup. -This page is mapped to a different prefix for each processor in an SMP configuration -( assuming the os designer is sane of course :-) ). -Bytes 0-512 ( 200 hex ) on s/390 & 0-512,4096-4544,4604-5119 currently on z/Architecture -are used by the processor itself for holding such information as exception indications & -entry points for exceptions. -Bytes after 0xc00 hex are used by linux for per processor globals on s/390 & z/Architecture -( there is a gap on z/Architecture too currently between 0xc00 & 1000 which linux uses ). +It exists between the real addresses 0-4096 on s/390 and between 0-8192 on +z/Architecture and is exchanged with one page on s/390 or two pages on +z/Architecture in absolute storage by the set prefix instruction during Linux +startup. +This page is mapped to a different prefix for each processor in an SMP +configuration (assuming the OS designer is sane of course). +Bytes 0-512 (200 hex) on s/390 and 0-512, 4096-4544, 4604-5119 currently on +z/Architecture are used by the processor itself for holding such information +as exception indications and entry points for exceptions. +Bytes after 0xc00 hex are used by linux for per processor globals on s/390 and +z/Architecture (there is a gap on z/Architecture currently between 0xc00 and +0x1000, too, which is used by Linux). The closest thing to this on traditional architectures is the interrupt vector table. This is a good thing & does simplify some of the kernel coding however it means that we now cannot catch stray NULL pointers in the @@ -192,26 +197,26 @@ Address Spaces on Intel Linux The traditional Intel Linux is approximately mapped as follows forgive the ascii art. -0xFFFFFFFF 4GB Himem ***************** - * * - * Kernel Space * - * * - ***************** **************** -User Space Himem (typically 0xC0000000 3GB )* User Stack * * * - ***************** * * - * Shared Libs * * Next Process * - ***************** * to * - * * <== * Run * <== - * User Program * * * - * Data BSS * * * - * Text * * * - * Sections * * * -0x00000000 ***************** **************** - -Now it is easy to see that on Intel it is quite easy to recognise a kernel address -as being one greater than user space himem ( in this case 0xC0000000). -& addresses of less than this are the ones in the current running program on this -processor ( if an smp box ). +0xFFFFFFFF 4GB Himem ***************** + * * + * Kernel Space * + * * + ***************** **************** +User Space Himem * User Stack * * * +(typically 0xC0000000 3GB ) ***************** * * + * Shared Libs * * Next Process * + ***************** * to * + * * <== * Run * <== + * User Program * * * + * Data BSS * * * + * Text * * * + * Sections * * * +0x00000000 ***************** **************** + +Now it is easy to see that on Intel it is quite easy to recognise a kernel +address as being one greater than user space himem (in this case 0xC0000000), +and addresses of less than this are the ones in the current running program on +this processor (if an smp box). If using the virtual machine ( VM ) as a debugger it is quite difficult to know which user process is running as the address space you are looking at could be from any process in the run queue. @@ -247,8 +252,8 @@ Our addressing scheme is basically as follows: Himem 0x7fffffff 2GB on s/390 ***************** **************** currently 0x3ffffffffff (2^42)-1 * User Stack * * * on z/Architecture. ***************** * * - * Shared Libs * * * - ***************** * * + * Shared Libs * * * + ***************** * * * * * Kernel * * User Program * * * * Data BSS * * * @@ -301,10 +306,10 @@ Virtual Addresses on s/390 & z/Architecture =========================================== A virtual address on s/390 is made up of 3 parts -The SX ( segment index, roughly corresponding to the PGD & PMD in linux terminology ) -being bits 1-11. -The PX ( page index, corresponding to the page table entry (pte) in linux terminology ) -being bits 12-19. +The SX (segment index, roughly corresponding to the PGD & PMD in Linux +terminology) being bits 1-11. +The PX (page index, corresponding to the page table entry (pte) in Linux +terminology) being bits 12-19. The remaining bits BX (the byte index are the offset in the page ) i.e. bits 20 to 31. @@ -368,9 +373,9 @@ each processor as follows. * ( 8K ) * 16K aligned ************************ -What this means is that we don't need to dedicate any register or global variable -to point to the current running process & can retrieve it with the following -very simple construct for s/390 & one very similar for z/Architecture. +What this means is that we don't need to dedicate any register or global +variable to point to the current running process & can retrieve it with the +following very simple construct for s/390 & one very similar for z/Architecture. static inline struct task_struct * get_current(void) { @@ -403,8 +408,8 @@ Note: To follow stackframes requires a knowledge of C or Pascal & limited knowledge of one assembly language. It should be noted that there are some differences between the -s/390 & z/Architecture stack layouts as the z/Architecture stack layout didn't have -to maintain compatibility with older linkage formats. +s/390 and z/Architecture stack layouts as the z/Architecture stack layout +didn't have to maintain compatibility with older linkage formats. Glossary: --------- @@ -440,7 +445,7 @@ The code generated by the compiler to return to the caller. frameless-function A frameless function in Linux for s390 & z/Architecture is one which doesn't -need more than the register save area ( 96 bytes on s/390, 160 on z/Architecture ) +need more than the register save area (96 bytes on s/390, 160 on z/Architecture) given to it by the caller. A frameless function never: 1) Sets up a back chain. @@ -588,8 +593,8 @@ A sample program with comments. Comments on the function test ----------------------------- -1) It didn't need to set up a pointer to the constant pool gpr13 as it isn't used -( :-( ). +1) It didn't need to set up a pointer to the constant pool gpr13 as it is not +used ( :-( ). 2) This is a frameless function & no stack is bought. 3) The compiler was clever enough to recognise that it could return the value in r2 as well as use it for the passed in parameter ( :-) ). @@ -743,35 +748,34 @@ Debugging under VM Notes ----- Addresses & values in the VM debugger are always hex never decimal -Address ranges are of the format - or . -e.g. The address range 0x2000 to 0x3000 can be described as 2000-3000 or 2000.1000 +Address ranges are of the format - or +. +For example, the address range 0x2000 to 0x3000 can be described as 2000-3000 +or 2000.1000 The VM Debugger is case insensitive. -VM's strengths are usually other debuggers weaknesses you can get at any resource -no matter how sensitive e.g. memory management resources,change address translation -in the PSW. For kernel hacking you will reap dividends if you get good at it. - -The VM Debugger displays operators but not operands, probably because some -of it was written when memory was expensive & the programmer was probably proud that -it fitted into 2k of memory & the programmers & didn't want to shock hardcore VM'ers by -changing the interface :-), also the debugger displays useful information on the same line & -the author of the code probably felt that it was a good idea not to go over -the 80 columns on the screen. - -As some of you are probably in a panic now this isn't as unintuitive as it may seem -as the 390 instructions are easy to decode mentally & you can make a good guess at a lot -of them as all the operands are nibble ( half byte aligned ) & if you have an objdump listing -also it is quite easy to follow, if you don't have an objdump listing keep a copy of -the s/390 Reference Summary & look at between pages 2 & 7 or alternatively the -s/390 principles of operation. +VM's strengths are usually other debuggers weaknesses you can get at any +resource no matter how sensitive e.g. memory management resources, change +address translation in the PSW. For kernel hacking you will reap dividends if +you get good at it. + +The VM Debugger displays operators but not operands, and also the debugger +displays useful information on the same line as the author of the code probably +felt that it was a good idea not to go over the 80 columns on the screen. +This isn't as unintuitive as it may seem as the s/390 instructions are easy to +decode mentally and you can make a good guess at a lot of them as all the +operands are nibble (half byte aligned). +So if you have an objdump listing by hand, it is quite easy to follow, and if +you don't have an objdump listing keep a copy of the s/390 Reference Summary +or alternatively the s/390 principles of operation next to you. e.g. even I can guess that 0001AFF8' LR 180F CC 0 is a ( load register ) lr r0,r15 -Also it is very easy to tell the length of a 390 instruction from the 2 most significant -bits in the instruction ( not that this info is really useful except if you are trying to -make sense of a hexdump of code ). +Also it is very easy to tell the length of a 390 instruction from the 2 most +significant bits in the instruction (not that this info is really useful except +if you are trying to make sense of a hexdump of code). Here is a table Bits Instruction Length ------------------------------------------ @@ -780,9 +784,6 @@ Bits Instruction Length 10 4 Bytes 11 6 Bytes - - - The debugger also displays other useful info on the same line such as the addresses being operated on destination addresses of branches & condition codes. e.g. @@ -853,8 +854,8 @@ Displaying & modifying Registers -------------------------------- D G will display all the gprs Adding a extra G to all the commands is necessary to access the full 64 bit -content in VM on z/Architecture obviously this isn't required for access registers -as these are still 32 bit. +content in VM on z/Architecture. Obviously this isn't required for access +registers as these are still 32 bit. e.g. DGG instead of DG D X will display all the control registers D AR will display all the access registers @@ -870,10 +871,11 @@ Displaying Memory ----------------- To display memory mapped using the current PSW's mapping try D -To make VM display a message each time it hits a particular address & continue try +To make VM display a message each time it hits a particular address and +continue try D I will disassemble/display a range of instructions. ST addr 32 bit word will store a 32 bit aligned address -D T will display the EBCDIC in an address ( if you are that way inclined ) +D T will display the EBCDIC in an address (if you are that way inclined) D R will display real addresses ( without DAT ) but with prefixing. There are other complex options to display if you need to get at say home space but are in primary space the easiest thing to do is to temporarily @@ -884,8 +886,8 @@ restore it. Hints ----- -If you want to issue a debugger command without halting your virtual machine with the -PA1 key try prefixing the command with #CP e.g. +If you want to issue a debugger command without halting your virtual machine +with the PA1 key try prefixing the command with #CP e.g. #cp tr i pswa 2000 also suffixing most debugger commands with RUN will cause them not to stop just display the mnemonic at the current instruction on the console. @@ -903,9 +905,10 @@ This sends a message to your own console each time do_signal is entered. script with breakpoints on every kernel procedure, this isn't a good idea because there are thousands of these routines & VM can only set 255 breakpoints at a time so you nearly had to spend as long pruning the file down as you would -entering the msg's by hand ),however, the trick might be useful for a single object file. -On linux'es 3270 emulator x3270 there is a very useful option under the file ment -Save Screens In File this is very good of keeping a copy of traces. +entering the msgs by hand), however, the trick might be useful for a single +object file. In the 3270 terminal emulator x3270 there is a very useful option +in the file menu called "Save Screen In File" - this is very good for keeping a +copy of traces. From CMS help will give you online help on a particular command. e.g. @@ -920,7 +923,8 @@ SET PF9 IMM B This does a single step in VM on pressing F8. SET PF10 ^ This sets up the ^ key. -which can be used for ^c (ctrl-c),^z (ctrl-z) which can't be typed directly into some 3270 consoles. +which can be used for ^c (ctrl-c),^z (ctrl-z) which can't be typed directly +into some 3270 consoles. SET PF11 ^- This types the starting keystrokes for a sysrq see SysRq below. SET PF12 RETRIEVE @@ -1014,8 +1018,8 @@ Tracing Program Exceptions -------------------------- If you get a crash which says something like illegal operation or specification exception followed by a register dump -You can restart linux & trace these using the tr prog trace option. - +You can restart linux & trace these using the tr prog trace +option. The most common ones you will normally be tracing for is @@ -1057,9 +1061,10 @@ TR GOTO INITIAL Tracing linux syscalls under VM ------------------------------- -Syscalls are implemented on Linux for S390 by the Supervisor call instruction (SVC) there 256 -possibilities of these as the instruction is made up of a 0xA opcode & the second byte being -the syscall number. They are traced using the simple command. +Syscalls are implemented on Linux for S390 by the Supervisor call instruction +(SVC). There 256 possibilities of these as the instruction is made up of a 0xA +opcode and the second byte being the syscall number. They are traced using the +simple command: TR SVC the syscalls are defined in linux/arch/s390/include/asm/unistd.h e.g. to trace all file opens just do @@ -1070,12 +1075,12 @@ SMP Specific commands --------------------- To find out how many cpus you have Q CPUS displays all the CPU's available to your virtual machine -To find the cpu that the current cpu VM debugger commands are being directed at do -Q CPU to change the current cpu VM debugger commands are being directed at do +To find the cpu that the current cpu VM debugger commands are being directed at +do Q CPU to change the current cpu VM debugger commands are being directed at do CPU -On a SMP guest issue a command to all CPUs try prefixing the command with cpu all. -To issue a command to a particular cpu try cpu e.g. +On a SMP guest issue a command to all CPUs try prefixing the command with cpu +all. To issue a command to a particular cpu try cpu e.g. CPU 01 TR I R 2000.3000 If you are running on a guest with several cpus & you have a IO related problem & cannot follow the flow of code but you know it isn't smp related. @@ -1101,10 +1106,10 @@ D TX0.100 Alternatively ============= -Under older VM debuggers ( I love EBDIC too ) you can use this little program I wrote which -will convert a command line of hex digits to ascii text which can be compiled under linux & -you can copy the hex digits from your x3270 terminal to your xterm if you are debugging -from a linuxbox. +Under older VM debuggers (I love EBDIC too) you can use following little +program which converts a command line of hex digits to ascii text. It can be +compiled under linux and you can copy the hex digits from your x3270 terminal +to your xterm if you are debugging from a linuxbox. This is quite useful when looking at a parameter passed in as a text string under VM ( unless you are good at decoding ASCII in your head ). @@ -1114,14 +1119,14 @@ TR SVC 5 We have stopped at a breakpoint 000151B0' SVC 0A05 -> 0001909A' CC 0 -D 20.8 to check the SVC old psw in the prefix area & see was it from userspace -( for the layout of the prefix area consult P18 of the s/390 390 Reference Summary -if you have it available ). +D 20.8 to check the SVC old psw in the prefix area and see was it from userspace +(for the layout of the prefix area consult the "Fixed Storage Locations" +chapter of the s/390 Reference Summary if you have it available). V00000020 070C2000 800151B2 The problem state bit wasn't set & it's also too early in the boot sequence for it to be a userspace SVC if it was we would have to temporarily switch the -psw to user space addressing so we could get at the first parameter of the open in -gpr2. +psw to user space addressing so we could get at the first parameter of the open +in gpr2. Next do a D G2 GPR 2 = 00014CB4 @@ -1208,9 +1213,9 @@ Here are the tricks I use 9 out of 10 times it works pretty well, When your backchain reaches a dead end -------------------------------------- -This can happen when an exception happens in the kernel & the kernel is entered twice -if you reach the NULL pointer at the end of the back chain you should be -able to sniff further back if you follow the following tricks. +This can happen when an exception happens in the kernel and the kernel is +entered twice. If you reach the NULL pointer at the end of the back chain you +should be able to sniff further back if you follow the following tricks. 1) A kernel address should be easy to recognise since it is in primary space & the problem state bit isn't set & also The Hi bit of the address is set. @@ -1260,8 +1265,8 @@ V000FFFD0 00010400 80010802 8001085A 000FFFA0 our 3rd return address is 8001085A -as the 04B52002 looks suspiciously like rubbish it is fair to assume that the kernel entry routines -for the sake of optimisation don't set up a backchain. +as the 04B52002 looks suspiciously like rubbish it is fair to assume that the +kernel entry routines for the sake of optimisation don't set up a backchain. now look at System.map to see if the addresses make any sense. @@ -1289,67 +1294,75 @@ Congrats you've done your first backchain. s/390 & z/Architecture IO Overview ================================== -I am not going to give a course in 390 IO architecture as this would take me quite a -while & I'm no expert. Instead I'll give a 390 IO architecture summary for Dummies if you have -the s/390 principles of operation available read this instead. If nothing else you may find a few -useful keywords in here & be able to use them on a web search engine like altavista to find -more useful information. +I am not going to give a course in 390 IO architecture as this would take me +quite a while and I'm no expert. Instead I'll give a 390 IO architecture +summary for Dummies. If you have the s/390 principles of operation available +read this instead. If nothing else you may find a few useful keywords in here +and be able to use them on a web search engine to find more useful information. Unlike other bus architectures modern 390 systems do their IO using mostly -fibre optics & devices such as tapes & disks can be shared between several mainframes, -also S390 can support up to 65536 devices while a high end PC based system might be choking -with around 64. Here is some of the common IO terminology +fibre optics and devices such as tapes and disks can be shared between several +mainframes. Also S390 can support up to 65536 devices while a high end PC based +system might be choking with around 64. -Subchannel: -This is the logical number most IO commands use to talk to an IO device there can be up to -0x10000 (65536) of these in a configuration typically there is a few hundred. Under VM -for simplicity they are allocated contiguously, however on the native hardware they are not -they typically stay consistent between boots provided no new hardware is inserted or removed. -Under Linux for 390 we use these as IRQ's & also when issuing an IO command (CLEAR SUBCHANNEL, -HALT SUBCHANNEL,MODIFY SUBCHANNEL,RESUME SUBCHANNEL,START SUBCHANNEL,STORE SUBCHANNEL & -TEST SUBCHANNEL ) we use this as the ID of the device we wish to talk to, the most -important of these instructions are START SUBCHANNEL ( to start IO ), TEST SUBCHANNEL ( to check -whether the IO completed successfully ), & HALT SUBCHANNEL ( to kill IO ), a subchannel -can have up to 8 channel paths to a device this offers redundancy if one is not available. +Here is some of the common IO terminology: +Subchannel: +This is the logical number most IO commands use to talk to an IO device. There +can be up to 0x10000 (65536) of these in a configuration, typically there are a +few hundred. Under VM for simplicity they are allocated contiguously, however +on the native hardware they are not. They typically stay consistent between +boots provided no new hardware is inserted or removed. +Under Linux for s390 we use these as IRQ's and also when issuing an IO command +(CLEAR SUBCHANNEL, HALT SUBCHANNEL, MODIFY SUBCHANNEL, RESUME SUBCHANNEL, +START SUBCHANNEL, STORE SUBCHANNEL and TEST SUBCHANNEL). We use this as the ID +of the device we wish to talk to. The most important of these instructions are +START SUBCHANNEL (to start IO), TEST SUBCHANNEL (to check whether the IO +completed successfully) and HALT SUBCHANNEL (to kill IO). A subchannel can have +up to 8 channel paths to a device, this offers redundancy if one is not +available. Device Number: -This number remains static & Is closely tied to the hardware, there are 65536 of these -also they are made up of a CHPID ( Channel Path ID, the most significant 8 bits ) -& another lsb 8 bits. These remain static even if more devices are inserted or removed -from the hardware, there is a 1 to 1 mapping between Subchannels & Device Numbers provided -devices aren't inserted or removed. +This number remains static and is closely tied to the hardware. There are 65536 +of these, made up of a CHPID (Channel Path ID, the most significant 8 bits) and +another lsb 8 bits. These remain static even if more devices are inserted or +removed from the hardware. There is a 1 to 1 mapping between subchannels and +device numbers, provided devices aren't inserted or removed. Channel Control Words: -CCWS are linked lists of instructions initially pointed to by an operation request block (ORB), -which is initially given to Start Subchannel (SSCH) command along with the subchannel number -for the IO subsystem to process while the CPU continues executing normal code. -These come in two flavours, Format 0 ( 24 bit for backward ) -compatibility & Format 1 ( 31 bit ). These are typically used to issue read & write -( & many other instructions ) they consist of a length field & an absolute address field. -For each IO typically get 1 or 2 interrupts one for channel end ( primary status ) when the -channel is idle & the second for device end ( secondary status ) sometimes you get both -concurrently, you check how the IO went on by issuing a TEST SUBCHANNEL at each interrupt, -from which you receive an Interruption response block (IRB). If you get channel & device end -status in the IRB without channel checks etc. your IO probably went okay. If you didn't you -probably need a doctor to examine the IRB & extended status word etc. +CCWs are linked lists of instructions initially pointed to by an operation +request block (ORB), which is initially given to Start Subchannel (SSCH) +command along with the subchannel number for the IO subsystem to process +while the CPU continues executing normal code. +CCWs come in two flavours, Format 0 (24 bit for backward compatibility) and +Format 1 (31 bit). These are typically used to issue read and write (and many +other) instructions. They consist of a length field and an absolute address +field. +Each IO typically gets 1 or 2 interrupts, one for channel end (primary status) +when the channel is idle, and the second for device end (secondary status). +Sometimes you get both concurrently. You check how the IO went on by issuing a +TEST SUBCHANNEL at each interrupt, from which you receive an Interruption +response block (IRB). If you get channel and device end status in the IRB +without channel checks etc. your IO probably went okay. If you didn't you +probably need to examine the IRB, extended status word etc. If an error occurs, more sophisticated control units have a facility known as -concurrent sense this means that if an error occurs Extended sense information will -be presented in the Extended status word in the IRB if not you have to issue a -subsequent SENSE CCW command after the test subchannel. +concurrent sense. This means that if an error occurs Extended sense information +will be presented in the Extended status word in the IRB. If not you have to +issue a subsequent SENSE CCW command after the test subchannel. -TPI( Test pending interrupt) can also be used for polled IO but in multitasking multiprocessor -systems it isn't recommended except for checking special cases ( i.e. non looping checks for -pending IO etc. ). +TPI (Test pending interrupt) can also be used for polled IO, but in +multitasking multiprocessor systems it isn't recommended except for +checking special cases (i.e. non looping checks for pending IO etc.). -Store Subchannel & Modify Subchannel can be used to examine & modify operating characteristics -of a subchannel ( e.g. channel paths ). +Store Subchannel and Modify Subchannel can be used to examine and modify +operating characteristics of a subchannel (e.g. channel paths). Other IO related Terms: Sysplex: S390's Clustering Technology -QDIO: S390's new high speed IO architecture to support devices such as gigabit ethernet, -this architecture is also designed to be forward compatible with up & coming 64 bit machines. +QDIO: S390's new high speed IO architecture to support devices such as gigabit +ethernet, this architecture is also designed to be forward compatible with +upcoming 64 bit machines. General Concepts @@ -1406,37 +1419,40 @@ sometimes called Bus-and Tag & sometimes Original Equipment Manufacturers Interface (OEMI). This byte wide Parallel channel path/bus has parity & data on the "Bus" cable -& control lines on the "Tag" cable. These can operate in byte multiplex mode for -sharing between several slow devices or burst mode & monopolize the channel for the -whole burst. Up to 256 devices can be addressed on one of these cables. These cables are -about one inch in diameter. The maximum unextended length supported by these cables is -125 Meters but this can be extended up to 2km with a fibre optic channel extended -such as a 3044. The maximum burst speed supported is 4.5 megabytes per second however -some really old processors support only transfer rates of 3.0, 2.0 & 1.0 MB/sec. +and control lines on the "Tag" cable. These can operate in byte multiplex mode +for sharing between several slow devices or burst mode and monopolize the +channel for the whole burst. Up to 256 devices can be addressed on one of these +cables. These cables are about one inch in diameter. The maximum unextended +length supported by these cables is 125 Meters but this can be extended up to +2km with a fibre optic channel extended such as a 3044. The maximum burst speed +supported is 4.5 megabytes per second. However, some really old processors +support only transfer rates of 3.0, 2.0 & 1.0 MB/sec. One of these paths can be daisy chained to up to 8 control units. ESCON if fibre optic it is also called FICON -Was introduced by IBM in 1990. Has 2 fibre optic cables & uses either leds or lasers -for communication at a signaling rate of up to 200 megabits/sec. As 10bits are transferred -for every 8 bits info this drops to 160 megabits/sec & to 18.6 Megabytes/sec once -control info & CRC are added. ESCON only operates in burst mode. +Was introduced by IBM in 1990. Has 2 fibre optic cables and uses either leds or +lasers for communication at a signaling rate of up to 200 megabits/sec. As +10bits are transferred for every 8 bits info this drops to 160 megabits/sec +and to 18.6 Megabytes/sec once control info and CRC are added. ESCON only +operates in burst mode. -ESCONs typical max cable length is 3km for the led version & 20km for the laser version -known as XDF ( extended distance facility ). This can be further extended by using an -ESCON director which triples the above mentioned ranges. Unlike Bus & Tag as ESCON is -serial it uses a packet switching architecture the standard Bus & Tag control protocol -is however present within the packets. Up to 256 devices can be attached to each control -unit that uses one of these interfaces. +ESCONs typical max cable length is 3km for the led version and 20km for the +laser version known as XDF (extended distance facility). This can be further +extended by using an ESCON director which triples the above mentioned ranges. +Unlike Bus & Tag as ESCON is serial it uses a packet switching architecture, +the standard Bus & Tag control protocol is however present within the packets. +Up to 256 devices can be attached to each control unit that uses one of these +interfaces. Common 390 Devices include: Network adapters typically OSA2,3172's,2116's & OSA-E gigabit ethernet adapters, -Consoles 3270 & 3215 ( a teletype emulated under linux for a line mode console ). +Consoles 3270 & 3215 (a teletype emulated under linux for a line mode console). DASD's direct access storage devices ( otherwise known as hard disks ). Tape Drives. CTC ( Channel to Channel Adapters ), ESCON or Parallel Cables used as a very high speed serial link -between 2 machines. We use 2 cables under linux to do a bi-directional serial link. +between 2 machines. Debugging IO on s/390 & z/Architecture under VM @@ -1475,9 +1491,9 @@ or the halt subchannels or TR HSCH 7C08-7C09 MSCH's ,STSCH's I think you can guess the rest -Ingo's favourite trick is tracing all the IO's & CCWS & spooling them into the reader of another -VM guest so he can ftp the logfile back to his own machine.I'll do a small bit of this & give you - a look at the output. +A good trick is tracing all the IO's and CCWS and spooling them into the reader +of another VM guest so he can ftp the logfile back to his own machine. I'll do +a small bit of this and give you a look at the output. 1) Spool stdout to VM reader SP PRT TO (another vm guest ) or * for the local vm guest @@ -1593,8 +1609,8 @@ undisplay : undo's display's info breakpoints: shows all current breakpoints -info stack: shows stack back trace ( if this doesn't work too well, I'll show you the -stacktrace by hand below ). +info stack: shows stack back trace (if this doesn't work too well, I'll show +you the stacktrace by hand below). info locals: displays local variables. @@ -1619,7 +1635,8 @@ next: like step except this will not step into subroutines stepi: steps a single machine code instruction. e.g. stepi 100 -nexti: steps a single machine code instruction but will not step into subroutines. +nexti: steps a single machine code instruction but will not step into +subroutines. finish: will run until exit of the current routine @@ -1721,7 +1738,8 @@ e.g. outputs: $1 = 11 -You might now be thinking that the line above didn't work, something extra had to be done. +You might now be thinking that the line above didn't work, something extra had +to be done. (gdb) call fflush(stdout) hello world$2 = 0 As an aside the debugger also calls malloc & free under the hood @@ -1804,26 +1822,17 @@ man gdb or info gdb. core dumps ---------- What a core dump ?, -A core dump is a file generated by the kernel ( if allowed ) which contains the registers, -& all active pages of the program which has crashed. -From this file gdb will allow you to look at the registers & stack trace & memory of the -program as if it just crashed on your system, it is usually called core & created in the -current working directory. -This is very useful in that a customer can mail a core dump to a technical support department -& the technical support department can reconstruct what happened. -Provided they have an identical copy of this program with debugging symbols compiled in & -the source base of this build is available. -In short it is far more useful than something like a crash log could ever hope to be. - -In theory all that is missing to restart a core dumped program is a kernel patch which -will do the following. -1) Make a new kernel task structure -2) Reload all the dumped pages back into the kernel's memory management structures. -3) Do the required clock fixups -4) Get all files & network connections for the process back into an identical state ( really difficult ). -5) A few more difficult things I haven't thought of. - - +A core dump is a file generated by the kernel (if allowed) which contains the +registers and all active pages of the program which has crashed. +From this file gdb will allow you to look at the registers, stack trace and +memory of the program as if it just crashed on your system. It is usually +called core and created in the current working directory. +This is very useful in that a customer can mail a core dump to a technical +support department and the technical support department can reconstruct what +happened. Provided they have an identical copy of this program with debugging +symbols compiled in and the source base of this build is available. +In short it is far more useful than something like a crash log could ever hope +to be. Why have I never seen one ?. Probably because you haven't used the command @@ -1868,7 +1877,7 @@ Breakpoint 2 at 0x4d87a4: file top.c, line 2609. #3 0x5167e6 in readline_internal_char () at readline.c:454 #4 0x5168ee in readline_internal_charloop () at readline.c:507 #5 0x51692c in readline_internal () at readline.c:521 -#6 0x5164fe in readline (prompt=0x7ffff810 "\177ÿøx\177ÿ÷Ø\177ÿøxÀ") +#6 0x5164fe in readline (prompt=0x7ffff810) at readline.c:349 #7 0x4d7a8a in command_line_input (prompt=0x564420 "(gdb) ", repeat=1, annotation_suffix=0x4d6b44 "prompt") at top.c:2091 @@ -1929,8 +1938,8 @@ cat /proc/sys/net/ipv4/ip_forward On my machine now outputs 1 IP forwarding is on. -There is a lot of useful info in here best found by going in & having a look around, -so I'll take you through some entries I consider important. +There is a lot of useful info in here best found by going in and having a look +around, so I'll take you through some entries I consider important. All the processes running on the machine have their own entry defined by /proc/ @@ -2060,7 +2069,8 @@ if the device doesn't say up try /etc/rc.d/init.d/network start ( this starts the network stack & hopefully calls ifconfig tr0 up ). -ifconfig looks at the output of /proc/net/dev & presents it in a more presentable form +ifconfig looks at the output of /proc/net/dev and presents it in a more +presentable form. Now ping the device from a machine in the same subnet. if the RX packets count & TX packets counts don't increment you probably have problems. From fcb6bc9e9e3cc0b0027ab91ed4a2e6b05f518049 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 9 Jan 2015 09:51:00 +0100 Subject: [PATCH 062/601] s390/docs: Remove section about script debugging from Debugging390.txt The section about debugging scripting languages has nothing to do with s390 (and the example is even apparently taken from a i586 host instead), so let's remove this chapter from the file. Signed-off-by: Thomas Huth Signed-off-by: Martin Schwidefsky --- Documentation/s390/Debugging390.txt | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/Documentation/s390/Debugging390.txt b/Documentation/s390/Debugging390.txt index ae75366cc3d084..3df8babcdc41ed 100644 --- a/Documentation/s390/Debugging390.txt +++ b/Documentation/s390/Debugging390.txt @@ -35,7 +35,6 @@ Examining core dumps ldd Debugging modules The proc file system -Starting points for debugging scripting languages etc. SysRq References Special Thanks @@ -2096,34 +2095,6 @@ of the device. See the manpage chandev.8 &type cat /proc/chandev for more info. - -Starting points for debugging scripting languages etc. -====================================================== - -bash/sh - -bash -x -e.g. bash -x /usr/bin/bashbug -displays the following lines as it executes them. -+ MACHINE=i586 -+ OS=linux-gnu -+ CC=gcc -+ CFLAGS= -DPROGRAM='bash' -DHOSTTYPE='i586' -DOSTYPE='linux-gnu' -DMACHTYPE='i586-pc-linux-gnu' -DSHELL -DHAVE_CONFIG_H -I. -I. -I./lib -O2 -pipe -+ RELEASE=2.01 -+ PATCHLEVEL=1 -+ RELSTATUS=release -+ MACHTYPE=i586-pc-linux-gnu - -perl -d runs the perlscript in a fully interactive debugger -. -Type 'h' in the debugger for help. - -for debugging java type -jdb another fully interactive gdb style debugger. -& type ? in the debugger for help. - - - SysRq ===== This is now supported by linux for s/390 & z/Architecture. From e982506730815f9e3bcd148d06b5b6d778d9c46c Mon Sep 17 00:00:00 2001 From: Syam Sidhardhan Date: Tue, 30 Dec 2014 16:47:01 +0530 Subject: [PATCH 063/601] s390/hmcdrv: remove unnecessary version.h inclusion version.h inclusion is not necessary as detected by versioncheck. Signed-off-by: Syam Sidhardhan Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/char/hmcdrv_mod.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/s390/char/hmcdrv_mod.c b/drivers/s390/char/hmcdrv_mod.c index 505c6a78ee1a63..251a318a9b7541 100644 --- a/drivers/s390/char/hmcdrv_mod.c +++ b/drivers/s390/char/hmcdrv_mod.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include "hmcdrv_ftp.h" From 5d425c18653731af62831d30a4fa023d532657a9 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 8 Jan 2015 10:42:34 +0000 Subject: [PATCH 064/601] arm64: kernel: add support for cpu cache information This patch adds support for cacheinfo on ARM64. On ARMv8, the cache hierarchy can be identified through Cache Level ID (CLIDR) register while the cache geometry is provided by Cache Size ID (CCSIDR) register. Since the architecture doesn't provide any way of detecting the cpus sharing particular cache, device tree is used for the same purpose. Signed-off-by: Sudeep Holla Cc: Will Deacon Cc: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cachetype.h | 29 +++++-- arch/arm64/kernel/Makefile | 2 +- arch/arm64/kernel/cacheinfo.c | 128 +++++++++++++++++++++++++++++ arch/arm64/kernel/cpuinfo.c | 12 --- 4 files changed, 152 insertions(+), 19 deletions(-) create mode 100644 arch/arm64/kernel/cacheinfo.c diff --git a/arch/arm64/include/asm/cachetype.h b/arch/arm64/include/asm/cachetype.h index 4c631a0a3609af..da2fc9e3cedd89 100644 --- a/arch/arm64/include/asm/cachetype.h +++ b/arch/arm64/include/asm/cachetype.h @@ -39,24 +39,41 @@ extern unsigned long __icache_flags; +/* + * NumSets, bits[27:13] - (Number of sets in cache) - 1 + * Associativity, bits[12:3] - (Associativity of cache) - 1 + * LineSize, bits[2:0] - (Log2(Number of words in cache line)) - 2 + */ +#define CCSIDR_EL1_WRITE_THROUGH BIT(31) +#define CCSIDR_EL1_WRITE_BACK BIT(30) +#define CCSIDR_EL1_READ_ALLOCATE BIT(29) +#define CCSIDR_EL1_WRITE_ALLOCATE BIT(28) #define CCSIDR_EL1_LINESIZE_MASK 0x7 #define CCSIDR_EL1_LINESIZE(x) ((x) & CCSIDR_EL1_LINESIZE_MASK) - +#define CCSIDR_EL1_ASSOCIATIVITY_SHIFT 3 +#define CCSIDR_EL1_ASSOCIATIVITY_MASK 0x3ff +#define CCSIDR_EL1_ASSOCIATIVITY(x) \ + (((x) >> CCSIDR_EL1_ASSOCIATIVITY_SHIFT) & CCSIDR_EL1_ASSOCIATIVITY_MASK) #define CCSIDR_EL1_NUMSETS_SHIFT 13 -#define CCSIDR_EL1_NUMSETS_MASK (0x7fff << CCSIDR_EL1_NUMSETS_SHIFT) +#define CCSIDR_EL1_NUMSETS_MASK 0x7fff #define CCSIDR_EL1_NUMSETS(x) \ - (((x) & CCSIDR_EL1_NUMSETS_MASK) >> CCSIDR_EL1_NUMSETS_SHIFT) + (((x) >> CCSIDR_EL1_NUMSETS_SHIFT) & CCSIDR_EL1_NUMSETS_MASK) + +#define CACHE_LINESIZE(x) (16 << CCSIDR_EL1_LINESIZE(x)) +#define CACHE_NUMSETS(x) (CCSIDR_EL1_NUMSETS(x) + 1) +#define CACHE_ASSOCIATIVITY(x) (CCSIDR_EL1_ASSOCIATIVITY(x) + 1) -extern u64 __attribute_const__ icache_get_ccsidr(void); +extern u64 __attribute_const__ cache_get_ccsidr(u64 csselr); +/* Helpers for Level 1 Instruction cache csselr = 1L */ static inline int icache_get_linesize(void) { - return 16 << CCSIDR_EL1_LINESIZE(icache_get_ccsidr()); + return CACHE_LINESIZE(cache_get_ccsidr(1L)); } static inline int icache_get_numsets(void) { - return 1 + CCSIDR_EL1_NUMSETS(icache_get_ccsidr()); + return CACHE_NUMSETS(cache_get_ccsidr(1L)); } /* diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index eaa77ed7766a3b..77d3d95683335c 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -16,7 +16,7 @@ arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \ entry-fpsimd.o process.o ptrace.o setup.o signal.o \ sys.o stacktrace.o time.o traps.o io.o vdso.o \ hyp-stub.o psci.o cpu_ops.o insn.o return_address.o \ - cpuinfo.o cpu_errata.o alternative.o + cpuinfo.o cpu_errata.o alternative.o cacheinfo.o arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ sys_compat.o \ diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c new file mode 100644 index 00000000000000..b8629d52fba940 --- /dev/null +++ b/arch/arm64/kernel/cacheinfo.c @@ -0,0 +1,128 @@ +/* + * ARM64 cacheinfo support + * + * Copyright (C) 2015 ARM Ltd. + * All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include + +#include +#include + +#define MAX_CACHE_LEVEL 7 /* Max 7 level supported */ +/* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */ +#define CLIDR_CTYPE_SHIFT(level) (3 * (level - 1)) +#define CLIDR_CTYPE_MASK(level) (7 << CLIDR_CTYPE_SHIFT(level)) +#define CLIDR_CTYPE(clidr, level) \ + (((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level)) + +static inline enum cache_type get_cache_type(int level) +{ + u64 clidr; + + if (level > MAX_CACHE_LEVEL) + return CACHE_TYPE_NOCACHE; + asm volatile ("mrs %x0, clidr_el1" : "=r" (clidr)); + return CLIDR_CTYPE(clidr, level); +} + +/* + * Cache Size Selection Register(CSSELR) selects which Cache Size ID + * Register(CCSIDR) is accessible by specifying the required cache + * level and the cache type. We need to ensure that no one else changes + * CSSELR by calling this in non-preemtible context + */ +u64 __attribute_const__ cache_get_ccsidr(u64 csselr) +{ + u64 ccsidr; + + WARN_ON(preemptible()); + + /* Put value into CSSELR */ + asm volatile("msr csselr_el1, %x0" : : "r" (csselr)); + isb(); + /* Read result out of CCSIDR */ + asm volatile("mrs %x0, ccsidr_el1" : "=r" (ccsidr)); + + return ccsidr; +} + +static void ci_leaf_init(struct cacheinfo *this_leaf, + enum cache_type type, unsigned int level) +{ + bool is_icache = type & CACHE_TYPE_INST; + u64 tmp = cache_get_ccsidr((level - 1) << 1 | is_icache); + + this_leaf->level = level; + this_leaf->type = type; + this_leaf->coherency_line_size = CACHE_LINESIZE(tmp); + this_leaf->number_of_sets = CACHE_NUMSETS(tmp); + this_leaf->ways_of_associativity = CACHE_ASSOCIATIVITY(tmp); + this_leaf->size = this_leaf->number_of_sets * + this_leaf->coherency_line_size * this_leaf->ways_of_associativity; + this_leaf->attributes = + ((tmp & CCSIDR_EL1_WRITE_THROUGH) ? CACHE_WRITE_THROUGH : 0) | + ((tmp & CCSIDR_EL1_WRITE_BACK) ? CACHE_WRITE_BACK : 0) | + ((tmp & CCSIDR_EL1_READ_ALLOCATE) ? CACHE_READ_ALLOCATE : 0) | + ((tmp & CCSIDR_EL1_WRITE_ALLOCATE) ? CACHE_WRITE_ALLOCATE : 0); +} + +static int __init_cache_level(unsigned int cpu) +{ + unsigned int ctype, level, leaves; + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + + for (level = 1, leaves = 0; level <= MAX_CACHE_LEVEL; level++) { + ctype = get_cache_type(level); + if (ctype == CACHE_TYPE_NOCACHE) { + level--; + break; + } + /* Separate instruction and data caches */ + leaves += (ctype == CACHE_TYPE_SEPARATE) ? 2 : 1; + } + + this_cpu_ci->num_levels = level; + this_cpu_ci->num_leaves = leaves; + return 0; +} + +static int __populate_cache_leaves(unsigned int cpu) +{ + unsigned int level, idx; + enum cache_type type; + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + struct cacheinfo *this_leaf = this_cpu_ci->info_list; + + for (idx = 0, level = 1; level <= this_cpu_ci->num_levels && + idx < this_cpu_ci->num_leaves; idx++, level++) { + type = get_cache_type(level); + if (type == CACHE_TYPE_SEPARATE) { + ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level); + ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level); + } else { + ci_leaf_init(this_leaf++, type, level); + } + } + return 0; +} + +DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level) +DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves) diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 07d435cf2eea6e..49782282a0271a 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -231,15 +231,3 @@ void __init cpuinfo_store_boot_cpu(void) boot_cpu_data = *info; } - -u64 __attribute_const__ icache_get_ccsidr(void) -{ - u64 ccsidr; - - WARN_ON(preemptible()); - - /* Select L1 I-cache and read its size ID register */ - asm("msr csselr_el1, %1; isb; mrs %0, ccsidr_el1" - : "=r"(ccsidr) : "r"(1L)); - return ccsidr; -} From cf99a48dce66b126391bb33c7709892d3d8002d7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 24 Nov 2014 12:03:32 +0000 Subject: [PATCH 065/601] arm64: introduce common ESR_ELx_* definitions Currently we have separate ESR_EL{1,2}_* macros, despite the fact that the encodings are common. While encodings are architected to refer to the current EL or a lower EL, the macros refer to particular ELs (e.g. ESR_ELx_EC_DABT_EL0). Having these duplicate definitions is redundant, and their naming is misleading. This patch introduces common ESR_ELx_* macros that can be used in all cases, in preparation for later patches which will migrate existing users over. Some additional cleanups are made in the process: * Suffixes for particular exception levelts (e.g. _EL0, _EL1) are replaced with more general _LOW and _CUR suffixes, matching the architectural intent. * ESR_ELx_EC_WFx, rather than ESR_ELx_EC_WFI is introduced, as this EC encoding covers traps from both WFE and WFI. Similarly, ESR_ELx_WFx_ISS_WFE rather than ESR_ELx_EC_WFI_ISS_WFE is introduced. * Multi-bit fields are given consistently named _SHIFT and _MASK macros. * UL() is used for compatiblity with assembly files. * Comments are added for currently unallocated ESR_ELx.EC encodings. For fields other than ESR_ELx.EC, macros are only implemented for fields for which there is already an ESR_EL{1,2}_* macro. Signed-off-by: Mark Rutland Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Cc: Marc Zyngier Cc: Peter Maydell Cc: Will Deacon --- arch/arm64/include/asm/esr.h | 79 ++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 72674f4c387150..0fd1b0e15ea87e 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -54,4 +54,83 @@ #define ESR_EL1_EC_BKPT32 (0x38) #define ESR_EL1_EC_BRK64 (0x3C) +#define ESR_ELx_EC_UNKNOWN (0x00) +#define ESR_ELx_EC_WFx (0x01) +/* Unallocated EC: 0x02 */ +#define ESR_ELx_EC_CP15_32 (0x03) +#define ESR_ELx_EC_CP15_64 (0x04) +#define ESR_ELx_EC_CP14_MR (0x05) +#define ESR_ELx_EC_CP14_LS (0x06) +#define ESR_ELx_EC_FP_ASIMD (0x07) +#define ESR_ELx_EC_CP10_ID (0x08) +/* Unallocated EC: 0x09 - 0x0B */ +#define ESR_ELx_EC_CP14_64 (0x0C) +/* Unallocated EC: 0x0d */ +#define ESR_ELx_EC_ILL (0x0E) +/* Unallocated EC: 0x0F - 0x10 */ +#define ESR_ELx_EC_SVC32 (0x11) +#define ESR_ELx_EC_HVC32 (0x12) +#define ESR_ELx_EC_SMC32 (0x13) +/* Unallocated EC: 0x14 */ +#define ESR_ELx_EC_SVC64 (0x15) +#define ESR_ELx_EC_HVC64 (0x16) +#define ESR_ELx_EC_SMC64 (0x17) +#define ESR_ELx_EC_SYS64 (0x18) +/* Unallocated EC: 0x19 - 0x1E */ +#define ESR_ELx_EC_IMP_DEF (0x1f) +#define ESR_ELx_EC_IABT_LOW (0x20) +#define ESR_ELx_EC_IABT_CUR (0x21) +#define ESR_ELx_EC_PC_ALIGN (0x22) +/* Unallocated EC: 0x23 */ +#define ESR_ELx_EC_DABT_LOW (0x24) +#define ESR_ELx_EC_DABT_CUR (0x25) +#define ESR_ELx_EC_SP_ALIGN (0x26) +/* Unallocated EC: 0x27 */ +#define ESR_ELx_EC_FP_EXC32 (0x28) +/* Unallocated EC: 0x29 - 0x2B */ +#define ESR_ELx_EC_FP_EXC64 (0x2C) +/* Unallocated EC: 0x2D - 0x2E */ +#define ESR_ELx_EC_SERROR (0x2F) +#define ESR_ELx_EC_BREAKPT_LOW (0x30) +#define ESR_ELx_EC_BREAKPT_CUR (0x31) +#define ESR_ELx_EC_SOFTSTP_LOW (0x32) +#define ESR_ELx_EC_SOFTSTP_CUR (0x33) +#define ESR_ELx_EC_WATCHPT_LOW (0x34) +#define ESR_ELx_EC_WATCHPT_CUR (0x35) +/* Unallocated EC: 0x36 - 0x37 */ +#define ESR_ELx_EC_BKPT32 (0x38) +/* Unallocated EC: 0x39 */ +#define ESR_ELx_EC_VECTOR32 (0x3A) +/* Unallocted EC: 0x3B */ +#define ESR_ELx_EC_BRK64 (0x3C) +/* Unallocated EC: 0x3D - 0x3F */ +#define ESR_ELx_EC_MAX (0x3F) + +#define ESR_ELx_EC_SHIFT (26) +#define ESR_ELx_EC_MASK (UL(0x3F) << ESR_ELx_EC_SHIFT) + +#define ESR_ELx_IL (UL(1) << 25) +#define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1) +#define ESR_ELx_ISV (UL(1) << 24) +#define ESR_ELx_SAS_SHIFT (22) +#define ESR_ELx_SAS (UL(3) << ESR_ELx_SAS_SHIFT) +#define ESR_ELx_SSE (UL(1) << 21) +#define ESR_ELx_SRT_SHIFT (16) +#define ESR_ELx_SRT_MASK (UL(0x1F) << ESR_ELx_SRT_SHIFT) +#define ESR_ELx_SF (UL(1) << 15) +#define ESR_ELx_AR (UL(1) << 14) +#define ESR_ELx_EA (UL(1) << 9) +#define ESR_ELx_CM (UL(1) << 8) +#define ESR_ELx_S1PTW (UL(1) << 7) +#define ESR_ELx_WNR (UL(1) << 6) +#define ESR_ELx_FSC (0x3F) +#define ESR_ELx_FSC_TYPE (0x3C) +#define ESR_ELx_FSC_EXTABT (0x10) +#define ESR_ELx_FSC_FAULT (0x04) +#define ESR_ELx_FSC_PERM (0x0C) +#define ESR_ELx_CV (UL(1) << 24) +#define ESR_ELx_COND_SHIFT (20) +#define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT) +#define ESR_ELx_WFx_ISS_WFE (UL(1) << 0) + #endif /* __ASM_ESR_H */ From aed40e0144bdbdadb45f2b623e07aa3157eb74c9 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 24 Nov 2014 12:31:40 +0000 Subject: [PATCH 066/601] arm64: move to ESR_ELx macros Now that we have common ESR_ELx_* macros, move the core arm64 code over to them. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Cc: Marc Zyngier Cc: Peter Maydell Cc: Will Deacon --- arch/arm64/kernel/entry.S | 64 ++++++++++++++++++------------------ arch/arm64/kernel/signal32.c | 2 +- arch/arm64/mm/fault.c | 2 +- 3 files changed, 34 insertions(+), 34 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index fd4fa374e5d2b2..02e6af11776269 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -269,18 +269,18 @@ ENDPROC(el1_error_invalid) el1_sync: kernel_entry 1 mrs x1, esr_el1 // read the syndrome register - lsr x24, x1, #ESR_EL1_EC_SHIFT // exception class - cmp x24, #ESR_EL1_EC_DABT_EL1 // data abort in EL1 + lsr x24, x1, #ESR_ELx_EC_SHIFT // exception class + cmp x24, #ESR_ELx_EC_DABT_CUR // data abort in EL1 b.eq el1_da - cmp x24, #ESR_EL1_EC_SYS64 // configurable trap + cmp x24, #ESR_ELx_EC_SYS64 // configurable trap b.eq el1_undef - cmp x24, #ESR_EL1_EC_SP_ALIGN // stack alignment exception + cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception b.eq el1_sp_pc - cmp x24, #ESR_EL1_EC_PC_ALIGN // pc alignment exception + cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception b.eq el1_sp_pc - cmp x24, #ESR_EL1_EC_UNKNOWN // unknown exception in EL1 + cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL1 b.eq el1_undef - cmp x24, #ESR_EL1_EC_BREAKPT_EL1 // debug exception in EL1 + cmp x24, #ESR_ELx_EC_BREAKPT_CUR // debug exception in EL1 b.ge el1_dbg b el1_inv el1_da: @@ -318,7 +318,7 @@ el1_dbg: /* * Debug exception handling */ - cmp x24, #ESR_EL1_EC_BRK64 // if BRK64 + cmp x24, #ESR_ELx_EC_BRK64 // if BRK64 cinc x24, x24, eq // set bit '0' tbz x24, #0, el1_inv // EL1 only mrs x0, far_el1 @@ -375,26 +375,26 @@ el1_preempt: el0_sync: kernel_entry 0 mrs x25, esr_el1 // read the syndrome register - lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class - cmp x24, #ESR_EL1_EC_SVC64 // SVC in 64-bit state + lsr x24, x25, #ESR_ELx_EC_SHIFT // exception class + cmp x24, #ESR_ELx_EC_SVC64 // SVC in 64-bit state b.eq el0_svc - cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 + cmp x24, #ESR_ELx_EC_DABT_LOW // data abort in EL0 b.eq el0_da - cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 + cmp x24, #ESR_ELx_EC_IABT_LOW // instruction abort in EL0 b.eq el0_ia - cmp x24, #ESR_EL1_EC_FP_ASIMD // FP/ASIMD access + cmp x24, #ESR_ELx_EC_FP_ASIMD // FP/ASIMD access b.eq el0_fpsimd_acc - cmp x24, #ESR_EL1_EC_FP_EXC64 // FP/ASIMD exception + cmp x24, #ESR_ELx_EC_FP_EXC64 // FP/ASIMD exception b.eq el0_fpsimd_exc - cmp x24, #ESR_EL1_EC_SYS64 // configurable trap + cmp x24, #ESR_ELx_EC_SYS64 // configurable trap b.eq el0_undef - cmp x24, #ESR_EL1_EC_SP_ALIGN // stack alignment exception + cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception b.eq el0_sp_pc - cmp x24, #ESR_EL1_EC_PC_ALIGN // pc alignment exception + cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception b.eq el0_sp_pc - cmp x24, #ESR_EL1_EC_UNKNOWN // unknown exception in EL0 + cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0 b.eq el0_undef - cmp x24, #ESR_EL1_EC_BREAKPT_EL0 // debug exception in EL0 + cmp x24, #ESR_ELx_EC_BREAKPT_LOW // debug exception in EL0 b.ge el0_dbg b el0_inv @@ -403,30 +403,30 @@ el0_sync: el0_sync_compat: kernel_entry 0, 32 mrs x25, esr_el1 // read the syndrome register - lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class - cmp x24, #ESR_EL1_EC_SVC32 // SVC in 32-bit state + lsr x24, x25, #ESR_ELx_EC_SHIFT // exception class + cmp x24, #ESR_ELx_EC_SVC32 // SVC in 32-bit state b.eq el0_svc_compat - cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 + cmp x24, #ESR_ELx_EC_DABT_LOW // data abort in EL0 b.eq el0_da - cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 + cmp x24, #ESR_ELx_EC_IABT_LOW // instruction abort in EL0 b.eq el0_ia - cmp x24, #ESR_EL1_EC_FP_ASIMD // FP/ASIMD access + cmp x24, #ESR_ELx_EC_FP_ASIMD // FP/ASIMD access b.eq el0_fpsimd_acc - cmp x24, #ESR_EL1_EC_FP_EXC32 // FP/ASIMD exception + cmp x24, #ESR_ELx_EC_FP_EXC32 // FP/ASIMD exception b.eq el0_fpsimd_exc - cmp x24, #ESR_EL1_EC_UNKNOWN // unknown exception in EL0 + cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0 b.eq el0_undef - cmp x24, #ESR_EL1_EC_CP15_32 // CP15 MRC/MCR trap + cmp x24, #ESR_ELx_EC_CP15_32 // CP15 MRC/MCR trap b.eq el0_undef - cmp x24, #ESR_EL1_EC_CP15_64 // CP15 MRRC/MCRR trap + cmp x24, #ESR_ELx_EC_CP15_64 // CP15 MRRC/MCRR trap b.eq el0_undef - cmp x24, #ESR_EL1_EC_CP14_MR // CP14 MRC/MCR trap + cmp x24, #ESR_ELx_EC_CP14_MR // CP14 MRC/MCR trap b.eq el0_undef - cmp x24, #ESR_EL1_EC_CP14_LS // CP14 LDC/STC trap + cmp x24, #ESR_ELx_EC_CP14_LS // CP14 LDC/STC trap b.eq el0_undef - cmp x24, #ESR_EL1_EC_CP14_64 // CP14 MRRC/MCRR trap + cmp x24, #ESR_ELx_EC_CP14_64 // CP14 MRRC/MCRR trap b.eq el0_undef - cmp x24, #ESR_EL1_EC_BREAKPT_EL0 // debug exception in EL0 + cmp x24, #ESR_ELx_EC_BREAKPT_LOW // debug exception in EL0 b.ge el0_dbg b el0_inv el0_svc_compat: diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 5a1ba6e80d4e20..192d900c058ff5 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -501,7 +501,7 @@ static int compat_setup_sigframe(struct compat_sigframe __user *sf, __put_user_error((compat_ulong_t)0, &sf->uc.uc_mcontext.trap_no, err); /* set the compat FSR WnR */ - __put_user_error(!!(current->thread.fault_code & ESR_EL1_WRITE) << + __put_user_error(!!(current->thread.fault_code & ESR_ELx_WNR) << FSR_WRITE_SHIFT, &sf->uc.uc_mcontext.error_code, err); __put_user_error(current->thread.fault_address, &sf->uc.uc_mcontext.fault_address, err); __put_user_error(set->sig[0], &sf->uc.uc_mcontext.oldmask, err); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index c11cd27ca8f580..96da13167d4a5c 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -219,7 +219,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, if (esr & ESR_LNX_EXEC) { vm_flags = VM_EXEC; - } else if ((esr & ESR_EL1_WRITE) && !(esr & ESR_EL1_CM)) { + } else if ((esr & ESR_ELx_WNR) && !(esr & ESR_ELx_CM)) { vm_flags = VM_WRITE; mm_flags |= FAULT_FLAG_WRITE; } From 60a1f02c9e91e0796b54e83b14fb8a07f7a568b6 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 18 Nov 2014 12:16:30 +0000 Subject: [PATCH 067/601] arm64: decode ESR_ELx.EC when reporting exceptions To aid the developer when something triggers an unexpected exception, decode the ESR_ELx.EC field when logging an ESR_ELx value. This doesn't tell the developer the specifics of the exception encoded in the remaining IL and ISS bits, but it can be helpful to distinguish between exception classes (e.g. SError and a data abort) without having to manually decode the field, which can be tiresome. Signed-off-by: Mark Rutland Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Cc: Marc Zyngier Cc: Peter Maydell Cc: Will Deacon --- arch/arm64/include/asm/esr.h | 6 +++++ arch/arm64/kernel/traps.c | 50 ++++++++++++++++++++++++++++++++++-- 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 0fd1b0e15ea87e..c315543d50f914 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -133,4 +133,10 @@ #define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT) #define ESR_ELx_WFx_ISS_WFE (UL(1) << 0) +#ifndef __ASSEMBLY__ +#include + +const char *esr_get_class_string(u32 esr); +#endif /* __ASSEMBLY */ + #endif /* __ASM_ESR_H */ diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 0a801e3743d5df..1ef2940df13c5e 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -33,6 +33,7 @@ #include #include +#include #include #include #include @@ -373,6 +374,51 @@ asmlinkage long do_ni_syscall(struct pt_regs *regs) return sys_ni_syscall(); } +static const char *esr_class_str[] = { + [0 ... ESR_ELx_EC_MAX] = "UNRECOGNIZED EC", + [ESR_ELx_EC_UNKNOWN] = "Unknown/Uncategorized", + [ESR_ELx_EC_WFx] = "WFI/WFE", + [ESR_ELx_EC_CP15_32] = "CP15 MCR/MRC", + [ESR_ELx_EC_CP15_64] = "CP15 MCRR/MRRC", + [ESR_ELx_EC_CP14_MR] = "CP14 MCR/MRC", + [ESR_ELx_EC_CP14_LS] = "CP14 LDC/STC", + [ESR_ELx_EC_FP_ASIMD] = "ASIMD", + [ESR_ELx_EC_CP10_ID] = "CP10 MRC/VMRS", + [ESR_ELx_EC_CP14_64] = "CP14 MCRR/MRRC", + [ESR_ELx_EC_ILL] = "PSTATE.IL", + [ESR_ELx_EC_SVC32] = "SVC (AArch32)", + [ESR_ELx_EC_HVC32] = "HVC (AArch32)", + [ESR_ELx_EC_SMC32] = "SMC (AArch32)", + [ESR_ELx_EC_SVC64] = "SVC (AArch64)", + [ESR_ELx_EC_HVC64] = "HVC (AArch64)", + [ESR_ELx_EC_SMC64] = "SMC (AArch64)", + [ESR_ELx_EC_SYS64] = "MSR/MRS (AArch64)", + [ESR_ELx_EC_IMP_DEF] = "EL3 IMP DEF", + [ESR_ELx_EC_IABT_LOW] = "IABT (lower EL)", + [ESR_ELx_EC_IABT_CUR] = "IABT (current EL)", + [ESR_ELx_EC_PC_ALIGN] = "PC Alignment", + [ESR_ELx_EC_DABT_LOW] = "DABT (lower EL)", + [ESR_ELx_EC_DABT_CUR] = "DABT (current EL)", + [ESR_ELx_EC_SP_ALIGN] = "SP Alignment", + [ESR_ELx_EC_FP_EXC32] = "FP (AArch32)", + [ESR_ELx_EC_FP_EXC64] = "FP (AArch64)", + [ESR_ELx_EC_SERROR] = "SError", + [ESR_ELx_EC_BREAKPT_LOW] = "Breakpoint (lower EL)", + [ESR_ELx_EC_BREAKPT_CUR] = "Breakpoint (current EL)", + [ESR_ELx_EC_SOFTSTP_LOW] = "Software Step (lower EL)", + [ESR_ELx_EC_SOFTSTP_CUR] = "Software Step (current EL)", + [ESR_ELx_EC_WATCHPT_LOW] = "Watchpoint (lower EL)", + [ESR_ELx_EC_WATCHPT_CUR] = "Watchpoint (current EL)", + [ESR_ELx_EC_BKPT32] = "BKPT (AArch32)", + [ESR_ELx_EC_VECTOR32] = "Vector catch (AArch32)", + [ESR_ELx_EC_BRK64] = "BRK (AArch64)", +}; + +const char *esr_get_class_string(u32 esr) +{ + return esr_class_str[esr >> ESR_ELx_EC_SHIFT]; +} + /* * bad_mode handles the impossible case in the exception vector. */ @@ -382,8 +428,8 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) void __user *pc = (void __user *)instruction_pointer(regs); console_verbose(); - pr_crit("Bad mode in %s handler detected, code 0x%08x\n", - handler[reason], esr); + pr_crit("Bad mode in %s handler detected, code 0x%08x -- %s\n", + handler[reason], esr, esr_get_class_string(esr)); __show_regs(regs); info.si_signo = SIGILL; From c6d01a947a51193e839516165286bc8d14a0e409 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 24 Nov 2014 13:59:30 +0000 Subject: [PATCH 068/601] arm64: kvm: move to ESR_ELx macros Now that we have common ESR_ELx macros, make use of them in the arm64 KVM code. The addition of to the include path highlighted badly ordered (i.e. not alphabetical) include lists; these are changed to alphabetical order. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Christoffer Dall Cc: Catalin Marinas Cc: Marc Zyngier Cc: Peter Maydell Cc: Will Deacon --- arch/arm64/include/asm/kvm_emulate.h | 28 +++++++++++++----------- arch/arm64/kvm/emulate.c | 5 +++-- arch/arm64/kvm/handle_exit.c | 32 +++++++++++++++------------- arch/arm64/kvm/hyp.S | 17 ++++++++------- arch/arm64/kvm/inject_fault.c | 14 ++++++------ arch/arm64/kvm/sys_regs.c | 23 +++++++++++--------- 6 files changed, 64 insertions(+), 55 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 8127e45e263752..5c56c0d2cef186 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -23,8 +23,10 @@ #define __ARM64_KVM_EMULATE_H__ #include -#include + +#include #include +#include #include #include @@ -128,63 +130,63 @@ static inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu) static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_ISV); + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_ISV); } static inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_WNR); + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WNR); } static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SSE); + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SSE); } static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu) { - return (kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SRT_MASK) >> ESR_EL2_SRT_SHIFT; + return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT; } static inline bool kvm_vcpu_dabt_isextabt(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_EA); + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_EA); } static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_S1PTW); + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW); } static inline int kvm_vcpu_dabt_get_as(const struct kvm_vcpu *vcpu) { - return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_EL2_SAS) >> ESR_EL2_SAS_SHIFT); + return 1 << ((kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT); } /* This one is not specific to Data Abort */ static inline bool kvm_vcpu_trap_il_is32bit(const struct kvm_vcpu *vcpu) { - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_EL2_IL); + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_IL); } static inline u8 kvm_vcpu_trap_get_class(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_hsr(vcpu) >> ESR_EL2_EC_SHIFT; + return kvm_vcpu_get_hsr(vcpu) >> ESR_ELx_EC_SHIFT; } static inline bool kvm_vcpu_trap_is_iabt(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_trap_get_class(vcpu) == ESR_EL2_EC_IABT; + return kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_IABT_LOW; } static inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC; + return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC; } static inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu) { - return kvm_vcpu_get_hsr(vcpu) & ESR_EL2_FSC_TYPE; + return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_FSC_TYPE; } static inline unsigned long kvm_vcpu_get_mpidr(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/emulate.c b/arch/arm64/kvm/emulate.c index 124418d17049f5..f87d8fbaa48ddb 100644 --- a/arch/arm64/kvm/emulate.c +++ b/arch/arm64/kvm/emulate.c @@ -22,6 +22,7 @@ */ #include +#include #include /* @@ -55,8 +56,8 @@ static int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu) { u32 esr = kvm_vcpu_get_hsr(vcpu); - if (esr & ESR_EL2_CV) - return (esr & ESR_EL2_COND) >> ESR_EL2_COND_SHIFT; + if (esr & ESR_ELx_CV) + return (esr & ESR_ELx_COND_MASK) >> ESR_ELx_COND_SHIFT; return -1; } diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 34b8bd0711e942..bcbc923d3060c9 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -21,8 +21,10 @@ #include #include -#include + +#include #include +#include #include #include @@ -61,7 +63,7 @@ static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) */ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) { - if (kvm_vcpu_get_hsr(vcpu) & ESR_EL2_EC_WFI_ISS_WFE) + if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) kvm_vcpu_on_spin(vcpu); else kvm_vcpu_block(vcpu); @@ -72,19 +74,19 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) } static exit_handle_fn arm_exit_handlers[] = { - [ESR_EL2_EC_WFI] = kvm_handle_wfx, - [ESR_EL2_EC_CP15_32] = kvm_handle_cp15_32, - [ESR_EL2_EC_CP15_64] = kvm_handle_cp15_64, - [ESR_EL2_EC_CP14_MR] = kvm_handle_cp14_32, - [ESR_EL2_EC_CP14_LS] = kvm_handle_cp14_load_store, - [ESR_EL2_EC_CP14_64] = kvm_handle_cp14_64, - [ESR_EL2_EC_HVC32] = handle_hvc, - [ESR_EL2_EC_SMC32] = handle_smc, - [ESR_EL2_EC_HVC64] = handle_hvc, - [ESR_EL2_EC_SMC64] = handle_smc, - [ESR_EL2_EC_SYS64] = kvm_handle_sys_reg, - [ESR_EL2_EC_IABT] = kvm_handle_guest_abort, - [ESR_EL2_EC_DABT] = kvm_handle_guest_abort, + [ESR_ELx_EC_WFx] = kvm_handle_wfx, + [ESR_ELx_EC_CP15_32] = kvm_handle_cp15_32, + [ESR_ELx_EC_CP15_64] = kvm_handle_cp15_64, + [ESR_ELx_EC_CP14_MR] = kvm_handle_cp14_32, + [ESR_ELx_EC_CP14_LS] = kvm_handle_cp14_load_store, + [ESR_ELx_EC_CP14_64] = kvm_handle_cp14_64, + [ESR_ELx_EC_HVC32] = handle_hvc, + [ESR_ELx_EC_SMC32] = handle_smc, + [ESR_ELx_EC_HVC64] = handle_hvc, + [ESR_ELx_EC_SMC64] = handle_smc, + [ESR_ELx_EC_SYS64] = kvm_handle_sys_reg, + [ESR_ELx_EC_IABT_LOW] = kvm_handle_guest_abort, + [ESR_ELx_EC_DABT_LOW] = kvm_handle_guest_abort, }; static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index fbe909fb0a1a8b..c0d820280a5eb5 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -17,15 +17,16 @@ #include -#include -#include #include +#include #include +#include #include #include -#include #include +#include #include +#include #define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x) #define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x) @@ -1140,9 +1141,9 @@ el1_sync: // Guest trapped into EL2 push x2, x3 mrs x1, esr_el2 - lsr x2, x1, #ESR_EL2_EC_SHIFT + lsr x2, x1, #ESR_ELx_EC_SHIFT - cmp x2, #ESR_EL2_EC_HVC64 + cmp x2, #ESR_ELx_EC_HVC64 b.ne el1_trap mrs x3, vttbr_el2 // If vttbr is valid, the 64bit guest @@ -1177,13 +1178,13 @@ el1_trap: * x1: ESR * x2: ESR_EC */ - cmp x2, #ESR_EL2_EC_DABT - mov x0, #ESR_EL2_EC_IABT + cmp x2, #ESR_ELx_EC_DABT_LOW + mov x0, #ESR_ELx_EC_IABT_LOW ccmp x2, x0, #4, ne b.ne 1f // Not an abort we care about /* This is an abort. Check for permission fault */ - and x2, x1, #ESR_EL2_FSC_TYPE + and x2, x1, #ESR_ELx_FSC_TYPE cmp x2, #FSC_PERM b.ne 1f // Not a permission fault diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 81a02a8762b054..f02530e726f693 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -118,27 +118,27 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr * instruction set. Report an external synchronous abort. */ if (kvm_vcpu_trap_il_is32bit(vcpu)) - esr |= ESR_EL1_IL; + esr |= ESR_ELx_IL; /* * Here, the guest runs in AArch64 mode when in EL1. If we get * an AArch32 fault, it means we managed to trap an EL0 fault. */ if (is_aarch32 || (cpsr & PSR_MODE_MASK) == PSR_MODE_EL0t) - esr |= (ESR_EL1_EC_IABT_EL0 << ESR_EL1_EC_SHIFT); + esr |= (ESR_ELx_EC_IABT_LOW << ESR_ELx_EC_SHIFT); else - esr |= (ESR_EL1_EC_IABT_EL1 << ESR_EL1_EC_SHIFT); + esr |= (ESR_ELx_EC_IABT_CUR << ESR_ELx_EC_SHIFT); if (!is_iabt) - esr |= ESR_EL1_EC_DABT_EL0; + esr |= ESR_ELx_EC_DABT_LOW; - vcpu_sys_reg(vcpu, ESR_EL1) = esr | ESR_EL2_EC_xABT_xFSR_EXTABT; + vcpu_sys_reg(vcpu, ESR_EL1) = esr | ESR_ELx_FSC_EXTABT; } static void inject_undef64(struct kvm_vcpu *vcpu) { unsigned long cpsr = *vcpu_cpsr(vcpu); - u32 esr = (ESR_EL1_EC_UNKNOWN << ESR_EL1_EC_SHIFT); + u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); *vcpu_spsr(vcpu) = cpsr; *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu); @@ -151,7 +151,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) * set. */ if (kvm_vcpu_trap_il_is32bit(vcpu)) - esr |= ESR_EL1_IL; + esr |= ESR_ELx_IL; vcpu_sys_reg(vcpu, ESR_EL1) = esr; } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 3d7c2df89946cc..6b859d7a48e77a 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -20,17 +20,20 @@ * along with this program. If not, see . */ -#include #include +#include #include -#include -#include -#include -#include -#include + #include #include #include +#include +#include +#include +#include +#include +#include + #include #include "sys_regs.h" @@ -815,12 +818,12 @@ static void unhandled_cp_access(struct kvm_vcpu *vcpu, int cp; switch(hsr_ec) { - case ESR_EL2_EC_CP15_32: - case ESR_EL2_EC_CP15_64: + case ESR_ELx_EC_CP15_32: + case ESR_ELx_EC_CP15_64: cp = 15; break; - case ESR_EL2_EC_CP14_MR: - case ESR_EL2_EC_CP14_64: + case ESR_ELx_EC_CP14_MR: + case ESR_ELx_EC_CP14_64: cp = 14; break; default: From 4a939087bd02ce38135a3924ad3099685abe4c5f Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 24 Nov 2014 14:03:52 +0000 Subject: [PATCH 069/601] arm64: remove ESR_EL1_* macros Now that all users have been moved over to the common ESR_ELx_* macros, remove the redundant ESR_EL1 macros. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Acked-by: Catalin Marinas Reviewed-by: Christoffer Dall Cc: Marc Zyngier Cc: Peter Maydell Cc: Will Deacon --- arch/arm64/include/asm/esr.h | 36 ------------------------------------ 1 file changed, 36 deletions(-) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index c315543d50f914..62167090937d8c 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -18,42 +18,6 @@ #ifndef __ASM_ESR_H #define __ASM_ESR_H -#define ESR_EL1_WRITE (1 << 6) -#define ESR_EL1_CM (1 << 8) -#define ESR_EL1_IL (1 << 25) - -#define ESR_EL1_EC_SHIFT (26) -#define ESR_EL1_EC_UNKNOWN (0x00) -#define ESR_EL1_EC_WFI (0x01) -#define ESR_EL1_EC_CP15_32 (0x03) -#define ESR_EL1_EC_CP15_64 (0x04) -#define ESR_EL1_EC_CP14_MR (0x05) -#define ESR_EL1_EC_CP14_LS (0x06) -#define ESR_EL1_EC_FP_ASIMD (0x07) -#define ESR_EL1_EC_CP10_ID (0x08) -#define ESR_EL1_EC_CP14_64 (0x0C) -#define ESR_EL1_EC_ILL_ISS (0x0E) -#define ESR_EL1_EC_SVC32 (0x11) -#define ESR_EL1_EC_SVC64 (0x15) -#define ESR_EL1_EC_SYS64 (0x18) -#define ESR_EL1_EC_IABT_EL0 (0x20) -#define ESR_EL1_EC_IABT_EL1 (0x21) -#define ESR_EL1_EC_PC_ALIGN (0x22) -#define ESR_EL1_EC_DABT_EL0 (0x24) -#define ESR_EL1_EC_DABT_EL1 (0x25) -#define ESR_EL1_EC_SP_ALIGN (0x26) -#define ESR_EL1_EC_FP_EXC32 (0x28) -#define ESR_EL1_EC_FP_EXC64 (0x2C) -#define ESR_EL1_EC_SERROR (0x2F) -#define ESR_EL1_EC_BREAKPT_EL0 (0x30) -#define ESR_EL1_EC_BREAKPT_EL1 (0x31) -#define ESR_EL1_EC_SOFTSTP_EL0 (0x32) -#define ESR_EL1_EC_SOFTSTP_EL1 (0x33) -#define ESR_EL1_EC_WATCHPT_EL0 (0x34) -#define ESR_EL1_EC_WATCHPT_EL1 (0x35) -#define ESR_EL1_EC_BKPT32 (0x38) -#define ESR_EL1_EC_BRK64 (0x3C) - #define ESR_ELx_EC_UNKNOWN (0x00) #define ESR_ELx_EC_WFx (0x01) /* Unallocated EC: 0x02 */ From 6e53031ed840fc6d6ad4d4e5778eed5a50183f23 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 24 Nov 2014 14:05:44 +0000 Subject: [PATCH 070/601] arm64: kvm: remove ESR_EL2_* macros Now that all users have been moved over to the common ESR_ELx_* macros, remove the redundant ESR_EL2 macros. To maintain compatibility with the fault handling code shared with 32-bit, the FSC_{FAULT,PERM} macros are retained as aliases for the common ESR_ELx_FSC_{FAULT,PERM} definitions. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Acked-by: Christoffer Dall Reviewed-by: Christoffer Dall Cc: Catalin Marinas Cc: Marc Zyngier Cc: Peter Maydell Cc: Will Deacon --- arch/arm64/include/asm/kvm_arm.h | 73 ++------------------------------ 1 file changed, 4 insertions(+), 69 deletions(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 8afb863f5a9e3a..94674eb7e7bb3c 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -18,6 +18,7 @@ #ifndef __ARM64_KVM_ARM_H__ #define __ARM64_KVM_ARM_H__ +#include #include #include @@ -184,77 +185,11 @@ #define MDCR_EL2_TPMCR (1 << 5) #define MDCR_EL2_HPMN_MASK (0x1F) -/* Exception Syndrome Register (ESR) bits */ -#define ESR_EL2_EC_SHIFT (26) -#define ESR_EL2_EC (UL(0x3f) << ESR_EL2_EC_SHIFT) -#define ESR_EL2_IL (UL(1) << 25) -#define ESR_EL2_ISS (ESR_EL2_IL - 1) -#define ESR_EL2_ISV_SHIFT (24) -#define ESR_EL2_ISV (UL(1) << ESR_EL2_ISV_SHIFT) -#define ESR_EL2_SAS_SHIFT (22) -#define ESR_EL2_SAS (UL(3) << ESR_EL2_SAS_SHIFT) -#define ESR_EL2_SSE (1 << 21) -#define ESR_EL2_SRT_SHIFT (16) -#define ESR_EL2_SRT_MASK (0x1f << ESR_EL2_SRT_SHIFT) -#define ESR_EL2_SF (1 << 15) -#define ESR_EL2_AR (1 << 14) -#define ESR_EL2_EA (1 << 9) -#define ESR_EL2_CM (1 << 8) -#define ESR_EL2_S1PTW (1 << 7) -#define ESR_EL2_WNR (1 << 6) -#define ESR_EL2_FSC (0x3f) -#define ESR_EL2_FSC_TYPE (0x3c) - -#define ESR_EL2_CV_SHIFT (24) -#define ESR_EL2_CV (UL(1) << ESR_EL2_CV_SHIFT) -#define ESR_EL2_COND_SHIFT (20) -#define ESR_EL2_COND (UL(0xf) << ESR_EL2_COND_SHIFT) - - -#define FSC_FAULT (0x04) -#define FSC_PERM (0x0c) +/* For compatibility with fault code shared with 32-bit */ +#define FSC_FAULT ESR_ELx_FSC_FAULT +#define FSC_PERM ESR_ELx_FSC_PERM /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ #define HPFAR_MASK (~UL(0xf)) -#define ESR_EL2_EC_UNKNOWN (0x00) -#define ESR_EL2_EC_WFI (0x01) -#define ESR_EL2_EC_CP15_32 (0x03) -#define ESR_EL2_EC_CP15_64 (0x04) -#define ESR_EL2_EC_CP14_MR (0x05) -#define ESR_EL2_EC_CP14_LS (0x06) -#define ESR_EL2_EC_FP_ASIMD (0x07) -#define ESR_EL2_EC_CP10_ID (0x08) -#define ESR_EL2_EC_CP14_64 (0x0C) -#define ESR_EL2_EC_ILL_ISS (0x0E) -#define ESR_EL2_EC_SVC32 (0x11) -#define ESR_EL2_EC_HVC32 (0x12) -#define ESR_EL2_EC_SMC32 (0x13) -#define ESR_EL2_EC_SVC64 (0x15) -#define ESR_EL2_EC_HVC64 (0x16) -#define ESR_EL2_EC_SMC64 (0x17) -#define ESR_EL2_EC_SYS64 (0x18) -#define ESR_EL2_EC_IABT (0x20) -#define ESR_EL2_EC_IABT_HYP (0x21) -#define ESR_EL2_EC_PC_ALIGN (0x22) -#define ESR_EL2_EC_DABT (0x24) -#define ESR_EL2_EC_DABT_HYP (0x25) -#define ESR_EL2_EC_SP_ALIGN (0x26) -#define ESR_EL2_EC_FP_EXC32 (0x28) -#define ESR_EL2_EC_FP_EXC64 (0x2C) -#define ESR_EL2_EC_SERROR (0x2F) -#define ESR_EL2_EC_BREAKPT (0x30) -#define ESR_EL2_EC_BREAKPT_HYP (0x31) -#define ESR_EL2_EC_SOFTSTP (0x32) -#define ESR_EL2_EC_SOFTSTP_HYP (0x33) -#define ESR_EL2_EC_WATCHPT (0x34) -#define ESR_EL2_EC_WATCHPT_HYP (0x35) -#define ESR_EL2_EC_BKPT32 (0x38) -#define ESR_EL2_EC_VECTOR32 (0x3A) -#define ESR_EL2_EC_BRK64 (0x3C) - -#define ESR_EL2_EC_xABT_xFSR_EXTABT 0x10 - -#define ESR_EL2_EC_WFI_ISS_WFE (1 << 0) - #endif /* __ARM64_KVM_ARM_H__ */ From 056bb5f51c357ee00046fde4929a03468ff45e7a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 7 Jan 2015 11:26:18 +0000 Subject: [PATCH 071/601] arm64: kvm: decode ESR_ELx.EC when reporting exceptions To aid the developer when something triggers an unexpected exception, decode the ESR_ELx.EC field when logging an ESR_ELx value using the newly introduced esr_get_class_string. This doesn't tell the developer the specifics of the exception encoded in the remaining IL and ISS bits, but it can be helpful to distinguish between exception classes (e.g. SError and a data abort) without having to manually decode the field, which can be tiresome. Signed-off-by: Mark Rutland Acked-by: Christoffer Dall Reviewed-by: Christoffer Dall Cc: Catalin Marinas Cc: Marc Zyngier Cc: Peter Maydell Cc: Will Deacon --- arch/arm64/kvm/handle_exit.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index bcbc923d3060c9..29b184a8f3f861 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -91,12 +91,13 @@ static exit_handle_fn arm_exit_handlers[] = { static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) { - u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu); + u32 hsr = kvm_vcpu_get_hsr(vcpu); + u8 hsr_ec = hsr >> ESR_ELx_EC_SHIFT; if (hsr_ec >= ARRAY_SIZE(arm_exit_handlers) || !arm_exit_handlers[hsr_ec]) { - kvm_err("Unknown exception class: hsr: %#08x\n", - (unsigned int)kvm_vcpu_get_hsr(vcpu)); + kvm_err("Unknown exception class: hsr: %#08x -- %s\n", + hsr, esr_get_class_string(hsr)); BUG(); } From d67703a8a69eecc8367bb9f848640b9efd430337 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Thu, 15 Jan 2015 12:07:33 +0000 Subject: [PATCH 072/601] arm64: kill off the libgcc dependency The arm64 kernel builds fine without the libgcc. Actually it should not be used at all in the kernel. The following are the reasons indicated by Russell King: Although libgcc is part of the compiler, libgcc is built with the expectation that it will be running in userland - it expects to link to a libc. That's why you can't build libgcc without having the glibc headers around. [...] Meanwhile, having the kernel build the compiler support functions that it needs ensures that (a) we know what compiler support functions are being used, (b) we know the implementation of those support functions are sane for use in the kernel, (c) we can build them with appropriate compiler flags for best performance, and (d) we remove an unnecessary dependency on the build toolchain. Signed-off-by: Kevin Hao Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/Makefile | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 1c43cec971b5cd..a20c28348be438 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -15,8 +15,6 @@ CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET) OBJCOPYFLAGS :=-O binary -R .note -R .note.gnu.build-id -R .comment -S GZFLAGS :=-9 -LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) - KBUILD_DEFCONFIG := defconfig KBUILD_CFLAGS += -mgeneral-regs-only @@ -50,7 +48,6 @@ core-$(CONFIG_KVM) += arch/arm64/kvm/ core-$(CONFIG_XEN) += arch/arm64/xen/ core-$(CONFIG_CRYPTO) += arch/arm64/crypto/ libs-y := arch/arm64/lib/ $(libs-y) -libs-y += $(LIBGCC) libs-$(CONFIG_EFI_STUB) += drivers/firmware/efi/libstub/ # Default target when executing plain make From 0b7857dbeb256d1a9c0c606ec4b5f417e159040b Mon Sep 17 00:00:00 2001 From: Yalin Wang Date: Fri, 16 Jan 2015 02:45:55 +0100 Subject: [PATCH 073/601] ARM: 8287/1: add bitrev.h file to support rbit instruction This patch add bitrev.h file to support rbit instruction, so that we can do bitrev operation by hardware. Signed-off-by: Yalin Wang Signed-off-by: Russell King --- arch/arm/Kconfig | 1 + arch/arm/include/asm/bitrev.h | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 arch/arm/include/asm/bitrev.h diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 97d07ed60a0b7b..c3a986d4512532 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -29,6 +29,7 @@ config ARM select HANDLE_DOMAIN_IRQ select HARDIRQS_SW_RESEND select HAVE_ARCH_AUDITSYSCALL if (AEABI && !OABI_COMPAT) + select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL select HAVE_ARCH_KGDB select HAVE_ARCH_SECCOMP_FILTER if (AEABI && !OABI_COMPAT) diff --git a/arch/arm/include/asm/bitrev.h b/arch/arm/include/asm/bitrev.h new file mode 100644 index 00000000000000..ec291c350ea381 --- /dev/null +++ b/arch/arm/include/asm/bitrev.h @@ -0,0 +1,20 @@ +#ifndef __ASM_BITREV_H +#define __ASM_BITREV_H + +static __always_inline __attribute_const__ u32 __arch_bitrev32(u32 x) +{ + __asm__ ("rbit %0, %1" : "=r" (x) : "r" (x)); + return x; +} + +static __always_inline __attribute_const__ u16 __arch_bitrev16(u16 x) +{ + return __arch_bitrev32((u32)x) >> 16; +} + +static __always_inline __attribute_const__ u8 __arch_bitrev8(u8 x) +{ + return __arch_bitrev32((u32)x) >> 24; +} + +#endif From 944e9df1d4f71f946aa044abc00726346e3c597c Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 8 Jan 2015 07:48:58 +0100 Subject: [PATCH 074/601] ARM: 8257/1: OMAP2+: use common l2cache initialization code This patch implements generic DT L2C initialisation (the one from init_IRQ in arch/arm/kernel/irq.c) for Omap4 and AM43 platforms and kills the SoC specific stuff in arch/arm/mach-omap2/omap4-common.c. Signed-off-by: Marek Szyprowski Tested-by: Nishanth Menon Acked-by: Nishanth Menon Acked-by: Tony Lindgren Signed-off-by: Russell King --- arch/arm/mach-omap2/board-generic.c | 6 ++++++ arch/arm/mach-omap2/common.h | 8 ++++++++ arch/arm/mach-omap2/omap4-common.c | 16 +--------------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c index 608079a1aba677..c5c480b76da57d 100644 --- a/arch/arm/mach-omap2/board-generic.c +++ b/arch/arm/mach-omap2/board-generic.c @@ -171,6 +171,9 @@ static const char *const omap4_boards_compat[] __initconst = { }; DT_MACHINE_START(OMAP4_DT, "Generic OMAP4 (Flattened Device Tree)") + .l2c_aux_val = OMAP_L2C_AUX_CTRL, + .l2c_aux_mask = 0xcf9fffff, + .l2c_write_sec = omap4_l2c310_write_sec, .reserve = omap_reserve, .smp = smp_ops(omap4_smp_ops), .map_io = omap4_map_io, @@ -214,6 +217,9 @@ static const char *const am43_boards_compat[] __initconst = { }; DT_MACHINE_START(AM43_DT, "Generic AM43 (Flattened Device Tree)") + .l2c_aux_val = OMAP_L2C_AUX_CTRL, + .l2c_aux_mask = 0xcf9fffff, + .l2c_write_sec = omap4_l2c310_write_sec, .map_io = am33xx_map_io, .init_early = am43xx_init_early, .init_late = am43xx_init_late, diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h index 377eea849e7bcd..2610c9f8d29fd7 100644 --- a/arch/arm/mach-omap2/common.h +++ b/arch/arm/mach-omap2/common.h @@ -35,6 +35,7 @@ #include #include +#include #include "i2c.h" #include "serial.h" @@ -94,11 +95,18 @@ extern void omap3_gptimer_timer_init(void); extern void omap4_local_timer_init(void); #ifdef CONFIG_CACHE_L2X0 int omap_l2_cache_init(void); +#define OMAP_L2C_AUX_CTRL (L2C_AUX_CTRL_SHARED_OVERRIDE | \ + L310_AUX_CTRL_DATA_PREFETCH | \ + L310_AUX_CTRL_INSTR_PREFETCH) +void omap4_l2c310_write_sec(unsigned long val, unsigned reg); #else static inline int omap_l2_cache_init(void) { return 0; } + +#define OMAP_L2C_AUX_CTRL 0 +#define omap4_l2c310_write_sec NULL #endif extern void omap5_realtime_timer_init(void); diff --git a/arch/arm/mach-omap2/omap4-common.c b/arch/arm/mach-omap2/omap4-common.c index b7cb44abe49b35..fe99ceff2e2d63 100644 --- a/arch/arm/mach-omap2/omap4-common.c +++ b/arch/arm/mach-omap2/omap4-common.c @@ -166,7 +166,7 @@ void __iomem *omap4_get_l2cache_base(void) return l2cache_base; } -static void omap4_l2c310_write_sec(unsigned long val, unsigned reg) +void omap4_l2c310_write_sec(unsigned long val, unsigned reg) { unsigned smc_op; @@ -201,24 +201,10 @@ static void omap4_l2c310_write_sec(unsigned long val, unsigned reg) int __init omap_l2_cache_init(void) { - u32 aux_ctrl; - /* Static mapping, never released */ l2cache_base = ioremap(OMAP44XX_L2CACHE_BASE, SZ_4K); if (WARN_ON(!l2cache_base)) return -ENOMEM; - - /* 16-way associativity, parity disabled, way size - 64KB (es2.0 +) */ - aux_ctrl = L2C_AUX_CTRL_SHARED_OVERRIDE | - L310_AUX_CTRL_DATA_PREFETCH | - L310_AUX_CTRL_INSTR_PREFETCH; - - outer_cache.write_sec = omap4_l2c310_write_sec; - if (of_have_populated_dt()) - l2x0_of_init(aux_ctrl, 0xcf9fffff); - else - l2x0_init(l2cache_base, aux_ctrl, 0xcf9fffff); - return 0; } #endif From 00218241aa0846e75d31b1dbadb5f8a76be1cc97 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 8 Jan 2015 07:49:41 +0100 Subject: [PATCH 075/601] ARM: 8258/1: l2c: use l2c_write_sec() for restoring latency and filter regs All four register for latency and filter settings cannot be written in non-secure mode and they should go through l2c_write_sec(). More on this can be found in CoreLink Level 2 Cache Controller L2C-310 Technical Reference Manual, 3.2. Register summary, table 3.1. This have been checked the TRM for r3p3, but it should be uniform for all revisions. Reported-by: Nishanth Menon Suggested-by: Tomasz Figa Signed-off-by: Marek Szyprowski Tested-by: Nishanth Menon Acked-by: Nishanth Menon Acked-by: Tony Lindgren Signed-off-by: Russell King --- arch/arm/mm/cache-l2x0.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 5e65ca8dea62cf..b83c401ca50c4a 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -623,14 +623,14 @@ static void l2c310_resume(void) unsigned revision; /* restore pl310 setup */ - writel_relaxed(l2x0_saved_regs.tag_latency, - base + L310_TAG_LATENCY_CTRL); - writel_relaxed(l2x0_saved_regs.data_latency, - base + L310_DATA_LATENCY_CTRL); - writel_relaxed(l2x0_saved_regs.filter_end, - base + L310_ADDR_FILTER_END); - writel_relaxed(l2x0_saved_regs.filter_start, - base + L310_ADDR_FILTER_START); + l2c_write_sec(l2x0_saved_regs.tag_latency, base, + L310_TAG_LATENCY_CTRL); + l2c_write_sec(l2x0_saved_regs.data_latency, base, + L310_DATA_LATENCY_CTRL); + l2c_write_sec(l2x0_saved_regs.filter_end, base, + L310_ADDR_FILTER_END); + l2c_write_sec(l2x0_saved_regs.filter_start, base, + L310_ADDR_FILTER_START); revision = readl_relaxed(base + L2X0_CACHE_ID) & L2X0_CACHE_ID_RTL_MASK; @@ -1135,28 +1135,28 @@ static void __init l2c310_of_parse(const struct device_node *np, of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag)); if (tag[0] && tag[1] && tag[2]) - writel_relaxed( + l2c_write_sec( L310_LATENCY_CTRL_RD(tag[0] - 1) | L310_LATENCY_CTRL_WR(tag[1] - 1) | L310_LATENCY_CTRL_SETUP(tag[2] - 1), - l2x0_base + L310_TAG_LATENCY_CTRL); + l2x0_base, L310_TAG_LATENCY_CTRL); of_property_read_u32_array(np, "arm,data-latency", data, ARRAY_SIZE(data)); if (data[0] && data[1] && data[2]) - writel_relaxed( + l2c_write_sec( L310_LATENCY_CTRL_RD(data[0] - 1) | L310_LATENCY_CTRL_WR(data[1] - 1) | L310_LATENCY_CTRL_SETUP(data[2] - 1), - l2x0_base + L310_DATA_LATENCY_CTRL); + l2x0_base, L310_DATA_LATENCY_CTRL); of_property_read_u32_array(np, "arm,filter-ranges", filter, ARRAY_SIZE(filter)); if (filter[1]) { - writel_relaxed(ALIGN(filter[0] + filter[1], SZ_1M), - l2x0_base + L310_ADDR_FILTER_END); - writel_relaxed((filter[0] & ~(SZ_1M - 1)) | L310_ADDR_FILTER_EN, - l2x0_base + L310_ADDR_FILTER_START); + l2c_write_sec(ALIGN(filter[0] + filter[1], SZ_1M), + l2x0_base, L310_ADDR_FILTER_END); + l2c_write_sec((filter[0] & ~(SZ_1M - 1)) | L310_ADDR_FILTER_EN, + l2x0_base, L310_ADDR_FILTER_START); } ret = l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_512K); From 6b49241ac2525818508ee2baff9a58541c65421c Mon Sep 17 00:00:00 2001 From: Tomasz Figa Date: Thu, 8 Jan 2015 07:50:29 +0100 Subject: [PATCH 076/601] ARM: 8259/1: l2c: Refactor the driver to use commit-like interface Certain implementations of secure hypervisors (namely the one found on Samsung Exynos-based boards) do not provide access to individual L2C registers. This makes the .write_sec()-based interface insufficient and provoking ugly hacks. This patch is first step to make the driver not rely on availability of writes to individual registers. This is achieved by refactoring the driver to use a commit-like operation scheme: all register values are prepared first and stored in an instance of l2x0_regs struct and then a single callback is responsible to flush those values to the hardware. [mszyprow: rebased onto 'ARM: l2c: use l2c_write_sec() for restoring latency and filter regs' patch] Signed-off-by: Tomasz Figa Signed-off-by: Marek Szyprowski Tested-by: Nishanth Menon Acked-by: Tony Lindgren Signed-off-by: Russell King --- arch/arm/mm/cache-l2x0.c | 212 +++++++++++++++++++++------------------ 1 file changed, 116 insertions(+), 96 deletions(-) diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index b83c401ca50c4a..dde0d54ac41e58 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -41,12 +41,14 @@ struct l2c_init_data { void (*enable)(void __iomem *, u32, unsigned); void (*fixup)(void __iomem *, u32, struct outer_cache_fns *); void (*save)(void __iomem *); + void (*configure)(void __iomem *); struct outer_cache_fns outer_cache; }; #define CACHE_LINE_SIZE 32 static void __iomem *l2x0_base; +static const struct l2c_init_data *l2x0_data; static DEFINE_RAW_SPINLOCK(l2x0_lock); static u32 l2x0_way_mask; /* Bitmask of active ways */ static u32 l2x0_size; @@ -106,6 +108,14 @@ static inline void l2c_unlock(void __iomem *base, unsigned num) } } +static void l2c_configure(void __iomem *base) +{ + if (l2x0_data->configure) + l2x0_data->configure(base); + + l2c_write_sec(l2x0_saved_regs.aux_ctrl, base, L2X0_AUX_CTRL); +} + /* * Enable the L2 cache controller. This function must only be * called when the cache controller is known to be disabled. @@ -114,7 +124,12 @@ static void l2c_enable(void __iomem *base, u32 aux, unsigned num_lock) { unsigned long flags; - l2c_write_sec(aux, base, L2X0_AUX_CTRL); + /* Do not touch the controller if already enabled. */ + if (readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN) + return; + + l2x0_saved_regs.aux_ctrl = aux; + l2c_configure(base); l2c_unlock(base, num_lock); @@ -208,6 +223,11 @@ static void l2c_save(void __iomem *base) l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); } +static void l2c_resume(void) +{ + l2c_enable(l2x0_base, l2x0_saved_regs.aux_ctrl, l2x0_data->num_lock); +} + /* * L2C-210 specific code. * @@ -288,14 +308,6 @@ static void l2c210_sync(void) __l2c210_cache_sync(l2x0_base); } -static void l2c210_resume(void) -{ - void __iomem *base = l2x0_base; - - if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) - l2c_enable(base, l2x0_saved_regs.aux_ctrl, 1); -} - static const struct l2c_init_data l2c210_data __initconst = { .type = "L2C-210", .way_size_0 = SZ_8K, @@ -309,7 +321,7 @@ static const struct l2c_init_data l2c210_data __initconst = { .flush_all = l2c210_flush_all, .disable = l2c_disable, .sync = l2c210_sync, - .resume = l2c210_resume, + .resume = l2c_resume, }, }; @@ -466,7 +478,7 @@ static const struct l2c_init_data l2c220_data = { .flush_all = l2c220_flush_all, .disable = l2c_disable, .sync = l2c220_sync, - .resume = l2c210_resume, + .resume = l2c_resume, }, }; @@ -615,39 +627,29 @@ static void __init l2c310_save(void __iomem *base) L310_POWER_CTRL); } -static void l2c310_resume(void) +static void l2c310_configure(void __iomem *base) { - void __iomem *base = l2x0_base; + unsigned revision; - if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) { - unsigned revision; - - /* restore pl310 setup */ - l2c_write_sec(l2x0_saved_regs.tag_latency, base, - L310_TAG_LATENCY_CTRL); - l2c_write_sec(l2x0_saved_regs.data_latency, base, - L310_DATA_LATENCY_CTRL); - l2c_write_sec(l2x0_saved_regs.filter_end, base, - L310_ADDR_FILTER_END); - l2c_write_sec(l2x0_saved_regs.filter_start, base, - L310_ADDR_FILTER_START); - - revision = readl_relaxed(base + L2X0_CACHE_ID) & - L2X0_CACHE_ID_RTL_MASK; - - if (revision >= L310_CACHE_ID_RTL_R2P0) - l2c_write_sec(l2x0_saved_regs.prefetch_ctrl, base, - L310_PREFETCH_CTRL); - if (revision >= L310_CACHE_ID_RTL_R3P0) - l2c_write_sec(l2x0_saved_regs.pwr_ctrl, base, - L310_POWER_CTRL); - - l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8); - - /* Re-enable full-line-of-zeros for Cortex-A9 */ - if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO) - set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1)); - } + /* restore pl310 setup */ + l2c_write_sec(l2x0_saved_regs.tag_latency, base, + L310_TAG_LATENCY_CTRL); + l2c_write_sec(l2x0_saved_regs.data_latency, base, + L310_DATA_LATENCY_CTRL); + l2c_write_sec(l2x0_saved_regs.filter_end, base, + L310_ADDR_FILTER_END); + l2c_write_sec(l2x0_saved_regs.filter_start, base, + L310_ADDR_FILTER_START); + + revision = readl_relaxed(base + L2X0_CACHE_ID) & + L2X0_CACHE_ID_RTL_MASK; + + if (revision >= L310_CACHE_ID_RTL_R2P0) + l2c_write_sec(l2x0_saved_regs.prefetch_ctrl, base, + L310_PREFETCH_CTRL); + if (revision >= L310_CACHE_ID_RTL_R3P0) + l2c_write_sec(l2x0_saved_regs.pwr_ctrl, base, + L310_POWER_CTRL); } static int l2c310_cpu_enable_flz(struct notifier_block *nb, unsigned long act, void *data) @@ -699,6 +701,23 @@ static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock) aux &= ~(L310_AUX_CTRL_FULL_LINE_ZERO | L310_AUX_CTRL_EARLY_BRESP); } + /* r3p0 or later has power control register */ + if (rev >= L310_CACHE_ID_RTL_R3P0) + l2x0_saved_regs.pwr_ctrl = L310_DYNAMIC_CLK_GATING_EN | + L310_STNDBY_MODE_EN; + + /* + * Always enable non-secure access to the lockdown registers - + * we write to them as part of the L2C enable sequence so they + * need to be accessible. + */ + aux |= L310_AUX_CTRL_NS_LOCKDOWN; + + l2c_enable(base, aux, num_lock); + + /* Read back resulting AUX_CTRL value as it could have been altered. */ + aux = readl_relaxed(base + L2X0_AUX_CTRL); + if (aux & (L310_AUX_CTRL_DATA_PREFETCH | L310_AUX_CTRL_INSTR_PREFETCH)) { u32 prefetch = readl_relaxed(base + L310_PREFETCH_CTRL); @@ -712,23 +731,12 @@ static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock) if (rev >= L310_CACHE_ID_RTL_R3P0) { u32 power_ctrl; - l2c_write_sec(L310_DYNAMIC_CLK_GATING_EN | L310_STNDBY_MODE_EN, - base, L310_POWER_CTRL); power_ctrl = readl_relaxed(base + L310_POWER_CTRL); pr_info("L2C-310 dynamic clock gating %sabled, standby mode %sabled\n", power_ctrl & L310_DYNAMIC_CLK_GATING_EN ? "en" : "dis", power_ctrl & L310_STNDBY_MODE_EN ? "en" : "dis"); } - /* - * Always enable non-secure access to the lockdown registers - - * we write to them as part of the L2C enable sequence so they - * need to be accessible. - */ - aux |= L310_AUX_CTRL_NS_LOCKDOWN; - - l2c_enable(base, aux, num_lock); - if (aux & L310_AUX_CTRL_FULL_LINE_ZERO) { set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1)); cpu_notifier(l2c310_cpu_enable_flz, 0); @@ -760,11 +768,11 @@ static void __init l2c310_fixup(void __iomem *base, u32 cache_id, if (revision >= L310_CACHE_ID_RTL_R3P0 && revision < L310_CACHE_ID_RTL_R3P2) { - u32 val = readl_relaxed(base + L310_PREFETCH_CTRL); + u32 val = l2x0_saved_regs.prefetch_ctrl; /* I don't think bit23 is required here... but iMX6 does so */ if (val & (BIT(30) | BIT(23))) { val &= ~(BIT(30) | BIT(23)); - l2c_write_sec(val, base, L310_PREFETCH_CTRL); + l2x0_saved_regs.prefetch_ctrl = val; errata[n++] = "752271"; } } @@ -800,6 +808,15 @@ static void l2c310_disable(void) l2c_disable(); } +static void l2c310_resume(void) +{ + l2c_resume(); + + /* Re-enable full-line-of-zeros for Cortex-A9 */ + if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO) + set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1)); +} + static const struct l2c_init_data l2c310_init_fns __initconst = { .type = "L2C-310", .way_size_0 = SZ_8K, @@ -807,6 +824,7 @@ static const struct l2c_init_data l2c310_init_fns __initconst = { .enable = l2c310_enable, .fixup = l2c310_fixup, .save = l2c310_save, + .configure = l2c310_configure, .outer_cache = { .inv_range = l2c210_inv_range, .clean_range = l2c210_clean_range, @@ -818,13 +836,21 @@ static const struct l2c_init_data l2c310_init_fns __initconst = { }, }; -static void __init __l2c_init(const struct l2c_init_data *data, - u32 aux_val, u32 aux_mask, u32 cache_id) +static int __init __l2c_init(const struct l2c_init_data *data, + u32 aux_val, u32 aux_mask, u32 cache_id) { struct outer_cache_fns fns; unsigned way_size_bits, ways; u32 aux, old_aux; + /* + * Save the pointer globally so that callbacks which do not receive + * context from callers can access the structure. + */ + l2x0_data = kmemdup(data, sizeof(*data), GFP_KERNEL); + if (!l2x0_data) + return -ENOMEM; + /* * Sanity check the aux values. aux_mask is the bits we preserve * from reading the hardware register, and aux_val is the bits we @@ -910,6 +936,8 @@ static void __init __l2c_init(const struct l2c_init_data *data, data->type, ways, l2x0_size >> 10); pr_info("%s: CACHE_ID 0x%08x, AUX_CTRL 0x%08x\n", data->type, cache_id, aux); + + return 0; } void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask) @@ -936,6 +964,10 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask) break; } + /* Read back current (default) hardware configuration */ + if (data->save) + data->save(l2x0_base); + __l2c_init(data, aux_val, aux_mask, cache_id); } @@ -1102,7 +1134,7 @@ static const struct l2c_init_data of_l2c210_data __initconst = { .flush_all = l2c210_flush_all, .disable = l2c_disable, .sync = l2c210_sync, - .resume = l2c210_resume, + .resume = l2c_resume, }, }; @@ -1120,7 +1152,7 @@ static const struct l2c_init_data of_l2c220_data __initconst = { .flush_all = l2c220_flush_all, .disable = l2c_disable, .sync = l2c220_sync, - .resume = l2c210_resume, + .resume = l2c_resume, }, }; @@ -1135,28 +1167,26 @@ static void __init l2c310_of_parse(const struct device_node *np, of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag)); if (tag[0] && tag[1] && tag[2]) - l2c_write_sec( + l2x0_saved_regs.tag_latency = L310_LATENCY_CTRL_RD(tag[0] - 1) | L310_LATENCY_CTRL_WR(tag[1] - 1) | - L310_LATENCY_CTRL_SETUP(tag[2] - 1), - l2x0_base, L310_TAG_LATENCY_CTRL); + L310_LATENCY_CTRL_SETUP(tag[2] - 1); of_property_read_u32_array(np, "arm,data-latency", data, ARRAY_SIZE(data)); if (data[0] && data[1] && data[2]) - l2c_write_sec( + l2x0_saved_regs.data_latency = L310_LATENCY_CTRL_RD(data[0] - 1) | L310_LATENCY_CTRL_WR(data[1] - 1) | - L310_LATENCY_CTRL_SETUP(data[2] - 1), - l2x0_base, L310_DATA_LATENCY_CTRL); + L310_LATENCY_CTRL_SETUP(data[2] - 1); of_property_read_u32_array(np, "arm,filter-ranges", filter, ARRAY_SIZE(filter)); if (filter[1]) { - l2c_write_sec(ALIGN(filter[0] + filter[1], SZ_1M), - l2x0_base, L310_ADDR_FILTER_END); - l2c_write_sec((filter[0] & ~(SZ_1M - 1)) | L310_ADDR_FILTER_EN, - l2x0_base, L310_ADDR_FILTER_START); + l2x0_saved_regs.filter_end = + ALIGN(filter[0] + filter[1], SZ_1M); + l2x0_saved_regs.filter_start = (filter[0] & ~(SZ_1M - 1)) + | L310_ADDR_FILTER_EN; } ret = l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_512K); @@ -1188,6 +1218,7 @@ static const struct l2c_init_data of_l2c310_data __initconst = { .enable = l2c310_enable, .fixup = l2c310_fixup, .save = l2c310_save, + .configure = l2c310_configure, .outer_cache = { .inv_range = l2c210_inv_range, .clean_range = l2c210_clean_range, @@ -1216,6 +1247,7 @@ static const struct l2c_init_data of_l2c310_coherent_data __initconst = { .enable = l2c310_enable, .fixup = l2c310_fixup, .save = l2c310_save, + .configure = l2c310_configure, .outer_cache = { .inv_range = l2c210_inv_range, .clean_range = l2c210_clean_range, @@ -1330,16 +1362,6 @@ static void aurora_save(void __iomem *base) l2x0_saved_regs.aux_ctrl = readl_relaxed(base + L2X0_AUX_CTRL); } -static void aurora_resume(void) -{ - void __iomem *base = l2x0_base; - - if (!(readl(base + L2X0_CTRL) & L2X0_CTRL_EN)) { - writel_relaxed(l2x0_saved_regs.aux_ctrl, base + L2X0_AUX_CTRL); - writel_relaxed(l2x0_saved_regs.ctrl, base + L2X0_CTRL); - } -} - /* * For Aurora cache in no outer mode, enable via the CP15 coprocessor * broadcasting of cache commands to L2. @@ -1401,7 +1423,7 @@ static const struct l2c_init_data of_aurora_with_outer_data __initconst = { .flush_all = l2x0_flush_all, .disable = l2x0_disable, .sync = l2x0_cache_sync, - .resume = aurora_resume, + .resume = l2c_resume, }, }; @@ -1414,7 +1436,7 @@ static const struct l2c_init_data of_aurora_no_outer_data __initconst = { .fixup = aurora_fixup, .save = aurora_save, .outer_cache = { - .resume = aurora_resume, + .resume = l2c_resume, }, }; @@ -1562,6 +1584,7 @@ static const struct l2c_init_data of_bcm_l2x0_data __initconst = { .of_parse = l2c310_of_parse, .enable = l2c310_enable, .save = l2c310_save, + .configure = l2c310_configure, .outer_cache = { .inv_range = bcm_inv_range, .clean_range = bcm_clean_range, @@ -1583,18 +1606,12 @@ static void __init tauros3_save(void __iomem *base) readl_relaxed(base + L310_PREFETCH_CTRL); } -static void tauros3_resume(void) +static void tauros3_configure(void __iomem *base) { - void __iomem *base = l2x0_base; - - if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) { - writel_relaxed(l2x0_saved_regs.aux2_ctrl, - base + TAUROS3_AUX2_CTRL); - writel_relaxed(l2x0_saved_regs.prefetch_ctrl, - base + L310_PREFETCH_CTRL); - - l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8); - } + writel_relaxed(l2x0_saved_regs.aux2_ctrl, + base + TAUROS3_AUX2_CTRL); + writel_relaxed(l2x0_saved_regs.prefetch_ctrl, + base + L310_PREFETCH_CTRL); } static const struct l2c_init_data of_tauros3_data __initconst = { @@ -1603,9 +1620,10 @@ static const struct l2c_init_data of_tauros3_data __initconst = { .num_lock = 8, .enable = l2c_enable, .save = tauros3_save, + .configure = tauros3_configure, /* Tauros3 broadcasts L1 cache operations to L2 */ .outer_cache = { - .resume = tauros3_resume, + .resume = l2c_resume, }, }; @@ -1661,6 +1679,10 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask) if (!of_property_read_bool(np, "cache-unified")) pr_err("L2C: device tree omits to specify unified cache\n"); + /* Read back current (default) hardware configuration */ + if (data->save) + data->save(l2x0_base); + /* L2 configuration can only be changed if the cache is disabled */ if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) if (data->of_parse) @@ -1671,8 +1693,6 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask) else cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID); - __l2c_init(data, aux_val, aux_mask, cache_id); - - return 0; + return __l2c_init(data, aux_val, aux_mask, cache_id); } #endif From c6d1a2d0078a30eb6290428a858c4e790a0e8691 Mon Sep 17 00:00:00 2001 From: Tomasz Figa Date: Thu, 8 Jan 2015 07:51:07 +0100 Subject: [PATCH 077/601] ARM: 8260/1: l2c: Add interface to ask hypervisor to configure L2C Because certain secure hypervisor do not allow writes to individual L2C registers, but rather expect set of parameters to be passed as argument to secure monitor calls, there is a need to provide an interface for the L2C driver to ask the firmware to configure the hardware according to specified parameters. This patch adds such. Signed-off-by: Tomasz Figa Signed-off-by: Marek Szyprowski Tested-by: Nishanth Menon Acked-by: Nishanth Menon Acked-by: Tony Lindgren Signed-off-by: Russell King --- arch/arm/include/asm/outercache.h | 3 +++ arch/arm/mm/cache-l2x0.c | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/arch/arm/include/asm/outercache.h b/arch/arm/include/asm/outercache.h index 891a56b35bcf0c..563b92fc2f41c3 100644 --- a/arch/arm/include/asm/outercache.h +++ b/arch/arm/include/asm/outercache.h @@ -23,6 +23,8 @@ #include +struct l2x0_regs; + struct outer_cache_fns { void (*inv_range)(unsigned long, unsigned long); void (*clean_range)(unsigned long, unsigned long); @@ -36,6 +38,7 @@ struct outer_cache_fns { /* This is an ARM L2C thing */ void (*write_sec)(unsigned long, unsigned); + void (*configure)(const struct l2x0_regs *); }; extern struct outer_cache_fns outer_cache; diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index dde0d54ac41e58..5288153f28b889 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -110,6 +110,11 @@ static inline void l2c_unlock(void __iomem *base, unsigned num) static void l2c_configure(void __iomem *base) { + if (outer_cache.configure) { + outer_cache.configure(&l2x0_saved_regs); + return; + } + if (l2x0_data->configure) l2x0_data->configure(base); @@ -910,6 +915,7 @@ static int __init __l2c_init(const struct l2c_init_data *data, fns = data->outer_cache; fns.write_sec = outer_cache.write_sec; + fns.configure = outer_cache.configure; if (data->fixup) data->fixup(l2x0_base, cache_id, &fns); From 0c4c2edcaeee323fcc4f41a862456358eda50b57 Mon Sep 17 00:00:00 2001 From: Tomasz Figa Date: Thu, 8 Jan 2015 07:51:45 +0100 Subject: [PATCH 078/601] ARM: 8261/1: l2c: Get outer cache .write_sec callback from mach_desc only if not NULL Certain platforms (i.e. Exynos) might need to set .write_sec callback from firmware initialization which is happenning in .init_early callback of machine descriptor. However current code will overwrite the pointer with whatever is present in machine descriptor, even though it can be already set earlier. This patch fixes this by making the assignment conditional, depending on whether current .write_sec callback is NULL. Signed-off-by: Tomasz Figa Signed-off-by: Marek Szyprowski Tested-by: Nishanth Menon Acked-by: Nishanth Menon Acked-by: Tony Lindgren Signed-off-by: Russell King --- arch/arm/kernel/irq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index ad857bada96ce7..350f188c92d294 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -109,7 +109,8 @@ void __init init_IRQ(void) if (IS_ENABLED(CONFIG_OF) && IS_ENABLED(CONFIG_CACHE_L2X0) && (machine_desc->l2c_aux_mask || machine_desc->l2c_aux_val)) { - outer_cache.write_sec = machine_desc->l2c_write_sec; + if (!outer_cache.write_sec) + outer_cache.write_sec = machine_desc->l2c_write_sec; ret = l2x0_of_init(machine_desc->l2c_aux_val, machine_desc->l2c_aux_mask); if (ret) From cf0681ca4cc5c9faa85a2df4c063b926fc09c977 Mon Sep 17 00:00:00 2001 From: Tomasz Figa Date: Thu, 8 Jan 2015 07:52:38 +0100 Subject: [PATCH 079/601] ARM: 8262/1: l2c: Add support for overriding prefetch settings Firmware on certain boards (e.g. ODROID-U3) can leave incorrect L2C prefetch settings configured in registers leading to crashes if L2C is enabled without overriding them. This patch introduces bindings to enable prefetch settings to be specified from DT and necessary support in the driver. [mszyprow: rebased onto v3.18-rc1, added error message when prefetch related dt property has been provided without any value] Signed-off-by: Tomasz Figa Signed-off-by: Marek Szyprowski Tested-by: Nishanth Menon Acked-by: Nishanth Menon Acked-by: Tony Lindgren Signed-off-by: Russell King --- .../devicetree/bindings/arm/l2cc.txt | 10 ++++ arch/arm/mm/cache-l2x0.c | 54 +++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/Documentation/devicetree/bindings/arm/l2cc.txt b/Documentation/devicetree/bindings/arm/l2cc.txt index 292ef7ca305853..0dbabe9a6b0abb 100644 --- a/Documentation/devicetree/bindings/arm/l2cc.txt +++ b/Documentation/devicetree/bindings/arm/l2cc.txt @@ -57,6 +57,16 @@ Optional properties: - cache-id-part: cache id part number to be used if it is not present on hardware - wt-override: If present then L2 is forced to Write through mode +- arm,double-linefill : Override double linefill enable setting. Enable if + non-zero, disable if zero. +- arm,double-linefill-incr : Override double linefill on INCR read. Enable + if non-zero, disable if zero. +- arm,double-linefill-wrap : Override double linefill on WRAP read. Enable + if non-zero, disable if zero. +- arm,prefetch-drop : Override prefetch drop enable setting. Enable if non-zero, + disable if zero. +- arm,prefetch-offset : Override prefetch offset value. Valid values are + 0-7, 15, 23, and 31. Example: diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 5288153f28b889..01de138094548b 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -1169,6 +1169,8 @@ static void __init l2c310_of_parse(const struct device_node *np, u32 tag[3] = { 0, 0, 0 }; u32 filter[2] = { 0, 0 }; u32 assoc; + u32 prefetch; + u32 val; int ret; of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag)); @@ -1214,6 +1216,58 @@ static void __init l2c310_of_parse(const struct device_node *np, assoc); break; } + + prefetch = l2x0_saved_regs.prefetch_ctrl; + + ret = of_property_read_u32(np, "arm,double-linefill", &val); + if (ret == 0) { + if (val) + prefetch |= L310_PREFETCH_CTRL_DBL_LINEFILL; + else + prefetch &= ~L310_PREFETCH_CTRL_DBL_LINEFILL; + } else if (ret != -EINVAL) { + pr_err("L2C-310 OF arm,double-linefill property value is missing\n"); + } + + ret = of_property_read_u32(np, "arm,double-linefill-incr", &val); + if (ret == 0) { + if (val) + prefetch |= L310_PREFETCH_CTRL_DBL_LINEFILL_INCR; + else + prefetch &= ~L310_PREFETCH_CTRL_DBL_LINEFILL_INCR; + } else if (ret != -EINVAL) { + pr_err("L2C-310 OF arm,double-linefill-incr property value is missing\n"); + } + + ret = of_property_read_u32(np, "arm,double-linefill-wrap", &val); + if (ret == 0) { + if (!val) + prefetch |= L310_PREFETCH_CTRL_DBL_LINEFILL_WRAP; + else + prefetch &= ~L310_PREFETCH_CTRL_DBL_LINEFILL_WRAP; + } else if (ret != -EINVAL) { + pr_err("L2C-310 OF arm,double-linefill-wrap property value is missing\n"); + } + + ret = of_property_read_u32(np, "arm,prefetch-drop", &val); + if (ret == 0) { + if (val) + prefetch |= L310_PREFETCH_CTRL_PREFETCH_DROP; + else + prefetch &= ~L310_PREFETCH_CTRL_PREFETCH_DROP; + } else if (ret != -EINVAL) { + pr_err("L2C-310 OF arm,prefetch-drop property value is missing\n"); + } + + ret = of_property_read_u32(np, "arm,prefetch-offset", &val); + if (ret == 0) { + prefetch &= ~L310_PREFETCH_CTRL_OFFSET_MASK; + prefetch |= val & L310_PREFETCH_CTRL_OFFSET_MASK; + } else if (ret != -EINVAL) { + pr_err("L2C-310 OF arm,prefetch-offset property value is missing\n"); + } + + l2x0_saved_regs.prefetch_ctrl = prefetch; } static const struct l2c_init_data of_l2c310_data __initconst = { From 5445b640f38efa9c571584d44df5673804194397 Mon Sep 17 00:00:00 2001 From: Tomasz Figa Date: Thu, 8 Jan 2015 07:53:20 +0100 Subject: [PATCH 080/601] ARM: 8263/1: EXYNOS: Add .write_sec outer cache callback for L2C-310 Exynos4 SoCs equipped with an L2C-310 cache controller and running under secure firmware require certain registers of aforementioned IP to be accessed only from secure mode. This means that SMC calls are required for certain register writes. To handle this, an implementation of .write_sec and .configure callbacks is provided by this patch. [added comment and reworked unconditional call to SMC_CMD_L2X0INVALL] Signed-off-by: Tomasz Figa Signed-off-by: Marek Szyprowski Acked-by: Arnd Bergmann Acked-by: Kukjin Kim Signed-off-by: Russell King --- arch/arm/mach-exynos/firmware.c | 50 +++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/arch/arm/mach-exynos/firmware.c b/arch/arm/mach-exynos/firmware.c index 766f57d2f029ac..4791a3cc00f9d4 100644 --- a/arch/arm/mach-exynos/firmware.c +++ b/arch/arm/mach-exynos/firmware.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -136,6 +137,43 @@ static const struct firmware_ops exynos_firmware_ops = { .resume = IS_ENABLED(CONFIG_EXYNOS_CPU_SUSPEND) ? exynos_resume : NULL, }; +static void exynos_l2_write_sec(unsigned long val, unsigned reg) +{ + static int l2cache_enabled; + + switch (reg) { + case L2X0_CTRL: + if (val & L2X0_CTRL_EN) { + /* + * Before the cache can be enabled, due to firmware + * design, SMC_CMD_L2X0INVALL must be called. + */ + if (!l2cache_enabled) { + exynos_smc(SMC_CMD_L2X0INVALL, 0, 0, 0); + l2cache_enabled = 1; + } + } else { + l2cache_enabled = 0; + } + exynos_smc(SMC_CMD_L2X0CTRL, val, 0, 0); + break; + + case L2X0_DEBUG_CTRL: + exynos_smc(SMC_CMD_L2X0DEBUG, val, 0, 0); + break; + + default: + WARN_ONCE(1, "%s: ignoring write to reg 0x%x\n", __func__, reg); + } +} + +static void exynos_l2_configure(const struct l2x0_regs *regs) +{ + exynos_smc(SMC_CMD_L2X0SETUP1, regs->tag_latency, regs->data_latency, + regs->prefetch_ctrl); + exynos_smc(SMC_CMD_L2X0SETUP2, regs->pwr_ctrl, regs->aux_ctrl, 0); +} + void __init exynos_firmware_init(void) { struct device_node *nd; @@ -155,4 +193,16 @@ void __init exynos_firmware_init(void) pr_info("Running under secure firmware.\n"); register_firmware_ops(&exynos_firmware_ops); + + /* + * Exynos 4 SoCs (based on Cortex A9 and equipped with L2C-310), + * running under secure firmware, require certain registers of L2 + * cache controller to be written in secure mode. Here .write_sec + * callback is provided to perform necessary SMC calls. + */ + if (IS_ENABLED(CONFIG_CACHE_L2X0) && + read_cpuid_part() == ARM_CPU_PART_CORTEX_A9) { + outer_cache.write_sec = exynos_l2_write_sec; + outer_cache.configure = exynos_l2_configure; + } } From 30ad527a6499a7dc037baea84e8a19be7f3bf509 Mon Sep 17 00:00:00 2001 From: Tomasz Figa Date: Thu, 8 Jan 2015 07:53:56 +0100 Subject: [PATCH 081/601] ARM: 8264/1: EXYNOS: Add support for non-secure L2X0 resume On Exynos SoCs it is necessary to resume operation of L2C early in assembly code, because otherwise certain systems will crash. This patch adds necessary code to non-secure resume handler. [rewrote the code accessing l2x0_saved_regs] Signed-off-by: Tomasz Figa Signed-off-by: Marek Szyprowski Acked-by: Arnd Bergmann Acked-by: Kukjin Kim Signed-off-by: Russell King --- arch/arm/mach-exynos/sleep.S | 46 ++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/arch/arm/mach-exynos/sleep.S b/arch/arm/mach-exynos/sleep.S index e3c373082bbee3..31d25834b9c4b0 100644 --- a/arch/arm/mach-exynos/sleep.S +++ b/arch/arm/mach-exynos/sleep.S @@ -16,6 +16,8 @@ */ #include +#include +#include #include "smc.h" #define CPU_MASK 0xff0ffff0 @@ -74,6 +76,45 @@ ENTRY(exynos_cpu_resume_ns) mov r0, #SMC_CMD_C15RESUME dsb smc #0 +#ifdef CONFIG_CACHE_L2X0 + adr r0, 1f + ldr r2, [r0] + add r0, r2, r0 + + /* Check that the address has been initialised. */ + ldr r1, [r0, #L2X0_R_PHY_BASE] + teq r1, #0 + beq skip_l2x0 + + /* Check if controller has been enabled. */ + ldr r2, [r1, #L2X0_CTRL] + tst r2, #0x1 + bne skip_l2x0 + + ldr r1, [r0, #L2X0_R_TAG_LATENCY] + ldr r2, [r0, #L2X0_R_DATA_LATENCY] + ldr r3, [r0, #L2X0_R_PREFETCH_CTRL] + mov r0, #SMC_CMD_L2X0SETUP1 + smc #0 + + /* Reload saved regs pointer because smc corrupts registers. */ + adr r0, 1f + ldr r2, [r0] + add r0, r2, r0 + + ldr r1, [r0, #L2X0_R_PWR_CTRL] + ldr r2, [r0, #L2X0_R_AUX_CTRL] + mov r0, #SMC_CMD_L2X0SETUP2 + smc #0 + + mov r0, #SMC_CMD_L2X0INVALL + smc #0 + + mov r1, #1 + mov r0, #SMC_CMD_L2X0CTRL + smc #0 +skip_l2x0: +#endif /* CONFIG_CACHE_L2X0 */ skip_cp15: b cpu_resume ENDPROC(exynos_cpu_resume_ns) @@ -83,3 +124,8 @@ cp15_save_diag: .globl cp15_save_power cp15_save_power: .long 0 @ cp15 power control + +#ifdef CONFIG_CACHE_L2X0 + .align +1: .long l2x0_saved_regs - . +#endif /* CONFIG_CACHE_L2X0 */ From 56b60b8bce4ae87470da4ed95435433b0ba8795c Mon Sep 17 00:00:00 2001 From: Tomasz Figa Date: Thu, 8 Jan 2015 07:54:34 +0100 Subject: [PATCH 082/601] ARM: 8265/1: dts: exynos4: Add nodes for L2 cache controller This patch adds device tree nodes for L2 cache controller present on Exynos4 SoCs. Signed-off-by: Tomasz Figa Signed-off-by: Marek Szyprowski Acked-by: Arnd Bergmann Acked-by: Kukjin Kim Signed-off-by: Russell King --- arch/arm/boot/dts/exynos4210.dtsi | 9 +++++++++ arch/arm/boot/dts/exynos4x12.dtsi | 14 ++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/arch/arm/boot/dts/exynos4210.dtsi b/arch/arm/boot/dts/exynos4210.dtsi index bcc9e63c807019..8e45ea44317e59 100644 --- a/arch/arm/boot/dts/exynos4210.dtsi +++ b/arch/arm/boot/dts/exynos4210.dtsi @@ -81,6 +81,15 @@ reg = <0x10023CA0 0x20>; }; + l2c: l2-cache-controller@10502000 { + compatible = "arm,pl310-cache"; + reg = <0x10502000 0x1000>; + cache-unified; + cache-level = <2>; + arm,tag-latency = <2 2 1>; + arm,data-latency = <2 2 1>; + }; + gic: interrupt-controller@10490000 { cpu-offset = <0x8000>; }; diff --git a/arch/arm/boot/dts/exynos4x12.dtsi b/arch/arm/boot/dts/exynos4x12.dtsi index 93b70402e9439a..8bc97c415c9a6f 100644 --- a/arch/arm/boot/dts/exynos4x12.dtsi +++ b/arch/arm/boot/dts/exynos4x12.dtsi @@ -54,6 +54,20 @@ reg = <0x10023CA0 0x20>; }; + l2c: l2-cache-controller@10502000 { + compatible = "arm,pl310-cache"; + reg = <0x10502000 0x1000>; + cache-unified; + cache-level = <2>; + arm,tag-latency = <2 2 1>; + arm,data-latency = <3 2 1>; + arm,double-linefill = <1>; + arm,double-linefill-incr = <0>; + arm,double-linefill-wrap = <1>; + arm,prefetch-drop = <1>; + arm,prefetch-offset = <7>; + }; + clock: clock-controller@10030000 { compatible = "samsung,exynos4412-clock"; reg = <0x10030000 0x20000>; From c25630381c6e093819d86d9618798db932cc2d90 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Thu, 15 Jan 2015 23:41:54 +0100 Subject: [PATCH 083/601] ARM: 8285/1: remove ARMv3 user access code again This code was restored with commit 080fc66fb5 ("ARM: Bring back ARMv3 IO and user access code") because the RiscPC memory bus does not understand half-word load/stores. However only the IO code needed restoring since the alternative user access code contains no half-word accesses, is already used when CONFIG_PREEMPT is set and runs faster on a StrongARM. Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/lib/Makefile | 15 +- arch/arm/lib/uaccess.S | 564 ----------------------------------------- 2 files changed, 2 insertions(+), 577 deletions(-) delete mode 100644 arch/arm/lib/uaccess.S diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 0573faab96ad2c..d8a780799506b5 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -15,19 +15,8 @@ lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \ io-readsb.o io-writesb.o io-readsl.o io-writesl.o \ call_with_stack.o bswapsdi2.o -mmu-y := clear_user.o copy_page.o getuser.o putuser.o - -# the code in uaccess.S is not preemption safe and -# probably faster on ARMv3 only -ifeq ($(CONFIG_PREEMPT),y) - mmu-y += copy_from_user.o copy_to_user.o -else -ifneq ($(CONFIG_CPU_32v3),y) - mmu-y += copy_from_user.o copy_to_user.o -else - mmu-y += uaccess.o -endif -endif +mmu-y := clear_user.o copy_page.o getuser.o putuser.o \ + copy_from_user.o copy_to_user.o # using lib_ here won't override already available weak symbols obj-$(CONFIG_UACCESS_WITH_MEMCPY) += uaccess_with_memcpy.o diff --git a/arch/arm/lib/uaccess.S b/arch/arm/lib/uaccess.S deleted file mode 100644 index e50520904b7641..00000000000000 --- a/arch/arm/lib/uaccess.S +++ /dev/null @@ -1,564 +0,0 @@ -/* - * linux/arch/arm/lib/uaccess.S - * - * Copyright (C) 1995, 1996,1997,1998 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Routines to block copy data to/from user memory - * These are highly optimised both for the 4k page size - * and for various alignments. - */ -#include -#include -#include -#include - - .text - -#define PAGE_SHIFT 12 - -/* Prototype: int __copy_to_user(void *to, const char *from, size_t n) - * Purpose : copy a block to user memory from kernel memory - * Params : to - user memory - * : from - kernel memory - * : n - number of bytes to copy - * Returns : Number of bytes NOT copied. - */ - -.Lc2u_dest_not_aligned: - rsb ip, ip, #4 - cmp ip, #2 - ldrb r3, [r1], #1 -USER( TUSER( strb) r3, [r0], #1) @ May fault - ldrgeb r3, [r1], #1 -USER( TUSER( strgeb) r3, [r0], #1) @ May fault - ldrgtb r3, [r1], #1 -USER( TUSER( strgtb) r3, [r0], #1) @ May fault - sub r2, r2, ip - b .Lc2u_dest_aligned - -ENTRY(__copy_to_user) - stmfd sp!, {r2, r4 - r7, lr} - cmp r2, #4 - blt .Lc2u_not_enough - ands ip, r0, #3 - bne .Lc2u_dest_not_aligned -.Lc2u_dest_aligned: - - ands ip, r1, #3 - bne .Lc2u_src_not_aligned -/* - * Seeing as there has to be at least 8 bytes to copy, we can - * copy one word, and force a user-mode page fault... - */ - -.Lc2u_0fupi: subs r2, r2, #4 - addmi ip, r2, #4 - bmi .Lc2u_0nowords - ldr r3, [r1], #4 -USER( TUSER( str) r3, [r0], #4) @ May fault - mov ip, r0, lsl #32 - PAGE_SHIFT @ On each page, use a ld/st??t instruction - rsb ip, ip, #0 - movs ip, ip, lsr #32 - PAGE_SHIFT - beq .Lc2u_0fupi -/* - * ip = max no. of bytes to copy before needing another "strt" insn - */ - cmp r2, ip - movlt ip, r2 - sub r2, r2, ip - subs ip, ip, #32 - blt .Lc2u_0rem8lp - -.Lc2u_0cpy8lp: ldmia r1!, {r3 - r6} - stmia r0!, {r3 - r6} @ Shouldnt fault - ldmia r1!, {r3 - r6} - subs ip, ip, #32 - stmia r0!, {r3 - r6} @ Shouldnt fault - bpl .Lc2u_0cpy8lp - -.Lc2u_0rem8lp: cmn ip, #16 - ldmgeia r1!, {r3 - r6} - stmgeia r0!, {r3 - r6} @ Shouldnt fault - tst ip, #8 - ldmneia r1!, {r3 - r4} - stmneia r0!, {r3 - r4} @ Shouldnt fault - tst ip, #4 - ldrne r3, [r1], #4 - TUSER( strne) r3, [r0], #4 @ Shouldnt fault - ands ip, ip, #3 - beq .Lc2u_0fupi -.Lc2u_0nowords: teq ip, #0 - beq .Lc2u_finished -.Lc2u_nowords: cmp ip, #2 - ldrb r3, [r1], #1 -USER( TUSER( strb) r3, [r0], #1) @ May fault - ldrgeb r3, [r1], #1 -USER( TUSER( strgeb) r3, [r0], #1) @ May fault - ldrgtb r3, [r1], #1 -USER( TUSER( strgtb) r3, [r0], #1) @ May fault - b .Lc2u_finished - -.Lc2u_not_enough: - movs ip, r2 - bne .Lc2u_nowords -.Lc2u_finished: mov r0, #0 - ldmfd sp!, {r2, r4 - r7, pc} - -.Lc2u_src_not_aligned: - bic r1, r1, #3 - ldr r7, [r1], #4 - cmp ip, #2 - bgt .Lc2u_3fupi - beq .Lc2u_2fupi -.Lc2u_1fupi: subs r2, r2, #4 - addmi ip, r2, #4 - bmi .Lc2u_1nowords - mov r3, r7, lspull #8 - ldr r7, [r1], #4 - orr r3, r3, r7, lspush #24 -USER( TUSER( str) r3, [r0], #4) @ May fault - mov ip, r0, lsl #32 - PAGE_SHIFT - rsb ip, ip, #0 - movs ip, ip, lsr #32 - PAGE_SHIFT - beq .Lc2u_1fupi - cmp r2, ip - movlt ip, r2 - sub r2, r2, ip - subs ip, ip, #16 - blt .Lc2u_1rem8lp - -.Lc2u_1cpy8lp: mov r3, r7, lspull #8 - ldmia r1!, {r4 - r7} - subs ip, ip, #16 - orr r3, r3, r4, lspush #24 - mov r4, r4, lspull #8 - orr r4, r4, r5, lspush #24 - mov r5, r5, lspull #8 - orr r5, r5, r6, lspush #24 - mov r6, r6, lspull #8 - orr r6, r6, r7, lspush #24 - stmia r0!, {r3 - r6} @ Shouldnt fault - bpl .Lc2u_1cpy8lp - -.Lc2u_1rem8lp: tst ip, #8 - movne r3, r7, lspull #8 - ldmneia r1!, {r4, r7} - orrne r3, r3, r4, lspush #24 - movne r4, r4, lspull #8 - orrne r4, r4, r7, lspush #24 - stmneia r0!, {r3 - r4} @ Shouldnt fault - tst ip, #4 - movne r3, r7, lspull #8 - ldrne r7, [r1], #4 - orrne r3, r3, r7, lspush #24 - TUSER( strne) r3, [r0], #4 @ Shouldnt fault - ands ip, ip, #3 - beq .Lc2u_1fupi -.Lc2u_1nowords: mov r3, r7, get_byte_1 - teq ip, #0 - beq .Lc2u_finished - cmp ip, #2 -USER( TUSER( strb) r3, [r0], #1) @ May fault - movge r3, r7, get_byte_2 -USER( TUSER( strgeb) r3, [r0], #1) @ May fault - movgt r3, r7, get_byte_3 -USER( TUSER( strgtb) r3, [r0], #1) @ May fault - b .Lc2u_finished - -.Lc2u_2fupi: subs r2, r2, #4 - addmi ip, r2, #4 - bmi .Lc2u_2nowords - mov r3, r7, lspull #16 - ldr r7, [r1], #4 - orr r3, r3, r7, lspush #16 -USER( TUSER( str) r3, [r0], #4) @ May fault - mov ip, r0, lsl #32 - PAGE_SHIFT - rsb ip, ip, #0 - movs ip, ip, lsr #32 - PAGE_SHIFT - beq .Lc2u_2fupi - cmp r2, ip - movlt ip, r2 - sub r2, r2, ip - subs ip, ip, #16 - blt .Lc2u_2rem8lp - -.Lc2u_2cpy8lp: mov r3, r7, lspull #16 - ldmia r1!, {r4 - r7} - subs ip, ip, #16 - orr r3, r3, r4, lspush #16 - mov r4, r4, lspull #16 - orr r4, r4, r5, lspush #16 - mov r5, r5, lspull #16 - orr r5, r5, r6, lspush #16 - mov r6, r6, lspull #16 - orr r6, r6, r7, lspush #16 - stmia r0!, {r3 - r6} @ Shouldnt fault - bpl .Lc2u_2cpy8lp - -.Lc2u_2rem8lp: tst ip, #8 - movne r3, r7, lspull #16 - ldmneia r1!, {r4, r7} - orrne r3, r3, r4, lspush #16 - movne r4, r4, lspull #16 - orrne r4, r4, r7, lspush #16 - stmneia r0!, {r3 - r4} @ Shouldnt fault - tst ip, #4 - movne r3, r7, lspull #16 - ldrne r7, [r1], #4 - orrne r3, r3, r7, lspush #16 - TUSER( strne) r3, [r0], #4 @ Shouldnt fault - ands ip, ip, #3 - beq .Lc2u_2fupi -.Lc2u_2nowords: mov r3, r7, get_byte_2 - teq ip, #0 - beq .Lc2u_finished - cmp ip, #2 -USER( TUSER( strb) r3, [r0], #1) @ May fault - movge r3, r7, get_byte_3 -USER( TUSER( strgeb) r3, [r0], #1) @ May fault - ldrgtb r3, [r1], #0 -USER( TUSER( strgtb) r3, [r0], #1) @ May fault - b .Lc2u_finished - -.Lc2u_3fupi: subs r2, r2, #4 - addmi ip, r2, #4 - bmi .Lc2u_3nowords - mov r3, r7, lspull #24 - ldr r7, [r1], #4 - orr r3, r3, r7, lspush #8 -USER( TUSER( str) r3, [r0], #4) @ May fault - mov ip, r0, lsl #32 - PAGE_SHIFT - rsb ip, ip, #0 - movs ip, ip, lsr #32 - PAGE_SHIFT - beq .Lc2u_3fupi - cmp r2, ip - movlt ip, r2 - sub r2, r2, ip - subs ip, ip, #16 - blt .Lc2u_3rem8lp - -.Lc2u_3cpy8lp: mov r3, r7, lspull #24 - ldmia r1!, {r4 - r7} - subs ip, ip, #16 - orr r3, r3, r4, lspush #8 - mov r4, r4, lspull #24 - orr r4, r4, r5, lspush #8 - mov r5, r5, lspull #24 - orr r5, r5, r6, lspush #8 - mov r6, r6, lspull #24 - orr r6, r6, r7, lspush #8 - stmia r0!, {r3 - r6} @ Shouldnt fault - bpl .Lc2u_3cpy8lp - -.Lc2u_3rem8lp: tst ip, #8 - movne r3, r7, lspull #24 - ldmneia r1!, {r4, r7} - orrne r3, r3, r4, lspush #8 - movne r4, r4, lspull #24 - orrne r4, r4, r7, lspush #8 - stmneia r0!, {r3 - r4} @ Shouldnt fault - tst ip, #4 - movne r3, r7, lspull #24 - ldrne r7, [r1], #4 - orrne r3, r3, r7, lspush #8 - TUSER( strne) r3, [r0], #4 @ Shouldnt fault - ands ip, ip, #3 - beq .Lc2u_3fupi -.Lc2u_3nowords: mov r3, r7, get_byte_3 - teq ip, #0 - beq .Lc2u_finished - cmp ip, #2 -USER( TUSER( strb) r3, [r0], #1) @ May fault - ldrgeb r3, [r1], #1 -USER( TUSER( strgeb) r3, [r0], #1) @ May fault - ldrgtb r3, [r1], #0 -USER( TUSER( strgtb) r3, [r0], #1) @ May fault - b .Lc2u_finished -ENDPROC(__copy_to_user) - - .pushsection .fixup,"ax" - .align 0 -9001: ldmfd sp!, {r0, r4 - r7, pc} - .popsection - -/* Prototype: unsigned long __copy_from_user(void *to,const void *from,unsigned long n); - * Purpose : copy a block from user memory to kernel memory - * Params : to - kernel memory - * : from - user memory - * : n - number of bytes to copy - * Returns : Number of bytes NOT copied. - */ -.Lcfu_dest_not_aligned: - rsb ip, ip, #4 - cmp ip, #2 -USER( TUSER( ldrb) r3, [r1], #1) @ May fault - strb r3, [r0], #1 -USER( TUSER( ldrgeb) r3, [r1], #1) @ May fault - strgeb r3, [r0], #1 -USER( TUSER( ldrgtb) r3, [r1], #1) @ May fault - strgtb r3, [r0], #1 - sub r2, r2, ip - b .Lcfu_dest_aligned - -ENTRY(__copy_from_user) - stmfd sp!, {r0, r2, r4 - r7, lr} - cmp r2, #4 - blt .Lcfu_not_enough - ands ip, r0, #3 - bne .Lcfu_dest_not_aligned -.Lcfu_dest_aligned: - ands ip, r1, #3 - bne .Lcfu_src_not_aligned - -/* - * Seeing as there has to be at least 8 bytes to copy, we can - * copy one word, and force a user-mode page fault... - */ - -.Lcfu_0fupi: subs r2, r2, #4 - addmi ip, r2, #4 - bmi .Lcfu_0nowords -USER( TUSER( ldr) r3, [r1], #4) - str r3, [r0], #4 - mov ip, r1, lsl #32 - PAGE_SHIFT @ On each page, use a ld/st??t instruction - rsb ip, ip, #0 - movs ip, ip, lsr #32 - PAGE_SHIFT - beq .Lcfu_0fupi -/* - * ip = max no. of bytes to copy before needing another "strt" insn - */ - cmp r2, ip - movlt ip, r2 - sub r2, r2, ip - subs ip, ip, #32 - blt .Lcfu_0rem8lp - -.Lcfu_0cpy8lp: ldmia r1!, {r3 - r6} @ Shouldnt fault - stmia r0!, {r3 - r6} - ldmia r1!, {r3 - r6} @ Shouldnt fault - subs ip, ip, #32 - stmia r0!, {r3 - r6} - bpl .Lcfu_0cpy8lp - -.Lcfu_0rem8lp: cmn ip, #16 - ldmgeia r1!, {r3 - r6} @ Shouldnt fault - stmgeia r0!, {r3 - r6} - tst ip, #8 - ldmneia r1!, {r3 - r4} @ Shouldnt fault - stmneia r0!, {r3 - r4} - tst ip, #4 - TUSER( ldrne) r3, [r1], #4 @ Shouldnt fault - strne r3, [r0], #4 - ands ip, ip, #3 - beq .Lcfu_0fupi -.Lcfu_0nowords: teq ip, #0 - beq .Lcfu_finished -.Lcfu_nowords: cmp ip, #2 -USER( TUSER( ldrb) r3, [r1], #1) @ May fault - strb r3, [r0], #1 -USER( TUSER( ldrgeb) r3, [r1], #1) @ May fault - strgeb r3, [r0], #1 -USER( TUSER( ldrgtb) r3, [r1], #1) @ May fault - strgtb r3, [r0], #1 - b .Lcfu_finished - -.Lcfu_not_enough: - movs ip, r2 - bne .Lcfu_nowords -.Lcfu_finished: mov r0, #0 - add sp, sp, #8 - ldmfd sp!, {r4 - r7, pc} - -.Lcfu_src_not_aligned: - bic r1, r1, #3 -USER( TUSER( ldr) r7, [r1], #4) @ May fault - cmp ip, #2 - bgt .Lcfu_3fupi - beq .Lcfu_2fupi -.Lcfu_1fupi: subs r2, r2, #4 - addmi ip, r2, #4 - bmi .Lcfu_1nowords - mov r3, r7, lspull #8 -USER( TUSER( ldr) r7, [r1], #4) @ May fault - orr r3, r3, r7, lspush #24 - str r3, [r0], #4 - mov ip, r1, lsl #32 - PAGE_SHIFT - rsb ip, ip, #0 - movs ip, ip, lsr #32 - PAGE_SHIFT - beq .Lcfu_1fupi - cmp r2, ip - movlt ip, r2 - sub r2, r2, ip - subs ip, ip, #16 - blt .Lcfu_1rem8lp - -.Lcfu_1cpy8lp: mov r3, r7, lspull #8 - ldmia r1!, {r4 - r7} @ Shouldnt fault - subs ip, ip, #16 - orr r3, r3, r4, lspush #24 - mov r4, r4, lspull #8 - orr r4, r4, r5, lspush #24 - mov r5, r5, lspull #8 - orr r5, r5, r6, lspush #24 - mov r6, r6, lspull #8 - orr r6, r6, r7, lspush #24 - stmia r0!, {r3 - r6} - bpl .Lcfu_1cpy8lp - -.Lcfu_1rem8lp: tst ip, #8 - movne r3, r7, lspull #8 - ldmneia r1!, {r4, r7} @ Shouldnt fault - orrne r3, r3, r4, lspush #24 - movne r4, r4, lspull #8 - orrne r4, r4, r7, lspush #24 - stmneia r0!, {r3 - r4} - tst ip, #4 - movne r3, r7, lspull #8 -USER( TUSER( ldrne) r7, [r1], #4) @ May fault - orrne r3, r3, r7, lspush #24 - strne r3, [r0], #4 - ands ip, ip, #3 - beq .Lcfu_1fupi -.Lcfu_1nowords: mov r3, r7, get_byte_1 - teq ip, #0 - beq .Lcfu_finished - cmp ip, #2 - strb r3, [r0], #1 - movge r3, r7, get_byte_2 - strgeb r3, [r0], #1 - movgt r3, r7, get_byte_3 - strgtb r3, [r0], #1 - b .Lcfu_finished - -.Lcfu_2fupi: subs r2, r2, #4 - addmi ip, r2, #4 - bmi .Lcfu_2nowords - mov r3, r7, lspull #16 -USER( TUSER( ldr) r7, [r1], #4) @ May fault - orr r3, r3, r7, lspush #16 - str r3, [r0], #4 - mov ip, r1, lsl #32 - PAGE_SHIFT - rsb ip, ip, #0 - movs ip, ip, lsr #32 - PAGE_SHIFT - beq .Lcfu_2fupi - cmp r2, ip - movlt ip, r2 - sub r2, r2, ip - subs ip, ip, #16 - blt .Lcfu_2rem8lp - - -.Lcfu_2cpy8lp: mov r3, r7, lspull #16 - ldmia r1!, {r4 - r7} @ Shouldnt fault - subs ip, ip, #16 - orr r3, r3, r4, lspush #16 - mov r4, r4, lspull #16 - orr r4, r4, r5, lspush #16 - mov r5, r5, lspull #16 - orr r5, r5, r6, lspush #16 - mov r6, r6, lspull #16 - orr r6, r6, r7, lspush #16 - stmia r0!, {r3 - r6} - bpl .Lcfu_2cpy8lp - -.Lcfu_2rem8lp: tst ip, #8 - movne r3, r7, lspull #16 - ldmneia r1!, {r4, r7} @ Shouldnt fault - orrne r3, r3, r4, lspush #16 - movne r4, r4, lspull #16 - orrne r4, r4, r7, lspush #16 - stmneia r0!, {r3 - r4} - tst ip, #4 - movne r3, r7, lspull #16 -USER( TUSER( ldrne) r7, [r1], #4) @ May fault - orrne r3, r3, r7, lspush #16 - strne r3, [r0], #4 - ands ip, ip, #3 - beq .Lcfu_2fupi -.Lcfu_2nowords: mov r3, r7, get_byte_2 - teq ip, #0 - beq .Lcfu_finished - cmp ip, #2 - strb r3, [r0], #1 - movge r3, r7, get_byte_3 - strgeb r3, [r0], #1 -USER( TUSER( ldrgtb) r3, [r1], #0) @ May fault - strgtb r3, [r0], #1 - b .Lcfu_finished - -.Lcfu_3fupi: subs r2, r2, #4 - addmi ip, r2, #4 - bmi .Lcfu_3nowords - mov r3, r7, lspull #24 -USER( TUSER( ldr) r7, [r1], #4) @ May fault - orr r3, r3, r7, lspush #8 - str r3, [r0], #4 - mov ip, r1, lsl #32 - PAGE_SHIFT - rsb ip, ip, #0 - movs ip, ip, lsr #32 - PAGE_SHIFT - beq .Lcfu_3fupi - cmp r2, ip - movlt ip, r2 - sub r2, r2, ip - subs ip, ip, #16 - blt .Lcfu_3rem8lp - -.Lcfu_3cpy8lp: mov r3, r7, lspull #24 - ldmia r1!, {r4 - r7} @ Shouldnt fault - orr r3, r3, r4, lspush #8 - mov r4, r4, lspull #24 - orr r4, r4, r5, lspush #8 - mov r5, r5, lspull #24 - orr r5, r5, r6, lspush #8 - mov r6, r6, lspull #24 - orr r6, r6, r7, lspush #8 - stmia r0!, {r3 - r6} - subs ip, ip, #16 - bpl .Lcfu_3cpy8lp - -.Lcfu_3rem8lp: tst ip, #8 - movne r3, r7, lspull #24 - ldmneia r1!, {r4, r7} @ Shouldnt fault - orrne r3, r3, r4, lspush #8 - movne r4, r4, lspull #24 - orrne r4, r4, r7, lspush #8 - stmneia r0!, {r3 - r4} - tst ip, #4 - movne r3, r7, lspull #24 -USER( TUSER( ldrne) r7, [r1], #4) @ May fault - orrne r3, r3, r7, lspush #8 - strne r3, [r0], #4 - ands ip, ip, #3 - beq .Lcfu_3fupi -.Lcfu_3nowords: mov r3, r7, get_byte_3 - teq ip, #0 - beq .Lcfu_finished - cmp ip, #2 - strb r3, [r0], #1 -USER( TUSER( ldrgeb) r3, [r1], #1) @ May fault - strgeb r3, [r0], #1 -USER( TUSER( ldrgtb) r3, [r1], #1) @ May fault - strgtb r3, [r0], #1 - b .Lcfu_finished -ENDPROC(__copy_from_user) - - .pushsection .fixup,"ax" - .align 0 - /* - * We took an exception. r0 contains a pointer to - * the byte not copied. - */ -9001: ldr r2, [sp], #4 @ void *to - sub r2, r0, r2 @ bytes copied - ldr r1, [sp], #4 @ unsigned long count - subs r4, r1, r2 @ bytes left to copy - movne r1, r4 - blne __memzero - mov r0, r4 - ldmfd sp!, {r4 - r7, pc} - .popsection - From 67c2b9cb30f561325b010e046b7bbe2a327e69a0 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 18 Nov 2014 12:18:18 +0100 Subject: [PATCH 084/601] ARM: 8207/1: amba: Use inlines instead of macros for amba_pclk_enable/disable Replace the amba_pclk_enable and amba_pclk_disable macros with static inline functions and remove checks for IS_ERR. The amba bus clock won't be ERR because probe would fail before the use of these functions. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Russell King --- include/linux/amba/bus.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 2afc618b15ce92..0ab5f8e0dea264 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -92,11 +92,15 @@ struct amba_device *amba_find_device(const char *, struct device *, unsigned int int amba_request_regions(struct amba_device *, const char *); void amba_release_regions(struct amba_device *); -#define amba_pclk_enable(d) \ - (IS_ERR((d)->pclk) ? 0 : clk_enable((d)->pclk)) +static inline int amba_pclk_enable(struct amba_device *dev) +{ + return clk_enable(dev->pclk); +} -#define amba_pclk_disable(d) \ - do { if (!IS_ERR((d)->pclk)) clk_disable((d)->pclk); } while (0) +static inline void amba_pclk_disable(struct amba_device *dev) +{ + clk_disable(dev->pclk); +} static inline int amba_pclk_prepare(struct amba_device *dev) { From b8f80bffd51f4ef029051d6898d9c2e3e5637dc3 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Fri, 14 Mar 2014 14:00:56 +0100 Subject: [PATCH 085/601] iommu/ipmmu-vmsa: Cleanup failures of ARM mapping creation or attachment The ARM IOMMU mapping needs to be released when attaching the device fails. Add arm_iommu_release_mapping() to the error code path. This is safe to call with a NULL mapping, so no specific check is needed. Cleanup is also missing when failing to create a mapping. Jump to the error code path in that case instead of returning immediately. Signed-off-by: Laurent Pinchart --- drivers/iommu/ipmmu-vmsa.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 748693192c20a0..034293ca4b9a4e 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -1090,7 +1090,8 @@ static int ipmmu_add_device(struct device *dev) SZ_1G, SZ_2G); if (IS_ERR(mapping)) { dev_err(mmu->dev, "failed to create ARM IOMMU mapping\n"); - return PTR_ERR(mapping); + ret = PTR_ERR(mapping); + goto error; } mmu->mapping = mapping; @@ -1106,6 +1107,7 @@ static int ipmmu_add_device(struct device *dev) return 0; error: + arm_iommu_release_mapping(mmu->mapping); kfree(dev->archdata.iommu); dev->archdata.iommu = NULL; iommu_group_remove_device(dev); From 22463cab3f96baf6b568200a35c7648438eea7ff Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 24 Jul 2014 15:34:54 +0200 Subject: [PATCH 086/601] iommu/ipmmu-vmsa: Flush P[UM]D entry before freeing the child page table When clearing PUD or PMD entries the child page table (if any) is freed and the PUD or PMD entry is then cleared. This result in a small race condition window during which a free page table could be accessed by the IPMMU. Fix it by clearing and flushing the PUD or PMD entry before freeing the child page table. Signed-off-by: Laurent Pinchart --- drivers/iommu/ipmmu-vmsa.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 034293ca4b9a4e..a32d237c0f2272 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -678,30 +678,33 @@ static int ipmmu_create_mapping(struct ipmmu_vmsa_domain *domain, static void ipmmu_clear_pud(struct ipmmu_vmsa_device *mmu, pud_t *pud) { - /* Free the page table. */ pgtable_t table = pud_pgtable(*pud); - __free_page(table); /* Clear the PUD. */ *pud = __pud(0); ipmmu_flush_pgtable(mmu, pud, sizeof(*pud)); + + /* Free the page table. */ + __free_page(table); } static void ipmmu_clear_pmd(struct ipmmu_vmsa_device *mmu, pud_t *pud, pmd_t *pmd) { + pmd_t pmdval = *pmd; unsigned int i; - /* Free the page table. */ - if (pmd_table(*pmd)) { - pgtable_t table = pmd_pgtable(*pmd); - __free_page(table); - } - /* Clear the PMD. */ *pmd = __pmd(0); ipmmu_flush_pgtable(mmu, pmd, sizeof(*pmd)); + /* Free the page table. */ + if (pmd_table(pmdval)) { + pgtable_t table = pmd_pgtable(pmdval); + + __free_page(table); + } + /* Check whether the PUD is still needed. */ pmd = pmd_offset(pud, 0); for (i = 0; i < IPMMU_PTRS_PER_PMD; ++i) { From 9eca0a5875403027e10d68dd162df9790d42839e Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 24 Jul 2014 15:34:54 +0200 Subject: [PATCH 087/601] iommu/ipmmu-vmsa: Invalidate TLB after unmapping The TLB must be invalidated after unmapping memory to remove stale TLB entries. this was supposed to be performed already, but a bug in the driver prevented the TLB invalidate function from being called. Fix it. Signed-off-by: Laurent Pinchart --- drivers/iommu/ipmmu-vmsa.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index a32d237c0f2272..3a61103ef10823 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -785,7 +785,6 @@ static int ipmmu_clear_mapping(struct ipmmu_vmsa_domain *domain, pud_t *pud; pmd_t *pmd; pte_t *pte; - int ret = 0; if (!pgd) return -EINVAL; @@ -847,8 +846,7 @@ static int ipmmu_clear_mapping(struct ipmmu_vmsa_domain *domain, done: spin_unlock_irqrestore(&domain->lock, flags); - if (ret) - ipmmu_tlb_invalidate(domain); + ipmmu_tlb_invalidate(domain); return 0; } From 4a93f21d87e769477c0cb2b98d7e7911b8d37c03 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Mon, 17 Mar 2014 01:02:46 +0100 Subject: [PATCH 088/601] iommu/ipmmu-vmsa: Add device tree bindings documentation Signed-off-by: Laurent Pinchart Acked-by: Geert Uytterhoeven --- .../bindings/iommu/renesas,ipmmu-vmsa.txt | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt diff --git a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt new file mode 100644 index 00000000000000..cd29083e16ec76 --- /dev/null +++ b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt @@ -0,0 +1,41 @@ +* Renesas VMSA-Compatible IOMMU + +The IPMMU is an IOMMU implementation compatible with the ARM VMSA page tables. +It provides address translation for bus masters outside of the CPU, each +connected to the IPMMU through a port called micro-TLB. + + +Required Properties: + + - compatible: Must contain "renesas,ipmmu-vmsa". + - reg: Base address and size of the IPMMU registers. + - interrupts: Specifiers for the MMU fault interrupts. For instances that + support secure mode two interrupts must be specified, for non-secure and + secure mode, in that order. For instances that don't support secure mode a + single interrupt must be specified. + + - #iommu-cells: Must be 1. + +Each bus master connected to an IPMMU must reference the IPMMU in its device +node with the following property: + + - iommus: A reference to the IPMMU in two cells. The first cell is a phandle + to the IPMMU and the second cell the number of the micro-TLB that the + device is connected to. + + +Example: R8A7791 IPMMU-MX and VSP1-D0 bus master + + ipmmu_mx: mmu@fe951000 { + compatible = "renasas,ipmmu-vmsa"; + reg = <0 0xfe951000 0 0x1000>; + interrupts = <0 222 IRQ_TYPE_LEVEL_HIGH>, + <0 221 IRQ_TYPE_LEVEL_HIGH>; + #iommu-cells = <1>; + }; + + vsp1@fe928000 { + ... + iommus = <&ipmmu_mx 13>; + ... + }; From 275f5053c7786285d2f20d2dd12908f834c47ad8 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Mon, 17 Mar 2014 01:02:46 +0100 Subject: [PATCH 089/601] iommu/ipmmu-vmsa: Add device tree support Make platform data optional when the device is instantiated from DT and look up the micro-TLB number in the bus master DT node. Signed-off-by: Laurent Pinchart --- drivers/iommu/ipmmu-vmsa.c | 55 ++++++++++++++++++++++++++++++++------ 1 file changed, 47 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 3a61103ef10823..368c852d9ee7a7 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -58,6 +59,8 @@ static LIST_HEAD(ipmmu_devices); * Registers Definition */ +#define IM_NS_ALIAS_OFFSET 0x800 + #define IM_CTX_SIZE 0x40 #define IMCTR 0x0000 @@ -1002,16 +1005,33 @@ static phys_addr_t ipmmu_iova_to_phys(struct iommu_domain *io_domain, static int ipmmu_find_utlb(struct ipmmu_vmsa_device *mmu, struct device *dev) { - const struct ipmmu_vmsa_master *master = mmu->pdata->masters; - const char *devname = dev_name(dev); - unsigned int i; + struct of_phandle_args args; + int ret; + + if (mmu->pdata) { + const struct ipmmu_vmsa_master *master = mmu->pdata->masters; + const char *devname = dev_name(dev); + unsigned int i; - for (i = 0; i < mmu->pdata->num_masters; ++i, ++master) { - if (strcmp(master->name, devname) == 0) - return master->utlb; + for (i = 0; i < mmu->pdata->num_masters; ++i, ++master) { + if (strcmp(master->name, devname) == 0) + return master->utlb; + } + + return -1; } - return -1; + ret = of_parse_phandle_with_args(dev->of_node, "iommus", + "#iommu-cells", 0, &args); + if (ret < 0) + return -1; + + of_node_put(args.np); + + if (args.np != mmu->dev->of_node || args.args_count != 1) + return -1; + + return args.args[0]; } static int ipmmu_add_device(struct device *dev) @@ -1157,7 +1177,7 @@ static int ipmmu_probe(struct platform_device *pdev) int irq; int ret; - if (!pdev->dev.platform_data) { + if (!IS_ENABLED(CONFIG_OF) && !pdev->dev.platform_data) { dev_err(&pdev->dev, "missing platform data\n"); return -EINVAL; } @@ -1178,6 +1198,20 @@ static int ipmmu_probe(struct platform_device *pdev) if (IS_ERR(mmu->base)) return PTR_ERR(mmu->base); + /* + * The IPMMU has two register banks, for secure and non-secure modes. + * The bank mapped at the beginning of the IPMMU address space + * corresponds to the running mode of the CPU. When running in secure + * mode the non-secure register bank is also available at an offset. + * + * Secure mode operation isn't clearly documented and is thus currently + * not implemented in the driver. Furthermore, preliminary tests of + * non-secure operation with the main register bank were not successful. + * Offset the registers base unconditionally to point to the non-secure + * alias space for now. + */ + mmu->base += IM_NS_ALIAS_OFFSET; + irq = platform_get_irq(pdev, 0); if (irq < 0) { dev_err(&pdev->dev, "no IRQ found\n"); @@ -1223,9 +1257,14 @@ static int ipmmu_remove(struct platform_device *pdev) return 0; } +static const struct of_device_id ipmmu_of_ids[] = { + { .compatible = "renesas,ipmmu-vmsa", }, +}; + static struct platform_driver ipmmu_driver = { .driver = { .name = "ipmmu-vmsa", + .of_match_table = of_match_ptr(ipmmu_of_ids), }, .probe = ipmmu_probe, .remove = ipmmu_remove, From a166d31ee56e8fc56ce2497d8de9da5359f4ee41 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 24 Jul 2014 01:36:43 +0200 Subject: [PATCH 090/601] iommu/ipmmu-vmsa: Support multiple micro TLBs per device Devices such as the system DMA controller are connected to multiple micro TLBs of the same IOMMU. Support this. Selective enabling of micro TLBs based on runtime device usage isn't possible at the moment due to lack of support in the IOMMU and DMA mapping APIs. Support for devices connected to different IOMMUs is also unsupported for the same reason. Signed-off-by: Laurent Pinchart --- drivers/iommu/ipmmu-vmsa.c | 115 +++++++++++++++++++++++++++---------- 1 file changed, 86 insertions(+), 29 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 368c852d9ee7a7..5d080cf11ba580 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -47,7 +47,8 @@ struct ipmmu_vmsa_domain { struct ipmmu_vmsa_archdata { struct ipmmu_vmsa_device *mmu; - unsigned int utlb; + unsigned int *utlbs; + unsigned int num_utlbs; }; static DEFINE_SPINLOCK(ipmmu_devices_lock); @@ -900,6 +901,7 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain, struct ipmmu_vmsa_device *mmu = archdata->mmu; struct ipmmu_vmsa_domain *domain = io_domain->priv; unsigned long flags; + unsigned int i; int ret = 0; if (!mmu) { @@ -928,7 +930,8 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain, if (ret < 0) return ret; - ipmmu_utlb_enable(domain, archdata->utlb); + for (i = 0; i < archdata->num_utlbs; ++i) + ipmmu_utlb_enable(domain, archdata->utlbs[i]); return 0; } @@ -938,8 +941,10 @@ static void ipmmu_detach_device(struct iommu_domain *io_domain, { struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu; struct ipmmu_vmsa_domain *domain = io_domain->priv; + unsigned int i; - ipmmu_utlb_disable(domain, archdata->utlb); + for (i = 0; i < archdata->num_utlbs; ++i) + ipmmu_utlb_disable(domain, archdata->utlbs[i]); /* * TODO: Optimize by disabling the context when no device is attached. @@ -1003,10 +1008,12 @@ static phys_addr_t ipmmu_iova_to_phys(struct iommu_domain *io_domain, return __pfn_to_phys(pte_pfn(pte)) | (iova & ~PAGE_MASK); } -static int ipmmu_find_utlb(struct ipmmu_vmsa_device *mmu, struct device *dev) +static int ipmmu_find_utlbs(struct ipmmu_vmsa_device *mmu, struct device *dev, + unsigned int **_utlbs) { - struct of_phandle_args args; - int ret; + unsigned int *utlbs; + unsigned int i; + int count; if (mmu->pdata) { const struct ipmmu_vmsa_master *master = mmu->pdata->masters; @@ -1014,32 +1021,64 @@ static int ipmmu_find_utlb(struct ipmmu_vmsa_device *mmu, struct device *dev) unsigned int i; for (i = 0; i < mmu->pdata->num_masters; ++i, ++master) { - if (strcmp(master->name, devname) == 0) - return master->utlb; + if (strcmp(master->name, devname) == 0) { + utlbs = kmalloc(sizeof(*utlbs), GFP_KERNEL); + if (!utlbs) + return -ENOMEM; + + utlbs[0] = master->utlb; + + *_utlbs = utlbs; + return 1; + } } - return -1; + return -EINVAL; } - ret = of_parse_phandle_with_args(dev->of_node, "iommus", - "#iommu-cells", 0, &args); - if (ret < 0) - return -1; + count = of_count_phandle_with_args(dev->of_node, "iommus", + "#iommu-cells"); + if (count < 0) + return -EINVAL; + + utlbs = kcalloc(count, sizeof(*utlbs), GFP_KERNEL); + if (!utlbs) + return -ENOMEM; + + for (i = 0; i < count; ++i) { + struct of_phandle_args args; + int ret; + + ret = of_parse_phandle_with_args(dev->of_node, "iommus", + "#iommu-cells", i, &args); + if (ret < 0) + goto error; + + of_node_put(args.np); + + if (args.np != mmu->dev->of_node || args.args_count != 1) + goto error; + + utlbs[i] = args.args[0]; + } - of_node_put(args.np); + *_utlbs = utlbs; - if (args.np != mmu->dev->of_node || args.args_count != 1) - return -1; + return count; - return args.args[0]; +error: + kfree(utlbs); + return -EINVAL; } static int ipmmu_add_device(struct device *dev) { struct ipmmu_vmsa_archdata *archdata; struct ipmmu_vmsa_device *mmu; - struct iommu_group *group; - int utlb = -1; + struct iommu_group *group = NULL; + unsigned int *utlbs = NULL; + unsigned int i; + int num_utlbs = 0; int ret; if (dev->archdata.iommu) { @@ -1052,8 +1091,8 @@ static int ipmmu_add_device(struct device *dev) spin_lock(&ipmmu_devices_lock); list_for_each_entry(mmu, &ipmmu_devices, list) { - utlb = ipmmu_find_utlb(mmu, dev); - if (utlb >= 0) { + num_utlbs = ipmmu_find_utlbs(mmu, dev, &utlbs); + if (num_utlbs) { /* * TODO Take a reference to the MMU to protect * against device removal. @@ -1064,17 +1103,22 @@ static int ipmmu_add_device(struct device *dev) spin_unlock(&ipmmu_devices_lock); - if (utlb < 0) + if (num_utlbs <= 0) return -ENODEV; - if (utlb >= mmu->num_utlbs) - return -EINVAL; + for (i = 0; i < num_utlbs; ++i) { + if (utlbs[i] >= mmu->num_utlbs) { + ret = -EINVAL; + goto error; + } + } /* Create a device group and add the device to it. */ group = iommu_group_alloc(); if (IS_ERR(group)) { dev_err(dev, "Failed to allocate IOMMU group\n"); - return PTR_ERR(group); + ret = PTR_ERR(group); + goto error; } ret = iommu_group_add_device(group, dev); @@ -1082,7 +1126,8 @@ static int ipmmu_add_device(struct device *dev) if (ret < 0) { dev_err(dev, "Failed to add device to IPMMU group\n"); - return ret; + group = NULL; + goto error; } archdata = kzalloc(sizeof(*archdata), GFP_KERNEL); @@ -1092,7 +1137,8 @@ static int ipmmu_add_device(struct device *dev) } archdata->mmu = mmu; - archdata->utlb = utlb; + archdata->utlbs = utlbs; + archdata->num_utlbs = num_utlbs; dev->archdata.iommu = archdata; /* @@ -1129,17 +1175,28 @@ static int ipmmu_add_device(struct device *dev) error: arm_iommu_release_mapping(mmu->mapping); + kfree(dev->archdata.iommu); + kfree(utlbs); + dev->archdata.iommu = NULL; - iommu_group_remove_device(dev); + + if (!IS_ERR_OR_NULL(group)) + iommu_group_remove_device(dev); + return ret; } static void ipmmu_remove_device(struct device *dev) { + struct ipmmu_vmsa_archdata *archdata = dev->archdata.iommu; + arm_iommu_detach_device(dev); iommu_group_remove_device(dev); - kfree(dev->archdata.iommu); + + kfree(archdata->utlbs); + kfree(archdata); + dev->archdata.iommu = NULL; } From 78e1f974dd351ac82d978e3aac2d27422013f914 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sun, 14 Dec 2014 02:37:09 +0200 Subject: [PATCH 091/601] iommu/ipmmu-vmsa: Remove platform data support No board file instantiates the IPMMU using platform data. Now that we have DT support, get rid of platform data. Signed-off-by: Laurent Pinchart --- drivers/iommu/ipmmu-vmsa.c | 24 ------------------------ include/linux/platform_data/ipmmu-vmsa.h | 24 ------------------------ 2 files changed, 48 deletions(-) delete mode 100644 include/linux/platform_data/ipmmu-vmsa.h diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 5d080cf11ba580..791c3daec7c0c8 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -30,7 +29,6 @@ struct ipmmu_vmsa_device { void __iomem *base; struct list_head list; - const struct ipmmu_vmsa_platform_data *pdata; unsigned int num_utlbs; struct dma_iommu_mapping *mapping; @@ -1015,27 +1013,6 @@ static int ipmmu_find_utlbs(struct ipmmu_vmsa_device *mmu, struct device *dev, unsigned int i; int count; - if (mmu->pdata) { - const struct ipmmu_vmsa_master *master = mmu->pdata->masters; - const char *devname = dev_name(dev); - unsigned int i; - - for (i = 0; i < mmu->pdata->num_masters; ++i, ++master) { - if (strcmp(master->name, devname) == 0) { - utlbs = kmalloc(sizeof(*utlbs), GFP_KERNEL); - if (!utlbs) - return -ENOMEM; - - utlbs[0] = master->utlb; - - *_utlbs = utlbs; - return 1; - } - } - - return -EINVAL; - } - count = of_count_phandle_with_args(dev->of_node, "iommus", "#iommu-cells"); if (count < 0) @@ -1246,7 +1223,6 @@ static int ipmmu_probe(struct platform_device *pdev) } mmu->dev = &pdev->dev; - mmu->pdata = pdev->dev.platform_data; mmu->num_utlbs = 32; /* Map I/O memory and request IRQ. */ diff --git a/include/linux/platform_data/ipmmu-vmsa.h b/include/linux/platform_data/ipmmu-vmsa.h deleted file mode 100644 index 5275b3ac6d3712..00000000000000 --- a/include/linux/platform_data/ipmmu-vmsa.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * IPMMU VMSA Platform Data - * - * Copyright (C) 2014 Renesas Electronics Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - */ - -#ifndef __IPMMU_VMSA_H__ -#define __IPMMU_VMSA_H__ - -struct ipmmu_vmsa_master { - const char *name; - unsigned int utlb; -}; - -struct ipmmu_vmsa_platform_data { - const struct ipmmu_vmsa_master *masters; - unsigned int num_masters; -}; - -#endif /* __IPMMU_VMSA_H__ */ From 7fe5d2b1daf6fd82857a8d0ee47547d7852ebe7b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 15 Jan 2015 12:01:06 +0000 Subject: [PATCH 092/601] arm64/efi: handle potential failure to remap memory map When remapping the UEFI memory map using ioremap_cache(), we have to deal with potential failure. Note that, even if the common case is for ioremap_cache() to return the existing linear mapping of the memory map, we cannot rely on that to be always the case, e.g., in the presence of a mem= kernel parameter. At the same time, remove a stale comment and move the memmap code together. Signed-off-by: Ard Biesheuvel Acked-by: Mark Rutland Acked-by: Mark Salter Signed-off-by: Catalin Marinas --- arch/arm64/kernel/efi.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index a98415b5979c2c..c9cb0fbe7aa45a 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -229,19 +229,21 @@ static int __init arm64_enable_runtime_services(void) return -1; } - mapsize = memmap.map_end - memmap.map; - if (efi_runtime_disabled()) { pr_info("EFI runtime services will be disabled.\n"); return -1; } pr_info("Remapping and enabling EFI services.\n"); - /* replace early memmap mapping with permanent mapping */ + + mapsize = memmap.map_end - memmap.map; memmap.map = (__force void *)ioremap_cache((phys_addr_t)memmap.phys_map, mapsize); + if (!memmap.map) { + pr_err("Failed to remap EFI memory map\n"); + return -1; + } memmap.map_end = memmap.map + mapsize; - efi.memmap = &memmap; efi.systab = (__force void *)ioremap_cache(efi_system_table, From 6083fe74b7bfffc2c7be8c711596608bda0cda6e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 15 Jan 2015 16:42:14 +0000 Subject: [PATCH 093/601] arm64: respect mem= for EFI When booting with EFI, we acquire the EFI memory map after parsing the early params. This unfortuantely renders the option useless as we call memblock_enforce_memory_limit (which uses memblock_remove_range behind the scenes) before we've added any memblocks. We end up removing nothing, then adding all of memory later when efi_init calls reserve_regions. Instead, we can log the limit and apply this later when we do the rest of the memblock work in memblock_init, which should work regardless of the presence of EFI. At the same time we may as well move the early parameter into arm64's mm/init.c, close to arm64_memblock_init. Any memory which must be mapped (e.g. for use by EFI runtime services) must be mapped explicitly reather than relying on the linear mapping, which may be truncated as a result of a mem= option passed on the kernel command line. Signed-off-by: Mark Rutland Acked-by: Catalin Marinas Acked-by: Ard Biesheuvel Tested-by: Ard Biesheuvel Cc: Leif Lindholm Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/setup.c | 19 ------------------- arch/arm64/mm/init.c | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 199d1b7809d7fa..207413fe08a02c 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -322,25 +322,6 @@ static void __init setup_machine_fdt(phys_addr_t dt_phys) dump_stack_set_arch_desc("%s (DT)", of_flat_dt_get_machine_name()); } -/* - * Limit the memory size that was specified via FDT. - */ -static int __init early_mem(char *p) -{ - phys_addr_t limit; - - if (!p) - return 1; - - limit = memparse(p, &p) & PAGE_MASK; - pr_notice("Memory limited to %lldMB\n", limit >> 20); - - memblock_enforce_memory_limit(limit); - - return 0; -} -early_param("mem", early_mem); - static void __init request_standard_resources(void) { struct memblock_region *region; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index bac492c12fcc4b..11c7b701b6813b 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -136,10 +136,29 @@ static void arm64_memory_present(void) } #endif +static phys_addr_t memory_limit = (phys_addr_t)ULLONG_MAX; + +/* + * Limit the memory size that was specified via FDT. + */ +static int __init early_mem(char *p) +{ + if (!p) + return 1; + + memory_limit = memparse(p, &p) & PAGE_MASK; + pr_notice("Memory limited to %lldMB\n", memory_limit >> 20); + + return 0; +} +early_param("mem", early_mem); + void __init arm64_memblock_init(void) { phys_addr_t dma_phys_limit = 0; + memblock_enforce_memory_limit(memory_limit); + /* * Register the kernel text, kernel data, initrd, and initial * pagetables with memblock. From dbaffde76405012778b8815b7721554b1302038e Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Tue, 6 Jan 2015 11:18:24 -0800 Subject: [PATCH 094/601] pstore: Use scnprintf() in pstore_mkfile() No guarantees that the names will not exceed the name buffer with future adjustments. Signed-off-by: Mark Salyzyn Acked-by: Kees Cook Signed-off-by: Tony Luck --- fs/pstore/inode.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 50416602774d89..d69586f09ffde0 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -338,32 +338,35 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count, switch (type) { case PSTORE_TYPE_DMESG: - sprintf(name, "dmesg-%s-%lld%s", psname, id, - compressed ? ".enc.z" : ""); + scnprintf(name, sizeof(name), "dmesg-%s-%lld%s", + psname, id, compressed ? ".enc.z" : ""); break; case PSTORE_TYPE_CONSOLE: - sprintf(name, "console-%s-%lld", psname, id); + scnprintf(name, sizeof(name), "console-%s-%lld", psname, id); break; case PSTORE_TYPE_FTRACE: - sprintf(name, "ftrace-%s-%lld", psname, id); + scnprintf(name, sizeof(name), "ftrace-%s-%lld", psname, id); break; case PSTORE_TYPE_MCE: - sprintf(name, "mce-%s-%lld", psname, id); + scnprintf(name, sizeof(name), "mce-%s-%lld", psname, id); break; case PSTORE_TYPE_PPC_RTAS: - sprintf(name, "rtas-%s-%lld", psname, id); + scnprintf(name, sizeof(name), "rtas-%s-%lld", psname, id); break; case PSTORE_TYPE_PPC_OF: - sprintf(name, "powerpc-ofw-%s-%lld", psname, id); + scnprintf(name, sizeof(name), "powerpc-ofw-%s-%lld", + psname, id); break; case PSTORE_TYPE_PPC_COMMON: - sprintf(name, "powerpc-common-%s-%lld", psname, id); + scnprintf(name, sizeof(name), "powerpc-common-%s-%lld", + psname, id); break; case PSTORE_TYPE_UNKNOWN: - sprintf(name, "unknown-%s-%lld", psname, id); + scnprintf(name, sizeof(name), "unknown-%s-%lld", psname, id); break; default: - sprintf(name, "type%d-%s-%lld", type, psname, id); + scnprintf(name, sizeof(name), "type%d-%s-%lld", + type, psname, id); break; } From ff6bf6e8024f073ecea7dbf5c6afe6bd3872a569 Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Tue, 13 Jan 2015 14:32:35 -0800 Subject: [PATCH 095/601] pstore: Remove superfluous memory size check All previous checks will fail with error if memory size is not sufficient to register a zone, so this legacy check has become redundant. Signed-off-by: Mark Salyzyn Acked-by: Kees Cook Signed-off-by: Tony Luck --- fs/pstore/ram.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 8613e5b35c2242..34ed8f860e2321 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -474,14 +474,6 @@ static int ramoops_probe(struct platform_device *pdev) if (err) goto fail_init_fprz; - if (!cxt->przs && !cxt->cprz && !cxt->fprz) { - pr_err("memory size too small, minimum is %zu\n", - cxt->console_size + cxt->record_size + - cxt->ftrace_size); - err = -EINVAL; - goto fail_cnt; - } - cxt->pstore.data = cxt; /* * Console can handle any buffer size, so prefer LOG_LINE_MAX. If we @@ -525,7 +517,6 @@ static int ramoops_probe(struct platform_device *pdev) kfree(cxt->pstore.buf); fail_clear: cxt->pstore.bufsize = 0; -fail_cnt: kfree(cxt->fprz); fail_init_fprz: kfree(cxt->cprz); From f44f96528a8cd4134a4f2c1a9d8c785600aa4888 Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Tue, 13 Jan 2015 14:32:50 -0800 Subject: [PATCH 096/601] pstore: Handle zero-sized prz in series ramoops_pstore_read fails to return the next in a prz series after first zero-sized entry, not venturing to the next non-zero entry. Signed-off-by: Mark Salyzyn Acked-by: Kees Cook Signed-off-by: Tony Luck --- fs/pstore/ram.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 34ed8f860e2321..6150e54eed30bc 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -164,6 +164,12 @@ static int ramoops_read_kmsg_hdr(char *buffer, struct timespec *time, return header_length; } +static bool prz_ok(struct persistent_ram_zone *prz) +{ + return !!prz && !!(persistent_ram_old_size(prz) + + persistent_ram_ecc_string(prz, NULL, 0)); +} + static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, int *count, struct timespec *time, char **buf, bool *compressed, @@ -178,13 +184,13 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, prz = ramoops_get_next_prz(cxt->przs, &cxt->dump_read_cnt, cxt->max_dump_cnt, id, type, PSTORE_TYPE_DMESG, 1); - if (!prz) + if (!prz_ok(prz)) prz = ramoops_get_next_prz(&cxt->cprz, &cxt->console_read_cnt, 1, id, type, PSTORE_TYPE_CONSOLE, 0); - if (!prz) + if (!prz_ok(prz)) prz = ramoops_get_next_prz(&cxt->fprz, &cxt->ftrace_read_cnt, 1, id, type, PSTORE_TYPE_FTRACE, 0); - if (!prz) + if (!prz_ok(prz)) return 0; if (!persistent_ram_old(prz)) From 9d5438f462abd6398cdb7b3211bdcec271873a3b Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Fri, 16 Jan 2015 16:01:10 -0800 Subject: [PATCH 097/601] pstore: Add pmsg - user-space accessible pstore object A secured user-space accessible pstore object. Writes to /dev/pmsg0 are appended to the buffer, on reboot the persistent contents are available in /sys/fs/pstore/pmsg-ramoops-[ID]. One possible use is syslogd, or other daemon, can write messages, then on reboot provides a means to triage user-space activities leading up to a panic as a companion to the pstore dmesg or console logs. Signed-off-by: Mark Salyzyn Acked-by: Kees Cook Signed-off-by: Tony Luck --- fs/pstore/Kconfig | 10 ++++ fs/pstore/Makefile | 2 + fs/pstore/inode.c | 3 + fs/pstore/internal.h | 6 ++ fs/pstore/platform.c | 1 + fs/pstore/pmsg.c | 114 +++++++++++++++++++++++++++++++++++++ fs/pstore/ram.c | 34 ++++++++++- include/linux/pstore.h | 1 + include/linux/pstore_ram.h | 1 + 9 files changed, 170 insertions(+), 2 deletions(-) create mode 100644 fs/pstore/pmsg.c diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig index 983d9510becca6..916b8e23d9684b 100644 --- a/fs/pstore/Kconfig +++ b/fs/pstore/Kconfig @@ -21,6 +21,16 @@ config PSTORE_CONSOLE When the option is enabled, pstore will log all kernel messages, even if no oops or panic happened. +config PSTORE_PMSG + bool "Log user space messages" + depends on PSTORE + help + When the option is enabled, pstore will export a character + interface /dev/pmsg0 to log user space messages. On reboot + data can be retrieved from /sys/fs/pstore/pmsg-ramoops-[ID]. + + If unsure, say N. + config PSTORE_FTRACE bool "Persistent function tracer" depends on PSTORE diff --git a/fs/pstore/Makefile b/fs/pstore/Makefile index 4c9095c2781e17..e647d8e81712f7 100644 --- a/fs/pstore/Makefile +++ b/fs/pstore/Makefile @@ -7,5 +7,7 @@ obj-y += pstore.o pstore-objs += inode.o platform.o obj-$(CONFIG_PSTORE_FTRACE) += ftrace.o +obj-$(CONFIG_PSTORE_PMSG) += pmsg.o + ramoops-objs += ram.o ram_core.o obj-$(CONFIG_PSTORE_RAM) += ramoops.o diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index d69586f09ffde0..b32ce53d24ee5d 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -361,6 +361,9 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count, scnprintf(name, sizeof(name), "powerpc-common-%s-%lld", psname, id); break; + case PSTORE_TYPE_PMSG: + scnprintf(name, sizeof(name), "pmsg-%s-%lld", psname, id); + break; case PSTORE_TYPE_UNKNOWN: scnprintf(name, sizeof(name), "unknown-%s-%lld", psname, id); break; diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h index 3b3d305277c44e..c36ba2cd0b5de2 100644 --- a/fs/pstore/internal.h +++ b/fs/pstore/internal.h @@ -45,6 +45,12 @@ extern void pstore_register_ftrace(void); static inline void pstore_register_ftrace(void) {} #endif +#ifdef CONFIG_PSTORE_PMSG +extern void pstore_register_pmsg(void); +#else +static inline void pstore_register_pmsg(void) {} +#endif + extern struct pstore_info *psinfo; extern void pstore_set_kmsg_bytes(int); diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 0a9b72cdfecaba..15ee78c5020b24 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -447,6 +447,7 @@ int pstore_register(struct pstore_info *psi) if ((psi->flags & PSTORE_FLAGS_FRAGILE) == 0) { pstore_register_console(); pstore_register_ftrace(); + pstore_register_pmsg(); } if (pstore_update_ms >= 0) { diff --git a/fs/pstore/pmsg.c b/fs/pstore/pmsg.c new file mode 100644 index 00000000000000..feb5dd2948b4e4 --- /dev/null +++ b/fs/pstore/pmsg.c @@ -0,0 +1,114 @@ +/* + * Copyright 2014 Google, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include "internal.h" + +static DEFINE_MUTEX(pmsg_lock); +#define PMSG_MAX_BOUNCE_BUFFER_SIZE (2*PAGE_SIZE) + +static ssize_t write_pmsg(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + size_t i, buffer_size; + char *buffer; + + if (!count) + return 0; + + if (!access_ok(VERIFY_READ, buf, count)) + return -EFAULT; + + buffer_size = count; + if (buffer_size > PMSG_MAX_BOUNCE_BUFFER_SIZE) + buffer_size = PMSG_MAX_BOUNCE_BUFFER_SIZE; + buffer = vmalloc(buffer_size); + + mutex_lock(&pmsg_lock); + for (i = 0; i < count; ) { + size_t c = min(count - i, buffer_size); + u64 id; + long ret; + + ret = __copy_from_user(buffer, buf + i, c); + if (unlikely(ret != 0)) { + mutex_unlock(&pmsg_lock); + vfree(buffer); + return -EFAULT; + } + psinfo->write_buf(PSTORE_TYPE_PMSG, 0, &id, 0, buffer, 0, c, + psinfo); + + i += c; + } + + mutex_unlock(&pmsg_lock); + vfree(buffer); + return count; +} + +static const struct file_operations pmsg_fops = { + .owner = THIS_MODULE, + .llseek = noop_llseek, + .write = write_pmsg, +}; + +static struct class *pmsg_class; +static int pmsg_major; +#define PMSG_NAME "pmsg" +#undef pr_fmt +#define pr_fmt(fmt) PMSG_NAME ": " fmt + +static char *pmsg_devnode(struct device *dev, umode_t *mode) +{ + if (mode) + *mode = 0220; + return NULL; +} + +void pstore_register_pmsg(void) +{ + struct device *pmsg_device; + + pmsg_major = register_chrdev(0, PMSG_NAME, &pmsg_fops); + if (pmsg_major < 0) { + pr_err("register_chrdev failed\n"); + goto err; + } + + pmsg_class = class_create(THIS_MODULE, PMSG_NAME); + if (IS_ERR(pmsg_class)) { + pr_err("device class file already in use\n"); + goto err_class; + } + pmsg_class->devnode = pmsg_devnode; + + pmsg_device = device_create(pmsg_class, NULL, MKDEV(pmsg_major, 0), + NULL, "%s%d", PMSG_NAME, 0); + if (IS_ERR(pmsg_device)) { + pr_err("failed to create device\n"); + goto err_device; + } + return; + +err_device: + class_destroy(pmsg_class); +err_class: + unregister_chrdev(pmsg_major, PMSG_NAME); +err: + return; +} diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 6150e54eed30bc..39d1373128e94a 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -51,6 +51,10 @@ static ulong ramoops_ftrace_size = MIN_MEM_SIZE; module_param_named(ftrace_size, ramoops_ftrace_size, ulong, 0400); MODULE_PARM_DESC(ftrace_size, "size of ftrace log"); +static ulong ramoops_pmsg_size = MIN_MEM_SIZE; +module_param_named(pmsg_size, ramoops_pmsg_size, ulong, 0400); +MODULE_PARM_DESC(pmsg_size, "size of user space message log"); + static ulong mem_address; module_param(mem_address, ulong, 0400); MODULE_PARM_DESC(mem_address, @@ -82,12 +86,14 @@ struct ramoops_context { struct persistent_ram_zone **przs; struct persistent_ram_zone *cprz; struct persistent_ram_zone *fprz; + struct persistent_ram_zone *mprz; phys_addr_t phys_addr; unsigned long size; unsigned int memtype; size_t record_size; size_t console_size; size_t ftrace_size; + size_t pmsg_size; int dump_oops; struct persistent_ram_ecc_info ecc_info; unsigned int max_dump_cnt; @@ -96,6 +102,7 @@ struct ramoops_context { unsigned int dump_read_cnt; unsigned int console_read_cnt; unsigned int ftrace_read_cnt; + unsigned int pmsg_read_cnt; struct pstore_info pstore; }; @@ -109,6 +116,7 @@ static int ramoops_pstore_open(struct pstore_info *psi) cxt->dump_read_cnt = 0; cxt->console_read_cnt = 0; cxt->ftrace_read_cnt = 0; + cxt->pmsg_read_cnt = 0; return 0; } @@ -190,6 +198,9 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, if (!prz_ok(prz)) prz = ramoops_get_next_prz(&cxt->fprz, &cxt->ftrace_read_cnt, 1, id, type, PSTORE_TYPE_FTRACE, 0); + if (!prz_ok(prz)) + prz = ramoops_get_next_prz(&cxt->mprz, &cxt->pmsg_read_cnt, + 1, id, type, PSTORE_TYPE_PMSG, 0); if (!prz_ok(prz)) return 0; @@ -258,6 +269,11 @@ static int notrace ramoops_pstore_write_buf(enum pstore_type_id type, return -ENOMEM; persistent_ram_write(cxt->fprz, buf, size); return 0; + } else if (type == PSTORE_TYPE_PMSG) { + if (!cxt->mprz) + return -ENOMEM; + persistent_ram_write(cxt->mprz, buf, size); + return 0; } if (type != PSTORE_TYPE_DMESG) @@ -315,6 +331,9 @@ static int ramoops_pstore_erase(enum pstore_type_id type, u64 id, int count, case PSTORE_TYPE_FTRACE: prz = cxt->fprz; break; + case PSTORE_TYPE_PMSG: + prz = cxt->mprz; + break; default: return -EINVAL; } @@ -441,7 +460,7 @@ static int ramoops_probe(struct platform_device *pdev) goto fail_out; if (!pdata->mem_size || (!pdata->record_size && !pdata->console_size && - !pdata->ftrace_size)) { + !pdata->ftrace_size && !pdata->pmsg_size)) { pr_err("The memory size and the record/console size must be " "non-zero\n"); goto fail_out; @@ -453,6 +472,8 @@ static int ramoops_probe(struct platform_device *pdev) pdata->console_size = rounddown_pow_of_two(pdata->console_size); if (pdata->ftrace_size && !is_power_of_2(pdata->ftrace_size)) pdata->ftrace_size = rounddown_pow_of_two(pdata->ftrace_size); + if (pdata->pmsg_size && !is_power_of_2(pdata->pmsg_size)) + pdata->pmsg_size = rounddown_pow_of_two(pdata->pmsg_size); cxt->size = pdata->mem_size; cxt->phys_addr = pdata->mem_address; @@ -460,12 +481,14 @@ static int ramoops_probe(struct platform_device *pdev) cxt->record_size = pdata->record_size; cxt->console_size = pdata->console_size; cxt->ftrace_size = pdata->ftrace_size; + cxt->pmsg_size = pdata->pmsg_size; cxt->dump_oops = pdata->dump_oops; cxt->ecc_info = pdata->ecc_info; paddr = cxt->phys_addr; - dump_mem_sz = cxt->size - cxt->console_size - cxt->ftrace_size; + dump_mem_sz = cxt->size - cxt->console_size - cxt->ftrace_size + - cxt->pmsg_size; err = ramoops_init_przs(dev, cxt, &paddr, dump_mem_sz); if (err) goto fail_out; @@ -480,6 +503,10 @@ static int ramoops_probe(struct platform_device *pdev) if (err) goto fail_init_fprz; + err = ramoops_init_prz(dev, cxt, &cxt->mprz, &paddr, cxt->pmsg_size, 0); + if (err) + goto fail_init_mprz; + cxt->pstore.data = cxt; /* * Console can handle any buffer size, so prefer LOG_LINE_MAX. If we @@ -523,6 +550,8 @@ static int ramoops_probe(struct platform_device *pdev) kfree(cxt->pstore.buf); fail_clear: cxt->pstore.bufsize = 0; + kfree(cxt->mprz); +fail_init_mprz: kfree(cxt->fprz); fail_init_fprz: kfree(cxt->cprz); @@ -580,6 +609,7 @@ static void ramoops_register_dummy(void) dummy_data->record_size = record_size; dummy_data->console_size = ramoops_console_size; dummy_data->ftrace_size = ramoops_ftrace_size; + dummy_data->pmsg_size = ramoops_pmsg_size; dummy_data->dump_oops = dump_oops; /* * For backwards compatibility ramoops.ecc=1 means 16 bytes ECC diff --git a/include/linux/pstore.h b/include/linux/pstore.h index ece0c6bbfcc561..8884f6e507f7c7 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -39,6 +39,7 @@ enum pstore_type_id { PSTORE_TYPE_PPC_RTAS = 4, PSTORE_TYPE_PPC_OF = 5, PSTORE_TYPE_PPC_COMMON = 6, + PSTORE_TYPE_PMSG = 7, PSTORE_TYPE_UNKNOWN = 255 }; diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index 4af3fdc85b01cb..9c9d6c154c8e92 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -81,6 +81,7 @@ struct ramoops_platform_data { unsigned long record_size; unsigned long console_size; unsigned long ftrace_size; + unsigned long pmsg_size; int dump_oops; struct persistent_ram_ecc_info ecc_info; }; From a6b8978c54b771308f6f1692b9735ac0bb087cc2 Mon Sep 17 00:00:00 2001 From: alex chen Date: Fri, 16 Jan 2015 15:52:03 -0800 Subject: [PATCH 098/601] pstore: Fix sprintf format specifier in pstore_dump() We should use sprintf format specifier "%u" instead of "%d" for argument of type 'unsigned int' in pstore_dump(). Signed-off-by: Alex Chen Reviewed-by: Joseph Qi Acked-by: Kees Cook Signed-off-by: Tony Luck --- fs/pstore/platform.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 15ee78c5020b24..c4c9a10c5760e0 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -301,7 +301,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, if (big_oops_buf) { dst = big_oops_buf; - hsize = sprintf(dst, "%s#%d Part%d\n", why, + hsize = sprintf(dst, "%s#%d Part%u\n", why, oopscount, part); size = big_oops_buf_sz - hsize; @@ -321,7 +321,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, } } else { dst = psinfo->buf; - hsize = sprintf(dst, "%s#%d Part%d\n", why, oopscount, + hsize = sprintf(dst, "%s#%d Part%u\n", why, oopscount, part); size = psinfo->bufsize - hsize; dst += hsize; From 398a1e71dc827b994b7f2f56c7c2186fea7f8d75 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 29 Aug 2014 10:33:02 +0100 Subject: [PATCH 099/601] TPM: Add new TPMs to the tail of the list to prevent inadvertent change of dev Add newly registered TPMs to the tail of the list, not the beginning, so that things that are specifying TPM_ANY_NUM don't find that the device they're using has inadvertently changed. Adding a second device would break IMA, for instance. Cc: stable@vger.kernel.org Signed-off-by: David Howells Reviewed-by: Jason Gunthorpe Signed-off-by: Peter Huewe --- drivers/char/tpm/tpm-interface.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index 6af17002a11533..cfb9089887bd86 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -1122,7 +1122,7 @@ struct tpm_chip *tpm_register_hardware(struct device *dev, /* Make chip available */ spin_lock(&driver_lock); - list_add_rcu(&chip->list, &tpm_chip_list); + list_add_tail_rcu(&chip->list, &tpm_chip_list); spin_unlock(&driver_lock); return chip; From bb95cd34ba4c9467114acc78eeddd53ab1c10085 Mon Sep 17 00:00:00 2001 From: Kiran Padwal Date: Fri, 19 Sep 2014 12:44:39 +0530 Subject: [PATCH 100/601] char: tpm: Add missing error check for devm_kzalloc Currently these driver are missing a check on the return value of devm_kzalloc, which would cause a NULL pointer dereference in a OOM situation. This patch adds a missing check for tpm_i2c_atmel.c and tpm_i2c_nuvoton.c Cc: stable@vger.kernel.org Signed-off-by: Kiran Padwal Reviewed-By: Jason Gunthorpe Signed-off-by: Peter Huewe --- drivers/char/tpm/tpm_i2c_atmel.c | 4 ++++ drivers/char/tpm/tpm_i2c_nuvoton.c | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/drivers/char/tpm/tpm_i2c_atmel.c b/drivers/char/tpm/tpm_i2c_atmel.c index 77272925dee6dd..503a85ae176cdc 100644 --- a/drivers/char/tpm/tpm_i2c_atmel.c +++ b/drivers/char/tpm/tpm_i2c_atmel.c @@ -168,6 +168,10 @@ static int i2c_atmel_probe(struct i2c_client *client, chip->vendor.priv = devm_kzalloc(dev, sizeof(struct priv_data), GFP_KERNEL); + if (!chip->vendor.priv) { + rc = -ENOMEM; + goto out_err; + } /* Default timeouts */ chip->vendor.timeout_a = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT); diff --git a/drivers/char/tpm/tpm_i2c_nuvoton.c b/drivers/char/tpm/tpm_i2c_nuvoton.c index 7b158efd49f7ad..23c7b137a7fdb4 100644 --- a/drivers/char/tpm/tpm_i2c_nuvoton.c +++ b/drivers/char/tpm/tpm_i2c_nuvoton.c @@ -538,6 +538,11 @@ static int i2c_nuvoton_probe(struct i2c_client *client, chip->vendor.priv = devm_kzalloc(dev, sizeof(struct priv_data), GFP_KERNEL); + if (!chip->vendor.priv) { + rc = -ENOMEM; + goto out_err; + } + init_waitqueue_head(&chip->vendor.read_queue); init_waitqueue_head(&chip->vendor.int_queue); From 448e9c55c12d6bd4fa90a7e31d802e045666d7c8 Mon Sep 17 00:00:00 2001 From: Scot Doyle Date: Wed, 24 Sep 2014 22:41:10 +0000 Subject: [PATCH 101/601] tpm_tis: verify interrupt during init Some machines, such as the Acer C720 and Toshiba CB35, have TPMs that do not send IRQs while also having an ACPI TPM entry indicating that they will be sent. These machines freeze on resume while the tpm_tis module waits for an IRQ, eventually timing out. When in interrupt mode, the tpm_tis module should receive an IRQ during module init. Fall back to polling mode if none is received when expected. Cc: Signed-off-by: Scot Doyle Tested-by: Michael Mullin Reviewed-by: Jason Gunthorpe [phuewe: minor checkpatch fixed] Signed-off-by: Peter Huewe --- drivers/char/tpm/tpm_tis.c | 76 +++++++++++++++++++++++++++++++------- 1 file changed, 62 insertions(+), 14 deletions(-) diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index 6f198549611222..ccb140d605327d 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -75,6 +75,10 @@ enum tis_defaults { #define TPM_DID_VID(l) (0x0F00 | ((l) << 12)) #define TPM_RID(l) (0x0F04 | ((l) << 12)) +struct priv_data { + bool irq_tested; +}; + static LIST_HEAD(tis_chips); static DEFINE_MUTEX(tis_lock); @@ -338,12 +342,27 @@ static int tpm_tis_send_data(struct tpm_chip *chip, u8 *buf, size_t len) return rc; } +static void disable_interrupts(struct tpm_chip *chip) +{ + u32 intmask; + + intmask = + ioread32(chip->vendor.iobase + + TPM_INT_ENABLE(chip->vendor.locality)); + intmask &= ~TPM_GLOBAL_INT_ENABLE; + iowrite32(intmask, + chip->vendor.iobase + + TPM_INT_ENABLE(chip->vendor.locality)); + free_irq(chip->vendor.irq, chip); + chip->vendor.irq = 0; +} + /* * If interrupts are used (signaled by an irq set in the vendor structure) * tpm.c can skip polling for the data to be available as the interrupt is * waited for here */ -static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len) +static int tpm_tis_send_main(struct tpm_chip *chip, u8 *buf, size_t len) { int rc; u32 ordinal; @@ -373,6 +392,30 @@ static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len) return rc; } +static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len) +{ + int rc, irq; + struct priv_data *priv = chip->vendor.priv; + + if (!chip->vendor.irq || priv->irq_tested) + return tpm_tis_send_main(chip, buf, len); + + /* Verify receipt of the expected IRQ */ + irq = chip->vendor.irq; + chip->vendor.irq = 0; + rc = tpm_tis_send_main(chip, buf, len); + chip->vendor.irq = irq; + if (!priv->irq_tested) + msleep(1); + if (!priv->irq_tested) { + disable_interrupts(chip); + dev_err(chip->dev, + FW_BUG "TPM interrupt not working, polling instead\n"); + } + priv->irq_tested = true; + return rc; +} + struct tis_vendor_timeout_override { u32 did_vid; unsigned long timeout_us[4]; @@ -505,6 +548,7 @@ static irqreturn_t tis_int_handler(int dummy, void *dev_id) if (interrupt == 0) return IRQ_NONE; + ((struct priv_data *)chip->vendor.priv)->irq_tested = true; if (interrupt & TPM_INTF_DATA_AVAIL_INT) wake_up_interruptible(&chip->vendor.read_queue); if (interrupt & TPM_INTF_LOCALITY_CHANGE_INT) @@ -534,9 +578,14 @@ static int tpm_tis_init(struct device *dev, resource_size_t start, u32 vendor, intfcaps, intmask; int rc, i, irq_s, irq_e, probe; struct tpm_chip *chip; + struct priv_data *priv; + priv = devm_kzalloc(dev, sizeof(struct priv_data), GFP_KERNEL); + if (priv == NULL) + return -ENOMEM; if (!(chip = tpm_register_hardware(dev, &tpm_tis))) return -ENODEV; + chip->vendor.priv = priv; chip->vendor.iobase = ioremap(start, len); if (!chip->vendor.iobase) { @@ -605,19 +654,6 @@ static int tpm_tis_init(struct device *dev, resource_size_t start, if (intfcaps & TPM_INTF_DATA_AVAIL_INT) dev_dbg(dev, "\tData Avail Int Support\n"); - /* get the timeouts before testing for irqs */ - if (tpm_get_timeouts(chip)) { - dev_err(dev, "Could not get TPM timeouts and durations\n"); - rc = -ENODEV; - goto out_err; - } - - if (tpm_do_selftest(chip)) { - dev_err(dev, "TPM self test failed\n"); - rc = -ENODEV; - goto out_err; - } - /* INTERRUPT Setup */ init_waitqueue_head(&chip->vendor.read_queue); init_waitqueue_head(&chip->vendor.int_queue); @@ -719,6 +755,18 @@ static int tpm_tis_init(struct device *dev, resource_size_t start, } } + if (tpm_get_timeouts(chip)) { + dev_err(dev, "Could not get TPM timeouts and durations\n"); + rc = -ENODEV; + goto out_err; + } + + if (tpm_do_selftest(chip)) { + dev_err(dev, "TPM self test failed\n"); + rc = -ENODEV; + goto out_err; + } + INIT_LIST_HEAD(&chip->vendor.list); mutex_lock(&tis_lock); list_add(&chip->vendor.list, &tis_chips); From 84eb186bc37c0900b53077ca21cf6dd15823a232 Mon Sep 17 00:00:00 2001 From: "Hon Ching (Vicky) Lo" Date: Sun, 30 Nov 2014 15:01:28 +0100 Subject: [PATCH 102/601] tpm: Fix NULL return in tpm_ibmvtpm_get_desired_dma There was an oops in tpm_ibmvtpm_get_desired_dma, which caused kernel panic during boot when vTPM is enabled in Power partition configured in AMS mode. vio_bus_probe calls vio_cmo_bus_probe which calls tpm_ibmvtpm_get_desired_dma to get the size needed for DMA allocation. The problem is, vio_cmo_bus_probe is called before calling probe, which for vtpm is tpm_ibmvtpm_probe and it's this function that initializes and sets up vtpm's CRQ and gets required data values. Therefore, since this has not yet been done, NULL is returned in attempt to get the size for DMA allocation. We added a NULL check. In addition, a default buffer size will be set when NULL is returned. Cc: Signed-off-by: Hon Ching (Vicky) Lo Signed-off-by: Peter Huewe --- drivers/char/tpm/tpm_ibmvtpm.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c index af74c57e509094..4109222f2878c1 100644 --- a/drivers/char/tpm/tpm_ibmvtpm.c +++ b/drivers/char/tpm/tpm_ibmvtpm.c @@ -307,6 +307,14 @@ static int tpm_ibmvtpm_remove(struct vio_dev *vdev) static unsigned long tpm_ibmvtpm_get_desired_dma(struct vio_dev *vdev) { struct ibmvtpm_dev *ibmvtpm = ibmvtpm_get_data(&vdev->dev); + + /* ibmvtpm initializes at probe time, so the data we are + * asking for may not be set yet. Estimate that 4K required + * for TCE-mapped buffer in addition to CRQ. + */ + if (!ibmvtpm) + return CRQ_RES_BUF_SIZE + PAGE_SIZE; + return CRQ_RES_BUF_SIZE + ibmvtpm->rtce_size; } From 9fd8e5a25ecb0febfad321c04478a9d8b8b247f7 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Sun, 16 Nov 2014 11:03:24 +0100 Subject: [PATCH 103/601] tpm: remove unnecessary sizeof(u8) sizeof(u8) is always 1. Signed-off-by: Fabian Frederick Signed-off-by: Peter Huewe --- drivers/char/tpm/tpm_i2c_stm_st33.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/tpm/tpm_i2c_stm_st33.c b/drivers/char/tpm/tpm_i2c_stm_st33.c index 4669e371342850..5271b75d76614c 100644 --- a/drivers/char/tpm/tpm_i2c_stm_st33.c +++ b/drivers/char/tpm/tpm_i2c_stm_st33.c @@ -635,13 +635,13 @@ tpm_st33_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) } platform_data->tpm_i2c_buffer[0] = - kmalloc(TPM_BUFSIZE * sizeof(u8), GFP_KERNEL); + kmalloc(TPM_BUFSIZE, GFP_KERNEL); if (platform_data->tpm_i2c_buffer[0] == NULL) { err = -ENOMEM; goto _tpm_clean_answer; } platform_data->tpm_i2c_buffer[1] = - kmalloc(TPM_BUFSIZE * sizeof(u8), GFP_KERNEL); + kmalloc(TPM_BUFSIZE, GFP_KERNEL); if (platform_data->tpm_i2c_buffer[1] == NULL) { err = -ENOMEM; goto _tpm_clean_response1; From 2dfc2deda2c9b1def7dc9399623479b69cd9f7ff Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Wed, 19 Nov 2014 14:44:15 +0100 Subject: [PATCH 104/601] char: tpm: Deletion of unnecessary checks before the function call "tpm_dev_vendor_release" The tpm_dev_vendor_release() function tests whether its argument is NULL and then returns immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Peter Huewe --- drivers/char/tpm/tpm_i2c_atmel.c | 3 +-- drivers/char/tpm/tpm_i2c_nuvoton.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/char/tpm/tpm_i2c_atmel.c b/drivers/char/tpm/tpm_i2c_atmel.c index 503a85ae176cdc..5f448886417f50 100644 --- a/drivers/char/tpm/tpm_i2c_atmel.c +++ b/drivers/char/tpm/tpm_i2c_atmel.c @@ -206,8 +206,7 @@ static int i2c_atmel_remove(struct i2c_client *client) struct device *dev = &(client->dev); struct tpm_chip *chip = dev_get_drvdata(dev); - if (chip) - tpm_dev_vendor_release(chip); + tpm_dev_vendor_release(chip); tpm_remove_hardware(dev); kfree(chip); return 0; diff --git a/drivers/char/tpm/tpm_i2c_nuvoton.c b/drivers/char/tpm/tpm_i2c_nuvoton.c index 23c7b137a7fdb4..bbb4997438c3b4 100644 --- a/drivers/char/tpm/tpm_i2c_nuvoton.c +++ b/drivers/char/tpm/tpm_i2c_nuvoton.c @@ -630,8 +630,7 @@ static int i2c_nuvoton_remove(struct i2c_client *client) struct device *dev = &(client->dev); struct tpm_chip *chip = dev_get_drvdata(dev); - if (chip) - tpm_dev_vendor_release(chip); + tpm_dev_vendor_release(chip); tpm_remove_hardware(dev); kfree(chip); return 0; From 1ba3b0b6f218072afe8372d12f1b6bf26a26008e Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Mon, 1 Dec 2014 19:32:46 +0100 Subject: [PATCH 105/601] tpm/tpm_i2c_stm_st33: Fix potential bug in tpm_stm_i2c_send When sending data in tpm_stm_i2c_send, each loop iteration send buf. Send buf + i instead as the goal of this for loop is to send a number of byte from buf that fit in burstcnt. Once those byte are sent, we are supposed to send the next ones. The driver was working because the burstcount value returns always the maximum size for a TPM command or response. (0x800 for a command and 0x400 for a response). Cc: stable@vger.kernel.org Reviewed-by: Jason Gunthorpe Signed-off-by: Christophe Ricard Signed-off-by: Peter Huewe --- drivers/char/tpm/tpm_i2c_stm_st33.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm_i2c_stm_st33.c b/drivers/char/tpm/tpm_i2c_stm_st33.c index 5271b75d76614c..d2031c43caa138 100644 --- a/drivers/char/tpm/tpm_i2c_stm_st33.c +++ b/drivers/char/tpm/tpm_i2c_stm_st33.c @@ -487,7 +487,7 @@ static int tpm_stm_i2c_send(struct tpm_chip *chip, unsigned char *buf, if (burstcnt < 0) return burstcnt; size = min_t(int, len - i - 1, burstcnt); - ret = I2C_WRITE_DATA(client, TPM_DATA_FIFO, buf, size); + ret = I2C_WRITE_DATA(client, TPM_DATA_FIFO, buf + i, size); if (ret < 0) goto out_err; From 578aa13eb796ce3f20523504424e973f351e4870 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Mon, 1 Dec 2014 19:32:47 +0100 Subject: [PATCH 106/601] tpm/tpm_i2c_stm_st33: Update Kconfig in order to be inline to other similar product STMicroelectronics i2c tpm is the only one to have a different tristate label. Rename it "TPM Interface Specification 1.2 Interface (I2C - STMicroelectronics)" Reviewed-by: Jason Gunthorpe Signed-off-by: Christophe Ricard [phuewe: corrected module name in the helptext] Signed-off-by: Peter Huewe --- drivers/char/tpm/Kconfig | 6 +++--- drivers/char/tpm/Makefile | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig index c54cac3f8bc8d3..3d0873b1ab5b59 100644 --- a/drivers/char/tpm/Kconfig +++ b/drivers/char/tpm/Kconfig @@ -100,15 +100,15 @@ config TCG_IBMVTPM will be accessible from within Linux. To compile this driver as a module, choose M here; the module will be called tpm_ibmvtpm. -config TCG_ST33_I2C - tristate "STMicroelectronics ST33 I2C TPM" +config TCG_TIS_I2C_ST33 + tristate "TPM Interface Specification 1.2 Interface (I2C - STMicroelectronics)" depends on I2C depends on GPIOLIB ---help--- If you have a TPM security chip from STMicroelectronics working with an I2C bus say Yes and it will be accessible from within Linux. To compile this driver as a module, choose M here; the module will be - called tpm_stm_st33_i2c. + called tpm_i2c_stm_st33. config TCG_XEN tristate "XEN TPM Interface" diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile index 4d85dd681b81e0..7f54dae847e454 100644 --- a/drivers/char/tpm/Makefile +++ b/drivers/char/tpm/Makefile @@ -20,5 +20,5 @@ obj-$(CONFIG_TCG_NSC) += tpm_nsc.o obj-$(CONFIG_TCG_ATMEL) += tpm_atmel.o obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o obj-$(CONFIG_TCG_IBMVTPM) += tpm_ibmvtpm.o -obj-$(CONFIG_TCG_ST33_I2C) += tpm_i2c_stm_st33.o +obj-$(CONFIG_TCG_TIS_I2C_ST33) += tpm_i2c_stm_st33.o obj-$(CONFIG_TCG_XEN) += xen-tpmfront.o From 642d2be22a746ba756e0273e19977f1794fe080c Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Mon, 1 Dec 2014 19:32:48 +0100 Subject: [PATCH 107/601] tpm/tpm_i2c_stm_st33: Change License header to have up to date address information The Free Software Foundation may have mail address change. In order to be sure to have up to date mail address give an url to the license which includes accurate informations. Reviewed-by: Jason Gunthorpe Signed-off-by: Christophe Ricard Signed-off-by: Peter Huewe --- drivers/char/tpm/tpm_i2c_stm_st33.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/char/tpm/tpm_i2c_stm_st33.c b/drivers/char/tpm/tpm_i2c_stm_st33.c index d2031c43caa138..bf1e530938736d 100644 --- a/drivers/char/tpm/tpm_i2c_stm_st33.c +++ b/drivers/char/tpm/tpm_i2c_stm_st33.c @@ -12,9 +12,8 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . * * STMicroelectronics version 1.2.0, Copyright (C) 2010 * STMicroelectronics comes with ABSOLUTELY NO WARRANTY. From 7500c4b99bddf1b01440bb430d7e38e7652817bf Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Mon, 1 Dec 2014 19:32:49 +0100 Subject: [PATCH 108/601] tpm/tpm_i2c_stm_st33: Fix few coding style error reported by scripts/checkpatch.pl Fix: - WARNING: Missing a blank line after declarations - WARNING: braces {} are not necessary for any arm of this statement Reviewed-by: Jason Gunthorpe Signed-off-by: Christophe Ricard Signed-off-by: Peter Huewe --- drivers/char/tpm/tpm_i2c_stm_st33.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/char/tpm/tpm_i2c_stm_st33.c b/drivers/char/tpm/tpm_i2c_stm_st33.c index bf1e530938736d..c4925a03d7ab30 100644 --- a/drivers/char/tpm/tpm_i2c_stm_st33.c +++ b/drivers/char/tpm/tpm_i2c_stm_st33.c @@ -156,6 +156,7 @@ static int read8_reg(struct i2c_client *client, u8 tpm_register, static void clear_interruption(struct i2c_client *client) { u8 interrupt; + I2C_READ_DATA(client, TPM_INT_STATUS, &interrupt, 1); I2C_WRITE_DATA(client, TPM_INT_STATUS, &interrupt, 1); I2C_READ_DATA(client, TPM_INT_STATUS, &interrupt, 1); @@ -232,6 +233,7 @@ static u8 tpm_stm_i2c_status(struct tpm_chip *chip) { struct i2c_client *client; u8 data; + client = (struct i2c_client *)TPM_VPRIV(chip); I2C_READ_DATA(client, TPM_STS, &data, 1); @@ -783,11 +785,11 @@ static int tpm_st33_i2c_pm_suspend(struct device *dev) struct st33zp24_platform_data *pin_infos = dev->platform_data; int ret = 0; - if (power_mgt) { + if (power_mgt) gpio_set_value(pin_infos->io_lpcpd, 0); - } else { + else ret = tpm_pm_suspend(dev); - } + return ret; } /* tpm_st33_i2c_suspend() */ From 2dbca7508f99618fc6cf964134403dccb63968bd Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Mon, 1 Dec 2014 19:32:50 +0100 Subject: [PATCH 109/601] tpm/tpm_i2c_stm_st33: Move tpm registers to tpm_i2c_stm_st33.c Move tpm registers to tpm_i2c_stm_st33.c. Reviewed-by: Jason Gunthorpe Signed-off-by: Christophe Ricard Signed-off-by: Peter Huewe --- drivers/char/tpm/tpm_i2c_stm_st33.c | 17 +++++++++++++++++ drivers/char/tpm/tpm_i2c_stm_st33.h | 17 ----------------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/char/tpm/tpm_i2c_stm_st33.c b/drivers/char/tpm/tpm_i2c_stm_st33.c index c4925a03d7ab30..44a02103ad4996 100644 --- a/drivers/char/tpm/tpm_i2c_stm_st33.c +++ b/drivers/char/tpm/tpm_i2c_stm_st33.c @@ -49,6 +49,23 @@ #include #include "tpm.h" + +#define TPM_ACCESS 0x0 +#define TPM_STS 0x18 +#define TPM_HASH_END 0x20 +#define TPM_DATA_FIFO 0x24 +#define TPM_HASH_DATA 0x24 +#define TPM_HASH_START 0x28 +#define TPM_INTF_CAPABILITY 0x14 +#define TPM_INT_STATUS 0x10 +#define TPM_INT_ENABLE 0x08 + +#define TPM_DUMMY_BYTE 0xAA +#define TPM_WRITE_DIRECTION 0x80 +#define TPM_HEADER_SIZE 10 +#define TPM_BUFSIZE 2048 + +#define LOCALITY0 0 #include "tpm_i2c_stm_st33.h" enum stm33zp24_access { diff --git a/drivers/char/tpm/tpm_i2c_stm_st33.h b/drivers/char/tpm/tpm_i2c_stm_st33.h index 439a43249aa650..3041271342eb78 100644 --- a/drivers/char/tpm/tpm_i2c_stm_st33.h +++ b/drivers/char/tpm/tpm_i2c_stm_st33.h @@ -30,23 +30,6 @@ #ifndef __STM_ST33_TPM_I2C_MAIN_H__ #define __STM_ST33_TPM_I2C_MAIN_H__ -#define TPM_ACCESS (0x0) -#define TPM_STS (0x18) -#define TPM_HASH_END (0x20) -#define TPM_DATA_FIFO (0x24) -#define TPM_HASH_DATA (0x24) -#define TPM_HASH_START (0x28) -#define TPM_INTF_CAPABILITY (0x14) -#define TPM_INT_STATUS (0x10) -#define TPM_INT_ENABLE (0x08) - -#define TPM_DUMMY_BYTE 0xAA -#define TPM_WRITE_DIRECTION 0x80 -#define TPM_HEADER_SIZE 10 -#define TPM_BUFSIZE 2048 - -#define LOCALITY0 0 - #define TPM_ST33_I2C "st33zp24_i2c" struct st33zp24_platform_data { From b9626f32876debc356fadb6e19aebcfe9d70c5ff Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Mon, 1 Dec 2014 19:32:51 +0100 Subject: [PATCH 110/601] tpm/tpm_i2c_stm_st33: Add new tpm_stm_dev structure and remove tpm_i2c_buffer[0], [1] buffer. In order to clean big buffers in st33zp24_platform_data structure, replace with tpm_stm_dev for driver internal usage. As only one buffer is really necessary replace with buf field. In the mean time move tpm_i2c_stm_st33.h to include/linux/platform_data. Reviewed-by: Jason Gunthorpe Signed-off-by: Christophe Ricard Signed-off-by: Peter Huewe --- drivers/char/tpm/tpm_i2c_stm_st33.c | 204 ++++++++---------- .../linux/platform_data}/tpm_i2c_stm_st33.h | 10 +- 2 files changed, 95 insertions(+), 119 deletions(-) rename {drivers/char/tpm => include/linux/platform_data}/tpm_i2c_stm_st33.h (81%) diff --git a/drivers/char/tpm/tpm_i2c_stm_st33.c b/drivers/char/tpm/tpm_i2c_stm_st33.c index 44a02103ad4996..4f725386385fd5 100644 --- a/drivers/char/tpm/tpm_i2c_stm_st33.c +++ b/drivers/char/tpm/tpm_i2c_stm_st33.c @@ -40,7 +40,6 @@ #include #include #include -#include #include #include #include @@ -48,6 +47,7 @@ #include #include +#include #include "tpm.h" #define TPM_ACCESS 0x0 @@ -66,7 +66,7 @@ #define TPM_BUFSIZE 2048 #define LOCALITY0 0 -#include "tpm_i2c_stm_st33.h" + enum stm33zp24_access { TPM_ACCESS_VALID = 0x80, @@ -98,6 +98,15 @@ enum tis_defaults { TIS_LONG_TIMEOUT = 2000, }; +struct tpm_stm_dev { + struct i2c_client *client; + struct completion irq_detection; + struct tpm_chip *chip; + u8 buf[TPM_BUFSIZE + 1]; + int io_serirq; + int io_lpcpd; +}; + /* * write8_reg * Send byte to the TIS register according to the ST33ZP24 I2C protocol. @@ -106,17 +115,12 @@ enum tis_defaults { * @param: tpm_size, The length of the data * @return: Returns negative errno, or else the number of bytes written. */ -static int write8_reg(struct i2c_client *client, u8 tpm_register, +static int write8_reg(struct tpm_stm_dev *tpm_dev, u8 tpm_register, u8 *tpm_data, u16 tpm_size) { - struct st33zp24_platform_data *pin_infos; - - pin_infos = client->dev.platform_data; - - pin_infos->tpm_i2c_buffer[0][0] = tpm_register; - memcpy(&pin_infos->tpm_i2c_buffer[0][1], tpm_data, tpm_size); - return i2c_master_send(client, pin_infos->tpm_i2c_buffer[0], - tpm_size + 1); + tpm_dev->buf[0] = tpm_register; + memcpy(tpm_dev->buf + 1, tpm_data, tpm_size); + return i2c_master_send(tpm_dev->client, tpm_dev->buf, tpm_size + 1); } /* write8_reg() */ /* @@ -127,56 +131,56 @@ static int write8_reg(struct i2c_client *client, u8 tpm_register, * @param: tpm_size, tpm TPM response size to read. * @return: number of byte read successfully: should be one if success. */ -static int read8_reg(struct i2c_client *client, u8 tpm_register, +static int read8_reg(struct tpm_stm_dev *tpm_dev, u8 tpm_register, u8 *tpm_data, int tpm_size) { u8 status = 0; u8 data; data = TPM_DUMMY_BYTE; - status = write8_reg(client, tpm_register, &data, 1); + status = write8_reg(tpm_dev, tpm_register, &data, 1); if (status == 2) - status = i2c_master_recv(client, tpm_data, tpm_size); + status = i2c_master_recv(tpm_dev->client, tpm_data, tpm_size); return status; } /* read8_reg() */ /* * I2C_WRITE_DATA * Send byte to the TIS register according to the ST33ZP24 I2C protocol. - * @param: client, the chip description + * @param: tpm_dev, the chip description * @param: tpm_register, the tpm tis register where the data should be written * @param: tpm_data, the tpm_data to write inside the tpm_register * @param: tpm_size, The length of the data * @return: number of byte written successfully: should be one if success. */ -#define I2C_WRITE_DATA(client, tpm_register, tpm_data, tpm_size) \ - (write8_reg(client, tpm_register | \ +#define I2C_WRITE_DATA(tpm_dev, tpm_register, tpm_data, tpm_size) \ + (write8_reg(tpm_dev, tpm_register | \ TPM_WRITE_DIRECTION, tpm_data, tpm_size)) /* * I2C_READ_DATA * Recv byte from the TIS register according to the ST33ZP24 I2C protocol. - * @param: tpm, the chip description + * @param: tpm_dev, the chip description * @param: tpm_register, the tpm tis register where the data should be read * @param: tpm_data, the TPM response * @param: tpm_size, tpm TPM response size to read. * @return: number of byte read successfully: should be one if success. */ -#define I2C_READ_DATA(client, tpm_register, tpm_data, tpm_size) \ - (read8_reg(client, tpm_register, tpm_data, tpm_size)) +#define I2C_READ_DATA(tpm_dev, tpm_register, tpm_data, tpm_size) \ + (read8_reg(tpm_dev, tpm_register, tpm_data, tpm_size)) /* * clear_interruption * clear the TPM interrupt register. * @param: tpm, the chip description */ -static void clear_interruption(struct i2c_client *client) +static void clear_interruption(struct tpm_stm_dev *tpm_dev) { u8 interrupt; - I2C_READ_DATA(client, TPM_INT_STATUS, &interrupt, 1); - I2C_WRITE_DATA(client, TPM_INT_STATUS, &interrupt, 1); - I2C_READ_DATA(client, TPM_INT_STATUS, &interrupt, 1); + I2C_READ_DATA(tpm_dev, TPM_INT_STATUS, &interrupt, 1); + I2C_WRITE_DATA(tpm_dev, TPM_INT_STATUS, &interrupt, 1); + I2C_READ_DATA(tpm_dev, TPM_INT_STATUS, &interrupt, 1); } /* clear_interruption() */ /* @@ -190,17 +194,16 @@ static long _wait_for_interrupt_serirq_timeout(struct tpm_chip *chip, { long status; struct i2c_client *client; - struct st33zp24_platform_data *pin_infos; + struct tpm_stm_dev *tpm_dev; - client = (struct i2c_client *)TPM_VPRIV(chip); - pin_infos = client->dev.platform_data; + tpm_dev = (struct tpm_stm_dev *)TPM_VPRIV(chip); + client = tpm_dev->client; status = wait_for_completion_interruptible_timeout( - &pin_infos->irq_detection, - timeout); + &tpm_dev->irq_detection, + timeout); if (status > 0) - enable_irq(gpio_to_irq(pin_infos->io_serirq)); - gpio_direction_input(pin_infos->io_serirq); + enable_irq(client->irq); return status; } /* wait_for_interrupt_serirq_timeout() */ @@ -209,15 +212,15 @@ static int wait_for_serirq_timeout(struct tpm_chip *chip, bool condition, unsigned long timeout) { int status = 2; - struct i2c_client *client; + struct tpm_stm_dev *tpm_dev; - client = (struct i2c_client *)TPM_VPRIV(chip); + tpm_dev = (struct tpm_stm_dev *)TPM_VPRIV(chip); status = _wait_for_interrupt_serirq_timeout(chip, timeout); if (!status) { status = -EBUSY; } else { - clear_interruption(client); + clear_interruption(tpm_dev); if (condition) status = 1; } @@ -230,13 +233,13 @@ static int wait_for_serirq_timeout(struct tpm_chip *chip, bool condition, */ static void tpm_stm_i2c_cancel(struct tpm_chip *chip) { - struct i2c_client *client; + struct tpm_stm_dev *tpm_dev; u8 data; - client = (struct i2c_client *)TPM_VPRIV(chip); + tpm_dev = (struct tpm_stm_dev *)TPM_VPRIV(chip); data = TPM_STS_COMMAND_READY; - I2C_WRITE_DATA(client, TPM_STS, &data, 1); + I2C_WRITE_DATA(tpm_dev, TPM_STS, &data, 1); if (chip->vendor.irq) wait_for_serirq_timeout(chip, 1, chip->vendor.timeout_a); } /* tpm_stm_i2c_cancel() */ @@ -248,12 +251,12 @@ static void tpm_stm_i2c_cancel(struct tpm_chip *chip) */ static u8 tpm_stm_i2c_status(struct tpm_chip *chip) { - struct i2c_client *client; + struct tpm_stm_dev *tpm_dev; u8 data; - client = (struct i2c_client *)TPM_VPRIV(chip); + tpm_dev = (struct tpm_stm_dev *)TPM_VPRIV(chip); - I2C_READ_DATA(client, TPM_STS, &data, 1); + I2C_READ_DATA(tpm_dev, TPM_STS, &data, 1); return data; } /* tpm_stm_i2c_status() */ @@ -265,13 +268,13 @@ static u8 tpm_stm_i2c_status(struct tpm_chip *chip) */ static int check_locality(struct tpm_chip *chip) { - struct i2c_client *client; + struct tpm_stm_dev *tpm_dev; u8 data; u8 status; - client = (struct i2c_client *)TPM_VPRIV(chip); + tpm_dev = (struct tpm_stm_dev *)TPM_VPRIV(chip); - status = I2C_READ_DATA(client, TPM_ACCESS, &data, 1); + status = I2C_READ_DATA(tpm_dev, TPM_ACCESS, &data, 1); if (status && (data & (TPM_ACCESS_ACTIVE_LOCALITY | TPM_ACCESS_VALID)) == (TPM_ACCESS_ACTIVE_LOCALITY | TPM_ACCESS_VALID)) @@ -290,16 +293,16 @@ static int request_locality(struct tpm_chip *chip) { unsigned long stop; long rc; - struct i2c_client *client; + struct tpm_stm_dev *tpm_dev; u8 data; - client = (struct i2c_client *)TPM_VPRIV(chip); + tpm_dev = (struct tpm_stm_dev *)TPM_VPRIV(chip); if (check_locality(chip) == chip->vendor.locality) return chip->vendor.locality; data = TPM_ACCESS_REQUEST_USE; - rc = I2C_WRITE_DATA(client, TPM_ACCESS, &data, 1); + rc = I2C_WRITE_DATA(tpm_dev, TPM_ACCESS, &data, 1); if (rc < 0) goto end; @@ -328,13 +331,13 @@ static int request_locality(struct tpm_chip *chip) */ static void release_locality(struct tpm_chip *chip) { - struct i2c_client *client; + struct tpm_stm_dev *tpm_dev; u8 data; - client = (struct i2c_client *)TPM_VPRIV(chip); + tpm_dev = (struct tpm_stm_dev *)TPM_VPRIV(chip); data = TPM_ACCESS_ACTIVE_LOCALITY; - I2C_WRITE_DATA(client, TPM_ACCESS, &data, 1); + I2C_WRITE_DATA(tpm_dev, TPM_ACCESS, &data, 1); } /* @@ -347,19 +350,20 @@ static int get_burstcount(struct tpm_chip *chip) unsigned long stop; int burstcnt, status; u8 tpm_reg, temp; + struct tpm_stm_dev *tpm_dev; - struct i2c_client *client = (struct i2c_client *)TPM_VPRIV(chip); + tpm_dev = (struct tpm_stm_dev *)TPM_VPRIV(chip); stop = jiffies + chip->vendor.timeout_d; do { tpm_reg = TPM_STS + 1; - status = I2C_READ_DATA(client, tpm_reg, &temp, 1); + status = I2C_READ_DATA(tpm_dev, tpm_reg, &temp, 1); if (status < 0) goto end; tpm_reg = tpm_reg + 1; burstcnt = temp; - status = I2C_READ_DATA(client, tpm_reg, &temp, 1); + status = I2C_READ_DATA(tpm_dev, tpm_reg, &temp, 1); if (status < 0) goto end; @@ -416,9 +420,9 @@ static int wait_for_stat(struct tpm_chip *chip, u8 mask, unsigned long timeout, static int recv_data(struct tpm_chip *chip, u8 *buf, size_t count) { int size = 0, burstcnt, len; - struct i2c_client *client; + struct tpm_stm_dev *tpm_dev; - client = (struct i2c_client *)TPM_VPRIV(chip); + tpm_dev = (struct tpm_stm_dev *)TPM_VPRIV(chip); while (size < count && wait_for_stat(chip, @@ -430,7 +434,7 @@ static int recv_data(struct tpm_chip *chip, u8 *buf, size_t count) if (burstcnt < 0) return burstcnt; len = min_t(int, burstcnt, count - size); - I2C_READ_DATA(client, TPM_DATA_FIFO, buf + size, len); + I2C_READ_DATA(tpm_dev, TPM_DATA_FIFO, buf + size, len); size += len; } return size; @@ -446,14 +450,14 @@ static irqreturn_t tpm_ioserirq_handler(int irq, void *dev_id) { struct tpm_chip *chip = dev_id; struct i2c_client *client; - struct st33zp24_platform_data *pin_infos; + struct tpm_stm_dev *tpm_dev; disable_irq_nosync(irq); - client = (struct i2c_client *)TPM_VPRIV(chip); - pin_infos = client->dev.platform_data; + tpm_dev = (struct tpm_stm_dev *)TPM_VPRIV(chip); + client = tpm_dev->client; - complete(&pin_infos->irq_detection); + complete(&tpm_dev->irq_detection); return IRQ_HANDLED; } /* tpm_ioserirq_handler() */ @@ -475,13 +479,15 @@ static int tpm_stm_i2c_send(struct tpm_chip *chip, unsigned char *buf, int ret; u8 data; struct i2c_client *client; + struct tpm_stm_dev *tpm_dev; if (chip == NULL) return -EBUSY; if (len < TPM_HEADER_SIZE) return -EBUSY; - client = (struct i2c_client *)TPM_VPRIV(chip); + tpm_dev = (struct tpm_stm_dev *)TPM_VPRIV(chip); + client = tpm_dev->client; client->flags = 0; @@ -505,7 +511,7 @@ static int tpm_stm_i2c_send(struct tpm_chip *chip, unsigned char *buf, if (burstcnt < 0) return burstcnt; size = min_t(int, len - i - 1, burstcnt); - ret = I2C_WRITE_DATA(client, TPM_DATA_FIFO, buf + i, size); + ret = I2C_WRITE_DATA(tpm_dev, TPM_DATA_FIFO, buf + i, size); if (ret < 0) goto out_err; @@ -518,7 +524,7 @@ static int tpm_stm_i2c_send(struct tpm_chip *chip, unsigned char *buf, goto out_err; } - ret = I2C_WRITE_DATA(client, TPM_DATA_FIFO, buf + len - 1, 1); + ret = I2C_WRITE_DATA(tpm_dev, TPM_DATA_FIFO, buf + len - 1, 1); if (ret < 0) goto out_err; @@ -529,7 +535,7 @@ static int tpm_stm_i2c_send(struct tpm_chip *chip, unsigned char *buf, } data = TPM_STS_GO; - I2C_WRITE_DATA(client, TPM_STS, &data, 1); + I2C_WRITE_DATA(tpm_dev, TPM_STS, &data, 1); return len; out_err: @@ -623,6 +629,7 @@ tpm_st33_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) u8 intmask; struct tpm_chip *chip; struct st33zp24_platform_data *platform_data; + struct tpm_stm_dev *tpm_dev; if (client == NULL) { pr_info("%s: i2c client is NULL. Device not accessible.\n", @@ -637,11 +644,11 @@ tpm_st33_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) goto end; } - chip = tpm_register_hardware(&client->dev, &st_i2c_tpm); - if (!chip) { - dev_info(&client->dev, "fail chip\n"); - err = -ENODEV; - goto end; + tpm_dev = devm_kzalloc(&client->dev, sizeof(struct tpm_stm_dev), + GFP_KERNEL); + if (!tpm_dev) { + err = -ENOMEM; + goto _tpm_clean_answer; } platform_data = client->dev.platform_data; @@ -652,20 +659,14 @@ tpm_st33_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) goto _tpm_clean_answer; } - platform_data->tpm_i2c_buffer[0] = - kmalloc(TPM_BUFSIZE, GFP_KERNEL); - if (platform_data->tpm_i2c_buffer[0] == NULL) { - err = -ENOMEM; - goto _tpm_clean_answer; - } - platform_data->tpm_i2c_buffer[1] = - kmalloc(TPM_BUFSIZE, GFP_KERNEL); - if (platform_data->tpm_i2c_buffer[1] == NULL) { - err = -ENOMEM; - goto _tpm_clean_response1; + chip = tpm_register_hardware(&client->dev, &st_i2c_tpm); + if (!chip) { + dev_info(&client->dev, "fail chip\n"); + return -ENODEV; } - TPM_VPRIV(chip) = client; + TPM_VPRIV(chip) = tpm_dev; + tpm_dev->client = client; chip->vendor.timeout_a = msecs_to_jiffies(TIS_SHORT_TIMEOUT); chip->vendor.timeout_b = msecs_to_jiffies(TIS_LONG_TIMEOUT); @@ -682,16 +683,16 @@ tpm_st33_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) } if (interrupts) { - init_completion(&platform_data->irq_detection); + init_completion(&tpm_dev->irq_detection); if (request_locality(chip) != LOCALITY0) { err = -ENODEV; - goto _tpm_clean_response2; + goto _tpm_clean_answer; } err = gpio_request(platform_data->io_serirq, "TPM IO_SERIRQ"); if (err) goto _gpio_init2; - clear_interruption(client); + clear_interruption(tpm_dev); err = request_irq(gpio_to_irq(platform_data->io_serirq), &tpm_ioserirq_handler, IRQF_TRIGGER_HIGH, @@ -702,7 +703,7 @@ tpm_st33_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) goto _irq_set; } - err = I2C_READ_DATA(client, TPM_INT_ENABLE, &intmask, 1); + err = I2C_READ_DATA(tpm_dev, TPM_INT_ENABLE, &intmask, 1); if (err < 0) goto _irq_set; @@ -713,16 +714,17 @@ tpm_st33_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) | TPM_INTF_STS_VALID_INT | TPM_INTF_DATA_AVAIL_INT; - err = I2C_WRITE_DATA(client, TPM_INT_ENABLE, &intmask, 1); + err = I2C_WRITE_DATA(tpm_dev, TPM_INT_ENABLE, &intmask, 1); if (err < 0) goto _irq_set; intmask = TPM_GLOBAL_INT_ENABLE; - err = I2C_WRITE_DATA(client, (TPM_INT_ENABLE + 3), &intmask, 1); + err = I2C_WRITE_DATA(tpm_dev, (TPM_INT_ENABLE + 3), + &intmask, 1); if (err < 0) goto _irq_set; - err = I2C_READ_DATA(client, TPM_INT_STATUS, &intmask, 1); + err = I2C_READ_DATA(tpm_dev, TPM_INT_STATUS, &intmask, 1); if (err < 0) goto _irq_set; @@ -744,12 +746,6 @@ tpm_st33_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) _gpio_init1: if (power_mgt) gpio_free(platform_data->io_lpcpd); -_tpm_clean_response2: - kzfree(platform_data->tpm_i2c_buffer[1]); - platform_data->tpm_i2c_buffer[1] = NULL; -_tpm_clean_response1: - kzfree(platform_data->tpm_i2c_buffer[0]); - platform_data->tpm_i2c_buffer[0] = NULL; _tpm_clean_answer: tpm_remove_hardware(chip->dev); end: @@ -765,28 +761,12 @@ tpm_st33_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) */ static int tpm_st33_i2c_remove(struct i2c_client *client) { - struct tpm_chip *chip = (struct tpm_chip *)i2c_get_clientdata(client); - struct st33zp24_platform_data *pin_infos = - ((struct i2c_client *)TPM_VPRIV(chip))->dev.platform_data; - - if (pin_infos != NULL) { - free_irq(pin_infos->io_serirq, chip); - - gpio_free(pin_infos->io_serirq); - gpio_free(pin_infos->io_lpcpd); + struct tpm_chip *chip = + (struct tpm_chip *) i2c_get_clientdata(client); + if (chip) tpm_remove_hardware(chip->dev); - if (pin_infos->tpm_i2c_buffer[1] != NULL) { - kzfree(pin_infos->tpm_i2c_buffer[1]); - pin_infos->tpm_i2c_buffer[1] = NULL; - } - if (pin_infos->tpm_i2c_buffer[0] != NULL) { - kzfree(pin_infos->tpm_i2c_buffer[0]); - pin_infos->tpm_i2c_buffer[0] = NULL; - } - } - return 0; } diff --git a/drivers/char/tpm/tpm_i2c_stm_st33.h b/include/linux/platform_data/tpm_i2c_stm_st33.h similarity index 81% rename from drivers/char/tpm/tpm_i2c_stm_st33.h rename to include/linux/platform_data/tpm_i2c_stm_st33.h index 3041271342eb78..88f9cb18113aa5 100644 --- a/drivers/char/tpm/tpm_i2c_stm_st33.h +++ b/include/linux/platform_data/tpm_i2c_stm_st33.h @@ -12,9 +12,8 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . * * STMicroelectronics version 1.2.0, Copyright (C) 2010 * STMicroelectronics comes with ABSOLUTELY NO WARRANTY. @@ -30,15 +29,12 @@ #ifndef __STM_ST33_TPM_I2C_MAIN_H__ #define __STM_ST33_TPM_I2C_MAIN_H__ + #define TPM_ST33_I2C "st33zp24_i2c" struct st33zp24_platform_data { int io_serirq; int io_lpcpd; - struct i2c_client *client; - u8 *tpm_i2c_buffer[2]; /* 0 Request 1 Response */ - struct completion irq_detection; - struct mutex lock; }; #endif /* __STM_ST33_TPM_I2C_MAIN_H__ */ From 76182b6b008b694d095aec1e2eb6c07fae787128 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Mon, 1 Dec 2014 19:32:52 +0100 Subject: [PATCH 111/601] tpm/tpm_i2c_stm_st33: Remove reference to io_serirq The serirq gpio pin is used only as interrupt. After driver initialization, the serirq signal is always used through interrupt and never with gpio kernel API. The irq can then be initialized during the platform_data definition within the client->irq pin. Reviewed-by: Jason Gunthorpe Signed-off-by: Christophe Ricard Signed-off-by: Peter Huewe --- drivers/char/tpm/tpm_i2c_stm_st33.c | 13 +++---------- include/linux/platform_data/tpm_i2c_stm_st33.h | 1 - 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/char/tpm/tpm_i2c_stm_st33.c b/drivers/char/tpm/tpm_i2c_stm_st33.c index 4f725386385fd5..728611638d1ca9 100644 --- a/drivers/char/tpm/tpm_i2c_stm_st33.c +++ b/drivers/char/tpm/tpm_i2c_stm_st33.c @@ -103,7 +103,6 @@ struct tpm_stm_dev { struct completion irq_detection; struct tpm_chip *chip; u8 buf[TPM_BUFSIZE + 1]; - int io_serirq; int io_lpcpd; }; @@ -688,18 +687,15 @@ tpm_st33_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) err = -ENODEV; goto _tpm_clean_answer; } - err = gpio_request(platform_data->io_serirq, "TPM IO_SERIRQ"); - if (err) - goto _gpio_init2; clear_interruption(tpm_dev); - err = request_irq(gpio_to_irq(platform_data->io_serirq), + err = request_irq(client->irq, &tpm_ioserirq_handler, IRQF_TRIGGER_HIGH, "TPM SERIRQ management", chip); if (err < 0) { dev_err(chip->dev , "TPM SERIRQ signals %d not available\n", - gpio_to_irq(platform_data->io_serirq)); + client->irq); goto _irq_set; } @@ -739,10 +735,7 @@ tpm_st33_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) dev_info(chip->dev, "TPM I2C Initialized\n"); return 0; _irq_set: - free_irq(gpio_to_irq(platform_data->io_serirq), (void *)chip); -_gpio_init2: - if (interrupts) - gpio_free(platform_data->io_serirq); + free_irq(client->irq, (void *)chip); _gpio_init1: if (power_mgt) gpio_free(platform_data->io_lpcpd); diff --git a/include/linux/platform_data/tpm_i2c_stm_st33.h b/include/linux/platform_data/tpm_i2c_stm_st33.h index 88f9cb18113aa5..85775cf5f9a54a 100644 --- a/include/linux/platform_data/tpm_i2c_stm_st33.h +++ b/include/linux/platform_data/tpm_i2c_stm_st33.h @@ -33,7 +33,6 @@ #define TPM_ST33_I2C "st33zp24_i2c" struct st33zp24_platform_data { - int io_serirq; int io_lpcpd; }; From ca16b7671365d71f29973ca6931be821780f592e Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Mon, 1 Dec 2014 19:32:53 +0100 Subject: [PATCH 112/601] tpm/tpm_i2c_stm_st33: Replace err/rc/ret by ret for a function return code Some functions return err, rc or ret for a status code. Return ret instead for all of them. Reviewed-by: Jason Gunthorpe Signed-off-by: Christophe Ricard Signed-off-by: Peter Huewe --- drivers/char/tpm/tpm_i2c_stm_st33.c | 61 ++++++++++++++--------------- 1 file changed, 30 insertions(+), 31 deletions(-) diff --git a/drivers/char/tpm/tpm_i2c_stm_st33.c b/drivers/char/tpm/tpm_i2c_stm_st33.c index 728611638d1ca9..d425fa1a25bc18 100644 --- a/drivers/char/tpm/tpm_i2c_stm_st33.c +++ b/drivers/char/tpm/tpm_i2c_stm_st33.c @@ -291,7 +291,7 @@ static int check_locality(struct tpm_chip *chip) static int request_locality(struct tpm_chip *chip) { unsigned long stop; - long rc; + long ret; struct tpm_stm_dev *tpm_dev; u8 data; @@ -301,15 +301,15 @@ static int request_locality(struct tpm_chip *chip) return chip->vendor.locality; data = TPM_ACCESS_REQUEST_USE; - rc = I2C_WRITE_DATA(tpm_dev, TPM_ACCESS, &data, 1); - if (rc < 0) + ret = I2C_WRITE_DATA(tpm_dev, TPM_ACCESS, &data, 1); + if (ret < 0) goto end; if (chip->vendor.irq) { - rc = wait_for_serirq_timeout(chip, (check_locality + ret = wait_for_serirq_timeout(chip, (check_locality (chip) >= 0), chip->vendor.timeout_a); - if (rc > 0) + if (ret > 0) return chip->vendor.locality; } else { stop = jiffies + chip->vendor.timeout_a; @@ -319,9 +319,9 @@ static int request_locality(struct tpm_chip *chip) msleep(TPM_TIMEOUT); } while (time_before(jiffies, stop)); } - rc = -EACCES; + ret = -EACCES; end: - return rc; + return ret; } /* request_locality() */ /* @@ -388,14 +388,14 @@ static int wait_for_stat(struct tpm_chip *chip, u8 mask, unsigned long timeout, wait_queue_head_t *queue) { unsigned long stop; - long rc; + long ret; u8 status; if (chip->vendor.irq) { - rc = wait_for_serirq_timeout(chip, ((tpm_stm_i2c_status + ret = wait_for_serirq_timeout(chip, ((tpm_stm_i2c_status (chip) & mask) == mask), timeout); - if (rc > 0) + if (ret > 0) return 0; } else { stop = jiffies + timeout; @@ -624,7 +624,7 @@ MODULE_PARM_DESC(power_mgt, "Power Management"); static int tpm_st33_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) { - int err; + int ret; u8 intmask; struct tpm_chip *chip; struct st33zp24_platform_data *platform_data; @@ -633,20 +633,20 @@ tpm_st33_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) if (client == NULL) { pr_info("%s: i2c client is NULL. Device not accessible.\n", __func__); - err = -ENODEV; + ret = -ENODEV; goto end; } if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { dev_info(&client->dev, "client not i2c capable\n"); - err = -ENODEV; + ret = -ENODEV; goto end; } tpm_dev = devm_kzalloc(&client->dev, sizeof(struct tpm_stm_dev), GFP_KERNEL); if (!tpm_dev) { - err = -ENOMEM; + ret = -ENOMEM; goto _tpm_clean_answer; } @@ -654,7 +654,7 @@ tpm_st33_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) if (!platform_data) { dev_info(&client->dev, "chip not available\n"); - err = -ENODEV; + ret = -ENODEV; goto _tpm_clean_answer; } @@ -675,8 +675,8 @@ tpm_st33_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) chip->vendor.locality = LOCALITY0; if (power_mgt) { - err = gpio_request(platform_data->io_lpcpd, "TPM IO_LPCPD"); - if (err) + ret = gpio_request(platform_data->io_lpcpd, "TPM IO_LPCPD"); + if (ret) goto _gpio_init1; gpio_set_value(platform_data->io_lpcpd, 1); } @@ -684,23 +684,23 @@ tpm_st33_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) if (interrupts) { init_completion(&tpm_dev->irq_detection); if (request_locality(chip) != LOCALITY0) { - err = -ENODEV; + ret = -ENODEV; goto _tpm_clean_answer; } clear_interruption(tpm_dev); - err = request_irq(client->irq, + ret = request_irq(client->irq, &tpm_ioserirq_handler, IRQF_TRIGGER_HIGH, "TPM SERIRQ management", chip); - if (err < 0) { + if (ret < 0) { dev_err(chip->dev , "TPM SERIRQ signals %d not available\n", client->irq); goto _irq_set; } - err = I2C_READ_DATA(tpm_dev, TPM_INT_ENABLE, &intmask, 1); - if (err < 0) + ret = I2C_READ_DATA(tpm_dev, TPM_INT_ENABLE, &intmask, 1); + if (ret < 0) goto _irq_set; intmask |= TPM_INTF_CMD_READY_INT @@ -710,18 +710,18 @@ tpm_st33_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) | TPM_INTF_STS_VALID_INT | TPM_INTF_DATA_AVAIL_INT; - err = I2C_WRITE_DATA(tpm_dev, TPM_INT_ENABLE, &intmask, 1); - if (err < 0) + ret = I2C_WRITE_DATA(tpm_dev, TPM_INT_ENABLE, &intmask, 1); + if (ret < 0) goto _irq_set; intmask = TPM_GLOBAL_INT_ENABLE; - err = I2C_WRITE_DATA(tpm_dev, (TPM_INT_ENABLE + 3), + ret = I2C_WRITE_DATA(tpm_dev, (TPM_INT_ENABLE + 3), &intmask, 1); - if (err < 0) + if (ret < 0) goto _irq_set; - err = I2C_READ_DATA(tpm_dev, TPM_INT_STATUS, &intmask, 1); - if (err < 0) + ret = I2C_READ_DATA(tpm_dev, TPM_INT_STATUS, &intmask, 1); + if (ret < 0) goto _irq_set; chip->vendor.irq = interrupts; @@ -743,7 +743,7 @@ tpm_st33_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) tpm_remove_hardware(chip->dev); end: pr_info("TPM I2C initialisation fail\n"); - return err; + return ret; } /* @@ -779,7 +779,6 @@ static int tpm_st33_i2c_pm_suspend(struct device *dev) gpio_set_value(pin_infos->io_lpcpd, 0); else ret = tpm_pm_suspend(dev); - return ret; } /* tpm_st33_i2c_suspend() */ @@ -807,7 +806,7 @@ static int tpm_st33_i2c_pm_resume(struct device *dev) tpm_do_selftest(chip); } return ret; -} /* tpm_st33_i2c_pm_resume() */ +} /* tpm_st33_i2c_pm_resume() */ #endif static const struct i2c_device_id tpm_st33_i2c_id[] = { From 875edad528cf29b382e82837e652e997aec16c49 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Mon, 1 Dec 2014 19:32:54 +0100 Subject: [PATCH 113/601] tpm/tpm_i2c_stm_st33: Replace tpm_st33_* function with tpm_stm_* For sanity, replace every tpm_st33_* with tpm_stm_* Reviewed-by: Jason Gunthorpe Signed-off-by: Christophe Ricard Signed-off-by: Peter Huewe --- drivers/char/tpm/tpm_i2c_stm_st33.c | 50 ++++++++++++++--------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/drivers/char/tpm/tpm_i2c_stm_st33.c b/drivers/char/tpm/tpm_i2c_stm_st33.c index d425fa1a25bc18..7f036630e87a89 100644 --- a/drivers/char/tpm/tpm_i2c_stm_st33.c +++ b/drivers/char/tpm/tpm_i2c_stm_st33.c @@ -224,7 +224,7 @@ static int wait_for_serirq_timeout(struct tpm_chip *chip, bool condition, status = 1; } return status; -} +} /* wait_for_serirq_timeout() */ /* * tpm_stm_i2c_cancel, cancel is not implemented. @@ -241,7 +241,7 @@ static void tpm_stm_i2c_cancel(struct tpm_chip *chip) I2C_WRITE_DATA(tpm_dev, TPM_STS, &data, 1); if (chip->vendor.irq) wait_for_serirq_timeout(chip, 1, chip->vendor.timeout_a); -} /* tpm_stm_i2c_cancel() */ +} /* tpm_stm_i2c_cancel() */ /* * tpm_stm_spi_status return the TPM_STS register @@ -257,7 +257,7 @@ static u8 tpm_stm_i2c_status(struct tpm_chip *chip) I2C_READ_DATA(tpm_dev, TPM_STS, &data, 1); return data; -} /* tpm_stm_i2c_status() */ +} /* tpm_stm_i2c_status() */ /* @@ -591,7 +591,7 @@ static int tpm_stm_i2c_recv(struct tpm_chip *chip, unsigned char *buf, return size; } -static bool tpm_st33_i2c_req_canceled(struct tpm_chip *chip, u8 status) +static bool tpm_stm_i2c_req_canceled(struct tpm_chip *chip, u8 status) { return (status == TPM_STS_COMMAND_READY); } @@ -603,7 +603,7 @@ static const struct tpm_class_ops st_i2c_tpm = { .status = tpm_stm_i2c_status, .req_complete_mask = TPM_STS_DATA_AVAIL | TPM_STS_VALID, .req_complete_val = TPM_STS_DATA_AVAIL | TPM_STS_VALID, - .req_canceled = tpm_st33_i2c_req_canceled, + .req_canceled = tpm_stm_i2c_req_canceled, }; static int interrupts; @@ -615,14 +615,14 @@ module_param(power_mgt, int, 0444); MODULE_PARM_DESC(power_mgt, "Power Management"); /* - * tpm_st33_i2c_probe initialize the TPM device + * tpm_stm_i2c_probe initialize the TPM device * @param: client, the i2c_client drescription (TPM I2C description). * @param: id, the i2c_device_id struct. * @return: 0 in case of success. * -1 in other case. */ static int -tpm_st33_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) +tpm_stm_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) { int ret; u8 intmask; @@ -747,12 +747,12 @@ tpm_st33_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) } /* - * tpm_st33_i2c_remove remove the TPM device + * tpm_stm_i2c_remove remove the TPM device * @param: client, the i2c_client drescription (TPM I2C description). clear_bit(0, &chip->is_open); * @return: 0 in case of success. */ -static int tpm_st33_i2c_remove(struct i2c_client *client) +static int tpm_stm_i2c_remove(struct i2c_client *client) { struct tpm_chip *chip = (struct tpm_chip *) i2c_get_clientdata(client); @@ -765,12 +765,12 @@ static int tpm_st33_i2c_remove(struct i2c_client *client) #ifdef CONFIG_PM_SLEEP /* - * tpm_st33_i2c_pm_suspend suspend the TPM device + * tpm_stm_i2c_pm_suspend suspend the TPM device * @param: client, the i2c_client drescription (TPM I2C description). * @param: mesg, the power management message. * @return: 0 in case of success. */ -static int tpm_st33_i2c_pm_suspend(struct device *dev) +static int tpm_stm_i2c_pm_suspend(struct device *dev) { struct st33zp24_platform_data *pin_infos = dev->platform_data; int ret = 0; @@ -780,14 +780,14 @@ static int tpm_st33_i2c_pm_suspend(struct device *dev) else ret = tpm_pm_suspend(dev); return ret; -} /* tpm_st33_i2c_suspend() */ +} /* tpm_stm_i2c_suspend() */ /* - * tpm_st33_i2c_pm_resume resume the TPM device + * tpm_stm_i2c_pm_resume resume the TPM device * @param: client, the i2c_client drescription (TPM I2C description). * @return: 0 in case of success. */ -static int tpm_st33_i2c_pm_resume(struct device *dev) +static int tpm_stm_i2c_pm_resume(struct device *dev) { struct tpm_chip *chip = dev_get_drvdata(dev); struct st33zp24_platform_data *pin_infos = dev->platform_data; @@ -806,28 +806,28 @@ static int tpm_st33_i2c_pm_resume(struct device *dev) tpm_do_selftest(chip); } return ret; -} /* tpm_st33_i2c_pm_resume() */ +} /* tpm_stm_i2c_pm_resume() */ #endif -static const struct i2c_device_id tpm_st33_i2c_id[] = { +static const struct i2c_device_id tpm_stm_i2c_id[] = { {TPM_ST33_I2C, 0}, {} }; -MODULE_DEVICE_TABLE(i2c, tpm_st33_i2c_id); -static SIMPLE_DEV_PM_OPS(tpm_st33_i2c_ops, tpm_st33_i2c_pm_suspend, - tpm_st33_i2c_pm_resume); -static struct i2c_driver tpm_st33_i2c_driver = { +MODULE_DEVICE_TABLE(i2c, tpm_stm_i2c_id); +static SIMPLE_DEV_PM_OPS(tpm_stm_i2c_ops, tpm_stm_i2c_pm_suspend, + tpm_stm_i2c_pm_resume); +static struct i2c_driver tpm_stm_i2c_driver = { .driver = { .owner = THIS_MODULE, .name = TPM_ST33_I2C, - .pm = &tpm_st33_i2c_ops, + .pm = &tpm_stm_i2c_ops, }, - .probe = tpm_st33_i2c_probe, - .remove = tpm_st33_i2c_remove, - .id_table = tpm_st33_i2c_id + .probe = tpm_stm_i2c_probe, + .remove = tpm_stm_i2c_remove, + .id_table = tpm_stm_i2c_id }; -module_i2c_driver(tpm_st33_i2c_driver); +module_i2c_driver(tpm_stm_i2c_driver); MODULE_AUTHOR("Christophe Ricard (tpmsupport@st.com)"); MODULE_DESCRIPTION("STM TPM I2C ST33 Driver"); From c36b1b2d1fa5b24e21482f33694a8ca0c653ecd9 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Mon, 1 Dec 2014 19:32:55 +0100 Subject: [PATCH 114/601] tpm/tpm_i2c_stm_st33: Add devicetree structure Add tpm_stm_st33_i2c dts structure keeping backward compatibility with static platform_data support as well. In the mean time to easy this update and to make it much simpler, we: - Moved all gpio_request to devm_gpio_request_one primitive - Moved request_irq to devm_request_irq Reviewed-by: Jason Gunthorpe Signed-off-by: Christophe Ricard Signed-off-by: Peter Huewe --- drivers/char/tpm/tpm_i2c_stm_st33.c | 166 +++++++++++++++++++--------- 1 file changed, 116 insertions(+), 50 deletions(-) diff --git a/drivers/char/tpm/tpm_i2c_stm_st33.c b/drivers/char/tpm/tpm_i2c_stm_st33.c index 7f036630e87a89..3f722630ff9234 100644 --- a/drivers/char/tpm/tpm_i2c_stm_st33.c +++ b/drivers/char/tpm/tpm_i2c_stm_st33.c @@ -46,6 +46,8 @@ #include #include #include +#include +#include #include #include "tpm.h" @@ -606,13 +608,81 @@ static const struct tpm_class_ops st_i2c_tpm = { .req_canceled = tpm_stm_i2c_req_canceled, }; -static int interrupts; -module_param(interrupts, int, 0444); -MODULE_PARM_DESC(interrupts, "Enable interrupts"); +#ifdef CONFIG_OF +static int tpm_stm_i2c_of_request_resources(struct tpm_chip *chip) +{ + struct device_node *pp; + struct tpm_stm_dev *tpm_dev = (struct tpm_stm_dev *)TPM_VPRIV(chip); + struct i2c_client *client = tpm_dev->client; + + int gpio; + int ret; + + pp = client->dev.of_node; + if (!pp) { + dev_err(chip->dev, "No platform data\n"); + return -ENODEV; + } + + /* Get GPIO from device tree */ + gpio = of_get_named_gpio(pp, "lpcpd-gpios", 0); + if (gpio < 0) { + dev_err(chip->dev, "Failed to retrieve lpcpd-gpios from dts.\n"); + tpm_dev->io_lpcpd = -1; + /* + * lpcpd pin is not specified. This is not an issue as + * power management can be also managed by TPM specific + * commands. So leave with a success status code. + */ + return 0; + } + /* GPIO request and configuration */ + ret = devm_gpio_request_one(&client->dev, gpio, + GPIOF_OUT_INIT_HIGH, "TPM IO LPCPD"); + if (ret) { + dev_err(chip->dev, "Failed to request lpcpd pin\n"); + return -ENODEV; + } + tpm_dev->io_lpcpd = gpio; + + return 0; +} +#else +static int tpm_stm_i2c_of_request_resources(struct tpm_chip *chip) +{ + return -ENODEV; +} +#endif + +static int tpm_stm_i2c_request_resources(struct i2c_client *client, + struct tpm_chip *chip) +{ + struct st33zp24_platform_data *pdata; + struct tpm_stm_dev *tpm_dev = (struct tpm_stm_dev *)TPM_VPRIV(chip); + int ret; + + pdata = client->dev.platform_data; + if (pdata == NULL) { + pr_err("No platform data\n"); + return -EINVAL; + } -static int power_mgt = 1; -module_param(power_mgt, int, 0444); -MODULE_PARM_DESC(power_mgt, "Power Management"); + /* store for late use */ + tpm_dev->io_lpcpd = pdata->io_lpcpd; + + if (gpio_is_valid(pdata->io_lpcpd)) { + ret = devm_gpio_request_one(&client->dev, + pdata->io_lpcpd, GPIOF_OUT_INIT_HIGH, + "TPM IO_LPCPD"); + if (ret) { + dev_err(chip->dev, "%s : reset gpio_request failed\n", + __FILE__); + return ret; + } + } + + return 0; +} /* * tpm_stm_i2c_probe initialize the TPM device @@ -633,30 +703,18 @@ tpm_stm_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) if (client == NULL) { pr_info("%s: i2c client is NULL. Device not accessible.\n", __func__); - ret = -ENODEV; - goto end; + return -ENODEV; } if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) { dev_info(&client->dev, "client not i2c capable\n"); - ret = -ENODEV; - goto end; + return -ENODEV; } tpm_dev = devm_kzalloc(&client->dev, sizeof(struct tpm_stm_dev), GFP_KERNEL); - if (!tpm_dev) { - ret = -ENOMEM; - goto _tpm_clean_answer; - } - - platform_data = client->dev.platform_data; - - if (!platform_data) { - dev_info(&client->dev, "chip not available\n"); - ret = -ENODEV; - goto _tpm_clean_answer; - } + if (!tpm_dev) + return -ENOMEM; chip = tpm_register_hardware(&client->dev, &st_i2c_tpm); if (!chip) { @@ -667,6 +725,17 @@ tpm_stm_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) TPM_VPRIV(chip) = tpm_dev; tpm_dev->client = client; + platform_data = client->dev.platform_data; + if (!platform_data && client->dev.of_node) { + ret = tpm_stm_i2c_of_request_resources(chip); + if (ret) + goto _tpm_clean_answer; + } else if (platform_data) { + ret = tpm_stm_i2c_request_resources(client, chip); + if (ret) + goto _tpm_clean_answer; + } + chip->vendor.timeout_a = msecs_to_jiffies(TIS_SHORT_TIMEOUT); chip->vendor.timeout_b = msecs_to_jiffies(TIS_LONG_TIMEOUT); chip->vendor.timeout_c = msecs_to_jiffies(TIS_SHORT_TIMEOUT); @@ -674,14 +743,7 @@ tpm_stm_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) chip->vendor.locality = LOCALITY0; - if (power_mgt) { - ret = gpio_request(platform_data->io_lpcpd, "TPM IO_LPCPD"); - if (ret) - goto _gpio_init1; - gpio_set_value(platform_data->io_lpcpd, 1); - } - - if (interrupts) { + if (client->irq) { init_completion(&tpm_dev->irq_detection); if (request_locality(chip) != LOCALITY0) { ret = -ENODEV; @@ -689,19 +751,19 @@ tpm_stm_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) } clear_interruption(tpm_dev); - ret = request_irq(client->irq, - &tpm_ioserirq_handler, + ret = devm_request_irq(&client->dev, client->irq, + tpm_ioserirq_handler, IRQF_TRIGGER_HIGH, "TPM SERIRQ management", chip); if (ret < 0) { dev_err(chip->dev , "TPM SERIRQ signals %d not available\n", client->irq); - goto _irq_set; + goto _tpm_clean_answer; } ret = I2C_READ_DATA(tpm_dev, TPM_INT_ENABLE, &intmask, 1); if (ret < 0) - goto _irq_set; + goto _tpm_clean_answer; intmask |= TPM_INTF_CMD_READY_INT | TPM_INTF_FIFO_AVALAIBLE_INT @@ -712,19 +774,19 @@ tpm_stm_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) ret = I2C_WRITE_DATA(tpm_dev, TPM_INT_ENABLE, &intmask, 1); if (ret < 0) - goto _irq_set; + goto _tpm_clean_answer; intmask = TPM_GLOBAL_INT_ENABLE; ret = I2C_WRITE_DATA(tpm_dev, (TPM_INT_ENABLE + 3), &intmask, 1); if (ret < 0) - goto _irq_set; + goto _tpm_clean_answer; ret = I2C_READ_DATA(tpm_dev, TPM_INT_STATUS, &intmask, 1); if (ret < 0) - goto _irq_set; + goto _tpm_clean_answer; - chip->vendor.irq = interrupts; + chip->vendor.irq = client->irq; tpm_gen_interrupt(chip); } @@ -734,15 +796,9 @@ tpm_stm_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) dev_info(chip->dev, "TPM I2C Initialized\n"); return 0; -_irq_set: - free_irq(client->irq, (void *)chip); -_gpio_init1: - if (power_mgt) - gpio_free(platform_data->io_lpcpd); _tpm_clean_answer: tpm_remove_hardware(chip->dev); -end: - pr_info("TPM I2C initialisation fail\n"); + dev_info(chip->dev, "TPM I2C initialisation fail\n"); return ret; } @@ -775,7 +831,7 @@ static int tpm_stm_i2c_pm_suspend(struct device *dev) struct st33zp24_platform_data *pin_infos = dev->platform_data; int ret = 0; - if (power_mgt) + if (gpio_is_valid(pin_infos->io_lpcpd)) gpio_set_value(pin_infos->io_lpcpd, 0); else ret = tpm_pm_suspend(dev); @@ -794,7 +850,7 @@ static int tpm_stm_i2c_pm_resume(struct device *dev) int ret = 0; - if (power_mgt) { + if (gpio_is_valid(pin_infos->io_lpcpd)) { gpio_set_value(pin_infos->io_lpcpd, 1); ret = wait_for_serirq_timeout(chip, (chip->ops->status(chip) & @@ -813,14 +869,24 @@ static const struct i2c_device_id tpm_stm_i2c_id[] = { {TPM_ST33_I2C, 0}, {} }; + +#ifdef CONFIG_OF +static const struct of_device_id of_st33zp24_i2c_match[] = { + { .compatible = "st,st33zp24-i2c", }, + {} +}; +MODULE_DEVICE_TABLE(of, of_st33zp24_i2c_match); +#endif + MODULE_DEVICE_TABLE(i2c, tpm_stm_i2c_id); static SIMPLE_DEV_PM_OPS(tpm_stm_i2c_ops, tpm_stm_i2c_pm_suspend, tpm_stm_i2c_pm_resume); static struct i2c_driver tpm_stm_i2c_driver = { .driver = { - .owner = THIS_MODULE, - .name = TPM_ST33_I2C, - .pm = &tpm_stm_i2c_ops, + .owner = THIS_MODULE, + .name = TPM_ST33_I2C, + .pm = &tpm_stm_i2c_ops, + .of_match_table = of_match_ptr(of_st33zp24_i2c_match), }, .probe = tpm_stm_i2c_probe, .remove = tpm_stm_i2c_remove, From e8f6f3b4d69956f66bba14a4d33aadf1fb209050 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Mon, 1 Dec 2014 19:32:56 +0100 Subject: [PATCH 115/601] tpm/tpm_i2c_stm_st33/dts/st33zp24_i2c: Add DTS Documentation st33zp24 tpm can be seen as a trivial i2c device as other i2c tpm. However several other properties needs to be documented such as lpcpd. Reviewed-by: Jason Gunthorpe Signed-off-by: Christophe Ricard Signed-off-by: Peter Huewe --- .../bindings/security/tpm/st33zp24.txt | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 Documentation/devicetree/bindings/security/tpm/st33zp24.txt diff --git a/Documentation/devicetree/bindings/security/tpm/st33zp24.txt b/Documentation/devicetree/bindings/security/tpm/st33zp24.txt new file mode 100644 index 00000000000000..0a7361dae65f57 --- /dev/null +++ b/Documentation/devicetree/bindings/security/tpm/st33zp24.txt @@ -0,0 +1,36 @@ +* STMicroelectronics SAS. ST33ZP24 TPM SoC + +Required properties: +- compatible: Should be "st,st33zp24-i2c". +- clock-frequency: I²C work frequency. +- reg: address on the bus + +Optional ST33ZP24 Properties: +- interrupt-parent: phandle for the interrupt gpio controller +- interrupts: GPIO interrupt to which the chip is connected +- lpcpd-gpios: Output GPIO pin used for ST33ZP24 power management D1/D2 state. +If set, power must be present when the platform is going into sleep/hibernate mode. + +Optional SoC Specific Properties: +- pinctrl-names: Contains only one value - "default". +- pintctrl-0: Specifies the pin control groups used for this controller. + +Example (for ARM-based BeagleBoard xM with ST33ZP24 on I2C2): + +&i2c2 { + + status = "okay"; + + st33zp24: st33zp24@13 { + + compatible = "st,st33zp24-i2c"; + + reg = <0x013>; + clock-frequency = <400000>; + + interrupt-parent = <&gpio5>; + interrupts = <7 IRQ_TYPE_LEVEL_HIGH>; + + lpcpd-gpios = <&gpio5 15 GPIO_ACTIVE_HIGH>; + }; +}; From 8dcd19874f31b7a806a2a2b129e94882918c3aaf Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Mon, 1 Dec 2014 19:32:57 +0100 Subject: [PATCH 116/601] tpm/tpm_i2c_stm_st33: Few code cleanup Cleanup code indentation, braces, test variable when NULL. Reviewed-by: Jason Gunthorpe Signed-off-by: Christophe Ricard Signed-off-by: Peter Huewe --- drivers/char/tpm/tpm_i2c_stm_st33.c | 32 ++++++++++++++--------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/char/tpm/tpm_i2c_stm_st33.c b/drivers/char/tpm/tpm_i2c_stm_st33.c index 3f722630ff9234..a444cd3cb17dda 100644 --- a/drivers/char/tpm/tpm_i2c_stm_st33.c +++ b/drivers/char/tpm/tpm_i2c_stm_st33.c @@ -26,7 +26,7 @@ * * @Synopsis: * 09/15/2010: First shot driver tpm_tis driver for - lpc is used as model. + * lpc is used as model. */ #include @@ -393,7 +393,7 @@ static int wait_for_stat(struct tpm_chip *chip, u8 mask, unsigned long timeout, long ret; u8 status; - if (chip->vendor.irq) { + if (chip->vendor.irq) { ret = wait_for_serirq_timeout(chip, ((tpm_stm_i2c_status (chip) & mask) == mask), timeout); @@ -429,8 +429,7 @@ static int recv_data(struct tpm_chip *chip, u8 *buf, size_t count) wait_for_stat(chip, TPM_STS_DATA_AVAIL | TPM_STS_VALID, chip->vendor.timeout_c, - &chip->vendor.read_queue) - == 0) { + &chip->vendor.read_queue) == 0) { burstcnt = get_burstcount(chip); if (burstcnt < 0) return burstcnt; @@ -482,7 +481,7 @@ static int tpm_stm_i2c_send(struct tpm_chip *chip, unsigned char *buf, struct i2c_client *client; struct tpm_stm_dev *tpm_dev; - if (chip == NULL) + if (!chip) return -EBUSY; if (len < TPM_HEADER_SIZE) return -EBUSY; @@ -559,7 +558,7 @@ static int tpm_stm_i2c_recv(struct tpm_chip *chip, unsigned char *buf, int size = 0; int expected; - if (chip == NULL) + if (!chip) return -EBUSY; if (count < TPM_HEADER_SIZE) { @@ -580,7 +579,7 @@ static int tpm_stm_i2c_recv(struct tpm_chip *chip, unsigned char *buf, } size += recv_data(chip, &buf[TPM_HEADER_SIZE], - expected - TPM_HEADER_SIZE); + expected - TPM_HEADER_SIZE); if (size < expected) { dev_err(chip->dev, "Unable to read remainder of result\n"); size = -ETIME; @@ -662,9 +661,9 @@ static int tpm_stm_i2c_request_resources(struct i2c_client *client, int ret; pdata = client->dev.platform_data; - if (pdata == NULL) { - pr_err("No platform data\n"); - return -EINVAL; + if (!pdata) { + dev_err(chip->dev, "No platform data\n"); + return -ENODEV; } /* store for late use */ @@ -695,13 +694,13 @@ static int tpm_stm_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) { int ret; - u8 intmask; + u8 intmask = 0; struct tpm_chip *chip; struct st33zp24_platform_data *platform_data; struct tpm_stm_dev *tpm_dev; - if (client == NULL) { - pr_info("%s: i2c client is NULL. Device not accessible.\n", + if (!client) { + dev_info(&client->dev, "%s: i2c client is NULL. Device not accessible.\n", __func__); return -ENODEV; } @@ -805,7 +804,7 @@ tpm_stm_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) /* * tpm_stm_i2c_remove remove the TPM device * @param: client, the i2c_client drescription (TPM I2C description). - clear_bit(0, &chip->is_open); + * clear_bit(0, &chip->is_open); * @return: 0 in case of success. */ static int tpm_stm_i2c_remove(struct i2c_client *client) @@ -835,6 +834,7 @@ static int tpm_stm_i2c_pm_suspend(struct device *dev) gpio_set_value(pin_infos->io_lpcpd, 0); else ret = tpm_pm_suspend(dev); + return ret; } /* tpm_stm_i2c_suspend() */ @@ -869,6 +869,7 @@ static const struct i2c_device_id tpm_stm_i2c_id[] = { {TPM_ST33_I2C, 0}, {} }; +MODULE_DEVICE_TABLE(i2c, tpm_stm_i2c_id); #ifdef CONFIG_OF static const struct of_device_id of_st33zp24_i2c_match[] = { @@ -878,7 +879,6 @@ static const struct of_device_id of_st33zp24_i2c_match[] = { MODULE_DEVICE_TABLE(of, of_st33zp24_i2c_match); #endif -MODULE_DEVICE_TABLE(i2c, tpm_stm_i2c_id); static SIMPLE_DEV_PM_OPS(tpm_stm_i2c_ops, tpm_stm_i2c_pm_suspend, tpm_stm_i2c_pm_resume); static struct i2c_driver tpm_stm_i2c_driver = { @@ -887,7 +887,7 @@ static struct i2c_driver tpm_stm_i2c_driver = { .name = TPM_ST33_I2C, .pm = &tpm_stm_i2c_ops, .of_match_table = of_match_ptr(of_st33zp24_i2c_match), - }, + }, .probe = tpm_stm_i2c_probe, .remove = tpm_stm_i2c_remove, .id_table = tpm_stm_i2c_id From c3804b8cdbd2abb50cf87318b13660dff46b7fe8 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Mon, 1 Dec 2014 19:32:58 +0100 Subject: [PATCH 117/601] tpm/tpm_i2c_stm_st33: Interrupt management improvement Improve the irq management by using a new function wait_for_stat. Instead of using a completion struct, we rely on the waitqueue read_queue and int_queue from chip->vendor field. Reviewed-by: Jason Gunthorpe Signed-off-by: Christophe Ricard Signed-off-by: Peter Huewe --- drivers/char/tpm/tpm_i2c_stm_st33.c | 197 +++++++++++++++------------- 1 file changed, 109 insertions(+), 88 deletions(-) diff --git a/drivers/char/tpm/tpm_i2c_stm_st33.c b/drivers/char/tpm/tpm_i2c_stm_st33.c index a444cd3cb17dda..eb0244a2ec9d0a 100644 --- a/drivers/char/tpm/tpm_i2c_stm_st33.c +++ b/drivers/char/tpm/tpm_i2c_stm_st33.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -102,9 +103,9 @@ enum tis_defaults { struct tpm_stm_dev { struct i2c_client *client; - struct completion irq_detection; struct tpm_chip *chip; u8 buf[TPM_BUFSIZE + 1]; + u32 intrs; int io_lpcpd; }; @@ -174,60 +175,17 @@ static int read8_reg(struct tpm_stm_dev *tpm_dev, u8 tpm_register, * clear_interruption * clear the TPM interrupt register. * @param: tpm, the chip description + * @return: the TPM_INT_STATUS value */ -static void clear_interruption(struct tpm_stm_dev *tpm_dev) +static u8 clear_interruption(struct tpm_stm_dev *tpm_dev) { u8 interrupt; I2C_READ_DATA(tpm_dev, TPM_INT_STATUS, &interrupt, 1); I2C_WRITE_DATA(tpm_dev, TPM_INT_STATUS, &interrupt, 1); - I2C_READ_DATA(tpm_dev, TPM_INT_STATUS, &interrupt, 1); + return interrupt; } /* clear_interruption() */ -/* - * _wait_for_interrupt_serirq_timeout - * @param: tpm, the chip description - * @param: timeout, the timeout of the interrupt - * @return: the status of the interruption. - */ -static long _wait_for_interrupt_serirq_timeout(struct tpm_chip *chip, - unsigned long timeout) -{ - long status; - struct i2c_client *client; - struct tpm_stm_dev *tpm_dev; - - tpm_dev = (struct tpm_stm_dev *)TPM_VPRIV(chip); - client = tpm_dev->client; - - status = wait_for_completion_interruptible_timeout( - &tpm_dev->irq_detection, - timeout); - if (status > 0) - enable_irq(client->irq); - - return status; -} /* wait_for_interrupt_serirq_timeout() */ - -static int wait_for_serirq_timeout(struct tpm_chip *chip, bool condition, - unsigned long timeout) -{ - int status = 2; - struct tpm_stm_dev *tpm_dev; - - tpm_dev = (struct tpm_stm_dev *)TPM_VPRIV(chip); - - status = _wait_for_interrupt_serirq_timeout(chip, timeout); - if (!status) { - status = -EBUSY; - } else { - clear_interruption(tpm_dev); - if (condition) - status = 1; - } - return status; -} /* wait_for_serirq_timeout() */ - /* * tpm_stm_i2c_cancel, cancel is not implemented. * @param: chip, the tpm_chip description as specified in driver/char/tpm/tpm.h @@ -241,8 +199,6 @@ static void tpm_stm_i2c_cancel(struct tpm_chip *chip) data = TPM_STS_COMMAND_READY; I2C_WRITE_DATA(tpm_dev, TPM_STS, &data, 1); - if (chip->vendor.irq) - wait_for_serirq_timeout(chip, 1, chip->vendor.timeout_a); } /* tpm_stm_i2c_cancel() */ /* @@ -297,30 +253,24 @@ static int request_locality(struct tpm_chip *chip) struct tpm_stm_dev *tpm_dev; u8 data; - tpm_dev = (struct tpm_stm_dev *)TPM_VPRIV(chip); - if (check_locality(chip) == chip->vendor.locality) return chip->vendor.locality; + tpm_dev = (struct tpm_stm_dev *)TPM_VPRIV(chip); + data = TPM_ACCESS_REQUEST_USE; ret = I2C_WRITE_DATA(tpm_dev, TPM_ACCESS, &data, 1); if (ret < 0) goto end; - if (chip->vendor.irq) { - ret = wait_for_serirq_timeout(chip, (check_locality - (chip) >= 0), - chip->vendor.timeout_a); - if (ret > 0) + stop = jiffies + chip->vendor.timeout_a; + + /* Request locality is usually effective after the request */ + do { + if (check_locality(chip) >= 0) return chip->vendor.locality; - } else { - stop = jiffies + chip->vendor.timeout_a; - do { - if (check_locality(chip) >= 0) - return chip->vendor.locality; - msleep(TPM_TIMEOUT); - } while (time_before(jiffies, stop)); - } + msleep(TPM_TIMEOUT); + } while (time_before(jiffies, stop)); ret = -EACCES; end: return ret; @@ -378,36 +328,107 @@ static int get_burstcount(struct tpm_chip *chip) return -EBUSY; } /* get_burstcount() */ +static bool wait_for_tpm_stat_cond(struct tpm_chip *chip, u8 mask, + bool check_cancel, bool *canceled) +{ + u8 status = chip->ops->status(chip); + + *canceled = false; + if ((status & mask) == mask) + return true; + if (check_cancel && chip->ops->req_canceled(chip, status)) { + *canceled = true; + return true; + } + return false; +} + +/* + * interrupt_to_status + * @param: irq_mask, the irq mask value to wait + * @return: the corresponding tpm_sts value + */ +static u8 interrupt_to_status(u8 irq_mask) +{ + u8 status = 0; + + if ((irq_mask & TPM_INTF_STS_VALID_INT) == TPM_INTF_STS_VALID_INT) + status |= TPM_STS_VALID; + if ((irq_mask & TPM_INTF_DATA_AVAIL_INT) == TPM_INTF_DATA_AVAIL_INT) + status |= TPM_STS_DATA_AVAIL; + if ((irq_mask & TPM_INTF_CMD_READY_INT) == TPM_INTF_CMD_READY_INT) + status |= TPM_STS_COMMAND_READY; + + return status; +} /* status_to_interrupt() */ + /* * wait_for_stat wait for a TPM_STS value * @param: chip, the tpm chip description * @param: mask, the value mask to wait * @param: timeout, the timeout * @param: queue, the wait queue. + * @param: check_cancel, does the command can be cancelled ? * @return: the tpm status, 0 if success, -ETIME if timeout is reached. */ static int wait_for_stat(struct tpm_chip *chip, u8 mask, unsigned long timeout, - wait_queue_head_t *queue) + wait_queue_head_t *queue, bool check_cancel) { unsigned long stop; - long ret; - u8 status; + int r; + bool canceled = false; + bool condition; + u32 cur_intrs; + u8 interrupt, status; + struct tpm_stm_dev *tpm_dev; + + tpm_dev = (struct tpm_stm_dev *)TPM_VPRIV(chip); + + /* check current status */ + status = tpm_stm_i2c_status(chip); + if ((status & mask) == mask) + return 0; + + stop = jiffies + timeout; if (chip->vendor.irq) { - ret = wait_for_serirq_timeout(chip, ((tpm_stm_i2c_status - (chip) & mask) == - mask), timeout); - if (ret > 0) + cur_intrs = tpm_dev->intrs; + interrupt = clear_interruption(tpm_dev); + enable_irq(chip->vendor.irq); + +again: + timeout = stop - jiffies; + if ((long) timeout <= 0) + return -1; + + r = wait_event_interruptible_timeout(*queue, + cur_intrs != tpm_dev->intrs, timeout); + + interrupt |= clear_interruption(tpm_dev); + status = interrupt_to_status(interrupt); + condition = wait_for_tpm_stat_cond(chip, mask, + check_cancel, &canceled); + + if (r >= 0 && condition) { + if (canceled) + return -ECANCELED; return 0; + } + if (r == -ERESTARTSYS && freezing(current)) { + clear_thread_flag(TIF_SIGPENDING); + goto again; + } + disable_irq_nosync(chip->vendor.irq); + } else { - stop = jiffies + timeout; do { msleep(TPM_TIMEOUT); - status = tpm_stm_i2c_status(chip); + status = chip->ops->status(chip); if ((status & mask) == mask) return 0; } while (time_before(jiffies, stop)); } + return -ETIME; } /* wait_for_stat() */ @@ -429,7 +450,7 @@ static int recv_data(struct tpm_chip *chip, u8 *buf, size_t count) wait_for_stat(chip, TPM_STS_DATA_AVAIL | TPM_STS_VALID, chip->vendor.timeout_c, - &chip->vendor.read_queue) == 0) { + &chip->vendor.read_queue, true) == 0) { burstcnt = get_burstcount(chip); if (burstcnt < 0) return burstcnt; @@ -449,15 +470,14 @@ static int recv_data(struct tpm_chip *chip, u8 *buf, size_t count) static irqreturn_t tpm_ioserirq_handler(int irq, void *dev_id) { struct tpm_chip *chip = dev_id; - struct i2c_client *client; struct tpm_stm_dev *tpm_dev; - disable_irq_nosync(irq); - tpm_dev = (struct tpm_stm_dev *)TPM_VPRIV(chip); - client = tpm_dev->client; - complete(&tpm_dev->irq_detection); + tpm_dev->intrs++; + wake_up_interruptible(&chip->vendor.read_queue); + disable_irq_nosync(chip->vendor.irq); + return IRQ_HANDLED; } /* tpm_ioserirq_handler() */ @@ -500,7 +520,7 @@ static int tpm_stm_i2c_send(struct tpm_chip *chip, unsigned char *buf, tpm_stm_i2c_cancel(chip); if (wait_for_stat (chip, TPM_STS_COMMAND_READY, chip->vendor.timeout_b, - &chip->vendor.int_queue) < 0) { + &chip->vendor.read_queue, false) < 0) { ret = -ETIME; goto out_err; } @@ -743,7 +763,10 @@ tpm_stm_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) chip->vendor.locality = LOCALITY0; if (client->irq) { - init_completion(&tpm_dev->irq_detection); + /* INTERRUPT Setup */ + init_waitqueue_head(&chip->vendor.read_queue); + tpm_dev->intrs = 0; + if (request_locality(chip) != LOCALITY0) { ret = -ENODEV; goto _tpm_clean_answer; @@ -765,9 +788,6 @@ tpm_stm_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) goto _tpm_clean_answer; intmask |= TPM_INTF_CMD_READY_INT - | TPM_INTF_FIFO_AVALAIBLE_INT - | TPM_INTF_WAKE_UP_READY_INT - | TPM_INTF_LOCALITY_CHANGE_INT | TPM_INTF_STS_VALID_INT | TPM_INTF_DATA_AVAIL_INT; @@ -787,6 +807,8 @@ tpm_stm_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) chip->vendor.irq = client->irq; + disable_irq_nosync(chip->vendor.irq); + tpm_gen_interrupt(chip); } @@ -852,10 +874,9 @@ static int tpm_stm_i2c_pm_resume(struct device *dev) if (gpio_is_valid(pin_infos->io_lpcpd)) { gpio_set_value(pin_infos->io_lpcpd, 1); - ret = wait_for_serirq_timeout(chip, - (chip->ops->status(chip) & - TPM_STS_VALID) == TPM_STS_VALID, - chip->vendor.timeout_b); + ret = wait_for_stat(chip, + TPM_STS_VALID, chip->vendor.timeout_b, + &chip->vendor.read_queue, false); } else { ret = tpm_pm_resume(dev); if (!ret) From 00820e8207b9b87fe0e34062876696dd6d326b2d Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Mon, 1 Dec 2014 19:32:59 +0100 Subject: [PATCH 118/601] tpm/tpm_i2c_stm_st33: Remove useless i2c read on interrupt registers Remove useless i2c read on TPM_INT_ENABLE and TPM_INT_STATUS Reviewed-by: Jason Gunthorpe Signed-off-by: Christophe Ricard Signed-off-by: Peter Huewe --- drivers/char/tpm/tpm_i2c_stm_st33.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/char/tpm/tpm_i2c_stm_st33.c b/drivers/char/tpm/tpm_i2c_stm_st33.c index eb0244a2ec9d0a..1d589e06e0dd8c 100644 --- a/drivers/char/tpm/tpm_i2c_stm_st33.c +++ b/drivers/char/tpm/tpm_i2c_stm_st33.c @@ -783,10 +783,6 @@ tpm_stm_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) goto _tpm_clean_answer; } - ret = I2C_READ_DATA(tpm_dev, TPM_INT_ENABLE, &intmask, 1); - if (ret < 0) - goto _tpm_clean_answer; - intmask |= TPM_INTF_CMD_READY_INT | TPM_INTF_STS_VALID_INT | TPM_INTF_DATA_AVAIL_INT; @@ -801,10 +797,6 @@ tpm_stm_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) if (ret < 0) goto _tpm_clean_answer; - ret = I2C_READ_DATA(tpm_dev, TPM_INT_STATUS, &intmask, 1); - if (ret < 0) - goto _tpm_clean_answer; - chip->vendor.irq = client->irq; disable_irq_nosync(chip->vendor.irq); From f2f083b5d478d409e6658392088e7e594f748c04 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Mon, 1 Dec 2014 19:33:00 +0100 Subject: [PATCH 119/601] tpm/tpm_i2c_stm_st33: Increment driver version to 1.2.1. Many changes were added to the driver so increment the version. Reviewed-by: Jason Gunthorpe Signed-off-by: Christophe Ricard Signed-off-by: Peter Huewe --- drivers/char/tpm/tpm_i2c_stm_st33.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/char/tpm/tpm_i2c_stm_st33.c b/drivers/char/tpm/tpm_i2c_stm_st33.c index 1d589e06e0dd8c..e643c8627b7d3c 100644 --- a/drivers/char/tpm/tpm_i2c_stm_st33.c +++ b/drivers/char/tpm/tpm_i2c_stm_st33.c @@ -1,6 +1,6 @@ /* * STMicroelectronics TPM I2C Linux driver for TPM ST33ZP24 - * Copyright (C) 2009, 2010 STMicroelectronics + * Copyright (C) 2009, 2010, 2014 STMicroelectronics * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -15,7 +15,7 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, see . * - * STMicroelectronics version 1.2.0, Copyright (C) 2010 + * STMicroelectronics version 1.2.1, Copyright (C) 2014 * STMicroelectronics comes with ABSOLUTELY NO WARRANTY. * This is free software, and you are welcome to redistribute it * under certain conditions. @@ -910,5 +910,5 @@ module_i2c_driver(tpm_stm_i2c_driver); MODULE_AUTHOR("Christophe Ricard (tpmsupport@st.com)"); MODULE_DESCRIPTION("STM TPM I2C ST33 Driver"); -MODULE_VERSION("1.2.0"); +MODULE_VERSION("1.2.1"); MODULE_LICENSE("GPL"); From 67fe94175a3e2da2bb0897f644ed856b8528c633 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Tue, 2 Dec 2014 21:06:03 +0100 Subject: [PATCH 120/601] tpm/tpm_i2c_stm_st33: Fix coccinelle warnings. Possible NULL pointer dereference If !client the kernel mays oops in dev_info when doing client->dev. Reported-by: Peter Huewe Signed-off-by: Christophe Ricard Signed-off-by: Peter Huewe --- drivers/char/tpm/tpm_i2c_stm_st33.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm_i2c_stm_st33.c b/drivers/char/tpm/tpm_i2c_stm_st33.c index e643c8627b7d3c..86203b022d13d5 100644 --- a/drivers/char/tpm/tpm_i2c_stm_st33.c +++ b/drivers/char/tpm/tpm_i2c_stm_st33.c @@ -720,7 +720,7 @@ tpm_stm_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) struct tpm_stm_dev *tpm_dev; if (!client) { - dev_info(&client->dev, "%s: i2c client is NULL. Device not accessible.\n", + pr_info("%s: i2c client is NULL. Device not accessible.\n", __func__); return -ENODEV; } From 1a0f1b279c2c4e5747a682b055d27d88e35bb632 Mon Sep 17 00:00:00 2001 From: Ashley Lai Date: Thu, 4 Dec 2014 21:01:51 -0600 Subject: [PATCH 121/601] tpm_ibmvtpm: Update email address in maintainers list and ibmvtpm driver Added myself as a maintainer for the IBM vtpm driver and removed myself from the tpm maintainer list. Also, updated the tpm_ibmvtpm driver with my current email address. Signed-off-by: Ashley Lai Signed-off-by: Peter Huewe --- MAINTAINERS | 8 +++++++- drivers/char/tpm/tpm_ibmvtpm.c | 2 +- drivers/char/tpm/tpm_ibmvtpm.h | 2 +- drivers/char/tpm/tpm_of.c | 2 +- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 600d2aad827660..e18aef54b4b27b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9720,13 +9720,19 @@ F: drivers/media/pci/tw68/ TPM DEVICE DRIVER M: Peter Huewe -M: Ashley Lai M: Marcel Selhorst W: http://tpmdd.sourceforge.net L: tpmdd-devel@lists.sourceforge.net (moderated for non-subscribers) S: Maintained F: drivers/char/tpm/ +TPM IBM_VTPM DEVICE DRIVER +M: Ashley Lai +W: http://tpmdd.sourceforge.net +L: tpmdd-devel@lists.sourceforge.net (moderated for non-subscribers) +S: Maintained +F: drivers/char/tpm/tpm_ibmvtpm* + TRACING M: Steven Rostedt M: Ingo Molnar diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c index 4109222f2878c1..96f5d448b84c53 100644 --- a/drivers/char/tpm/tpm_ibmvtpm.c +++ b/drivers/char/tpm/tpm_ibmvtpm.c @@ -1,7 +1,7 @@ /* * Copyright (C) 2012 IBM Corporation * - * Author: Ashley Lai + * Author: Ashley Lai * * Maintained by: * diff --git a/drivers/char/tpm/tpm_ibmvtpm.h b/drivers/char/tpm/tpm_ibmvtpm.h index bd82a791f995d6..f595f14426bf1f 100644 --- a/drivers/char/tpm/tpm_ibmvtpm.h +++ b/drivers/char/tpm/tpm_ibmvtpm.h @@ -1,7 +1,7 @@ /* * Copyright (C) 2012 IBM Corporation * - * Author: Ashley Lai + * Author: Ashley Lai * * Maintained by: * diff --git a/drivers/char/tpm/tpm_of.c b/drivers/char/tpm/tpm_of.c index 98ba2bd1a355d9..c002d1bd9cafe3 100644 --- a/drivers/char/tpm/tpm_of.c +++ b/drivers/char/tpm/tpm_of.c @@ -1,7 +1,7 @@ /* * Copyright 2012 IBM Corporation * - * Author: Ashley Lai + * Author: Ashley Lai * * Maintained by: * From 87155b7311bfec75b590b823b11f77adf2a16412 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Fri, 12 Dec 2014 11:46:33 -0800 Subject: [PATCH 122/601] tpm: merge duplicate transmit_cmd() functions Merged transmit_cmd() functions in tpm-interface.c and tpm-sysfs.c. Added "tpm_" prefix for consistency sake. Changed cmd parameter as opaque. This enables to use separate command structures for TPM1 and TPM2 commands in future. Loose coupling works fine here. Signed-off-by: Jarkko Sakkinen Reviewed-by: Jasob Gunthorpe Reviewed-by: Stefan Berger Reviewed-by: Peter Huewe Tested-by: Scot Doyle Tested-by: Peter Huewe Signed-off-by: Peter Huewe --- drivers/char/tpm/tpm-interface.c | 49 +++++++++++++++++--------------- drivers/char/tpm/tpm-sysfs.c | 23 ++------------- drivers/char/tpm/tpm.h | 3 +- 3 files changed, 30 insertions(+), 45 deletions(-) diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index cfb9089887bd86..c17aa45024aaac 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -398,9 +398,10 @@ ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf, #define TPM_DIGEST_SIZE 20 #define TPM_RET_CODE_IDX 6 -static ssize_t transmit_cmd(struct tpm_chip *chip, struct tpm_cmd_t *cmd, - int len, const char *desc) +ssize_t tpm_transmit_cmd(struct tpm_chip *chip, void *cmd, + int len, const char *desc) { + struct tpm_output_header *header; int err; len = tpm_transmit(chip, (u8 *) cmd, len); @@ -409,7 +410,9 @@ static ssize_t transmit_cmd(struct tpm_chip *chip, struct tpm_cmd_t *cmd, else if (len < TPM_HEADER_SIZE) return -EFAULT; - err = be32_to_cpu(cmd->header.out.return_code); + header = cmd; + + err = be32_to_cpu(header->return_code); if (err != 0 && desc) dev_err(chip->dev, "A TPM error (%d) occurred %s\n", err, desc); @@ -448,7 +451,7 @@ ssize_t tpm_getcap(struct device *dev, __be32 subcap_id, cap_t *cap, tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4); tpm_cmd.params.getcap_in.subcap = subcap_id; } - rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, desc); + rc = tpm_transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, desc); if (!rc) *cap = tpm_cmd.params.getcap_out.cap; return rc; @@ -464,8 +467,8 @@ void tpm_gen_interrupt(struct tpm_chip *chip) tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4); tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_TIMEOUT; - rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, - "attempting to determine the timeouts"); + rc = tpm_transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, + "attempting to determine the timeouts"); } EXPORT_SYMBOL_GPL(tpm_gen_interrupt); @@ -484,8 +487,8 @@ static int tpm_startup(struct tpm_chip *chip, __be16 startup_type) struct tpm_cmd_t start_cmd; start_cmd.header.in = tpm_startup_header; start_cmd.params.startup_in.startup_type = startup_type; - return transmit_cmd(chip, &start_cmd, TPM_INTERNAL_RESULT_SIZE, - "attempting to start the TPM"); + return tpm_transmit_cmd(chip, &start_cmd, TPM_INTERNAL_RESULT_SIZE, + "attempting to start the TPM"); } int tpm_get_timeouts(struct tpm_chip *chip) @@ -500,7 +503,7 @@ int tpm_get_timeouts(struct tpm_chip *chip) tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP; tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4); tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_TIMEOUT; - rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, NULL); + rc = tpm_transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, NULL); if (rc == TPM_ERR_INVALID_POSTINIT) { /* The TPM is not started, we are the first to talk to it. @@ -513,7 +516,7 @@ int tpm_get_timeouts(struct tpm_chip *chip) tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP; tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4); tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_TIMEOUT; - rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, + rc = tpm_transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, NULL); } if (rc) { @@ -575,8 +578,8 @@ int tpm_get_timeouts(struct tpm_chip *chip) tpm_cmd.params.getcap_in.subcap_size = cpu_to_be32(4); tpm_cmd.params.getcap_in.subcap = TPM_CAP_PROP_TIS_DURATION; - rc = transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, - "attempting to determine the durations"); + rc = tpm_transmit_cmd(chip, &tpm_cmd, TPM_INTERNAL_RESULT_SIZE, + "attempting to determine the durations"); if (rc) return rc; @@ -631,8 +634,8 @@ static int tpm_continue_selftest(struct tpm_chip *chip) struct tpm_cmd_t cmd; cmd.header.in = continue_selftest_header; - rc = transmit_cmd(chip, &cmd, CONTINUE_SELFTEST_RESULT_SIZE, - "continue selftest"); + rc = tpm_transmit_cmd(chip, &cmd, CONTINUE_SELFTEST_RESULT_SIZE, + "continue selftest"); return rc; } @@ -672,8 +675,8 @@ int tpm_pcr_read_dev(struct tpm_chip *chip, int pcr_idx, u8 *res_buf) cmd.header.in = pcrread_header; cmd.params.pcrread_in.pcr_idx = cpu_to_be32(pcr_idx); - rc = transmit_cmd(chip, &cmd, READ_PCR_RESULT_SIZE, - "attempting to read a pcr value"); + rc = tpm_transmit_cmd(chip, &cmd, READ_PCR_RESULT_SIZE, + "attempting to read a pcr value"); if (rc == 0) memcpy(res_buf, cmd.params.pcrread_out.pcr_result, @@ -737,8 +740,8 @@ int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash) cmd.header.in = pcrextend_header; cmd.params.pcrextend_in.pcr_idx = cpu_to_be32(pcr_idx); memcpy(cmd.params.pcrextend_in.hash, hash, TPM_DIGEST_SIZE); - rc = transmit_cmd(chip, &cmd, EXTEND_PCR_RESULT_SIZE, - "attempting extend a PCR value"); + rc = tpm_transmit_cmd(chip, &cmd, EXTEND_PCR_RESULT_SIZE, + "attempting extend a PCR value"); tpm_chip_put(chip); return rc; @@ -817,7 +820,7 @@ int tpm_send(u32 chip_num, void *cmd, size_t buflen) if (chip == NULL) return -ENODEV; - rc = transmit_cmd(chip, cmd, buflen, "attempting tpm_cmd"); + rc = tpm_transmit_cmd(chip, cmd, buflen, "attempting tpm_cmd"); tpm_chip_put(chip); return rc; @@ -938,14 +941,14 @@ int tpm_pm_suspend(struct device *dev) cmd.params.pcrextend_in.pcr_idx = cpu_to_be32(tpm_suspend_pcr); memcpy(cmd.params.pcrextend_in.hash, dummy_hash, TPM_DIGEST_SIZE); - rc = transmit_cmd(chip, &cmd, EXTEND_PCR_RESULT_SIZE, - "extending dummy pcr before suspend"); + rc = tpm_transmit_cmd(chip, &cmd, EXTEND_PCR_RESULT_SIZE, + "extending dummy pcr before suspend"); } /* now do the actual savestate */ for (try = 0; try < TPM_RETRY; try++) { cmd.header.in = savestate_header; - rc = transmit_cmd(chip, &cmd, SAVESTATE_RESULT_SIZE, NULL); + rc = tpm_transmit_cmd(chip, &cmd, SAVESTATE_RESULT_SIZE, NULL); /* * If the TPM indicates that it is too busy to respond to @@ -1022,7 +1025,7 @@ int tpm_get_random(u32 chip_num, u8 *out, size_t max) tpm_cmd.header.in = tpm_getrandom_header; tpm_cmd.params.getrandom_in.num_bytes = cpu_to_be32(num_bytes); - err = transmit_cmd(chip, &tpm_cmd, + err = tpm_transmit_cmd(chip, &tpm_cmd, TPM_GETRANDOM_RESULT_SIZE + num_bytes, "attempting get random"); if (err) diff --git a/drivers/char/tpm/tpm-sysfs.c b/drivers/char/tpm/tpm-sysfs.c index 01730a27ae076c..8ecb052e290d32 100644 --- a/drivers/char/tpm/tpm-sysfs.c +++ b/drivers/char/tpm/tpm-sysfs.c @@ -20,25 +20,6 @@ #include #include "tpm.h" -/* XXX for now this helper is duplicated in tpm-interface.c */ -static ssize_t transmit_cmd(struct tpm_chip *chip, struct tpm_cmd_t *cmd, - int len, const char *desc) -{ - int err; - - len = tpm_transmit(chip, (u8 *) cmd, len); - if (len < 0) - return len; - else if (len < TPM_HEADER_SIZE) - return -EFAULT; - - err = be32_to_cpu(cmd->header.out.return_code); - if (err != 0 && desc) - dev_err(chip->dev, "A TPM error (%d) occurred %s\n", err, desc); - - return err; -} - #define READ_PUBEK_RESULT_SIZE 314 #define TPM_ORD_READPUBEK cpu_to_be32(124) static struct tpm_input_header tpm_readpubek_header = { @@ -58,8 +39,8 @@ static ssize_t pubek_show(struct device *dev, struct device_attribute *attr, struct tpm_chip *chip = dev_get_drvdata(dev); tpm_cmd.header.in = tpm_readpubek_header; - err = transmit_cmd(chip, &tpm_cmd, READ_PUBEK_RESULT_SIZE, - "attempting to read the PUBEK"); + err = tpm_transmit_cmd(chip, &tpm_cmd, READ_PUBEK_RESULT_SIZE, + "attempting to read the PUBEK"); if (err) goto out; diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index e4d0888d2eab58..e638eb016b90cb 100644 --- a/drivers/char/tpm/tpm.h +++ b/drivers/char/tpm/tpm.h @@ -314,9 +314,10 @@ struct tpm_cmd_t { } __packed; ssize_t tpm_getcap(struct device *, __be32, cap_t *, const char *); - ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf, size_t bufsiz); +ssize_t tpm_transmit_cmd(struct tpm_chip *chip, void *cmd, int len, + const char *desc); extern int tpm_get_timeouts(struct tpm_chip *); extern void tpm_gen_interrupt(struct tpm_chip *); extern int tpm_do_selftest(struct tpm_chip *); From afb5abc262e962089ef2d7c2bbf71bb6f53a2a78 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Fri, 12 Dec 2014 11:46:34 -0800 Subject: [PATCH 123/601] tpm: two-phase chip management functions tpm_register_hardware() and tpm_remove_hardware() are called often before initializing the device. The problem is that the device might not be fully initialized when it comes visible to the user space. This patch resolves the issue by diving initialization into two parts: - tpmm_chip_alloc() creates struct tpm_chip. - tpm_chip_register() sets up the character device and sysfs attributes. The framework takes care of freeing struct tpm_chip by using the devres API. The broken release callback has been wiped. ACPI drivers do not ever get this callback. Regards to Jason Gunthorpe for carefully reviewing this part of the code. Signed-off-by: Jarkko Sakkinen Reviewed-by: Jasob Gunthorpe Reviewed-by: Stefan Berger Tested-by: Scot Doyle Tested-by: Peter Huewe [phuewe: update to upstream changes] Signed-off-by: Peter Huewe --- drivers/char/tpm/Makefile | 2 +- drivers/char/tpm/tpm-chip.c | 199 ++++++++++++++++++++++++++++ drivers/char/tpm/tpm-interface.c | 148 +-------------------- drivers/char/tpm/tpm.h | 16 ++- drivers/char/tpm/tpm_atmel.c | 11 +- drivers/char/tpm/tpm_i2c_atmel.c | 39 ++---- drivers/char/tpm/tpm_i2c_infineon.c | 37 +----- drivers/char/tpm/tpm_i2c_nuvoton.c | 47 ++----- drivers/char/tpm/tpm_i2c_stm_st33.c | 14 +- drivers/char/tpm/tpm_ibmvtpm.c | 17 +-- drivers/char/tpm/tpm_infineon.c | 29 ++-- drivers/char/tpm/tpm_nsc.c | 14 +- drivers/char/tpm/tpm_tis.c | 80 +++++------ drivers/char/tpm/xen-tpmfront.c | 14 +- 14 files changed, 329 insertions(+), 338 deletions(-) create mode 100644 drivers/char/tpm/tpm-chip.c diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile index 7f54dae847e454..c715596acb7c07 100644 --- a/drivers/char/tpm/Makefile +++ b/drivers/char/tpm/Makefile @@ -2,7 +2,7 @@ # Makefile for the kernel tpm device drivers. # obj-$(CONFIG_TCG_TPM) += tpm.o -tpm-y := tpm-interface.o tpm-dev.o tpm-sysfs.o +tpm-y := tpm-interface.o tpm-dev.o tpm-sysfs.o tpm-chip.o tpm-$(CONFIG_ACPI) += tpm_ppi.o ifdef CONFIG_ACPI diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c new file mode 100644 index 00000000000000..7dc9999e2d54b2 --- /dev/null +++ b/drivers/char/tpm/tpm-chip.c @@ -0,0 +1,199 @@ +/* + * Copyright (C) 2004 IBM Corporation + * Copyright (C) 2014 Intel Corporation + * + * Authors: + * Jarkko Sakkinen + * Leendert van Doorn + * Dave Safford + * Reiner Sailer + * Kylene Hall + * + * Maintained by: + * + * TPM chip management routines. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + * + */ + +#include +#include +#include +#include +#include +#include "tpm.h" +#include "tpm_eventlog.h" + +static DECLARE_BITMAP(dev_mask, TPM_NUM_DEVICES); +static LIST_HEAD(tpm_chip_list); +static DEFINE_SPINLOCK(driver_lock); + +/* + * tpm_chip_find_get - return tpm_chip for a given chip number + * @chip_num the device number for the chip + */ +struct tpm_chip *tpm_chip_find_get(int chip_num) +{ + struct tpm_chip *pos, *chip = NULL; + + rcu_read_lock(); + list_for_each_entry_rcu(pos, &tpm_chip_list, list) { + if (chip_num != TPM_ANY_NUM && chip_num != pos->dev_num) + continue; + + if (try_module_get(pos->dev->driver->owner)) { + chip = pos; + break; + } + } + rcu_read_unlock(); + return chip; +} + +/** + * tpmm_chip_remove() - free chip memory and device number + * @data: points to struct tpm_chip instance + * + * This is used internally by tpmm_chip_alloc() and called by devres + * when the device is released. This function does the opposite of + * tpmm_chip_alloc() freeing memory and the device number. + */ +static void tpmm_chip_remove(void *data) +{ + struct tpm_chip *chip = (struct tpm_chip *) data; + + spin_lock(&driver_lock); + clear_bit(chip->dev_num, dev_mask); + spin_unlock(&driver_lock); + kfree(chip); +} + +/** + * tpmm_chip_alloc() - allocate a new struct tpm_chip instance + * @dev: device to which the chip is associated + * @ops: struct tpm_class_ops instance + * + * Allocates a new struct tpm_chip instance and assigns a free + * device number for it. Caller does not have to worry about + * freeing the allocated resources. When the devices is removed + * devres calls tpmm_chip_remove() to do the job. + */ +struct tpm_chip *tpmm_chip_alloc(struct device *dev, + const struct tpm_class_ops *ops) +{ + struct tpm_chip *chip; + + chip = kzalloc(sizeof(*chip), GFP_KERNEL); + if (chip == NULL) + return ERR_PTR(-ENOMEM); + + mutex_init(&chip->tpm_mutex); + INIT_LIST_HEAD(&chip->list); + + chip->ops = ops; + + spin_lock(&driver_lock); + chip->dev_num = find_first_zero_bit(dev_mask, TPM_NUM_DEVICES); + spin_unlock(&driver_lock); + + if (chip->dev_num >= TPM_NUM_DEVICES) { + dev_err(dev, "No available tpm device numbers\n"); + kfree(chip); + return ERR_PTR(-ENOMEM); + } + + set_bit(chip->dev_num, dev_mask); + + scnprintf(chip->devname, sizeof(chip->devname), "tpm%d", chip->dev_num); + + chip->dev = dev; + devm_add_action(dev, tpmm_chip_remove, chip); + dev_set_drvdata(dev, chip); + + return chip; +} +EXPORT_SYMBOL_GPL(tpmm_chip_alloc); + +/* + * tpm_chip_register() - create a misc driver for the TPM chip + * @chip: TPM chip to use. + * + * Creates a misc driver for the TPM chip and adds sysfs interfaces for + * the device, PPI and TCPA. As the last step this function adds the + * chip to the list of TPM chips available for use. + * + * NOTE: This function should be only called after the chip initialization + * is complete. + * + * Called from tpm_.c probe function only for devices + * the driver has determined it should claim. Prior to calling + * this function the specific probe function has called pci_enable_device + * upon errant exit from this function specific probe function should call + * pci_disable_device + */ +int tpm_chip_register(struct tpm_chip *chip) +{ + int rc; + + rc = tpm_dev_add_device(chip); + if (rc) + return rc; + + rc = tpm_sysfs_add_device(chip); + if (rc) + goto del_misc; + + rc = tpm_add_ppi(&chip->dev->kobj); + if (rc) + goto del_sysfs; + + chip->bios_dir = tpm_bios_log_setup(chip->devname); + + /* Make the chip available. */ + spin_lock(&driver_lock); + list_add_rcu(&chip->list, &tpm_chip_list); + spin_unlock(&driver_lock); + + chip->flags |= TPM_CHIP_FLAG_REGISTERED; + + return 0; +del_sysfs: + tpm_sysfs_del_device(chip); +del_misc: + tpm_dev_del_device(chip); + return rc; +} +EXPORT_SYMBOL_GPL(tpm_chip_register); + +/* + * tpm_chip_unregister() - release the TPM driver + * @chip: TPM chip to use. + * + * Takes the chip first away from the list of available TPM chips and then + * cleans up all the resources reserved by tpm_chip_register(). + * + * NOTE: This function should be only called before deinitializing chip + * resources. + */ +void tpm_chip_unregister(struct tpm_chip *chip) +{ + if (!(chip->flags & TPM_CHIP_FLAG_REGISTERED)) + return; + + spin_lock(&driver_lock); + list_del_rcu(&chip->list); + spin_unlock(&driver_lock); + synchronize_rcu(); + + if (chip->bios_dir) + tpm_bios_log_teardown(chip->bios_dir); + tpm_remove_ppi(&chip->dev->kobj); + tpm_sysfs_del_device(chip); + + tpm_dev_del_device(chip); +} +EXPORT_SYMBOL_GPL(tpm_chip_unregister); diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index c17aa45024aaac..4dbed1e45abd7a 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -1,5 +1,6 @@ /* * Copyright (C) 2004 IBM Corporation + * Copyright (C) 2014 Intel Corporation * * Authors: * Leendert van Doorn @@ -47,10 +48,6 @@ module_param_named(suspend_pcr, tpm_suspend_pcr, uint, 0644); MODULE_PARM_DESC(suspend_pcr, "PCR to use for dummy writes to faciltate flush on suspend."); -static LIST_HEAD(tpm_chip_list); -static DEFINE_SPINLOCK(driver_lock); -static DECLARE_BITMAP(dev_mask, TPM_NUM_DEVICES); - /* * Array with one entry per ordinal defining the maximum amount * of time the chip could take to return the result. The ordinal @@ -639,27 +636,6 @@ static int tpm_continue_selftest(struct tpm_chip *chip) return rc; } -/* - * tpm_chip_find_get - return tpm_chip for given chip number - */ -static struct tpm_chip *tpm_chip_find_get(int chip_num) -{ - struct tpm_chip *pos, *chip = NULL; - - rcu_read_lock(); - list_for_each_entry_rcu(pos, &tpm_chip_list, list) { - if (chip_num != TPM_ANY_NUM && chip_num != pos->dev_num) - continue; - - if (try_module_get(pos->dev->driver->owner)) { - chip = pos; - break; - } - } - rcu_read_unlock(); - return chip; -} - #define TPM_ORDINAL_PCRREAD cpu_to_be32(21) #define READ_PCR_RESULT_SIZE 30 static struct tpm_input_header pcrread_header = { @@ -887,30 +863,6 @@ int wait_for_tpm_stat(struct tpm_chip *chip, u8 mask, unsigned long timeout, } EXPORT_SYMBOL_GPL(wait_for_tpm_stat); -void tpm_remove_hardware(struct device *dev) -{ - struct tpm_chip *chip = dev_get_drvdata(dev); - - if (chip == NULL) { - dev_err(dev, "No device data found\n"); - return; - } - - spin_lock(&driver_lock); - list_del_rcu(&chip->list); - spin_unlock(&driver_lock); - synchronize_rcu(); - - tpm_dev_del_device(chip); - tpm_sysfs_del_device(chip); - tpm_remove_ppi(&dev->kobj); - tpm_bios_log_teardown(chip->bios_dir); - - /* write it this way to be explicit (chip->dev == dev) */ - put_device(chip->dev); -} -EXPORT_SYMBOL_GPL(tpm_remove_hardware); - #define TPM_ORD_SAVESTATE cpu_to_be32(152) #define SAVESTATE_RESULT_SIZE 10 @@ -1044,104 +996,6 @@ int tpm_get_random(u32 chip_num, u8 *out, size_t max) } EXPORT_SYMBOL_GPL(tpm_get_random); -/* In case vendor provided release function, call it too.*/ - -void tpm_dev_vendor_release(struct tpm_chip *chip) -{ - if (!chip) - return; - - clear_bit(chip->dev_num, dev_mask); -} -EXPORT_SYMBOL_GPL(tpm_dev_vendor_release); - - -/* - * Once all references to platform device are down to 0, - * release all allocated structures. - */ -static void tpm_dev_release(struct device *dev) -{ - struct tpm_chip *chip = dev_get_drvdata(dev); - - if (!chip) - return; - - tpm_dev_vendor_release(chip); - - chip->release(dev); - kfree(chip); -} - -/* - * Called from tpm_.c probe function only for devices - * the driver has determined it should claim. Prior to calling - * this function the specific probe function has called pci_enable_device - * upon errant exit from this function specific probe function should call - * pci_disable_device - */ -struct tpm_chip *tpm_register_hardware(struct device *dev, - const struct tpm_class_ops *ops) -{ - struct tpm_chip *chip; - - /* Driver specific per-device data */ - chip = kzalloc(sizeof(*chip), GFP_KERNEL); - - if (chip == NULL) - return NULL; - - mutex_init(&chip->tpm_mutex); - INIT_LIST_HEAD(&chip->list); - - chip->ops = ops; - chip->dev_num = find_first_zero_bit(dev_mask, TPM_NUM_DEVICES); - - if (chip->dev_num >= TPM_NUM_DEVICES) { - dev_err(dev, "No available tpm device numbers\n"); - goto out_free; - } - - set_bit(chip->dev_num, dev_mask); - - scnprintf(chip->devname, sizeof(chip->devname), "%s%d", "tpm", - chip->dev_num); - - chip->dev = get_device(dev); - chip->release = dev->release; - dev->release = tpm_dev_release; - dev_set_drvdata(dev, chip); - - if (tpm_dev_add_device(chip)) - goto put_device; - - if (tpm_sysfs_add_device(chip)) - goto del_misc; - - if (tpm_add_ppi(&dev->kobj)) - goto del_sysfs; - - chip->bios_dir = tpm_bios_log_setup(chip->devname); - - /* Make chip available */ - spin_lock(&driver_lock); - list_add_tail_rcu(&chip->list, &tpm_chip_list); - spin_unlock(&driver_lock); - - return chip; - -del_sysfs: - tpm_sysfs_del_device(chip); -del_misc: - tpm_dev_del_device(chip); -put_device: - put_device(chip->dev); -out_free: - kfree(chip); - return NULL; -} -EXPORT_SYMBOL_GPL(tpm_register_hardware); - MODULE_AUTHOR("Leendert van Doorn (leendert@watson.ibm.com)"); MODULE_DESCRIPTION("TPM Driver"); MODULE_VERSION("2.0"); diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index e638eb016b90cb..72ff18c872d3e9 100644 --- a/drivers/char/tpm/tpm.h +++ b/drivers/char/tpm/tpm.h @@ -94,9 +94,14 @@ struct tpm_vendor_specific { #define TPM_VID_WINBOND 0x1050 #define TPM_VID_STM 0x104A +enum tpm_chip_flags { + TPM_CHIP_FLAG_REGISTERED = BIT(0), +}; + struct tpm_chip { struct device *dev; /* Device stuff */ const struct tpm_class_ops *ops; + unsigned int flags; int dev_num; /* /dev/tpm# */ char devname[7]; @@ -110,7 +115,6 @@ struct tpm_chip { struct dentry **bios_dir; struct list_head list; - void (*release) (struct device *); }; #define to_tpm_chip(n) container_of(n, struct tpm_chip, vendor) @@ -322,15 +326,17 @@ extern int tpm_get_timeouts(struct tpm_chip *); extern void tpm_gen_interrupt(struct tpm_chip *); extern int tpm_do_selftest(struct tpm_chip *); extern unsigned long tpm_calc_ordinal_duration(struct tpm_chip *, u32); -extern struct tpm_chip* tpm_register_hardware(struct device *, - const struct tpm_class_ops *ops); -extern void tpm_dev_vendor_release(struct tpm_chip *); -extern void tpm_remove_hardware(struct device *); extern int tpm_pm_suspend(struct device *); extern int tpm_pm_resume(struct device *); extern int wait_for_tpm_stat(struct tpm_chip *, u8, unsigned long, wait_queue_head_t *, bool); +struct tpm_chip *tpm_chip_find_get(int chip_num); +extern struct tpm_chip *tpmm_chip_alloc(struct device *dev, + const struct tpm_class_ops *ops); +extern int tpm_chip_register(struct tpm_chip *chip); +extern void tpm_chip_unregister(struct tpm_chip *chip); + int tpm_dev_add_device(struct tpm_chip *chip); void tpm_dev_del_device(struct tpm_chip *chip); int tpm_sysfs_add_device(struct tpm_chip *chip); diff --git a/drivers/char/tpm/tpm_atmel.c b/drivers/char/tpm/tpm_atmel.c index 435c8b9dd2f8fe..3d4c6c3b0433ee 100644 --- a/drivers/char/tpm/tpm_atmel.c +++ b/drivers/char/tpm/tpm_atmel.c @@ -138,11 +138,11 @@ static void atml_plat_remove(void) struct tpm_chip *chip = dev_get_drvdata(&pdev->dev); if (chip) { + tpm_chip_unregister(chip); if (chip->vendor.have_region) atmel_release_region(chip->vendor.base, chip->vendor.region_size); atmel_put_base_addr(chip->vendor.iobase); - tpm_remove_hardware(chip->dev); platform_device_unregister(pdev); } } @@ -183,8 +183,9 @@ static int __init init_atmel(void) goto err_rel_reg; } - if (!(chip = tpm_register_hardware(&pdev->dev, &tpm_atmel))) { - rc = -ENODEV; + chip = tpmm_chip_alloc(&pdev->dev, &tpm_atmel); + if (IS_ERR(chip)) { + rc = PTR_ERR(chip); goto err_unreg_dev; } @@ -193,6 +194,10 @@ static int __init init_atmel(void) chip->vendor.have_region = have_region; chip->vendor.region_size = region_size; + rc = tpm_chip_register(chip); + if (rc) + goto err_unreg_dev; + return 0; err_unreg_dev: diff --git a/drivers/char/tpm/tpm_i2c_atmel.c b/drivers/char/tpm/tpm_i2c_atmel.c index 5f448886417f50..643a9402e911da 100644 --- a/drivers/char/tpm/tpm_i2c_atmel.c +++ b/drivers/char/tpm/tpm_i2c_atmel.c @@ -153,25 +153,20 @@ static const struct tpm_class_ops i2c_atmel = { static int i2c_atmel_probe(struct i2c_client *client, const struct i2c_device_id *id) { - int rc; struct tpm_chip *chip; struct device *dev = &client->dev; if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) return -ENODEV; - chip = tpm_register_hardware(dev, &i2c_atmel); - if (!chip) { - dev_err(dev, "%s() error in tpm_register_hardware\n", __func__); - return -ENODEV; - } + chip = tpmm_chip_alloc(dev, &i2c_atmel); + if (IS_ERR(chip)) + return PTR_ERR(chip); chip->vendor.priv = devm_kzalloc(dev, sizeof(struct priv_data), GFP_KERNEL); - if (!chip->vendor.priv) { - rc = -ENOMEM; - goto out_err; - } + if (!chip->vendor.priv) + return -ENOMEM; /* Default timeouts */ chip->vendor.timeout_a = msecs_to_jiffies(TPM_I2C_SHORT_TIMEOUT); @@ -183,32 +178,20 @@ static int i2c_atmel_probe(struct i2c_client *client, /* There is no known way to probe for this device, and all version * information seems to be read via TPM commands. Thus we rely on the * TPM startup process in the common code to detect the device. */ - if (tpm_get_timeouts(chip)) { - rc = -ENODEV; - goto out_err; - } - - if (tpm_do_selftest(chip)) { - rc = -ENODEV; - goto out_err; - } + if (tpm_get_timeouts(chip)) + return -ENODEV; - return 0; + if (tpm_do_selftest(chip)) + return -ENODEV; -out_err: - tpm_dev_vendor_release(chip); - tpm_remove_hardware(chip->dev); - return rc; + return tpm_chip_register(chip); } static int i2c_atmel_remove(struct i2c_client *client) { struct device *dev = &(client->dev); struct tpm_chip *chip = dev_get_drvdata(dev); - - tpm_dev_vendor_release(chip); - tpm_remove_hardware(dev); - kfree(chip); + tpm_chip_unregister(chip); return 0; } diff --git a/drivers/char/tpm/tpm_i2c_infineon.c b/drivers/char/tpm/tpm_i2c_infineon.c index 472af4bb1b6128..03708e6b309a39 100644 --- a/drivers/char/tpm/tpm_i2c_infineon.c +++ b/drivers/char/tpm/tpm_i2c_infineon.c @@ -581,12 +581,9 @@ static int tpm_tis_i2c_init(struct device *dev) int rc = 0; struct tpm_chip *chip; - chip = tpm_register_hardware(dev, &tpm_tis_i2c); - if (!chip) { - dev_err(dev, "could not register hardware\n"); - rc = -ENODEV; - goto out_err; - } + chip = tpmm_chip_alloc(dev, &tpm_tis_i2c); + if (IS_ERR(chip)) + return PTR_ERR(chip); /* Disable interrupts */ chip->vendor.irq = 0; @@ -600,7 +597,7 @@ static int tpm_tis_i2c_init(struct device *dev) if (request_locality(chip, 0) != 0) { dev_err(dev, "could not request locality\n"); rc = -ENODEV; - goto out_vendor; + goto out_err; } /* read four bytes from DID_VID register */ @@ -628,21 +625,9 @@ static int tpm_tis_i2c_init(struct device *dev) tpm_get_timeouts(chip); tpm_do_selftest(chip); - return 0; - + return tpm_chip_register(chip); out_release: release_locality(chip, chip->vendor.locality, 1); - -out_vendor: - /* close file handles */ - tpm_dev_vendor_release(chip); - - /* remove hardware */ - tpm_remove_hardware(chip->dev); - - /* reset these pointers, otherwise we oops */ - chip->dev->release = NULL; - chip->release = NULL; tpm_dev.client = NULL; out_err: return rc; @@ -712,17 +697,9 @@ static int tpm_tis_i2c_probe(struct i2c_client *client, static int tpm_tis_i2c_remove(struct i2c_client *client) { struct tpm_chip *chip = tpm_dev.chip; - release_locality(chip, chip->vendor.locality, 1); - /* close file handles */ - tpm_dev_vendor_release(chip); - - /* remove hardware */ - tpm_remove_hardware(chip->dev); - - /* reset these pointers, otherwise we oops */ - chip->dev->release = NULL; - chip->release = NULL; + tpm_chip_unregister(chip); + release_locality(chip, chip->vendor.locality, 1); tpm_dev.client = NULL; return 0; diff --git a/drivers/char/tpm/tpm_i2c_nuvoton.c b/drivers/char/tpm/tpm_i2c_nuvoton.c index bbb4997438c3b4..8c23088d99ef30 100644 --- a/drivers/char/tpm/tpm_i2c_nuvoton.c +++ b/drivers/char/tpm/tpm_i2c_nuvoton.c @@ -530,18 +530,14 @@ static int i2c_nuvoton_probe(struct i2c_client *client, dev_info(dev, "VID: %04X DID: %02X RID: %02X\n", (u16) vid, (u8) (vid >> 16), (u8) (vid >> 24)); - chip = tpm_register_hardware(dev, &tpm_i2c); - if (!chip) { - dev_err(dev, "%s() error in tpm_register_hardware\n", __func__); - return -ENODEV; - } + chip = tpmm_chip_alloc(dev, &tpm_i2c); + if (IS_ERR(chip)) + return PTR_ERR(chip); chip->vendor.priv = devm_kzalloc(dev, sizeof(struct priv_data), GFP_KERNEL); - if (!chip->vendor.priv) { - rc = -ENOMEM; - goto out_err; - } + if (!chip->vendor.priv) + return -ENOMEM; init_waitqueue_head(&chip->vendor.read_queue); init_waitqueue_head(&chip->vendor.int_queue); @@ -589,7 +585,7 @@ static int i2c_nuvoton_probe(struct i2c_client *client, TPM_DATA_FIFO_W, 1, (u8 *) (&rc)); if (rc < 0) - goto out_err; + return rc; /* TPM_STS <- 0x40 (commandReady) */ i2c_nuvoton_ready(chip); } else { @@ -599,44 +595,29 @@ static int i2c_nuvoton_probe(struct i2c_client *client, * only TPM_STS_VALID should be set */ if (i2c_nuvoton_read_status(chip) != - TPM_STS_VALID) { - rc = -EIO; - goto out_err; - } + TPM_STS_VALID) + return -EIO; } } } - if (tpm_get_timeouts(chip)) { - rc = -ENODEV; - goto out_err; - } - - if (tpm_do_selftest(chip)) { - rc = -ENODEV; - goto out_err; - } + if (tpm_get_timeouts(chip)) + return -ENODEV; - return 0; + if (tpm_do_selftest(chip)) + return -ENODEV; -out_err: - tpm_dev_vendor_release(chip); - tpm_remove_hardware(chip->dev); - return rc; + return tpm_chip_register(chip); } static int i2c_nuvoton_remove(struct i2c_client *client) { struct device *dev = &(client->dev); struct tpm_chip *chip = dev_get_drvdata(dev); - - tpm_dev_vendor_release(chip); - tpm_remove_hardware(dev); - kfree(chip); + tpm_chip_unregister(chip); return 0; } - static const struct i2c_device_id i2c_nuvoton_id[] = { {I2C_DRIVER_NAME, 0}, {} diff --git a/drivers/char/tpm/tpm_i2c_stm_st33.c b/drivers/char/tpm/tpm_i2c_stm_st33.c index 86203b022d13d5..9a96d3704fe578 100644 --- a/drivers/char/tpm/tpm_i2c_stm_st33.c +++ b/drivers/char/tpm/tpm_i2c_stm_st33.c @@ -735,11 +735,9 @@ tpm_stm_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) if (!tpm_dev) return -ENOMEM; - chip = tpm_register_hardware(&client->dev, &st_i2c_tpm); - if (!chip) { - dev_info(&client->dev, "fail chip\n"); - return -ENODEV; - } + chip = tpmm_chip_alloc(&client->dev, &st_i2c_tpm); + if (IS_ERR(chip)) + return PTR_ERR(chip); TPM_VPRIV(chip) = tpm_dev; tpm_dev->client = client; @@ -807,10 +805,8 @@ tpm_stm_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) tpm_get_timeouts(chip); tpm_do_selftest(chip); - dev_info(chip->dev, "TPM I2C Initialized\n"); - return 0; + return tpm_chip_register(chip); _tpm_clean_answer: - tpm_remove_hardware(chip->dev); dev_info(chip->dev, "TPM I2C initialisation fail\n"); return ret; } @@ -827,7 +823,7 @@ static int tpm_stm_i2c_remove(struct i2c_client *client) (struct tpm_chip *) i2c_get_clientdata(client); if (chip) - tpm_remove_hardware(chip->dev); + tpm_chip_unregister(chip); return 0; } diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c index 96f5d448b84c53..0840347e251c38 100644 --- a/drivers/char/tpm/tpm_ibmvtpm.c +++ b/drivers/char/tpm/tpm_ibmvtpm.c @@ -270,8 +270,11 @@ static int ibmvtpm_crq_send_init(struct ibmvtpm_dev *ibmvtpm) static int tpm_ibmvtpm_remove(struct vio_dev *vdev) { struct ibmvtpm_dev *ibmvtpm = ibmvtpm_get_data(&vdev->dev); + struct tpm_chip *chip = dev_get_drvdata(ibmvtpm->dev); int rc = 0; + tpm_chip_unregister(chip); + free_irq(vdev->irq, ibmvtpm); do { @@ -290,8 +293,6 @@ static int tpm_ibmvtpm_remove(struct vio_dev *vdev) kfree(ibmvtpm->rtce_buf); } - tpm_remove_hardware(ibmvtpm->dev); - kfree(ibmvtpm); return 0; @@ -563,11 +564,9 @@ static int tpm_ibmvtpm_probe(struct vio_dev *vio_dev, struct tpm_chip *chip; int rc = -ENOMEM, rc1; - chip = tpm_register_hardware(dev, &tpm_ibmvtpm); - if (!chip) { - dev_err(dev, "tpm_register_hardware failed\n"); - return -ENODEV; - } + chip = tpmm_chip_alloc(dev, &tpm_ibmvtpm); + if (IS_ERR(chip)) + return PTR_ERR(chip); ibmvtpm = kzalloc(sizeof(struct ibmvtpm_dev), GFP_KERNEL); if (!ibmvtpm) { @@ -637,7 +636,7 @@ static int tpm_ibmvtpm_probe(struct vio_dev *vio_dev, if (rc) goto init_irq_cleanup; - return rc; + return tpm_chip_register(chip); init_irq_cleanup: do { rc1 = plpar_hcall_norets(H_FREE_CRQ, vio_dev->unit_address); @@ -652,8 +651,6 @@ static int tpm_ibmvtpm_probe(struct vio_dev *vio_dev, kfree(ibmvtpm); } - tpm_remove_hardware(dev); - return rc; } diff --git a/drivers/char/tpm/tpm_infineon.c b/drivers/char/tpm/tpm_infineon.c index dc0a2554034ea2..dcdb671b2a5df1 100644 --- a/drivers/char/tpm/tpm_infineon.c +++ b/drivers/char/tpm/tpm_infineon.c @@ -546,7 +546,14 @@ static int tpm_inf_pnp_probe(struct pnp_dev *dev, vendorid[0], vendorid[1], productid[0], productid[1], chipname); - if (!(chip = tpm_register_hardware(&dev->dev, &tpm_inf))) + chip = tpmm_chip_alloc(&dev->dev, &tpm_inf); + if (IS_ERR(chip)) { + rc = PTR_ERR(chip); + goto err_release_region; + } + + rc = tpm_chip_register(chip); + if (rc) goto err_release_region; return 0; @@ -572,17 +579,15 @@ static void tpm_inf_pnp_remove(struct pnp_dev *dev) { struct tpm_chip *chip = pnp_get_drvdata(dev); - if (chip) { - if (tpm_dev.iotype == TPM_INF_IO_PORT) { - release_region(tpm_dev.data_regs, tpm_dev.data_size); - release_region(tpm_dev.config_port, - tpm_dev.config_size); - } else { - iounmap(tpm_dev.mem_base); - release_mem_region(tpm_dev.map_base, tpm_dev.map_size); - } - tpm_dev_vendor_release(chip); - tpm_remove_hardware(chip->dev); + tpm_chip_unregister(chip); + + if (tpm_dev.iotype == TPM_INF_IO_PORT) { + release_region(tpm_dev.data_regs, tpm_dev.data_size); + release_region(tpm_dev.config_port, + tpm_dev.config_size); + } else { + iounmap(tpm_dev.mem_base); + release_mem_region(tpm_dev.map_base, tpm_dev.map_size); } } diff --git a/drivers/char/tpm/tpm_nsc.c b/drivers/char/tpm/tpm_nsc.c index 4d0a17ea8cde6b..6e2c2e64b292e7 100644 --- a/drivers/char/tpm/tpm_nsc.c +++ b/drivers/char/tpm/tpm_nsc.c @@ -247,10 +247,9 @@ static struct platform_device *pdev = NULL; static void tpm_nsc_remove(struct device *dev) { struct tpm_chip *chip = dev_get_drvdata(dev); - if ( chip ) { - release_region(chip->vendor.base, 2); - tpm_remove_hardware(chip->dev); - } + + tpm_chip_unregister(chip); + release_region(chip->vendor.base, 2); } static SIMPLE_DEV_PM_OPS(tpm_nsc_pm, tpm_pm_suspend, tpm_pm_resume); @@ -307,11 +306,16 @@ static int __init init_nsc(void) goto err_del_dev; } - if (!(chip = tpm_register_hardware(&pdev->dev, &tpm_nsc))) { + chip = tpmm_chip_alloc(&pdev->dev, &tpm_nsc); + if (IS_ERR(chip)) { rc = -ENODEV; goto err_rel_reg; } + rc = tpm_chip_register(chip); + if (rc) + goto err_rel_reg; + dev_dbg(&pdev->dev, "NSC TPM detected\n"); dev_dbg(&pdev->dev, "NSC LDN 0x%x, SID 0x%x, SRID 0x%x\n", diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index ccb140d605327d..36f4fec11c2aa0 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -79,9 +79,6 @@ struct priv_data { bool irq_tested; }; -static LIST_HEAD(tis_chips); -static DEFINE_MUTEX(tis_lock); - #if defined(CONFIG_PNP) && defined(CONFIG_ACPI) static int is_itpm(struct pnp_dev *dev) { @@ -572,6 +569,17 @@ static bool interrupts = true; module_param(interrupts, bool, 0444); MODULE_PARM_DESC(interrupts, "Enable interrupts"); +static void tpm_tis_remove(struct tpm_chip *chip) +{ + iowrite32(~TPM_GLOBAL_INT_ENABLE & + ioread32(chip->vendor.iobase + + TPM_INT_ENABLE(chip->vendor. + locality)), + chip->vendor.iobase + + TPM_INT_ENABLE(chip->vendor.locality)); + release_locality(chip, chip->vendor.locality, 1); +} + static int tpm_tis_init(struct device *dev, resource_size_t start, resource_size_t len, unsigned int irq) { @@ -583,15 +591,16 @@ static int tpm_tis_init(struct device *dev, resource_size_t start, priv = devm_kzalloc(dev, sizeof(struct priv_data), GFP_KERNEL); if (priv == NULL) return -ENOMEM; - if (!(chip = tpm_register_hardware(dev, &tpm_tis))) - return -ENODEV; + + chip = tpmm_chip_alloc(dev, &tpm_tis); + if (IS_ERR(chip)) + return PTR_ERR(chip); + chip->vendor.priv = priv; - chip->vendor.iobase = ioremap(start, len); - if (!chip->vendor.iobase) { - rc = -EIO; - goto out_err; - } + chip->vendor.iobase = devm_ioremap(dev, start, len); + if (!chip->vendor.iobase) + return -EIO; /* Default timeouts */ chip->vendor.timeout_a = msecs_to_jiffies(TIS_SHORT_TIMEOUT); @@ -685,8 +694,8 @@ static int tpm_tis_init(struct device *dev, resource_size_t start, for (i = irq_s; i <= irq_e && chip->vendor.irq == 0; i++) { iowrite8(i, chip->vendor.iobase + TPM_INT_VECTOR(chip->vendor.locality)); - if (request_irq - (i, tis_int_probe, IRQF_SHARED, + if (devm_request_irq + (dev, i, tis_int_probe, IRQF_SHARED, chip->vendor.miscdev.name, chip) != 0) { dev_info(chip->dev, "Unable to request irq: %d for probe\n", @@ -726,15 +735,14 @@ static int tpm_tis_init(struct device *dev, resource_size_t start, iowrite32(intmask, chip->vendor.iobase + TPM_INT_ENABLE(chip->vendor.locality)); - free_irq(i, chip); } } if (chip->vendor.irq) { iowrite8(chip->vendor.irq, chip->vendor.iobase + TPM_INT_VECTOR(chip->vendor.locality)); - if (request_irq - (chip->vendor.irq, tis_int_handler, IRQF_SHARED, + if (devm_request_irq + (dev, chip->vendor.irq, tis_int_handler, IRQF_SHARED, chip->vendor.miscdev.name, chip) != 0) { dev_info(chip->dev, "Unable to request irq: %d for use\n", @@ -767,17 +775,9 @@ static int tpm_tis_init(struct device *dev, resource_size_t start, goto out_err; } - INIT_LIST_HEAD(&chip->vendor.list); - mutex_lock(&tis_lock); - list_add(&chip->vendor.list, &tis_chips); - mutex_unlock(&tis_lock); - - - return 0; + return tpm_chip_register(chip); out_err: - if (chip->vendor.iobase) - iounmap(chip->vendor.iobase); - tpm_remove_hardware(chip->dev); + tpm_tis_remove(chip); return rc; } @@ -859,13 +859,10 @@ MODULE_DEVICE_TABLE(pnp, tpm_pnp_tbl); static void tpm_tis_pnp_remove(struct pnp_dev *dev) { struct tpm_chip *chip = pnp_get_drvdata(dev); - - tpm_dev_vendor_release(chip); - - kfree(chip); + tpm_chip_unregister(chip); + tpm_tis_remove(chip); } - static struct pnp_driver tis_pnp_driver = { .name = "tpm_tis", .id_table = tpm_pnp_tbl, @@ -884,7 +881,7 @@ MODULE_PARM_DESC(hid, "Set additional specific HID for this driver to probe"); static struct platform_driver tis_drv = { .driver = { - .name = "tpm_tis", + .name = "tpm_tis", .pm = &tpm_tis_pm, }, }; @@ -923,31 +920,16 @@ static int __init init_tis(void) static void __exit cleanup_tis(void) { - struct tpm_vendor_specific *i, *j; struct tpm_chip *chip; - mutex_lock(&tis_lock); - list_for_each_entry_safe(i, j, &tis_chips, list) { - chip = to_tpm_chip(i); - tpm_remove_hardware(chip->dev); - iowrite32(~TPM_GLOBAL_INT_ENABLE & - ioread32(chip->vendor.iobase + - TPM_INT_ENABLE(chip->vendor. - locality)), - chip->vendor.iobase + - TPM_INT_ENABLE(chip->vendor.locality)); - release_locality(chip, chip->vendor.locality, 1); - if (chip->vendor.irq) - free_irq(chip->vendor.irq, chip); - iounmap(i->iobase); - list_del(&i->list); - } - mutex_unlock(&tis_lock); #ifdef CONFIG_PNP if (!force) { pnp_unregister_driver(&tis_pnp_driver); return; } #endif + chip = dev_get_drvdata(&pdev->dev); + tpm_chip_unregister(chip); + tpm_tis_remove(chip); platform_device_unregister(pdev); platform_driver_unregister(&tis_drv); } diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c index 441b44e5422691..c3b4f5a5ac1075 100644 --- a/drivers/char/tpm/xen-tpmfront.c +++ b/drivers/char/tpm/xen-tpmfront.c @@ -175,9 +175,9 @@ static int setup_chip(struct device *dev, struct tpm_private *priv) { struct tpm_chip *chip; - chip = tpm_register_hardware(dev, &tpm_vtpm); - if (!chip) - return -ENODEV; + chip = tpmm_chip_alloc(dev, &tpm_vtpm); + if (IS_ERR(chip)) + return PTR_ERR(chip); init_waitqueue_head(&chip->vendor.read_queue); @@ -286,6 +286,7 @@ static int tpmfront_probe(struct xenbus_device *dev, const struct xenbus_device_id *id) { struct tpm_private *priv; + struct tpm_chip *chip; int rv; priv = kzalloc(sizeof(*priv), GFP_KERNEL); @@ -302,21 +303,22 @@ static int tpmfront_probe(struct xenbus_device *dev, rv = setup_ring(dev, priv); if (rv) { - tpm_remove_hardware(&dev->dev); + chip = dev_get_drvdata(&dev->dev); + tpm_chip_unregister(chip); ring_free(priv); return rv; } tpm_get_timeouts(priv->chip); - return rv; + return tpm_chip_register(priv->chip); } static int tpmfront_remove(struct xenbus_device *dev) { struct tpm_chip *chip = dev_get_drvdata(&dev->dev); struct tpm_private *priv = TPM_VPRIV(chip); - tpm_remove_hardware(&dev->dev); + tpm_chip_unregister(chip); ring_free(priv); TPM_VPRIV(chip) = NULL; return 0; From 0dc553652102c55a43eb1ab52e2049e478469f53 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Fri, 12 Dec 2014 11:46:35 -0800 Subject: [PATCH 124/601] tpm: fix raciness of PPI interface lookup Traversal of the ACPI device tree was not done right. PPI interface should be looked up only from the ACPI device that is the platform device for the TPM. This could cause problems with systems with two TPM chips such as 4th gen Intel systems. In addition, added the missing license and copyright platter to the tpm_ppi.c. Signed-off-by: Jarkko Sakkinen Reviewed-by: Jasob Gunthorpe Reviewed-by: Stefan Berger Tested-by: Scot Doyle Signed-off-by: Peter Huewe --- drivers/char/tpm/tpm-chip.c | 4 +- drivers/char/tpm/tpm.h | 17 ++++- drivers/char/tpm/tpm_ppi.c | 141 ++++++++++++++++++++++-------------- drivers/char/tpm/tpm_tis.c | 14 +++- 4 files changed, 112 insertions(+), 64 deletions(-) diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index 7dc9999e2d54b2..64102de91ca3c8 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -147,7 +147,7 @@ int tpm_chip_register(struct tpm_chip *chip) if (rc) goto del_misc; - rc = tpm_add_ppi(&chip->dev->kobj); + rc = tpm_add_ppi(chip); if (rc) goto del_sysfs; @@ -191,7 +191,7 @@ void tpm_chip_unregister(struct tpm_chip *chip) if (chip->bios_dir) tpm_bios_log_teardown(chip->bios_dir); - tpm_remove_ppi(&chip->dev->kobj); + tpm_remove_ppi(chip); tpm_sysfs_del_device(chip); tpm_dev_del_device(chip); diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index 72ff18c872d3e9..3409acf953f316 100644 --- a/drivers/char/tpm/tpm.h +++ b/drivers/char/tpm/tpm.h @@ -27,6 +27,7 @@ #include #include #include +#include enum tpm_const { TPM_MINOR = 224, /* officially assigned */ @@ -94,8 +95,11 @@ struct tpm_vendor_specific { #define TPM_VID_WINBOND 0x1050 #define TPM_VID_STM 0x104A +#define TPM_PPI_VERSION_LEN 3 + enum tpm_chip_flags { TPM_CHIP_FLAG_REGISTERED = BIT(0), + TPM_CHIP_FLAG_PPI = BIT(1), }; struct tpm_chip { @@ -114,6 +118,11 @@ struct tpm_chip { struct dentry **bios_dir; +#ifdef CONFIG_ACPI + acpi_handle acpi_dev_handle; + char ppi_version[TPM_PPI_VERSION_LEN + 1]; +#endif /* CONFIG_ACPI */ + struct list_head list; }; @@ -345,15 +354,15 @@ void tpm_sysfs_del_device(struct tpm_chip *chip); int tpm_pcr_read_dev(struct tpm_chip *chip, int pcr_idx, u8 *res_buf); #ifdef CONFIG_ACPI -extern int tpm_add_ppi(struct kobject *); -extern void tpm_remove_ppi(struct kobject *); +extern int tpm_add_ppi(struct tpm_chip *chip); +extern void tpm_remove_ppi(struct tpm_chip *chip); #else -static inline int tpm_add_ppi(struct kobject *parent) +static inline int tpm_add_ppi(struct tpm_chip *chip) { return 0; } -static inline void tpm_remove_ppi(struct kobject *parent) +static inline void tpm_remove_ppi(struct tpm_chip *chip) { } #endif diff --git a/drivers/char/tpm/tpm_ppi.c b/drivers/char/tpm/tpm_ppi.c index 61dcc8011ec711..af48c56f642fb1 100644 --- a/drivers/char/tpm/tpm_ppi.c +++ b/drivers/char/tpm/tpm_ppi.c @@ -1,3 +1,22 @@ +/* + * Copyright (C) 2012-2014 Intel Corporation + * + * Authors: + * Xiaoyan Zhang + * Jiang Liu + * Jarkko Sakkinen + * + * Maintained by: + * + * This file contains implementation of the sysfs interface for PPI. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + + #include #include "tpm.h" @@ -12,7 +31,6 @@ #define PPI_TPM_REQ_MAX 22 #define PPI_VS_REQ_START 128 #define PPI_VS_REQ_END 255 -#define PPI_VERSION_LEN 3 static const u8 tpm_ppi_uuid[] = { 0xA6, 0xFA, 0xDD, 0x3D, @@ -22,45 +40,22 @@ static const u8 tpm_ppi_uuid[] = { 0x8D, 0x10, 0x08, 0x9D, 0x16, 0x53 }; -static char tpm_ppi_version[PPI_VERSION_LEN + 1]; -static acpi_handle tpm_ppi_handle; - -static acpi_status ppi_callback(acpi_handle handle, u32 level, void *context, - void **return_value) -{ - union acpi_object *obj; - - if (!acpi_check_dsm(handle, tpm_ppi_uuid, TPM_PPI_REVISION_ID, - 1 << TPM_PPI_FN_VERSION)) - return AE_OK; - - /* Cache version string */ - obj = acpi_evaluate_dsm_typed(handle, tpm_ppi_uuid, - TPM_PPI_REVISION_ID, TPM_PPI_FN_VERSION, - NULL, ACPI_TYPE_STRING); - if (obj) { - strlcpy(tpm_ppi_version, obj->string.pointer, - PPI_VERSION_LEN + 1); - ACPI_FREE(obj); - } - - *return_value = handle; - - return AE_CTRL_TERMINATE; -} - static inline union acpi_object * -tpm_eval_dsm(int func, acpi_object_type type, union acpi_object *argv4) +tpm_eval_dsm(acpi_handle ppi_handle, int func, acpi_object_type type, + union acpi_object *argv4) { - BUG_ON(!tpm_ppi_handle); - return acpi_evaluate_dsm_typed(tpm_ppi_handle, tpm_ppi_uuid, - TPM_PPI_REVISION_ID, func, argv4, type); + BUG_ON(!ppi_handle); + return acpi_evaluate_dsm_typed(ppi_handle, tpm_ppi_uuid, + TPM_PPI_REVISION_ID, + func, argv4, type); } static ssize_t tpm_show_ppi_version(struct device *dev, struct device_attribute *attr, char *buf) { - return scnprintf(buf, PAGE_SIZE, "%s\n", tpm_ppi_version); + struct tpm_chip *chip = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%s\n", chip->ppi_version); } static ssize_t tpm_show_ppi_request(struct device *dev, @@ -68,8 +63,10 @@ static ssize_t tpm_show_ppi_request(struct device *dev, { ssize_t size = -EINVAL; union acpi_object *obj; + struct tpm_chip *chip = dev_get_drvdata(dev); - obj = tpm_eval_dsm(TPM_PPI_FN_GETREQ, ACPI_TYPE_PACKAGE, NULL); + obj = tpm_eval_dsm(chip->acpi_dev_handle, TPM_PPI_FN_GETREQ, + ACPI_TYPE_PACKAGE, NULL); if (!obj) return -ENXIO; @@ -103,14 +100,15 @@ static ssize_t tpm_store_ppi_request(struct device *dev, int func = TPM_PPI_FN_SUBREQ; union acpi_object *obj, tmp; union acpi_object argv4 = ACPI_INIT_DSM_ARGV4(1, &tmp); + struct tpm_chip *chip = dev_get_drvdata(dev); /* * the function to submit TPM operation request to pre-os environment * is updated with function index from SUBREQ to SUBREQ2 since PPI * version 1.1 */ - if (acpi_check_dsm(tpm_ppi_handle, tpm_ppi_uuid, TPM_PPI_REVISION_ID, - 1 << TPM_PPI_FN_SUBREQ2)) + if (acpi_check_dsm(chip->acpi_dev_handle, tpm_ppi_uuid, + TPM_PPI_REVISION_ID, 1 << TPM_PPI_FN_SUBREQ2)) func = TPM_PPI_FN_SUBREQ2; /* @@ -119,7 +117,7 @@ static ssize_t tpm_store_ppi_request(struct device *dev, * string/package type. For PPI version 1.0 and 1.1, use buffer type * for compatibility, and use package type since 1.2 according to spec. */ - if (strcmp(tpm_ppi_version, "1.2") < 0) { + if (strcmp(chip->ppi_version, "1.2") < 0) { if (sscanf(buf, "%d", &req) != 1) return -EINVAL; argv4.type = ACPI_TYPE_BUFFER; @@ -131,7 +129,8 @@ static ssize_t tpm_store_ppi_request(struct device *dev, return -EINVAL; } - obj = tpm_eval_dsm(func, ACPI_TYPE_INTEGER, &argv4); + obj = tpm_eval_dsm(chip->acpi_dev_handle, func, ACPI_TYPE_INTEGER, + &argv4); if (!obj) { return -ENXIO; } else { @@ -157,6 +156,7 @@ static ssize_t tpm_show_ppi_transition_action(struct device *dev, .buffer.length = 0, .buffer.pointer = NULL }; + struct tpm_chip *chip = dev_get_drvdata(dev); static char *info[] = { "None", @@ -171,9 +171,10 @@ static ssize_t tpm_show_ppi_transition_action(struct device *dev, * (e.g. Capella with PPI 1.0) need integer/string/buffer type, so for * compatibility, define params[3].type as buffer, if PPI version < 1.2 */ - if (strcmp(tpm_ppi_version, "1.2") < 0) + if (strcmp(chip->ppi_version, "1.2") < 0) obj = &tmp; - obj = tpm_eval_dsm(TPM_PPI_FN_GETACT, ACPI_TYPE_INTEGER, obj); + obj = tpm_eval_dsm(chip->acpi_dev_handle, TPM_PPI_FN_GETACT, + ACPI_TYPE_INTEGER, obj); if (!obj) { return -ENXIO; } else { @@ -196,8 +197,10 @@ static ssize_t tpm_show_ppi_response(struct device *dev, acpi_status status = -EINVAL; union acpi_object *obj, *ret_obj; u64 req, res; + struct tpm_chip *chip = dev_get_drvdata(dev); - obj = tpm_eval_dsm(TPM_PPI_FN_GETRSP, ACPI_TYPE_PACKAGE, NULL); + obj = tpm_eval_dsm(chip->acpi_dev_handle, TPM_PPI_FN_GETRSP, + ACPI_TYPE_PACKAGE, NULL); if (!obj) return -ENXIO; @@ -248,7 +251,8 @@ static ssize_t tpm_show_ppi_response(struct device *dev, return status; } -static ssize_t show_ppi_operations(char *buf, u32 start, u32 end) +static ssize_t show_ppi_operations(acpi_handle dev_handle, char *buf, u32 start, + u32 end) { int i; u32 ret; @@ -264,14 +268,15 @@ static ssize_t show_ppi_operations(char *buf, u32 start, u32 end) "User not required", }; - if (!acpi_check_dsm(tpm_ppi_handle, tpm_ppi_uuid, TPM_PPI_REVISION_ID, + if (!acpi_check_dsm(dev_handle, tpm_ppi_uuid, TPM_PPI_REVISION_ID, 1 << TPM_PPI_FN_GETOPR)) return -EPERM; tmp.integer.type = ACPI_TYPE_INTEGER; for (i = start; i <= end; i++) { tmp.integer.value = i; - obj = tpm_eval_dsm(TPM_PPI_FN_GETOPR, ACPI_TYPE_INTEGER, &argv); + obj = tpm_eval_dsm(dev_handle, TPM_PPI_FN_GETOPR, + ACPI_TYPE_INTEGER, &argv); if (!obj) { return -ENOMEM; } else { @@ -291,14 +296,20 @@ static ssize_t tpm_show_ppi_tcg_operations(struct device *dev, struct device_attribute *attr, char *buf) { - return show_ppi_operations(buf, 0, PPI_TPM_REQ_MAX); + struct tpm_chip *chip = dev_get_drvdata(dev); + + return show_ppi_operations(chip->acpi_dev_handle, buf, 0, + PPI_TPM_REQ_MAX); } static ssize_t tpm_show_ppi_vs_operations(struct device *dev, struct device_attribute *attr, char *buf) { - return show_ppi_operations(buf, PPI_VS_REQ_START, PPI_VS_REQ_END); + struct tpm_chip *chip = dev_get_drvdata(dev); + + return show_ppi_operations(chip->acpi_dev_handle, buf, PPI_VS_REQ_START, + PPI_VS_REQ_END); } static DEVICE_ATTR(version, S_IRUGO, tpm_show_ppi_version, NULL); @@ -323,16 +334,38 @@ static struct attribute_group ppi_attr_grp = { .attrs = ppi_attrs }; -int tpm_add_ppi(struct kobject *parent) +int tpm_add_ppi(struct tpm_chip *chip) { - /* Cache TPM ACPI handle and version string */ - acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, - ppi_callback, NULL, NULL, &tpm_ppi_handle); - return tpm_ppi_handle ? sysfs_create_group(parent, &ppi_attr_grp) : 0; + union acpi_object *obj; + int rc; + + if (!chip->acpi_dev_handle) + return 0; + + if (!acpi_check_dsm(chip->acpi_dev_handle, tpm_ppi_uuid, + TPM_PPI_REVISION_ID, 1 << TPM_PPI_FN_VERSION)) + return 0; + + /* Cache PPI version string. */ + obj = acpi_evaluate_dsm_typed(chip->acpi_dev_handle, tpm_ppi_uuid, + TPM_PPI_REVISION_ID, TPM_PPI_FN_VERSION, + NULL, ACPI_TYPE_STRING); + if (obj) { + strlcpy(chip->ppi_version, obj->string.pointer, + sizeof(chip->ppi_version)); + ACPI_FREE(obj); + } + + rc = sysfs_create_group(&chip->dev->kobj, &ppi_attr_grp); + + if (!rc) + chip->flags |= TPM_CHIP_FLAG_PPI; + + return rc; } -void tpm_remove_ppi(struct kobject *parent) +void tpm_remove_ppi(struct tpm_chip *chip) { - if (tpm_ppi_handle) - sysfs_remove_group(parent, &ppi_attr_grp); + if (chip->flags & TPM_CHIP_FLAG_PPI) + sysfs_remove_group(&chip->dev->kobj, &ppi_attr_grp); } diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index 36f4fec11c2aa0..9695850d2afa19 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -580,8 +580,9 @@ static void tpm_tis_remove(struct tpm_chip *chip) release_locality(chip, chip->vendor.locality, 1); } -static int tpm_tis_init(struct device *dev, resource_size_t start, - resource_size_t len, unsigned int irq) +static int tpm_tis_init(struct device *dev, acpi_handle acpi_dev_handle, + resource_size_t start, resource_size_t len, + unsigned int irq) { u32 vendor, intfcaps, intmask; int rc, i, irq_s, irq_e, probe; @@ -597,6 +598,7 @@ static int tpm_tis_init(struct device *dev, resource_size_t start, return PTR_ERR(chip); chip->vendor.priv = priv; + chip->acpi_dev_handle = acpi_dev_handle; chip->vendor.iobase = devm_ioremap(dev, start, len); if (!chip->vendor.iobase) @@ -827,6 +829,7 @@ static int tpm_tis_pnp_init(struct pnp_dev *pnp_dev, { resource_size_t start, len; unsigned int irq = 0; + acpi_handle acpi_dev_handle = NULL; start = pnp_mem_start(pnp_dev, 0); len = pnp_mem_len(pnp_dev, 0); @@ -839,7 +842,10 @@ static int tpm_tis_pnp_init(struct pnp_dev *pnp_dev, if (is_itpm(pnp_dev)) itpm = true; - return tpm_tis_init(&pnp_dev->dev, start, len, irq); + if (pnp_acpi_device(pnp_dev)) + acpi_dev_handle = pnp_acpi_device(pnp_dev)->handle; + + return tpm_tis_init(&pnp_dev->dev, acpi_dev_handle, start, len, irq); } static struct pnp_device_id tpm_pnp_tbl[] = { @@ -907,7 +913,7 @@ static int __init init_tis(void) rc = PTR_ERR(pdev); goto err_dev; } - rc = tpm_tis_init(&pdev->dev, TIS_MEM_BASE, TIS_MEM_LEN, 0); + rc = tpm_tis_init(&pdev->dev, NULL, TIS_MEM_BASE, TIS_MEM_LEN, 0); if (rc) goto err_init; return 0; From 71ed848fd791bc0b53a1b7a04f29eb9e994c7cbb Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Fri, 12 Dec 2014 11:46:36 -0800 Subject: [PATCH 125/601] tpm: rename chip->dev to chip->pdev Rename chip->dev to chip->pdev to make it explicit that this not the character device but actually represents the platform device. Signed-off-by: Jarkko Sakkinen Reviewed-by: Jasob Gunthorpe Reviewed-by: Stefan Berger Reviewed-by: Peter Huewe Tested-by: Scot Doyle Tested-by: Peter Huewe Signed-off-by: Peter Huewe --- drivers/char/tpm/tpm-chip.c | 4 ++-- drivers/char/tpm/tpm-dev.c | 10 +++++----- drivers/char/tpm/tpm-interface.c | 29 +++++++++++++++-------------- drivers/char/tpm/tpm-sysfs.c | 6 +++--- drivers/char/tpm/tpm.h | 4 ++-- drivers/char/tpm/tpm_atmel.c | 14 +++++++------- drivers/char/tpm/tpm_i2c_atmel.c | 16 ++++++++-------- drivers/char/tpm/tpm_i2c_infineon.c | 6 +++--- drivers/char/tpm/tpm_i2c_nuvoton.c | 22 +++++++++++----------- drivers/char/tpm/tpm_i2c_stm_st33.c | 18 +++++++++--------- drivers/char/tpm/tpm_infineon.c | 22 +++++++++++----------- drivers/char/tpm/tpm_nsc.c | 20 ++++++++++---------- drivers/char/tpm/tpm_ppi.c | 4 ++-- drivers/char/tpm/tpm_tis.c | 14 +++++++------- 14 files changed, 95 insertions(+), 94 deletions(-) diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index 64102de91ca3c8..e72b042aa867f0 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -45,7 +45,7 @@ struct tpm_chip *tpm_chip_find_get(int chip_num) if (chip_num != TPM_ANY_NUM && chip_num != pos->dev_num) continue; - if (try_module_get(pos->dev->driver->owner)) { + if (try_module_get(pos->pdev->driver->owner)) { chip = pos; break; } @@ -110,7 +110,7 @@ struct tpm_chip *tpmm_chip_alloc(struct device *dev, scnprintf(chip->devname, sizeof(chip->devname), "tpm%d", chip->dev_num); - chip->dev = dev; + chip->pdev = dev; devm_add_action(dev, tpmm_chip_remove, chip); dev_set_drvdata(dev, chip); diff --git a/drivers/char/tpm/tpm-dev.c b/drivers/char/tpm/tpm-dev.c index d9b774e02a1fa6..356832146788ba 100644 --- a/drivers/char/tpm/tpm-dev.c +++ b/drivers/char/tpm/tpm-dev.c @@ -63,7 +63,7 @@ static int tpm_open(struct inode *inode, struct file *file) * by the check of is_open variable, which is protected * by driver_lock. */ if (test_and_set_bit(0, &chip->is_open)) { - dev_dbg(chip->dev, "Another process owns this TPM\n"); + dev_dbg(chip->pdev, "Another process owns this TPM\n"); return -EBUSY; } @@ -81,7 +81,7 @@ static int tpm_open(struct inode *inode, struct file *file) INIT_WORK(&priv->work, timeout_work); file->private_data = priv; - get_device(chip->dev); + get_device(chip->pdev); return 0; } @@ -168,7 +168,7 @@ static int tpm_release(struct inode *inode, struct file *file) file->private_data = NULL; atomic_set(&priv->data_pending, 0); clear_bit(0, &priv->chip->is_open); - put_device(priv->chip->dev); + put_device(priv->chip->pdev); kfree(priv); return 0; } @@ -193,12 +193,12 @@ int tpm_dev_add_device(struct tpm_chip *chip) chip->vendor.miscdev.minor = MISC_DYNAMIC_MINOR; chip->vendor.miscdev.name = chip->devname; - chip->vendor.miscdev.parent = chip->dev; + chip->vendor.miscdev.parent = chip->pdev; rc = misc_register(&chip->vendor.miscdev); if (rc) { chip->vendor.miscdev.name = NULL; - dev_err(chip->dev, + dev_err(chip->pdev, "unable to misc_register %s, minor %d err=%d\n", chip->vendor.miscdev.name, chip->vendor.miscdev.minor, rc); diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index 4dbed1e45abd7a..e2af28fd63cb60 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -343,7 +343,7 @@ ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf, if (count == 0) return -ENODATA; if (count > bufsiz) { - dev_err(chip->dev, + dev_err(chip->pdev, "invalid count value %x %zx\n", count, bufsiz); return -E2BIG; } @@ -352,7 +352,7 @@ ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf, rc = chip->ops->send(chip, (u8 *) buf, count); if (rc < 0) { - dev_err(chip->dev, + dev_err(chip->pdev, "tpm_transmit: tpm_send: error %zd\n", rc); goto out; } @@ -368,7 +368,7 @@ ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf, goto out_recv; if (chip->ops->req_canceled(chip, status)) { - dev_err(chip->dev, "Operation Canceled\n"); + dev_err(chip->pdev, "Operation Canceled\n"); rc = -ECANCELED; goto out; } @@ -378,14 +378,14 @@ ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf, } while (time_before(jiffies, stop)); chip->ops->cancel(chip); - dev_err(chip->dev, "Operation Timed out\n"); + dev_err(chip->pdev, "Operation Timed out\n"); rc = -ETIME; goto out; out_recv: rc = chip->ops->recv(chip, (u8 *) buf, bufsiz); if (rc < 0) - dev_err(chip->dev, + dev_err(chip->pdev, "tpm_transmit: tpm_recv: error %zd\n", rc); out: mutex_unlock(&chip->tpm_mutex); @@ -411,7 +411,8 @@ ssize_t tpm_transmit_cmd(struct tpm_chip *chip, void *cmd, err = be32_to_cpu(header->return_code); if (err != 0 && desc) - dev_err(chip->dev, "A TPM error (%d) occurred %s\n", err, desc); + dev_err(chip->pdev, "A TPM error (%d) occurred %s\n", err, + desc); return err; } @@ -505,7 +506,7 @@ int tpm_get_timeouts(struct tpm_chip *chip) if (rc == TPM_ERR_INVALID_POSTINIT) { /* The TPM is not started, we are the first to talk to it. Execute a startup command. */ - dev_info(chip->dev, "Issuing TPM_STARTUP"); + dev_info(chip->pdev, "Issuing TPM_STARTUP"); if (tpm_startup(chip, TPM_ST_CLEAR)) return rc; @@ -517,7 +518,7 @@ int tpm_get_timeouts(struct tpm_chip *chip) NULL); } if (rc) { - dev_err(chip->dev, + dev_err(chip->pdev, "A TPM error (%zd) occurred attempting to determine the timeouts\n", rc); goto duration; @@ -556,7 +557,7 @@ int tpm_get_timeouts(struct tpm_chip *chip) /* Report adjusted timeouts */ if (chip->vendor.timeout_adjusted) { - dev_info(chip->dev, + dev_info(chip->pdev, HW_ERR "Adjusting reported timeouts: A %lu->%luus B %lu->%luus C %lu->%luus D %lu->%luus\n", old_timeout[0], new_timeout[0], old_timeout[1], new_timeout[1], @@ -603,7 +604,7 @@ int tpm_get_timeouts(struct tpm_chip *chip) chip->vendor.duration[TPM_MEDIUM] *= 1000; chip->vendor.duration[TPM_LONG] *= 1000; chip->vendor.duration_adjusted = true; - dev_info(chip->dev, "Adjusting TPM timeout parameters."); + dev_info(chip->pdev, "Adjusting TPM timeout parameters."); } return 0; } @@ -760,7 +761,7 @@ int tpm_do_selftest(struct tpm_chip *chip) * around 300ms while the self test is ongoing, keep trying * until the self test duration expires. */ if (rc == -ETIME) { - dev_info(chip->dev, HW_ERR "TPM command timed out during continue self test"); + dev_info(chip->pdev, HW_ERR "TPM command timed out during continue self test"); msleep(delay_msec); continue; } @@ -770,7 +771,7 @@ int tpm_do_selftest(struct tpm_chip *chip) rc = be32_to_cpu(cmd.header.out.return_code); if (rc == TPM_ERR_DISABLED || rc == TPM_ERR_DEACTIVATED) { - dev_info(chip->dev, + dev_info(chip->pdev, "TPM is disabled/deactivated (0x%X)\n", rc); /* TPM is disabled and/or deactivated; driver can * proceed and TPM does handle commands for @@ -918,10 +919,10 @@ int tpm_pm_suspend(struct device *dev) } if (rc) - dev_err(chip->dev, + dev_err(chip->pdev, "Error (%d) sending savestate before suspend\n", rc); else if (try > 0) - dev_warn(chip->dev, "TPM savestate took %dms\n", + dev_warn(chip->pdev, "TPM savestate took %dms\n", try * TPM_TIMEOUT_RETRY); return rc; diff --git a/drivers/char/tpm/tpm-sysfs.c b/drivers/char/tpm/tpm-sysfs.c index 8ecb052e290d32..ee66fd4673f359 100644 --- a/drivers/char/tpm/tpm-sysfs.c +++ b/drivers/char/tpm/tpm-sysfs.c @@ -284,16 +284,16 @@ static const struct attribute_group tpm_dev_group = { int tpm_sysfs_add_device(struct tpm_chip *chip) { int err; - err = sysfs_create_group(&chip->dev->kobj, + err = sysfs_create_group(&chip->pdev->kobj, &tpm_dev_group); if (err) - dev_err(chip->dev, + dev_err(chip->pdev, "failed to create sysfs attributes, %d\n", err); return err; } void tpm_sysfs_del_device(struct tpm_chip *chip) { - sysfs_remove_group(&chip->dev->kobj, &tpm_dev_group); + sysfs_remove_group(&chip->pdev->kobj, &tpm_dev_group); } diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index 3409acf953f316..adf6af835329fe 100644 --- a/drivers/char/tpm/tpm.h +++ b/drivers/char/tpm/tpm.h @@ -103,7 +103,7 @@ enum tpm_chip_flags { }; struct tpm_chip { - struct device *dev; /* Device stuff */ + struct device *pdev; /* Device stuff */ const struct tpm_class_ops *ops; unsigned int flags; @@ -130,7 +130,7 @@ struct tpm_chip { static inline void tpm_chip_put(struct tpm_chip *chip) { - module_put(chip->dev->driver->owner); + module_put(chip->pdev->driver->owner); } static inline int tpm_read_index(int base, int index) diff --git a/drivers/char/tpm/tpm_atmel.c b/drivers/char/tpm/tpm_atmel.c index 3d4c6c3b0433ee..dfadad0916a1e8 100644 --- a/drivers/char/tpm/tpm_atmel.c +++ b/drivers/char/tpm/tpm_atmel.c @@ -49,7 +49,7 @@ static int tpm_atml_recv(struct tpm_chip *chip, u8 *buf, size_t count) for (i = 0; i < 6; i++) { status = ioread8(chip->vendor.iobase + 1); if ((status & ATML_STATUS_DATA_AVAIL) == 0) { - dev_err(chip->dev, "error reading header\n"); + dev_err(chip->pdev, "error reading header\n"); return -EIO; } *buf++ = ioread8(chip->vendor.iobase); @@ -60,12 +60,12 @@ static int tpm_atml_recv(struct tpm_chip *chip, u8 *buf, size_t count) size = be32_to_cpu(*native_size); if (count < size) { - dev_err(chip->dev, + dev_err(chip->pdev, "Recv size(%d) less than available space\n", size); for (; i < size; i++) { /* clear the waiting data anyway */ status = ioread8(chip->vendor.iobase + 1); if ((status & ATML_STATUS_DATA_AVAIL) == 0) { - dev_err(chip->dev, "error reading data\n"); + dev_err(chip->pdev, "error reading data\n"); return -EIO; } } @@ -76,7 +76,7 @@ static int tpm_atml_recv(struct tpm_chip *chip, u8 *buf, size_t count) for (; i < size; i++) { status = ioread8(chip->vendor.iobase + 1); if ((status & ATML_STATUS_DATA_AVAIL) == 0) { - dev_err(chip->dev, "error reading data\n"); + dev_err(chip->pdev, "error reading data\n"); return -EIO; } *buf++ = ioread8(chip->vendor.iobase); @@ -86,7 +86,7 @@ static int tpm_atml_recv(struct tpm_chip *chip, u8 *buf, size_t count) status = ioread8(chip->vendor.iobase + 1); if (status & ATML_STATUS_DATA_AVAIL) { - dev_err(chip->dev, "data available is stuck\n"); + dev_err(chip->pdev, "data available is stuck\n"); return -EIO; } @@ -97,9 +97,9 @@ static int tpm_atml_send(struct tpm_chip *chip, u8 *buf, size_t count) { int i; - dev_dbg(chip->dev, "tpm_atml_send:\n"); + dev_dbg(chip->pdev, "tpm_atml_send:\n"); for (i = 0; i < count; i++) { - dev_dbg(chip->dev, "%d 0x%x(%d)\n", i, buf[i], buf[i]); + dev_dbg(chip->pdev, "%d 0x%x(%d)\n", i, buf[i], buf[i]); iowrite8(buf[i], chip->vendor.iobase); } diff --git a/drivers/char/tpm/tpm_i2c_atmel.c b/drivers/char/tpm/tpm_i2c_atmel.c index 643a9402e911da..7a0ca78ad3c68a 100644 --- a/drivers/char/tpm/tpm_i2c_atmel.c +++ b/drivers/char/tpm/tpm_i2c_atmel.c @@ -52,7 +52,7 @@ struct priv_data { static int i2c_atmel_send(struct tpm_chip *chip, u8 *buf, size_t len) { struct priv_data *priv = chip->vendor.priv; - struct i2c_client *client = to_i2c_client(chip->dev); + struct i2c_client *client = to_i2c_client(chip->pdev); s32 status; priv->len = 0; @@ -62,7 +62,7 @@ static int i2c_atmel_send(struct tpm_chip *chip, u8 *buf, size_t len) status = i2c_master_send(client, buf, len); - dev_dbg(chip->dev, + dev_dbg(chip->pdev, "%s(buf=%*ph len=%0zx) -> sts=%d\n", __func__, (int)min_t(size_t, 64, len), buf, len, status); return status; @@ -71,7 +71,7 @@ static int i2c_atmel_send(struct tpm_chip *chip, u8 *buf, size_t len) static int i2c_atmel_recv(struct tpm_chip *chip, u8 *buf, size_t count) { struct priv_data *priv = chip->vendor.priv; - struct i2c_client *client = to_i2c_client(chip->dev); + struct i2c_client *client = to_i2c_client(chip->pdev); struct tpm_output_header *hdr = (struct tpm_output_header *)priv->buffer; u32 expected_len; @@ -88,7 +88,7 @@ static int i2c_atmel_recv(struct tpm_chip *chip, u8 *buf, size_t count) return -ENOMEM; if (priv->len >= expected_len) { - dev_dbg(chip->dev, + dev_dbg(chip->pdev, "%s early(buf=%*ph count=%0zx) -> ret=%d\n", __func__, (int)min_t(size_t, 64, expected_len), buf, count, expected_len); @@ -97,7 +97,7 @@ static int i2c_atmel_recv(struct tpm_chip *chip, u8 *buf, size_t count) } rc = i2c_master_recv(client, buf, expected_len); - dev_dbg(chip->dev, + dev_dbg(chip->pdev, "%s reread(buf=%*ph count=%0zx) -> ret=%d\n", __func__, (int)min_t(size_t, 64, expected_len), buf, count, expected_len); @@ -106,13 +106,13 @@ static int i2c_atmel_recv(struct tpm_chip *chip, u8 *buf, size_t count) static void i2c_atmel_cancel(struct tpm_chip *chip) { - dev_err(chip->dev, "TPM operation cancellation was requested, but is not supported"); + dev_err(chip->pdev, "TPM operation cancellation was requested, but is not supported"); } static u8 i2c_atmel_read_status(struct tpm_chip *chip) { struct priv_data *priv = chip->vendor.priv; - struct i2c_client *client = to_i2c_client(chip->dev); + struct i2c_client *client = to_i2c_client(chip->pdev); int rc; /* The TPM fails the I2C read until it is ready, so we do the entire @@ -125,7 +125,7 @@ static u8 i2c_atmel_read_status(struct tpm_chip *chip) /* Once the TPM has completed the command the command remains readable * until another command is issued. */ rc = i2c_master_recv(client, priv->buffer, sizeof(priv->buffer)); - dev_dbg(chip->dev, + dev_dbg(chip->pdev, "%s: sts=%d", __func__, rc); if (rc <= 0) return 0; diff --git a/drivers/char/tpm/tpm_i2c_infineon.c b/drivers/char/tpm/tpm_i2c_infineon.c index 03708e6b309a39..33c5f360ab01e9 100644 --- a/drivers/char/tpm/tpm_i2c_infineon.c +++ b/drivers/char/tpm/tpm_i2c_infineon.c @@ -446,7 +446,7 @@ static int tpm_tis_i2c_recv(struct tpm_chip *chip, u8 *buf, size_t count) /* read first 10 bytes, including tag, paramsize, and result */ size = recv_data(chip, buf, TPM_HEADER_SIZE); if (size < TPM_HEADER_SIZE) { - dev_err(chip->dev, "Unable to read header\n"); + dev_err(chip->pdev, "Unable to read header\n"); goto out; } @@ -459,14 +459,14 @@ static int tpm_tis_i2c_recv(struct tpm_chip *chip, u8 *buf, size_t count) size += recv_data(chip, &buf[TPM_HEADER_SIZE], expected - TPM_HEADER_SIZE); if (size < expected) { - dev_err(chip->dev, "Unable to read remainder of result\n"); + dev_err(chip->pdev, "Unable to read remainder of result\n"); size = -ETIME; goto out; } wait_for_stat(chip, TPM_STS_VALID, chip->vendor.timeout_c, &status); if (status & TPM_STS_DATA_AVAIL) { /* retry? */ - dev_err(chip->dev, "Error left over data\n"); + dev_err(chip->pdev, "Error left over data\n"); size = -EIO; goto out; } diff --git a/drivers/char/tpm/tpm_i2c_nuvoton.c b/drivers/char/tpm/tpm_i2c_nuvoton.c index 8c23088d99ef30..e1eadb0c156894 100644 --- a/drivers/char/tpm/tpm_i2c_nuvoton.c +++ b/drivers/char/tpm/tpm_i2c_nuvoton.c @@ -96,13 +96,13 @@ static s32 i2c_nuvoton_write_buf(struct i2c_client *client, u8 offset, u8 size, /* read TPM_STS register */ static u8 i2c_nuvoton_read_status(struct tpm_chip *chip) { - struct i2c_client *client = to_i2c_client(chip->dev); + struct i2c_client *client = to_i2c_client(chip->pdev); s32 status; u8 data; status = i2c_nuvoton_read_buf(client, TPM_STS, 1, &data); if (status <= 0) { - dev_err(chip->dev, "%s() error return %d\n", __func__, + dev_err(chip->pdev, "%s() error return %d\n", __func__, status); data = TPM_STS_ERR_VAL; } @@ -127,13 +127,13 @@ static s32 i2c_nuvoton_write_status(struct i2c_client *client, u8 data) /* write commandReady to TPM_STS register */ static void i2c_nuvoton_ready(struct tpm_chip *chip) { - struct i2c_client *client = to_i2c_client(chip->dev); + struct i2c_client *client = to_i2c_client(chip->pdev); s32 status; /* this causes the current command to be aborted */ status = i2c_nuvoton_write_status(client, TPM_STS_COMMAND_READY); if (status < 0) - dev_err(chip->dev, + dev_err(chip->pdev, "%s() fail to write TPM_STS.commandReady\n", __func__); } @@ -212,7 +212,7 @@ static int i2c_nuvoton_wait_for_stat(struct tpm_chip *chip, u8 mask, u8 value, return 0; } while (time_before(jiffies, stop)); } - dev_err(chip->dev, "%s(%02x, %02x) -> timeout\n", __func__, mask, + dev_err(chip->pdev, "%s(%02x, %02x) -> timeout\n", __func__, mask, value); return -ETIMEDOUT; } @@ -240,7 +240,7 @@ static int i2c_nuvoton_recv_data(struct i2c_client *client, &chip->vendor.read_queue) == 0) { burst_count = i2c_nuvoton_get_burstcount(client, chip); if (burst_count < 0) { - dev_err(chip->dev, + dev_err(chip->pdev, "%s() fail to read burstCount=%d\n", __func__, burst_count); return -EIO; @@ -249,12 +249,12 @@ static int i2c_nuvoton_recv_data(struct i2c_client *client, rc = i2c_nuvoton_read_buf(client, TPM_DATA_FIFO_R, bytes2read, &buf[size]); if (rc < 0) { - dev_err(chip->dev, + dev_err(chip->pdev, "%s() fail on i2c_nuvoton_read_buf()=%d\n", __func__, rc); return -EIO; } - dev_dbg(chip->dev, "%s(%d):", __func__, bytes2read); + dev_dbg(chip->pdev, "%s(%d):", __func__, bytes2read); size += bytes2read; } @@ -264,7 +264,7 @@ static int i2c_nuvoton_recv_data(struct i2c_client *client, /* Read TPM command results */ static int i2c_nuvoton_recv(struct tpm_chip *chip, u8 *buf, size_t count) { - struct device *dev = chip->dev; + struct device *dev = chip->pdev; struct i2c_client *client = to_i2c_client(dev); s32 rc; int expected, status, burst_count, retries, size = 0; @@ -334,7 +334,7 @@ static int i2c_nuvoton_recv(struct tpm_chip *chip, u8 *buf, size_t count) break; } i2c_nuvoton_ready(chip); - dev_dbg(chip->dev, "%s() -> %d\n", __func__, size); + dev_dbg(chip->pdev, "%s() -> %d\n", __func__, size); return size; } @@ -347,7 +347,7 @@ static int i2c_nuvoton_recv(struct tpm_chip *chip, u8 *buf, size_t count) */ static int i2c_nuvoton_send(struct tpm_chip *chip, u8 *buf, size_t len) { - struct device *dev = chip->dev; + struct device *dev = chip->pdev; struct i2c_client *client = to_i2c_client(dev); u32 ordinal; size_t count = 0; diff --git a/drivers/char/tpm/tpm_i2c_stm_st33.c b/drivers/char/tpm/tpm_i2c_stm_st33.c index 9a96d3704fe578..48c4808d5a7a0c 100644 --- a/drivers/char/tpm/tpm_i2c_stm_st33.c +++ b/drivers/char/tpm/tpm_i2c_stm_st33.c @@ -588,7 +588,7 @@ static int tpm_stm_i2c_recv(struct tpm_chip *chip, unsigned char *buf, size = recv_data(chip, buf, TPM_HEADER_SIZE); if (size < TPM_HEADER_SIZE) { - dev_err(chip->dev, "Unable to read header\n"); + dev_err(chip->pdev, "Unable to read header\n"); goto out; } @@ -601,7 +601,7 @@ static int tpm_stm_i2c_recv(struct tpm_chip *chip, unsigned char *buf, size += recv_data(chip, &buf[TPM_HEADER_SIZE], expected - TPM_HEADER_SIZE); if (size < expected) { - dev_err(chip->dev, "Unable to read remainder of result\n"); + dev_err(chip->pdev, "Unable to read remainder of result\n"); size = -ETIME; goto out; } @@ -639,14 +639,14 @@ static int tpm_stm_i2c_of_request_resources(struct tpm_chip *chip) pp = client->dev.of_node; if (!pp) { - dev_err(chip->dev, "No platform data\n"); + dev_err(chip->pdev, "No platform data\n"); return -ENODEV; } /* Get GPIO from device tree */ gpio = of_get_named_gpio(pp, "lpcpd-gpios", 0); if (gpio < 0) { - dev_err(chip->dev, "Failed to retrieve lpcpd-gpios from dts.\n"); + dev_err(chip->pdev, "Failed to retrieve lpcpd-gpios from dts.\n"); tpm_dev->io_lpcpd = -1; /* * lpcpd pin is not specified. This is not an issue as @@ -659,7 +659,7 @@ static int tpm_stm_i2c_of_request_resources(struct tpm_chip *chip) ret = devm_gpio_request_one(&client->dev, gpio, GPIOF_OUT_INIT_HIGH, "TPM IO LPCPD"); if (ret) { - dev_err(chip->dev, "Failed to request lpcpd pin\n"); + dev_err(chip->pdev, "Failed to request lpcpd pin\n"); return -ENODEV; } tpm_dev->io_lpcpd = gpio; @@ -682,7 +682,7 @@ static int tpm_stm_i2c_request_resources(struct i2c_client *client, pdata = client->dev.platform_data; if (!pdata) { - dev_err(chip->dev, "No platform data\n"); + dev_err(chip->pdev, "No platform data\n"); return -ENODEV; } @@ -694,7 +694,7 @@ static int tpm_stm_i2c_request_resources(struct i2c_client *client, pdata->io_lpcpd, GPIOF_OUT_INIT_HIGH, "TPM IO_LPCPD"); if (ret) { - dev_err(chip->dev, "%s : reset gpio_request failed\n", + dev_err(chip->pdev, "%s : reset gpio_request failed\n", __FILE__); return ret; } @@ -776,7 +776,7 @@ tpm_stm_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) IRQF_TRIGGER_HIGH, "TPM SERIRQ management", chip); if (ret < 0) { - dev_err(chip->dev , "TPM SERIRQ signals %d not available\n", + dev_err(chip->pdev , "TPM SERIRQ signals %d not available\n", client->irq); goto _tpm_clean_answer; } @@ -807,7 +807,7 @@ tpm_stm_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) return tpm_chip_register(chip); _tpm_clean_answer: - dev_info(chip->dev, "TPM I2C initialisation fail\n"); + dev_info(chip->pdev, "TPM I2C initialisation fail\n"); return ret; } diff --git a/drivers/char/tpm/tpm_infineon.c b/drivers/char/tpm/tpm_infineon.c index dcdb671b2a5df1..6d492132ad2b75 100644 --- a/drivers/char/tpm/tpm_infineon.c +++ b/drivers/char/tpm/tpm_infineon.c @@ -195,9 +195,9 @@ static int wait(struct tpm_chip *chip, int wait_for_bit) } if (i == TPM_MAX_TRIES) { /* timeout occurs */ if (wait_for_bit == STAT_XFE) - dev_err(chip->dev, "Timeout in wait(STAT_XFE)\n"); + dev_err(chip->pdev, "Timeout in wait(STAT_XFE)\n"); if (wait_for_bit == STAT_RDA) - dev_err(chip->dev, "Timeout in wait(STAT_RDA)\n"); + dev_err(chip->pdev, "Timeout in wait(STAT_RDA)\n"); return -EIO; } return 0; @@ -220,7 +220,7 @@ static void wait_and_send(struct tpm_chip *chip, u8 sendbyte) static void tpm_wtx(struct tpm_chip *chip) { number_of_wtx++; - dev_info(chip->dev, "Granting WTX (%02d / %02d)\n", + dev_info(chip->pdev, "Granting WTX (%02d / %02d)\n", number_of_wtx, TPM_MAX_WTX_PACKAGES); wait_and_send(chip, TPM_VL_VER); wait_and_send(chip, TPM_CTRL_WTX); @@ -231,7 +231,7 @@ static void tpm_wtx(struct tpm_chip *chip) static void tpm_wtx_abort(struct tpm_chip *chip) { - dev_info(chip->dev, "Aborting WTX\n"); + dev_info(chip->pdev, "Aborting WTX\n"); wait_and_send(chip, TPM_VL_VER); wait_and_send(chip, TPM_CTRL_WTX_ABORT); wait_and_send(chip, 0x00); @@ -257,7 +257,7 @@ static int tpm_inf_recv(struct tpm_chip *chip, u8 * buf, size_t count) } if (buf[0] != TPM_VL_VER) { - dev_err(chip->dev, + dev_err(chip->pdev, "Wrong transport protocol implementation!\n"); return -EIO; } @@ -272,7 +272,7 @@ static int tpm_inf_recv(struct tpm_chip *chip, u8 * buf, size_t count) } if ((size == 0x6D00) && (buf[1] == 0x80)) { - dev_err(chip->dev, "Error handling on vendor layer!\n"); + dev_err(chip->pdev, "Error handling on vendor layer!\n"); return -EIO; } @@ -284,7 +284,7 @@ static int tpm_inf_recv(struct tpm_chip *chip, u8 * buf, size_t count) } if (buf[1] == TPM_CTRL_WTX) { - dev_info(chip->dev, "WTX-package received\n"); + dev_info(chip->pdev, "WTX-package received\n"); if (number_of_wtx < TPM_MAX_WTX_PACKAGES) { tpm_wtx(chip); goto recv_begin; @@ -295,14 +295,14 @@ static int tpm_inf_recv(struct tpm_chip *chip, u8 * buf, size_t count) } if (buf[1] == TPM_CTRL_WTX_ABORT_ACK) { - dev_info(chip->dev, "WTX-abort acknowledged\n"); + dev_info(chip->pdev, "WTX-abort acknowledged\n"); return size; } if (buf[1] == TPM_CTRL_ERROR) { - dev_err(chip->dev, "ERROR-package received:\n"); + dev_err(chip->pdev, "ERROR-package received:\n"); if (buf[4] == TPM_INF_NAK) - dev_err(chip->dev, + dev_err(chip->pdev, "-> Negative acknowledgement" " - retransmit command!\n"); return -EIO; @@ -321,7 +321,7 @@ static int tpm_inf_send(struct tpm_chip *chip, u8 * buf, size_t count) ret = empty_fifo(chip, 1); if (ret) { - dev_err(chip->dev, "Timeout while clearing FIFO\n"); + dev_err(chip->pdev, "Timeout while clearing FIFO\n"); return -EIO; } diff --git a/drivers/char/tpm/tpm_nsc.c b/drivers/char/tpm/tpm_nsc.c index 6e2c2e64b292e7..289389ecef84f0 100644 --- a/drivers/char/tpm/tpm_nsc.c +++ b/drivers/char/tpm/tpm_nsc.c @@ -113,7 +113,7 @@ static int nsc_wait_for_ready(struct tpm_chip *chip) } while (time_before(jiffies, stop)); - dev_info(chip->dev, "wait for ready failed\n"); + dev_info(chip->pdev, "wait for ready failed\n"); return -EBUSY; } @@ -129,12 +129,12 @@ static int tpm_nsc_recv(struct tpm_chip *chip, u8 * buf, size_t count) return -EIO; if (wait_for_stat(chip, NSC_STATUS_F0, NSC_STATUS_F0, &data) < 0) { - dev_err(chip->dev, "F0 timeout\n"); + dev_err(chip->pdev, "F0 timeout\n"); return -EIO; } if ((data = inb(chip->vendor.base + NSC_DATA)) != NSC_COMMAND_NORMAL) { - dev_err(chip->dev, "not in normal mode (0x%x)\n", + dev_err(chip->pdev, "not in normal mode (0x%x)\n", data); return -EIO; } @@ -143,7 +143,7 @@ static int tpm_nsc_recv(struct tpm_chip *chip, u8 * buf, size_t count) for (p = buffer; p < &buffer[count]; p++) { if (wait_for_stat (chip, NSC_STATUS_OBF, NSC_STATUS_OBF, &data) < 0) { - dev_err(chip->dev, + dev_err(chip->pdev, "OBF timeout (while reading data)\n"); return -EIO; } @@ -154,11 +154,11 @@ static int tpm_nsc_recv(struct tpm_chip *chip, u8 * buf, size_t count) if ((data & NSC_STATUS_F0) == 0 && (wait_for_stat(chip, NSC_STATUS_F0, NSC_STATUS_F0, &data) < 0)) { - dev_err(chip->dev, "F0 not set\n"); + dev_err(chip->pdev, "F0 not set\n"); return -EIO; } if ((data = inb(chip->vendor.base + NSC_DATA)) != NSC_COMMAND_EOC) { - dev_err(chip->dev, + dev_err(chip->pdev, "expected end of command(0x%x)\n", data); return -EIO; } @@ -189,19 +189,19 @@ static int tpm_nsc_send(struct tpm_chip *chip, u8 * buf, size_t count) return -EIO; if (wait_for_stat(chip, NSC_STATUS_IBF, 0, &data) < 0) { - dev_err(chip->dev, "IBF timeout\n"); + dev_err(chip->pdev, "IBF timeout\n"); return -EIO; } outb(NSC_COMMAND_NORMAL, chip->vendor.base + NSC_COMMAND); if (wait_for_stat(chip, NSC_STATUS_IBR, NSC_STATUS_IBR, &data) < 0) { - dev_err(chip->dev, "IBR timeout\n"); + dev_err(chip->pdev, "IBR timeout\n"); return -EIO; } for (i = 0; i < count; i++) { if (wait_for_stat(chip, NSC_STATUS_IBF, 0, &data) < 0) { - dev_err(chip->dev, + dev_err(chip->pdev, "IBF timeout (while writing data)\n"); return -EIO; } @@ -209,7 +209,7 @@ static int tpm_nsc_send(struct tpm_chip *chip, u8 * buf, size_t count) } if (wait_for_stat(chip, NSC_STATUS_IBF, 0, &data) < 0) { - dev_err(chip->dev, "IBF timeout\n"); + dev_err(chip->pdev, "IBF timeout\n"); return -EIO; } outb(NSC_COMMAND_EOC, chip->vendor.base + NSC_COMMAND); diff --git a/drivers/char/tpm/tpm_ppi.c b/drivers/char/tpm/tpm_ppi.c index af48c56f642fb1..6ca9b5d78144bb 100644 --- a/drivers/char/tpm/tpm_ppi.c +++ b/drivers/char/tpm/tpm_ppi.c @@ -356,7 +356,7 @@ int tpm_add_ppi(struct tpm_chip *chip) ACPI_FREE(obj); } - rc = sysfs_create_group(&chip->dev->kobj, &ppi_attr_grp); + rc = sysfs_create_group(&chip->pdev->kobj, &ppi_attr_grp); if (!rc) chip->flags |= TPM_CHIP_FLAG_PPI; @@ -367,5 +367,5 @@ int tpm_add_ppi(struct tpm_chip *chip) void tpm_remove_ppi(struct tpm_chip *chip) { if (chip->flags & TPM_CHIP_FLAG_PPI) - sysfs_remove_group(&chip->dev->kobj, &ppi_attr_grp); + sysfs_remove_group(&chip->pdev->kobj, &ppi_attr_grp); } diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index 9695850d2afa19..9e02489a94f3b6 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -242,7 +242,7 @@ static int tpm_tis_recv(struct tpm_chip *chip, u8 *buf, size_t count) /* read first 10 bytes, including tag, paramsize, and result */ if ((size = recv_data(chip, buf, TPM_HEADER_SIZE)) < TPM_HEADER_SIZE) { - dev_err(chip->dev, "Unable to read header\n"); + dev_err(chip->pdev, "Unable to read header\n"); goto out; } @@ -255,7 +255,7 @@ static int tpm_tis_recv(struct tpm_chip *chip, u8 *buf, size_t count) if ((size += recv_data(chip, &buf[TPM_HEADER_SIZE], expected - TPM_HEADER_SIZE)) < expected) { - dev_err(chip->dev, "Unable to read remainder of result\n"); + dev_err(chip->pdev, "Unable to read remainder of result\n"); size = -ETIME; goto out; } @@ -264,7 +264,7 @@ static int tpm_tis_recv(struct tpm_chip *chip, u8 *buf, size_t count) &chip->vendor.int_queue, false); status = tpm_tis_status(chip); if (status & TPM_STS_DATA_AVAIL) { /* retry? */ - dev_err(chip->dev, "Error left over data\n"); + dev_err(chip->pdev, "Error left over data\n"); size = -EIO; goto out; } @@ -406,7 +406,7 @@ static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len) msleep(1); if (!priv->irq_tested) { disable_interrupts(chip); - dev_err(chip->dev, + dev_err(chip->pdev, FW_BUG "TPM interrupt not working, polling instead\n"); } priv->irq_tested = true; @@ -476,7 +476,7 @@ static int probe_itpm(struct tpm_chip *chip) rc = tpm_tis_send_data(chip, cmd_getticks, len); if (rc == 0) { - dev_info(chip->dev, "Detected an iTPM.\n"); + dev_info(chip->pdev, "Detected an iTPM.\n"); rc = 1; } else rc = -EFAULT; @@ -699,7 +699,7 @@ static int tpm_tis_init(struct device *dev, acpi_handle acpi_dev_handle, if (devm_request_irq (dev, i, tis_int_probe, IRQF_SHARED, chip->vendor.miscdev.name, chip) != 0) { - dev_info(chip->dev, + dev_info(chip->pdev, "Unable to request irq: %d for probe\n", i); continue; @@ -746,7 +746,7 @@ static int tpm_tis_init(struct device *dev, acpi_handle acpi_dev_handle, if (devm_request_irq (dev, chip->vendor.irq, tis_int_handler, IRQF_SHARED, chip->vendor.miscdev.name, chip) != 0) { - dev_info(chip->dev, + dev_info(chip->pdev, "Unable to request irq: %d for use\n", chip->vendor.irq); chip->vendor.irq = 0; From 313d21eeab9282e01fdcecd40e9ca87e0953627f Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Fri, 12 Dec 2014 11:46:37 -0800 Subject: [PATCH 126/601] tpm: device class for tpm Added own device class for TPM. Uses MISC_MAJOR:TPM_MINOR for the first character device in order to retain backwards compatibility. Added tpm_dev_release() back attached to the character device. I've been running this code now for a while on my laptop (Lenovo T430S) TrouSerS works perfectly without modifications. I don't believe it breaks anything significantly. The sysfs attributes that have been placed under the wrong place and are against sysfs-rules.txt should be probably left to stagnate under platform device directory and start defining new sysfs attributes to the char device directory. Guidelines for future TPM sysfs attributes should be probably along the lines of - Single flat set of mandatory sysfs attributes. For example, current PPI interface is way way too rich when you only want to use it to clear and activate the TPM. - Define sysfs attribute if and only if there's no way to get the value from ring-3. No attributes for TPM properties. It's just unnecessary maintenance hurdle that we don't want. Signed-off-by: Jarkko Sakkinen Reviewed-by: Jasob Gunthorpe Reviewed-by: Stefan Berger Tested-by: Scot Doyle Tested-by: Peter Huewe Signed-off-by: Peter Huewe --- Documentation/ABI/stable/sysfs-class-tpm | 22 ++++---- drivers/char/tpm/tpm-chip.c | 72 ++++++++++++++++++++---- drivers/char/tpm/tpm-dev.c | 36 +----------- drivers/char/tpm/tpm-interface.c | 29 ++++++++++ drivers/char/tpm/tpm.h | 12 ++-- drivers/char/tpm/tpm_i2c_nuvoton.c | 2 +- drivers/char/tpm/tpm_tis.c | 4 +- 7 files changed, 116 insertions(+), 61 deletions(-) diff --git a/Documentation/ABI/stable/sysfs-class-tpm b/Documentation/ABI/stable/sysfs-class-tpm index a60b45e2493b66..9f790eebb5d2b0 100644 --- a/Documentation/ABI/stable/sysfs-class-tpm +++ b/Documentation/ABI/stable/sysfs-class-tpm @@ -1,4 +1,4 @@ -What: /sys/class/misc/tpmX/device/ +What: /sys/class/tpm/tpmX/device/ Date: April 2005 KernelVersion: 2.6.12 Contact: tpmdd-devel@lists.sf.net @@ -6,7 +6,7 @@ Description: The device/ directory under a specific TPM instance exposes the properties of that TPM chip -What: /sys/class/misc/tpmX/device/active +What: /sys/class/tpm/tpmX/device/active Date: April 2006 KernelVersion: 2.6.17 Contact: tpmdd-devel@lists.sf.net @@ -18,7 +18,7 @@ Description: The "active" property prints a '1' if the TPM chip is accepting section 17 for more information on which commands are available. -What: /sys/class/misc/tpmX/device/cancel +What: /sys/class/tpm/tpmX/device/cancel Date: June 2005 KernelVersion: 2.6.13 Contact: tpmdd-devel@lists.sf.net @@ -26,7 +26,7 @@ Description: The "cancel" property allows you to cancel the currently pending TPM command. Writing any value to cancel will call the TPM vendor specific cancel operation. -What: /sys/class/misc/tpmX/device/caps +What: /sys/class/tpm/tpmX/device/caps Date: April 2005 KernelVersion: 2.6.12 Contact: tpmdd-devel@lists.sf.net @@ -43,7 +43,7 @@ Description: The "caps" property contains TPM manufacturer and version info. the chip supports. Firmware version is that of the chip and is manufacturer specific. -What: /sys/class/misc/tpmX/device/durations +What: /sys/class/tpm/tpmX/device/durations Date: March 2011 KernelVersion: 3.1 Contact: tpmdd-devel@lists.sf.net @@ -66,7 +66,7 @@ Description: The "durations" property shows the 3 vendor-specific values scaled to be displayed in usecs. In this case "[adjusted]" will be displayed in place of "[original]". -What: /sys/class/misc/tpmX/device/enabled +What: /sys/class/tpm/tpmX/device/enabled Date: April 2006 KernelVersion: 2.6.17 Contact: tpmdd-devel@lists.sf.net @@ -75,7 +75,7 @@ Description: The "enabled" property prints a '1' if the TPM chip is enabled, may be visible but produce a '0' after some operation that disables the TPM. -What: /sys/class/misc/tpmX/device/owned +What: /sys/class/tpm/tpmX/device/owned Date: April 2006 KernelVersion: 2.6.17 Contact: tpmdd-devel@lists.sf.net @@ -83,7 +83,7 @@ Description: The "owned" property produces a '1' if the TPM_TakeOwnership ordinal has been executed successfully in the chip. A '0' indicates that ownership hasn't been taken. -What: /sys/class/misc/tpmX/device/pcrs +What: /sys/class/tpm/tpmX/device/pcrs Date: April 2005 KernelVersion: 2.6.12 Contact: tpmdd-devel@lists.sf.net @@ -106,7 +106,7 @@ Description: The "pcrs" property will dump the current value of all Platform 1.2 chips, PCRs represent SHA-1 hashes, which are 20 bytes long. Use the "caps" property to determine TPM version. -What: /sys/class/misc/tpmX/device/pubek +What: /sys/class/tpm/tpmX/device/pubek Date: April 2005 KernelVersion: 2.6.12 Contact: tpmdd-devel@lists.sf.net @@ -158,7 +158,7 @@ Description: The "pubek" property will return the TPM's public endorsement Modulus Length: 256 (bytes) Modulus: The 256 byte Endorsement Key modulus -What: /sys/class/misc/tpmX/device/temp_deactivated +What: /sys/class/tpm/tpmX/device/temp_deactivated Date: April 2006 KernelVersion: 2.6.17 Contact: tpmdd-devel@lists.sf.net @@ -167,7 +167,7 @@ Description: The "temp_deactivated" property returns a '1' if the chip has cycle. Whether a warm boot (reboot) will clear a TPM chip from a temp_deactivated state is platform specific. -What: /sys/class/misc/tpmX/device/timeouts +What: /sys/class/tpm/tpmX/device/timeouts Date: March 2011 KernelVersion: 3.1 Contact: tpmdd-devel@lists.sf.net diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index e72b042aa867f0..7596eef3bff9e4 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "tpm.h" #include "tpm_eventlog.h" @@ -32,6 +33,9 @@ static DECLARE_BITMAP(dev_mask, TPM_NUM_DEVICES); static LIST_HEAD(tpm_chip_list); static DEFINE_SPINLOCK(driver_lock); +struct class *tpm_class; +dev_t tpm_devt; + /* * tpm_chip_find_get - return tpm_chip for a given chip number * @chip_num the device number for the chip @@ -55,16 +59,14 @@ struct tpm_chip *tpm_chip_find_get(int chip_num) } /** - * tpmm_chip_remove() - free chip memory and device number - * @data: points to struct tpm_chip instance + * tpm_dev_release() - free chip memory and the device number + * @dev: the character device for the TPM chip * - * This is used internally by tpmm_chip_alloc() and called by devres - * when the device is released. This function does the opposite of - * tpmm_chip_alloc() freeing memory and the device number. + * This is used as the release function for the character device. */ -static void tpmm_chip_remove(void *data) +static void tpm_dev_release(struct device *dev) { - struct tpm_chip *chip = (struct tpm_chip *) data; + struct tpm_chip *chip = container_of(dev, struct tpm_chip, dev); spin_lock(&driver_lock); clear_bit(chip->dev_num, dev_mask); @@ -111,18 +113,68 @@ struct tpm_chip *tpmm_chip_alloc(struct device *dev, scnprintf(chip->devname, sizeof(chip->devname), "tpm%d", chip->dev_num); chip->pdev = dev; - devm_add_action(dev, tpmm_chip_remove, chip); + dev_set_drvdata(dev, chip); + chip->dev.class = tpm_class; + chip->dev.release = tpm_dev_release; + chip->dev.parent = chip->pdev; + + if (chip->dev_num == 0) + chip->dev.devt = MKDEV(MISC_MAJOR, TPM_MINOR); + else + chip->dev.devt = MKDEV(MAJOR(tpm_devt), chip->dev_num); + + dev_set_name(&chip->dev, chip->devname); + + device_initialize(&chip->dev); + + chip->cdev.owner = chip->pdev->driver->owner; + cdev_init(&chip->cdev, &tpm_fops); + return chip; } EXPORT_SYMBOL_GPL(tpmm_chip_alloc); +static int tpm_dev_add_device(struct tpm_chip *chip) +{ + int rc; + + rc = device_add(&chip->dev); + if (rc) { + dev_err(&chip->dev, + "unable to device_register() %s, major %d, minor %d, err=%d\n", + chip->devname, MAJOR(chip->dev.devt), + MINOR(chip->dev.devt), rc); + + return rc; + } + + rc = cdev_add(&chip->cdev, chip->dev.devt, 1); + if (rc) { + dev_err(&chip->dev, + "unable to cdev_add() %s, major %d, minor %d, err=%d\n", + chip->devname, MAJOR(chip->dev.devt), + MINOR(chip->dev.devt), rc); + + device_unregister(&chip->dev); + return rc; + } + + return rc; +} + +static void tpm_dev_del_device(struct tpm_chip *chip) +{ + cdev_del(&chip->cdev); + device_unregister(&chip->dev); +} + /* - * tpm_chip_register() - create a misc driver for the TPM chip + * tpm_chip_register() - create a character device for the TPM chip * @chip: TPM chip to use. * - * Creates a misc driver for the TPM chip and adds sysfs interfaces for + * Creates a character device for the TPM chip and adds sysfs interfaces for * the device, PPI and TCPA. As the last step this function adds the * chip to the list of TPM chips available for use. * diff --git a/drivers/char/tpm/tpm-dev.c b/drivers/char/tpm/tpm-dev.c index 356832146788ba..de0337ebd6585b 100644 --- a/drivers/char/tpm/tpm-dev.c +++ b/drivers/char/tpm/tpm-dev.c @@ -17,7 +17,6 @@ * License. * */ -#include #include #include #include "tpm.h" @@ -54,9 +53,8 @@ static void timeout_work(struct work_struct *work) static int tpm_open(struct inode *inode, struct file *file) { - struct miscdevice *misc = file->private_data; - struct tpm_chip *chip = container_of(misc, struct tpm_chip, - vendor.miscdev); + struct tpm_chip *chip = + container_of(inode->i_cdev, struct tpm_chip, cdev); struct file_priv *priv; /* It's assured that the chip will be opened just once, @@ -173,7 +171,7 @@ static int tpm_release(struct inode *inode, struct file *file) return 0; } -static const struct file_operations tpm_fops = { +const struct file_operations tpm_fops = { .owner = THIS_MODULE, .llseek = no_llseek, .open = tpm_open, @@ -182,32 +180,4 @@ static const struct file_operations tpm_fops = { .release = tpm_release, }; -int tpm_dev_add_device(struct tpm_chip *chip) -{ - int rc; - chip->vendor.miscdev.fops = &tpm_fops; - if (chip->dev_num == 0) - chip->vendor.miscdev.minor = TPM_MINOR; - else - chip->vendor.miscdev.minor = MISC_DYNAMIC_MINOR; - - chip->vendor.miscdev.name = chip->devname; - chip->vendor.miscdev.parent = chip->pdev; - - rc = misc_register(&chip->vendor.miscdev); - if (rc) { - chip->vendor.miscdev.name = NULL; - dev_err(chip->pdev, - "unable to misc_register %s, minor %d err=%d\n", - chip->vendor.miscdev.name, - chip->vendor.miscdev.minor, rc); - } - return rc; -} - -void tpm_dev_del_device(struct tpm_chip *chip) -{ - if (chip->vendor.miscdev.name) - misc_deregister(&chip->vendor.miscdev); -} diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index e2af28fd63cb60..b6f6b17392fd7d 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -997,6 +997,35 @@ int tpm_get_random(u32 chip_num, u8 *out, size_t max) } EXPORT_SYMBOL_GPL(tpm_get_random); +static int __init tpm_init(void) +{ + int rc; + + tpm_class = class_create(THIS_MODULE, "tpm"); + if (IS_ERR(tpm_class)) { + pr_err("couldn't create tpm class\n"); + return PTR_ERR(tpm_class); + } + + rc = alloc_chrdev_region(&tpm_devt, 0, TPM_NUM_DEVICES, "tpm"); + if (rc < 0) { + pr_err("tpm: failed to allocate char dev region\n"); + class_destroy(tpm_class); + return rc; + } + + return 0; +} + +static void __exit tpm_exit(void) +{ + class_destroy(tpm_class); + unregister_chrdev_region(tpm_devt, TPM_NUM_DEVICES); +} + +subsys_initcall(tpm_init); +module_exit(tpm_exit); + MODULE_AUTHOR("Leendert van Doorn (leendert@watson.ibm.com)"); MODULE_DESCRIPTION("TPM Driver"); MODULE_VERSION("2.0"); diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index adf6af835329fe..d46765b4c97e12 100644 --- a/drivers/char/tpm/tpm.h +++ b/drivers/char/tpm/tpm.h @@ -23,11 +23,11 @@ #include #include #include -#include #include #include #include #include +#include enum tpm_const { TPM_MINOR = 224, /* officially assigned */ @@ -74,7 +74,6 @@ struct tpm_vendor_specific { int region_size; int have_region; - struct miscdevice miscdev; struct list_head list; int locality; unsigned long timeout_a, timeout_b, timeout_c, timeout_d; /* jiffies */ @@ -104,6 +103,9 @@ enum tpm_chip_flags { struct tpm_chip { struct device *pdev; /* Device stuff */ + struct device dev; + struct cdev cdev; + const struct tpm_class_ops *ops; unsigned int flags; @@ -326,6 +328,10 @@ struct tpm_cmd_t { tpm_cmd_params params; } __packed; +extern struct class *tpm_class; +extern dev_t tpm_devt; +extern const struct file_operations tpm_fops; + ssize_t tpm_getcap(struct device *, __be32, cap_t *, const char *); ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf, size_t bufsiz); @@ -346,8 +352,6 @@ extern struct tpm_chip *tpmm_chip_alloc(struct device *dev, extern int tpm_chip_register(struct tpm_chip *chip); extern void tpm_chip_unregister(struct tpm_chip *chip); -int tpm_dev_add_device(struct tpm_chip *chip); -void tpm_dev_del_device(struct tpm_chip *chip); int tpm_sysfs_add_device(struct tpm_chip *chip); void tpm_sysfs_del_device(struct tpm_chip *chip); diff --git a/drivers/char/tpm/tpm_i2c_nuvoton.c b/drivers/char/tpm/tpm_i2c_nuvoton.c index e1eadb0c156894..9d42b7d78e501d 100644 --- a/drivers/char/tpm/tpm_i2c_nuvoton.c +++ b/drivers/char/tpm/tpm_i2c_nuvoton.c @@ -560,7 +560,7 @@ static int i2c_nuvoton_probe(struct i2c_client *client, rc = devm_request_irq(dev, chip->vendor.irq, i2c_nuvoton_int_handler, IRQF_TRIGGER_LOW, - chip->vendor.miscdev.name, + chip->devname, chip); if (rc) { dev_err(dev, "%s() Unable to request irq: %d for use\n", diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index 9e02489a94f3b6..239cf0bbc1a1d5 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -698,7 +698,7 @@ static int tpm_tis_init(struct device *dev, acpi_handle acpi_dev_handle, TPM_INT_VECTOR(chip->vendor.locality)); if (devm_request_irq (dev, i, tis_int_probe, IRQF_SHARED, - chip->vendor.miscdev.name, chip) != 0) { + chip->devname, chip) != 0) { dev_info(chip->pdev, "Unable to request irq: %d for probe\n", i); @@ -745,7 +745,7 @@ static int tpm_tis_init(struct device *dev, acpi_handle acpi_dev_handle, TPM_INT_VECTOR(chip->vendor.locality)); if (devm_request_irq (dev, chip->vendor.irq, tis_int_handler, IRQF_SHARED, - chip->vendor.miscdev.name, chip) != 0) { + chip->devname, chip) != 0) { dev_info(chip->pdev, "Unable to request irq: %d for use\n", chip->vendor.irq); From 7a1d7e6dd76a2070e2d86826391468edc33bb6d6 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Fri, 12 Dec 2014 11:46:38 -0800 Subject: [PATCH 127/601] tpm: TPM 2.0 baseline support TPM 2.0 devices are separated by adding a field 'flags' to struct tpm_chip and defining a flag TPM_CHIP_FLAG_TPM2 for tagging them. This patch adds the following internal functions: - tpm2_get_random() - tpm2_get_tpm_pt() - tpm2_pcr_extend() - tpm2_pcr_read() - tpm2_startup() Additionally, the following exported functions are implemented for implementing TPM 2.0 device drivers: - tpm2_do_selftest() - tpm2_calc_ordinal_durations() - tpm2_gen_interrupt() The existing functions that are exported for the use for existing subsystems have been changed to check the flags field in struct tpm_chip and use appropriate TPM 2.0 counterpart if TPM_CHIP_FLAG_TPM2 is est. The code for tpm2_calc_ordinal_duration() and tpm2_startup() were originally written by Will Arthur. Signed-off-by: Jarkko Sakkinen Signed-off-by: Will Arthur Reviewed-by: Jasob Gunthorpe Reviewed-by: Stefan Berger Reviewed-by: Peter Huewe Tested-by: Peter Huewe [phuewe: Fixed copy paste error * 2] Signed-off-by: Peter Huewe --- drivers/char/tpm/Makefile | 2 +- drivers/char/tpm/tpm-chip.c | 27 +- drivers/char/tpm/tpm-interface.c | 23 +- drivers/char/tpm/tpm.h | 66 ++++ drivers/char/tpm/tpm2-cmd.c | 617 +++++++++++++++++++++++++++++++ 5 files changed, 721 insertions(+), 14 deletions(-) create mode 100644 drivers/char/tpm/tpm2-cmd.c diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile index c715596acb7c07..88848edb081c11 100644 --- a/drivers/char/tpm/Makefile +++ b/drivers/char/tpm/Makefile @@ -2,7 +2,7 @@ # Makefile for the kernel tpm device drivers. # obj-$(CONFIG_TCG_TPM) += tpm.o -tpm-y := tpm-interface.o tpm-dev.o tpm-sysfs.o tpm-chip.o +tpm-y := tpm-interface.o tpm-dev.o tpm-sysfs.o tpm-chip.o tpm2-cmd.o tpm-$(CONFIG_ACPI) += tpm_ppi.o ifdef CONFIG_ACPI diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index 7596eef3bff9e4..6459af7c164604 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -195,15 +195,18 @@ int tpm_chip_register(struct tpm_chip *chip) if (rc) return rc; - rc = tpm_sysfs_add_device(chip); - if (rc) - goto del_misc; + /* Populate sysfs for TPM1 devices. */ + if (!(chip->flags & TPM_CHIP_FLAG_TPM2)) { + rc = tpm_sysfs_add_device(chip); + if (rc) + goto del_misc; - rc = tpm_add_ppi(chip); - if (rc) - goto del_sysfs; + rc = tpm_add_ppi(chip); + if (rc) + goto del_sysfs; - chip->bios_dir = tpm_bios_log_setup(chip->devname); + chip->bios_dir = tpm_bios_log_setup(chip->devname); + } /* Make the chip available. */ spin_lock(&driver_lock); @@ -241,10 +244,12 @@ void tpm_chip_unregister(struct tpm_chip *chip) spin_unlock(&driver_lock); synchronize_rcu(); - if (chip->bios_dir) - tpm_bios_log_teardown(chip->bios_dir); - tpm_remove_ppi(chip); - tpm_sysfs_del_device(chip); + if (!(chip->flags & TPM_CHIP_FLAG_TPM2)) { + if (chip->bios_dir) + tpm_bios_log_teardown(chip->bios_dir); + tpm_remove_ppi(chip); + tpm_sysfs_del_device(chip); + } tpm_dev_del_device(chip); } diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index b6f6b17392fd7d..20cf94d3138636 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -360,7 +360,10 @@ ssize_t tpm_transmit(struct tpm_chip *chip, const char *buf, if (chip->vendor.irq) goto out_recv; - stop = jiffies + tpm_calc_ordinal_duration(chip, ordinal); + if (chip->flags & TPM_CHIP_FLAG_TPM2) + stop = jiffies + tpm2_calc_ordinal_duration(chip, ordinal); + else + stop = jiffies + tpm_calc_ordinal_duration(chip, ordinal); do { u8 status = chip->ops->status(chip); if ((status & chip->ops->req_complete_mask) == @@ -484,6 +487,7 @@ static int tpm_startup(struct tpm_chip *chip, __be16 startup_type) { struct tpm_cmd_t start_cmd; start_cmd.header.in = tpm_startup_header; + start_cmd.params.startup_in.startup_type = startup_type; return tpm_transmit_cmd(chip, &start_cmd, TPM_INTERNAL_RESULT_SIZE, "attempting to start the TPM"); @@ -680,7 +684,10 @@ int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf) chip = tpm_chip_find_get(chip_num); if (chip == NULL) return -ENODEV; - rc = tpm_pcr_read_dev(chip, pcr_idx, res_buf); + if (chip->flags & TPM_CHIP_FLAG_TPM2) + rc = tpm2_pcr_read(chip, pcr_idx, res_buf); + else + rc = tpm_pcr_read_dev(chip, pcr_idx, res_buf); tpm_chip_put(chip); return rc; } @@ -714,6 +721,12 @@ int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash) if (chip == NULL) return -ENODEV; + if (chip->flags & TPM_CHIP_FLAG_TPM2) { + rc = tpm2_pcr_extend(chip, pcr_idx, hash); + tpm_chip_put(chip); + return rc; + } + cmd.header.in = pcrextend_header; cmd.params.pcrextend_in.pcr_idx = cpu_to_be32(pcr_idx); memcpy(cmd.params.pcrextend_in.hash, hash, TPM_DIGEST_SIZE); @@ -974,6 +987,12 @@ int tpm_get_random(u32 chip_num, u8 *out, size_t max) if (chip == NULL) return -ENODEV; + if (chip->flags & TPM_CHIP_FLAG_TPM2) { + err = tpm2_get_random(chip, out, max); + tpm_chip_put(chip); + return err; + } + do { tpm_cmd.header.in = tpm_getrandom_header; tpm_cmd.params.getrandom_in.num_bytes = cpu_to_be32(num_bytes); diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index d46765b4c97e12..cc421cfde389d8 100644 --- a/drivers/char/tpm/tpm.h +++ b/drivers/char/tpm/tpm.h @@ -62,6 +62,59 @@ enum tpm_duration { #define TPM_ERR_INVALID_POSTINIT 38 #define TPM_HEADER_SIZE 10 + +enum tpm2_const { + TPM2_PLATFORM_PCR = 24, + TPM2_PCR_SELECT_MIN = ((TPM2_PLATFORM_PCR + 7) / 8), + TPM2_TIMEOUT_A = 750, + TPM2_TIMEOUT_B = 2000, + TPM2_TIMEOUT_C = 200, + TPM2_TIMEOUT_D = 30, + TPM2_DURATION_SHORT = 20, + TPM2_DURATION_MEDIUM = 750, + TPM2_DURATION_LONG = 2000, +}; + +enum tpm2_structures { + TPM2_ST_NO_SESSIONS = 0x8001, + TPM2_ST_SESSIONS = 0x8002, +}; + +enum tpm2_return_codes { + TPM2_RC_INITIALIZE = 0x0100, + TPM2_RC_TESTING = 0x090A, + TPM2_RC_DISABLED = 0x0120, +}; + +enum tpm2_algorithms { + TPM2_ALG_SHA1 = 0x0004, +}; + +enum tpm2_command_codes { + TPM2_CC_FIRST = 0x011F, + TPM2_CC_SELF_TEST = 0x0143, + TPM2_CC_STARTUP = 0x0144, + TPM2_CC_SHUTDOWN = 0x0145, + TPM2_CC_GET_CAPABILITY = 0x017A, + TPM2_CC_GET_RANDOM = 0x017B, + TPM2_CC_PCR_READ = 0x017E, + TPM2_CC_PCR_EXTEND = 0x0182, + TPM2_CC_LAST = 0x018F, +}; + +enum tpm2_permanent_handles { + TPM2_RS_PW = 0x40000009, +}; + +enum tpm2_capabilities { + TPM2_CAP_TPM_PROPERTIES = 6, +}; + +enum tpm2_startup_types { + TPM2_SU_CLEAR = 0x0000, + TPM2_SU_STATE = 0x0001, +}; + struct tpm_chip; struct tpm_vendor_specific { @@ -99,6 +152,7 @@ struct tpm_vendor_specific { enum tpm_chip_flags { TPM_CHIP_FLAG_REGISTERED = BIT(0), TPM_CHIP_FLAG_PPI = BIT(1), + TPM_CHIP_FLAG_TPM2 = BIT(2), }; struct tpm_chip { @@ -370,3 +424,15 @@ static inline void tpm_remove_ppi(struct tpm_chip *chip) { } #endif + +int tpm2_pcr_read(struct tpm_chip *chip, int pcr_idx, u8 *res_buf); +int tpm2_pcr_extend(struct tpm_chip *chip, int pcr_idx, const u8 *hash); +int tpm2_get_random(struct tpm_chip *chip, u8 *out, size_t max); +ssize_t tpm2_get_tpm_pt(struct tpm_chip *chip, u32 property_id, + u32 *value, const char *desc); + +extern int tpm2_startup(struct tpm_chip *chip, u16 startup_type); +extern int tpm2_shutdown(struct tpm_chip *chip, u16 shutdown_type); +extern unsigned long tpm2_calc_ordinal_duration(struct tpm_chip *, u32); +extern int tpm2_do_selftest(struct tpm_chip *chip); +extern int tpm2_gen_interrupt(struct tpm_chip *chip, bool quiet); diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c new file mode 100644 index 00000000000000..1abe6502219f25 --- /dev/null +++ b/drivers/char/tpm/tpm2-cmd.c @@ -0,0 +1,617 @@ +/* + * Copyright (C) 2014 Intel Corporation + * + * Authors: + * Jarkko Sakkinen + * + * Maintained by: + * + * This file contains TPM2 protocol implementations of the commands + * used by the kernel internally. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include "tpm.h" + +struct tpm2_startup_in { + __be16 startup_type; +} __packed; + +struct tpm2_self_test_in { + u8 full_test; +} __packed; + +struct tpm2_pcr_read_in { + __be32 pcr_selects_cnt; + __be16 hash_alg; + u8 pcr_select_size; + u8 pcr_select[TPM2_PCR_SELECT_MIN]; +} __packed; + +struct tpm2_pcr_read_out { + __be32 update_cnt; + __be32 pcr_selects_cnt; + __be16 hash_alg; + u8 pcr_select_size; + u8 pcr_select[TPM2_PCR_SELECT_MIN]; + __be32 digests_cnt; + __be16 digest_size; + u8 digest[TPM_DIGEST_SIZE]; +} __packed; + +struct tpm2_null_auth_area { + __be32 handle; + __be16 nonce_size; + u8 attributes; + __be16 auth_size; +} __packed; + +struct tpm2_pcr_extend_in { + __be32 pcr_idx; + __be32 auth_area_size; + struct tpm2_null_auth_area auth_area; + __be32 digest_cnt; + __be16 hash_alg; + u8 digest[TPM_DIGEST_SIZE]; +} __packed; + +struct tpm2_get_tpm_pt_in { + __be32 cap_id; + __be32 property_id; + __be32 property_cnt; +} __packed; + +struct tpm2_get_tpm_pt_out { + u8 more_data; + __be32 subcap_id; + __be32 property_cnt; + __be32 property_id; + __be32 value; +} __packed; + +struct tpm2_get_random_in { + __be16 size; +} __packed; + +struct tpm2_get_random_out { + __be16 size; + u8 buffer[TPM_MAX_RNG_DATA]; +} __packed; + +union tpm2_cmd_params { + struct tpm2_startup_in startup_in; + struct tpm2_self_test_in selftest_in; + struct tpm2_pcr_read_in pcrread_in; + struct tpm2_pcr_read_out pcrread_out; + struct tpm2_pcr_extend_in pcrextend_in; + struct tpm2_get_tpm_pt_in get_tpm_pt_in; + struct tpm2_get_tpm_pt_out get_tpm_pt_out; + struct tpm2_get_random_in getrandom_in; + struct tpm2_get_random_out getrandom_out; +}; + +struct tpm2_cmd { + tpm_cmd_header header; + union tpm2_cmd_params params; +} __packed; + +/* + * Array with one entry per ordinal defining the maximum amount + * of time the chip could take to return the result. The values + * of the SHORT, MEDIUM, and LONG durations are taken from the + * PC Client Profile (PTP) specification. + */ +static const u8 tpm2_ordinal_duration[TPM2_CC_LAST - TPM2_CC_FIRST + 1] = { + TPM_UNDEFINED, /* 11F */ + TPM_UNDEFINED, /* 120 */ + TPM_LONG, /* 121 */ + TPM_UNDEFINED, /* 122 */ + TPM_UNDEFINED, /* 123 */ + TPM_UNDEFINED, /* 124 */ + TPM_UNDEFINED, /* 125 */ + TPM_UNDEFINED, /* 126 */ + TPM_UNDEFINED, /* 127 */ + TPM_UNDEFINED, /* 128 */ + TPM_LONG, /* 129 */ + TPM_UNDEFINED, /* 12a */ + TPM_UNDEFINED, /* 12b */ + TPM_UNDEFINED, /* 12c */ + TPM_UNDEFINED, /* 12d */ + TPM_UNDEFINED, /* 12e */ + TPM_UNDEFINED, /* 12f */ + TPM_UNDEFINED, /* 130 */ + TPM_UNDEFINED, /* 131 */ + TPM_UNDEFINED, /* 132 */ + TPM_UNDEFINED, /* 133 */ + TPM_UNDEFINED, /* 134 */ + TPM_UNDEFINED, /* 135 */ + TPM_UNDEFINED, /* 136 */ + TPM_UNDEFINED, /* 137 */ + TPM_UNDEFINED, /* 138 */ + TPM_UNDEFINED, /* 139 */ + TPM_UNDEFINED, /* 13a */ + TPM_UNDEFINED, /* 13b */ + TPM_UNDEFINED, /* 13c */ + TPM_UNDEFINED, /* 13d */ + TPM_MEDIUM, /* 13e */ + TPM_UNDEFINED, /* 13f */ + TPM_UNDEFINED, /* 140 */ + TPM_UNDEFINED, /* 141 */ + TPM_UNDEFINED, /* 142 */ + TPM_LONG, /* 143 */ + TPM_MEDIUM, /* 144 */ + TPM_UNDEFINED, /* 145 */ + TPM_UNDEFINED, /* 146 */ + TPM_UNDEFINED, /* 147 */ + TPM_UNDEFINED, /* 148 */ + TPM_UNDEFINED, /* 149 */ + TPM_UNDEFINED, /* 14a */ + TPM_UNDEFINED, /* 14b */ + TPM_UNDEFINED, /* 14c */ + TPM_UNDEFINED, /* 14d */ + TPM_LONG, /* 14e */ + TPM_UNDEFINED, /* 14f */ + TPM_UNDEFINED, /* 150 */ + TPM_UNDEFINED, /* 151 */ + TPM_UNDEFINED, /* 152 */ + TPM_UNDEFINED, /* 153 */ + TPM_UNDEFINED, /* 154 */ + TPM_UNDEFINED, /* 155 */ + TPM_UNDEFINED, /* 156 */ + TPM_UNDEFINED, /* 157 */ + TPM_UNDEFINED, /* 158 */ + TPM_UNDEFINED, /* 159 */ + TPM_UNDEFINED, /* 15a */ + TPM_UNDEFINED, /* 15b */ + TPM_MEDIUM, /* 15c */ + TPM_UNDEFINED, /* 15d */ + TPM_UNDEFINED, /* 15e */ + TPM_UNDEFINED, /* 15f */ + TPM_UNDEFINED, /* 160 */ + TPM_UNDEFINED, /* 161 */ + TPM_UNDEFINED, /* 162 */ + TPM_UNDEFINED, /* 163 */ + TPM_UNDEFINED, /* 164 */ + TPM_UNDEFINED, /* 165 */ + TPM_UNDEFINED, /* 166 */ + TPM_UNDEFINED, /* 167 */ + TPM_UNDEFINED, /* 168 */ + TPM_UNDEFINED, /* 169 */ + TPM_UNDEFINED, /* 16a */ + TPM_UNDEFINED, /* 16b */ + TPM_UNDEFINED, /* 16c */ + TPM_UNDEFINED, /* 16d */ + TPM_UNDEFINED, /* 16e */ + TPM_UNDEFINED, /* 16f */ + TPM_UNDEFINED, /* 170 */ + TPM_UNDEFINED, /* 171 */ + TPM_UNDEFINED, /* 172 */ + TPM_UNDEFINED, /* 173 */ + TPM_UNDEFINED, /* 174 */ + TPM_UNDEFINED, /* 175 */ + TPM_UNDEFINED, /* 176 */ + TPM_LONG, /* 177 */ + TPM_UNDEFINED, /* 178 */ + TPM_UNDEFINED, /* 179 */ + TPM_MEDIUM, /* 17a */ + TPM_LONG, /* 17b */ + TPM_UNDEFINED, /* 17c */ + TPM_UNDEFINED, /* 17d */ + TPM_UNDEFINED, /* 17e */ + TPM_UNDEFINED, /* 17f */ + TPM_UNDEFINED, /* 180 */ + TPM_UNDEFINED, /* 181 */ + TPM_MEDIUM, /* 182 */ + TPM_UNDEFINED, /* 183 */ + TPM_UNDEFINED, /* 184 */ + TPM_MEDIUM, /* 185 */ + TPM_MEDIUM, /* 186 */ + TPM_UNDEFINED, /* 187 */ + TPM_UNDEFINED, /* 188 */ + TPM_UNDEFINED, /* 189 */ + TPM_UNDEFINED, /* 18a */ + TPM_UNDEFINED, /* 18b */ + TPM_UNDEFINED, /* 18c */ + TPM_UNDEFINED, /* 18d */ + TPM_UNDEFINED, /* 18e */ + TPM_UNDEFINED /* 18f */ +}; + +#define TPM2_PCR_READ_IN_SIZE \ + (sizeof(struct tpm_input_header) + \ + sizeof(struct tpm2_pcr_read_in)) + +static const struct tpm_input_header tpm2_pcrread_header = { + .tag = cpu_to_be16(TPM2_ST_NO_SESSIONS), + .length = cpu_to_be32(TPM2_PCR_READ_IN_SIZE), + .ordinal = cpu_to_be32(TPM2_CC_PCR_READ) +}; + +/** + * tpm2_pcr_read() - read a PCR value + * @chip: TPM chip to use. + * @pcr_idx: index of the PCR to read. + * @ref_buf: buffer to store the resulting hash, + * + * 0 is returned when the operation is successful. If a negative number is + * returned it remarks a POSIX error code. If a positive number is returned + * it remarks a TPM error. + */ +int tpm2_pcr_read(struct tpm_chip *chip, int pcr_idx, u8 *res_buf) +{ + int rc; + struct tpm2_cmd cmd; + u8 *buf; + + if (pcr_idx >= TPM2_PLATFORM_PCR) + return -EINVAL; + + cmd.header.in = tpm2_pcrread_header; + cmd.params.pcrread_in.pcr_selects_cnt = cpu_to_be32(1); + cmd.params.pcrread_in.hash_alg = cpu_to_be16(TPM2_ALG_SHA1); + cmd.params.pcrread_in.pcr_select_size = TPM2_PCR_SELECT_MIN; + + memset(cmd.params.pcrread_in.pcr_select, 0, + sizeof(cmd.params.pcrread_in.pcr_select)); + cmd.params.pcrread_in.pcr_select[pcr_idx >> 3] = 1 << (pcr_idx & 0x7); + + rc = tpm_transmit_cmd(chip, &cmd, sizeof(cmd), + "attempting to read a pcr value"); + if (rc == 0) { + buf = cmd.params.pcrread_out.digest; + memcpy(res_buf, buf, TPM_DIGEST_SIZE); + } + + return rc; +} + +#define TPM2_GET_PCREXTEND_IN_SIZE \ + (sizeof(struct tpm_input_header) + \ + sizeof(struct tpm2_pcr_extend_in)) + +static const struct tpm_input_header tpm2_pcrextend_header = { + .tag = cpu_to_be16(TPM2_ST_SESSIONS), + .length = cpu_to_be32(TPM2_GET_PCREXTEND_IN_SIZE), + .ordinal = cpu_to_be32(TPM2_CC_PCR_EXTEND) +}; + +/** + * tpm2_pcr_extend() - extend a PCR value + * @chip: TPM chip to use. + * @pcr_idx: index of the PCR. + * @hash: hash value to use for the extend operation. + * + * 0 is returned when the operation is successful. If a negative number is + * returned it remarks a POSIX error code. If a positive number is returned + * it remarks a TPM error. + */ +int tpm2_pcr_extend(struct tpm_chip *chip, int pcr_idx, const u8 *hash) +{ + struct tpm2_cmd cmd; + int rc; + + cmd.header.in = tpm2_pcrextend_header; + cmd.params.pcrextend_in.pcr_idx = cpu_to_be32(pcr_idx); + cmd.params.pcrextend_in.auth_area_size = + cpu_to_be32(sizeof(struct tpm2_null_auth_area)); + cmd.params.pcrextend_in.auth_area.handle = + cpu_to_be32(TPM2_RS_PW); + cmd.params.pcrextend_in.auth_area.nonce_size = 0; + cmd.params.pcrextend_in.auth_area.attributes = 0; + cmd.params.pcrextend_in.auth_area.auth_size = 0; + cmd.params.pcrextend_in.digest_cnt = cpu_to_be32(1); + cmd.params.pcrextend_in.hash_alg = cpu_to_be16(TPM2_ALG_SHA1); + memcpy(cmd.params.pcrextend_in.digest, hash, TPM_DIGEST_SIZE); + + rc = tpm_transmit_cmd(chip, &cmd, sizeof(cmd), + "attempting extend a PCR value"); + + return rc; +} + +#define TPM2_GETRANDOM_IN_SIZE \ + (sizeof(struct tpm_input_header) + \ + sizeof(struct tpm2_get_random_in)) + +static const struct tpm_input_header tpm2_getrandom_header = { + .tag = cpu_to_be16(TPM2_ST_NO_SESSIONS), + .length = cpu_to_be32(TPM2_GETRANDOM_IN_SIZE), + .ordinal = cpu_to_be32(TPM2_CC_GET_RANDOM) +}; + +/** + * tpm2_get_random() - get random bytes from the TPM RNG + * @chip: TPM chip to use + * @out: destination buffer for the random bytes + * @max: the max number of bytes to write to @out + * + * 0 is returned when the operation is successful. If a negative number is + * returned it remarks a POSIX error code. If a positive number is returned + * it remarks a TPM error. + */ +int tpm2_get_random(struct tpm_chip *chip, u8 *out, size_t max) +{ + struct tpm2_cmd cmd; + u32 recd; + u32 num_bytes; + int err; + int total = 0; + int retries = 5; + u8 *dest = out; + + num_bytes = min_t(u32, max, sizeof(cmd.params.getrandom_out.buffer)); + + if (!out || !num_bytes || + max > sizeof(cmd.params.getrandom_out.buffer)) + return -EINVAL; + + do { + cmd.header.in = tpm2_getrandom_header; + cmd.params.getrandom_in.size = cpu_to_be16(num_bytes); + + err = tpm_transmit_cmd(chip, &cmd, sizeof(cmd), + "attempting get random"); + if (err) + break; + + recd = min_t(u32, be16_to_cpu(cmd.params.getrandom_out.size), + num_bytes); + memcpy(dest, cmd.params.getrandom_out.buffer, recd); + + dest += recd; + total += recd; + num_bytes -= recd; + } while (retries-- && total < max); + + return total ? total : -EIO; +} + +#define TPM2_GET_TPM_PT_IN_SIZE \ + (sizeof(struct tpm_input_header) + \ + sizeof(struct tpm2_get_tpm_pt_in)) + +static const struct tpm_input_header tpm2_get_tpm_pt_header = { + .tag = cpu_to_be16(TPM2_ST_NO_SESSIONS), + .length = cpu_to_be32(TPM2_GET_TPM_PT_IN_SIZE), + .ordinal = cpu_to_be32(TPM2_CC_GET_CAPABILITY) +}; + +/** + * tpm2_get_tpm_pt() - get value of a TPM_CAP_TPM_PROPERTIES type property + * @chip: TPM chip to use. + * @property_id: property ID. + * @value: output variable. + * @desc: passed to tpm_transmit_cmd() + * + * 0 is returned when the operation is successful. If a negative number is + * returned it remarks a POSIX error code. If a positive number is returned + * it remarks a TPM error. + */ +ssize_t tpm2_get_tpm_pt(struct tpm_chip *chip, u32 property_id, u32 *value, + const char *desc) +{ + struct tpm2_cmd cmd; + int rc; + + cmd.header.in = tpm2_get_tpm_pt_header; + cmd.params.get_tpm_pt_in.cap_id = cpu_to_be32(TPM2_CAP_TPM_PROPERTIES); + cmd.params.get_tpm_pt_in.property_id = cpu_to_be32(property_id); + cmd.params.get_tpm_pt_in.property_cnt = cpu_to_be32(1); + + rc = tpm_transmit_cmd(chip, &cmd, sizeof(cmd), desc); + if (!rc) + *value = cmd.params.get_tpm_pt_out.value; + + return rc; +} + +#define TPM2_STARTUP_IN_SIZE \ + (sizeof(struct tpm_input_header) + \ + sizeof(struct tpm2_startup_in)) + +static const struct tpm_input_header tpm2_startup_header = { + .tag = cpu_to_be16(TPM2_ST_NO_SESSIONS), + .length = cpu_to_be32(TPM2_STARTUP_IN_SIZE), + .ordinal = cpu_to_be32(TPM2_CC_STARTUP) +}; + +/** + * tpm2_startup() - send startup command to the TPM chip + * @chip: TPM chip to use. + * @startup_type startup type. The value is either + * TPM_SU_CLEAR or TPM_SU_STATE. + * + * 0 is returned when the operation is successful. If a negative number is + * returned it remarks a POSIX error code. If a positive number is returned + * it remarks a TPM error. + */ +int tpm2_startup(struct tpm_chip *chip, u16 startup_type) +{ + struct tpm2_cmd cmd; + + cmd.header.in = tpm2_startup_header; + + cmd.params.startup_in.startup_type = cpu_to_be16(startup_type); + return tpm_transmit_cmd(chip, &cmd, sizeof(cmd), + "attempting to start the TPM"); +} +EXPORT_SYMBOL_GPL(tpm2_startup); + +#define TPM2_SHUTDOWN_IN_SIZE \ + (sizeof(struct tpm_input_header) + \ + sizeof(struct tpm2_startup_in)) + +static const struct tpm_input_header tpm2_shutdown_header = { + .tag = cpu_to_be16(TPM2_ST_NO_SESSIONS), + .length = cpu_to_be32(TPM2_SHUTDOWN_IN_SIZE), + .ordinal = cpu_to_be32(TPM2_CC_SHUTDOWN) +}; + +/** + * tpm2_shutdown() - send shutdown command to the TPM chip + * @chip: TPM chip to use. + * @shutdown_type shutdown type. The value is either + * TPM_SU_CLEAR or TPM_SU_STATE. + * + * 0 is returned when the operation is successful. If a negative number is + * returned it remarks a POSIX error code. If a positive number is returned + * it remarks a TPM error. + */ +int tpm2_shutdown(struct tpm_chip *chip, u16 shutdown_type) +{ + struct tpm2_cmd cmd; + + cmd.header.in = tpm2_shutdown_header; + + cmd.params.startup_in.startup_type = cpu_to_be16(shutdown_type); + return tpm_transmit_cmd(chip, &cmd, sizeof(cmd), + "stopping the TPM"); +} +EXPORT_SYMBOL_GPL(tpm2_shutdown); + +/* + * tpm2_calc_ordinal_duration() - maximum duration for a command + * @chip: TPM chip to use. + * @ordinal: command code number. + * + * 0 is returned when the operation is successful. If a negative number is + * returned it remarks a POSIX error code. If a positive number is returned + * it remarks a TPM error. + */ +unsigned long tpm2_calc_ordinal_duration(struct tpm_chip *chip, u32 ordinal) +{ + int index = TPM_UNDEFINED; + int duration = 0; + + if (ordinal >= TPM2_CC_FIRST && ordinal <= TPM2_CC_LAST) + index = tpm2_ordinal_duration[ordinal - TPM2_CC_FIRST]; + + if (index != TPM_UNDEFINED) + duration = chip->vendor.duration[index]; + + if (duration <= 0) + duration = 2 * 60 * HZ; + + return duration; +} +EXPORT_SYMBOL_GPL(tpm2_calc_ordinal_duration); + +#define TPM2_SELF_TEST_IN_SIZE \ + (sizeof(struct tpm_input_header) + \ + sizeof(struct tpm2_self_test_in)) + +static const struct tpm_input_header tpm2_selftest_header = { + .tag = cpu_to_be16(TPM2_ST_NO_SESSIONS), + .length = cpu_to_be32(TPM2_SELF_TEST_IN_SIZE), + .ordinal = cpu_to_be32(TPM2_CC_SELF_TEST) +}; + +/** + * tpm2_continue_selftest() - start a self test + * @chip: TPM chip to use + * @full: test all commands instead of testing only those that were not + * previously tested. + * + * 0 is returned when the operation is successful. If a negative number is + * returned it remarks a POSIX error code. If a positive number is returned + * it remarks a TPM error. + */ +static int tpm2_start_selftest(struct tpm_chip *chip, bool full) +{ + int rc; + struct tpm2_cmd cmd; + + cmd.header.in = tpm2_selftest_header; + cmd.params.selftest_in.full_test = full; + + rc = tpm_transmit_cmd(chip, &cmd, TPM2_SELF_TEST_IN_SIZE, + "continue selftest"); + + /* At least some prototype chips seem to give RC_TESTING error + * immediately. This is a workaround for that. + */ + if (rc == TPM2_RC_TESTING) { + dev_warn(chip->pdev, "Got RC_TESTING, ignoring\n"); + rc = 0; + } + + return rc; +} + +/** + * tpm2_do_selftest() - run a full self test + * @chip: TPM chip to use + * + * During the self test TPM2 commands return with the error code RC_TESTING. + * Waiting is done by issuing PCR read until it executes successfully. + * + * 0 is returned when the operation is successful. If a negative number is + * returned it remarks a POSIX error code. If a positive number is returned + * it remarks a TPM error. + */ +int tpm2_do_selftest(struct tpm_chip *chip) +{ + int rc; + unsigned int loops; + unsigned int delay_msec = 100; + unsigned long duration; + struct tpm2_cmd cmd; + int i; + + duration = tpm2_calc_ordinal_duration(chip, TPM2_CC_SELF_TEST); + + loops = jiffies_to_msecs(duration) / delay_msec; + + rc = tpm2_start_selftest(chip, true); + if (rc) + return rc; + + for (i = 0; i < loops; i++) { + /* Attempt to read a PCR value */ + cmd.header.in = tpm2_pcrread_header; + cmd.params.pcrread_in.pcr_selects_cnt = cpu_to_be32(1); + cmd.params.pcrread_in.hash_alg = cpu_to_be16(TPM2_ALG_SHA1); + cmd.params.pcrread_in.pcr_select_size = TPM2_PCR_SELECT_MIN; + cmd.params.pcrread_in.pcr_select[0] = 0x01; + cmd.params.pcrread_in.pcr_select[1] = 0x00; + cmd.params.pcrread_in.pcr_select[2] = 0x00; + + rc = tpm_transmit_cmd(chip, (u8 *) &cmd, sizeof(cmd), NULL); + if (rc < 0) + break; + + rc = be32_to_cpu(cmd.header.out.return_code); + if (rc != TPM2_RC_TESTING) + break; + + msleep(delay_msec); + } + + return rc; +} +EXPORT_SYMBOL_GPL(tpm2_do_selftest); + +/** + * tpm2_gen_interrupt() - generate an interrupt + * @chip: TPM chip to use + * @quiet: surpress the error message + * + * 0 is returned when the operation is successful. If a negative number is + * returned it remarks a POSIX error code. If a positive number is returned + * it remarks a TPM error. + */ +int tpm2_gen_interrupt(struct tpm_chip *chip, bool quiet) +{ + const char *desc = NULL; + u32 dummy; + + if (!quiet) + desc = "attempting to generate an interrupt"; + + return tpm2_get_tpm_pt(chip, TPM2_CAP_TPM_PROPERTIES, &dummy, desc); +} +EXPORT_SYMBOL_GPL(tpm2_gen_interrupt); From 30fc8d138e9123f374a3c3867e7c7c5cd4004941 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Fri, 12 Dec 2014 11:46:39 -0800 Subject: [PATCH 128/601] tpm: TPM 2.0 CRB Interface tpm_crb is a driver for TPM 2.0 Command Response Buffer (CRB) Interface as defined in PC Client Platform TPM Profile (PTP) Specification. Only polling and single locality is supported as these are the limitations of the available hardware, Platform Trust Techonlogy (PTT) in Haswell CPUs. The driver always applies CRB with ACPI start because PTT reports using only ACPI start as start method but as a result of my testing it requires also CRB start. Signed-off-by: Jarkko Sakkinen Reviewed-by: Jasob Gunthorpe Reviewed-by: Stefan Berger Signed-off-by: Peter Huewe --- drivers/char/tpm/Kconfig | 9 + drivers/char/tpm/Makefile | 1 + drivers/char/tpm/tpm-interface.c | 3 + drivers/char/tpm/tpm_crb.c | 354 +++++++++++++++++++++++++++++++ 4 files changed, 367 insertions(+) create mode 100644 drivers/char/tpm/tpm_crb.c diff --git a/drivers/char/tpm/Kconfig b/drivers/char/tpm/Kconfig index 3d0873b1ab5b59..9d4e37549eb289 100644 --- a/drivers/char/tpm/Kconfig +++ b/drivers/char/tpm/Kconfig @@ -122,4 +122,13 @@ config TCG_XEN To compile this driver as a module, choose M here; the module will be called xen-tpmfront. +config TCG_CRB + tristate "TPM 2.0 CRB Interface" + depends on X86 && ACPI + ---help--- + If you have a TPM security chip that is compliant with the + TCG CRB 2.0 TPM specification say Yes and it will be accessible + from within Linux. To compile this driver as a module, choose + M here; the module will be called tpm_crb. + endif # TCG_TPM diff --git a/drivers/char/tpm/Makefile b/drivers/char/tpm/Makefile index 88848edb081c11..990cf183931d5c 100644 --- a/drivers/char/tpm/Makefile +++ b/drivers/char/tpm/Makefile @@ -22,3 +22,4 @@ obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o obj-$(CONFIG_TCG_IBMVTPM) += tpm_ibmvtpm.o obj-$(CONFIG_TCG_TIS_I2C_ST33) += tpm_i2c_stm_st33.o obj-$(CONFIG_TCG_XEN) += xen-tpmfront.o +obj-$(CONFIG_TCG_CRB) += tpm_crb.o diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index 20cf94d3138636..bf53a3771da530 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -901,6 +901,9 @@ int tpm_pm_suspend(struct device *dev) if (chip == NULL) return -ENODEV; + if (chip->flags & TPM_CHIP_FLAG_TPM2) + return tpm2_shutdown(chip, TPM2_SU_CLEAR); + /* for buggy tpm, flush pcrs with extend to selected dummy */ if (tpm_suspend_pcr) { cmd.header.in = pcrextend_header; diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c new file mode 100644 index 00000000000000..c248a356d5c9e6 --- /dev/null +++ b/drivers/char/tpm/tpm_crb.c @@ -0,0 +1,354 @@ +/* + * Copyright (C) 2014 Intel Corporation + * + * Authors: + * Jarkko Sakkinen + * + * Maintained by: + * + * This device driver implements the TPM interface as defined in + * the TCG CRB 2.0 TPM specification. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include +#include +#include +#include +#include +#include "tpm.h" + +#define ACPI_SIG_TPM2 "TPM2" + +static const u8 CRB_ACPI_START_UUID[] = { + /* 0000 */ 0xAB, 0x6C, 0xBF, 0x6B, 0x63, 0x54, 0x14, 0x47, + /* 0008 */ 0xB7, 0xCD, 0xF0, 0x20, 0x3C, 0x03, 0x68, 0xD4 +}; + +enum crb_defaults { + CRB_ACPI_START_REVISION_ID = 1, + CRB_ACPI_START_INDEX = 1, +}; + +enum crb_start_method { + CRB_SM_ACPI_START = 2, + CRB_SM_CRB = 7, + CRB_SM_CRB_WITH_ACPI_START = 8, +}; + +struct acpi_tpm2 { + struct acpi_table_header hdr; + u16 platform_class; + u16 reserved; + u64 control_area_pa; + u32 start_method; +} __packed; + +enum crb_ca_request { + CRB_CA_REQ_GO_IDLE = BIT(0), + CRB_CA_REQ_CMD_READY = BIT(1), +}; + +enum crb_ca_status { + CRB_CA_STS_ERROR = BIT(0), + CRB_CA_STS_TPM_IDLE = BIT(1), +}; + +enum crb_start { + CRB_START_INVOKE = BIT(0), +}; + +enum crb_cancel { + CRB_CANCEL_INVOKE = BIT(0), +}; + +struct crb_control_area { + u32 req; + u32 sts; + u32 cancel; + u32 start; + u32 int_enable; + u32 int_sts; + u32 cmd_size; + u64 cmd_pa; + u32 rsp_size; + u64 rsp_pa; +} __packed; + +enum crb_status { + CRB_STS_COMPLETE = BIT(0), +}; + +enum crb_flags { + CRB_FL_ACPI_START = BIT(0), + CRB_FL_CRB_START = BIT(1), +}; + +struct crb_priv { + unsigned int flags; + struct crb_control_area __iomem *cca; + u8 __iomem *cmd; + u8 __iomem *rsp; +}; + +#ifdef CONFIG_PM_SLEEP +static int crb_resume(struct device *dev) +{ + int rc; + struct tpm_chip *chip = dev_get_drvdata(dev); + + rc = tpm2_shutdown(chip, TPM2_SU_STATE); + if (!rc) + rc = tpm2_do_selftest(chip); + + return rc; +} + +static SIMPLE_DEV_PM_OPS(crb_pm, tpm_pm_suspend, crb_resume); +#endif + +static u8 crb_status(struct tpm_chip *chip) +{ + struct crb_priv *priv = chip->vendor.priv; + u8 sts = 0; + + if ((le32_to_cpu(ioread32(&priv->cca->start)) & CRB_START_INVOKE) != + CRB_START_INVOKE) + sts |= CRB_STS_COMPLETE; + + return sts; +} + +static int crb_recv(struct tpm_chip *chip, u8 *buf, size_t count) +{ + struct crb_priv *priv = chip->vendor.priv; + unsigned int expected; + + /* sanity check */ + if (count < 6) + return -EIO; + + if (le32_to_cpu(ioread32(&priv->cca->sts)) & CRB_CA_STS_ERROR) + return -EIO; + + memcpy_fromio(buf, priv->rsp, 6); + expected = be32_to_cpup((__be32 *) &buf[2]); + + if (expected > count) + return -EIO; + + memcpy_fromio(&buf[6], &priv->rsp[6], expected - 6); + + return expected; +} + +static int crb_do_acpi_start(struct tpm_chip *chip) +{ + union acpi_object *obj; + int rc; + + obj = acpi_evaluate_dsm(chip->acpi_dev_handle, + CRB_ACPI_START_UUID, + CRB_ACPI_START_REVISION_ID, + CRB_ACPI_START_INDEX, + NULL); + if (!obj) + return -ENXIO; + rc = obj->integer.value == 0 ? 0 : -ENXIO; + ACPI_FREE(obj); + return rc; +} + +static int crb_send(struct tpm_chip *chip, u8 *buf, size_t len) +{ + struct crb_priv *priv = chip->vendor.priv; + int rc = 0; + + if (len > le32_to_cpu(ioread32(&priv->cca->cmd_size))) { + dev_err(&chip->dev, + "invalid command count value %x %zx\n", + (unsigned int) len, + (size_t) le32_to_cpu(ioread32(&priv->cca->cmd_size))); + return -E2BIG; + } + + memcpy_toio(priv->cmd, buf, len); + + /* Make sure that cmd is populated before issuing start. */ + wmb(); + + if (priv->flags & CRB_FL_CRB_START) + iowrite32(cpu_to_le32(CRB_START_INVOKE), &priv->cca->start); + + if (priv->flags & CRB_FL_ACPI_START) + rc = crb_do_acpi_start(chip); + + return rc; +} + +static void crb_cancel(struct tpm_chip *chip) +{ + struct crb_priv *priv = chip->vendor.priv; + + iowrite32(cpu_to_le32(CRB_CANCEL_INVOKE), &priv->cca->cancel); + + /* Make sure that cmd is populated before issuing cancel. */ + wmb(); + + if ((priv->flags & CRB_FL_ACPI_START) && crb_do_acpi_start(chip)) + dev_err(&chip->dev, "ACPI Start failed\n"); + + iowrite32(0, &priv->cca->cancel); +} + +static bool crb_req_canceled(struct tpm_chip *chip, u8 status) +{ + struct crb_priv *priv = chip->vendor.priv; + u32 cancel = le32_to_cpu(ioread32(&priv->cca->cancel)); + + return (cancel & CRB_CANCEL_INVOKE) == CRB_CANCEL_INVOKE; +} + +static const struct tpm_class_ops tpm_crb = { + .status = crb_status, + .recv = crb_recv, + .send = crb_send, + .cancel = crb_cancel, + .req_canceled = crb_req_canceled, + .req_complete_mask = CRB_STS_COMPLETE, + .req_complete_val = CRB_STS_COMPLETE, +}; + +static int crb_acpi_add(struct acpi_device *device) +{ + struct tpm_chip *chip; + struct acpi_tpm2 *buf; + struct crb_priv *priv; + struct device *dev = &device->dev; + acpi_status status; + u32 sm; + u64 pa; + int rc; + + chip = tpmm_chip_alloc(dev, &tpm_crb); + if (IS_ERR(chip)) + return PTR_ERR(chip); + + chip->flags = TPM_CHIP_FLAG_TPM2; + + status = acpi_get_table(ACPI_SIG_TPM2, 1, + (struct acpi_table_header **) &buf); + if (ACPI_FAILURE(status)) { + dev_err(dev, "failed to get TPM2 ACPI table\n"); + return -ENODEV; + } + + if (buf->hdr.length < sizeof(struct acpi_tpm2)) { + dev_err(dev, "TPM2 ACPI table has wrong size"); + return -EINVAL; + } + + priv = (struct crb_priv *) devm_kzalloc(dev, sizeof(struct crb_priv), + GFP_KERNEL); + if (!priv) { + dev_err(dev, "failed to devm_kzalloc for private data\n"); + return -ENOMEM; + } + + sm = le32_to_cpu(buf->start_method); + + /* The reason for the extra quirk is that the PTT in 4th Gen Core CPUs + * report only ACPI start but in practice seems to require both + * ACPI start and CRB start. + */ + if (sm == CRB_SM_CRB || sm == CRB_SM_CRB_WITH_ACPI_START || + !strcmp(acpi_device_hid(device), "MSFT0101")) + priv->flags |= CRB_FL_CRB_START; + + if (sm == CRB_SM_ACPI_START || sm == CRB_SM_CRB_WITH_ACPI_START) + priv->flags |= CRB_FL_ACPI_START; + + priv->cca = (struct crb_control_area __iomem *) + devm_ioremap_nocache(dev, buf->control_area_pa, 0x1000); + if (!priv->cca) { + dev_err(dev, "ioremap of the control area failed\n"); + return -ENOMEM; + } + + memcpy_fromio(&pa, &priv->cca->cmd_pa, 8); + pa = le64_to_cpu(pa); + priv->cmd = devm_ioremap_nocache(dev, le64_to_cpu(pa), + ioread32(&priv->cca->cmd_size)); + if (!priv->cmd) { + dev_err(dev, "ioremap of the command buffer failed\n"); + return -ENOMEM; + } + + memcpy_fromio(&pa, &priv->cca->rsp_pa, 8); + pa = le64_to_cpu(pa); + priv->rsp = devm_ioremap_nocache(dev, le64_to_cpu(pa), + ioread32(&priv->cca->rsp_size)); + if (!priv->rsp) { + dev_err(dev, "ioremap of the response buffer failed\n"); + return -ENOMEM; + } + + chip->vendor.priv = priv; + + /* Default timeouts and durations */ + chip->vendor.timeout_a = msecs_to_jiffies(TPM2_TIMEOUT_A); + chip->vendor.timeout_b = msecs_to_jiffies(TPM2_TIMEOUT_B); + chip->vendor.timeout_c = msecs_to_jiffies(TPM2_TIMEOUT_C); + chip->vendor.timeout_d = msecs_to_jiffies(TPM2_TIMEOUT_D); + chip->vendor.duration[TPM_SHORT] = + msecs_to_jiffies(TPM2_DURATION_SHORT); + chip->vendor.duration[TPM_MEDIUM] = + msecs_to_jiffies(TPM2_DURATION_MEDIUM); + chip->vendor.duration[TPM_LONG] = + msecs_to_jiffies(TPM2_DURATION_LONG); + + chip->acpi_dev_handle = device->handle; + + rc = tpm2_do_selftest(chip); + if (rc) + return rc; + + return tpm_chip_register(chip); +} + +static int crb_acpi_remove(struct acpi_device *device) +{ + struct device *dev = &device->dev; + struct tpm_chip *chip = dev_get_drvdata(dev); + + tpm_chip_unregister(chip); + return 0; +} + +static struct acpi_device_id crb_device_ids[] = { + {"MSFT0101", 0}, + {"", 0}, +}; +MODULE_DEVICE_TABLE(acpi, crb_device_ids); + +static struct acpi_driver crb_acpi_driver = { + .name = "tpm_crb", + .ids = crb_device_ids, + .ops = { + .add = crb_acpi_add, + .remove = crb_acpi_remove, + }, + .drv = { + .pm = &crb_pm, + }, +}; + +module_acpi_driver(crb_acpi_driver); +MODULE_AUTHOR("Jarkko Sakkinen "); +MODULE_DESCRIPTION("TPM2 Driver"); +MODULE_VERSION("0.1"); +MODULE_LICENSE("GPL"); From aec04cbdf7231c1b0da76172de82dfa2388a80d7 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Fri, 12 Dec 2014 11:46:40 -0800 Subject: [PATCH 129/601] tpm: TPM 2.0 FIFO Interface Detect TPM 2.0 by sending idempotent TPM 2.x command. Ordinals for TPM 2.0 are higher than TPM 1.x commands so this should be fail-safe. Using STS3 is unreliable because some chips just report 0xff and not what the spec says. Before TPM family is detected, timeouts are set to the maximum values for both TPM 1.x and TPM 2.x. In addition to this, suspend/resume functionality is implemented for TPM 2.x. Signed-off-by: Jarkko Sakkinen Signed-off-by: Will Arthur Reviewed-by: Jasob Gunthorpe Reviewed-by: Stefan Berger Reviewed-by: Peter Huewe Tested-by: Peter Huewe Signed-off-by: Peter Huewe --- drivers/char/tpm/tpm_tis.c | 112 +++++++++++++++++++++++++++++-------- 1 file changed, 89 insertions(+), 23 deletions(-) diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index 239cf0bbc1a1d5..20a61bc98db873 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -1,5 +1,6 @@ /* * Copyright (C) 2005, 2006 IBM Corporation + * Copyright (C) 2014 Intel Corporation * * Authors: * Leendert van Doorn @@ -64,12 +65,22 @@ enum tis_defaults { TIS_LONG_TIMEOUT = 2000, /* 2 sec */ }; + +/* Some timeout values are needed before it is known whether the chip is + * TPM 1.0 or TPM 2.0. + */ +#define TIS_TIMEOUT_A_MAX max(TIS_SHORT_TIMEOUT, TPM2_TIMEOUT_A) +#define TIS_TIMEOUT_B_MAX max(TIS_LONG_TIMEOUT, TPM2_TIMEOUT_B) +#define TIS_TIMEOUT_C_MAX max(TIS_SHORT_TIMEOUT, TPM2_TIMEOUT_C) +#define TIS_TIMEOUT_D_MAX max(TIS_SHORT_TIMEOUT, TPM2_TIMEOUT_D) + #define TPM_ACCESS(l) (0x0000 | ((l) << 12)) #define TPM_INT_ENABLE(l) (0x0008 | ((l) << 12)) #define TPM_INT_VECTOR(l) (0x000C | ((l) << 12)) #define TPM_INT_STATUS(l) (0x0010 | ((l) << 12)) #define TPM_INTF_CAPS(l) (0x0014 | ((l) << 12)) #define TPM_STS(l) (0x0018 | ((l) << 12)) +#define TPM_STS3(l) (0x001b | ((l) << 12)) #define TPM_DATA_FIFO(l) (0x0024 | ((l) << 12)) #define TPM_DID_VID(l) (0x0F00 | ((l) << 12)) @@ -363,6 +374,7 @@ static int tpm_tis_send_main(struct tpm_chip *chip, u8 *buf, size_t len) { int rc; u32 ordinal; + unsigned long dur; rc = tpm_tis_send_data(chip, buf, len); if (rc < 0) @@ -374,9 +386,14 @@ static int tpm_tis_send_main(struct tpm_chip *chip, u8 *buf, size_t len) if (chip->vendor.irq) { ordinal = be32_to_cpu(*((__be32 *) (buf + 6))); + + if (chip->flags & TPM_CHIP_FLAG_TPM2) + dur = tpm2_calc_ordinal_duration(chip, ordinal); + else + dur = tpm_calc_ordinal_duration(chip, ordinal); + if (wait_for_tpm_stat - (chip, TPM_STS_DATA_AVAIL | TPM_STS_VALID, - tpm_calc_ordinal_duration(chip, ordinal), + (chip, TPM_STS_DATA_AVAIL | TPM_STS_VALID, dur, &chip->vendor.read_queue, false) < 0) { rc = -ETIME; goto out_err; @@ -598,17 +615,19 @@ static int tpm_tis_init(struct device *dev, acpi_handle acpi_dev_handle, return PTR_ERR(chip); chip->vendor.priv = priv; +#ifdef CONFIG_ACPI chip->acpi_dev_handle = acpi_dev_handle; +#endif chip->vendor.iobase = devm_ioremap(dev, start, len); if (!chip->vendor.iobase) return -EIO; - /* Default timeouts */ - chip->vendor.timeout_a = msecs_to_jiffies(TIS_SHORT_TIMEOUT); - chip->vendor.timeout_b = msecs_to_jiffies(TIS_LONG_TIMEOUT); - chip->vendor.timeout_c = msecs_to_jiffies(TIS_SHORT_TIMEOUT); - chip->vendor.timeout_d = msecs_to_jiffies(TIS_SHORT_TIMEOUT); + /* Maximum timeouts */ + chip->vendor.timeout_a = TIS_TIMEOUT_A_MAX; + chip->vendor.timeout_b = TIS_TIMEOUT_B_MAX; + chip->vendor.timeout_c = TIS_TIMEOUT_C_MAX; + chip->vendor.timeout_d = TIS_TIMEOUT_D_MAX; if (wait_startup(chip, 0) != 0) { rc = -ENODEV; @@ -620,11 +639,18 @@ static int tpm_tis_init(struct device *dev, acpi_handle acpi_dev_handle, goto out_err; } + /* Every TPM 2.x command has a higher ordinal than TPM 1.x commands. + * Therefore, we can use an idempotent TPM 2.x command to probe TPM 2.x. + */ + rc = tpm2_gen_interrupt(chip, true); + if (rc == 0 || rc == TPM2_RC_INITIALIZE) + chip->flags |= TPM_CHIP_FLAG_TPM2; + vendor = ioread32(chip->vendor.iobase + TPM_DID_VID(0)); chip->vendor.manufacturer_id = vendor; - dev_info(dev, - "1.2 TPM (device-id 0x%X, rev-id %d)\n", + dev_info(dev, "%s TPM (device-id 0x%X, rev-id %d)\n", + (chip->flags & TPM_CHIP_FLAG_TPM2) ? "2.0" : "1.2", vendor >> 16, ioread8(chip->vendor.iobase + TPM_RID(0))); if (!itpm) { @@ -720,7 +746,10 @@ static int tpm_tis_init(struct device *dev, acpi_handle acpi_dev_handle, chip->vendor.probed_irq = 0; /* Generate Interrupts */ - tpm_gen_interrupt(chip); + if (chip->flags & TPM_CHIP_FLAG_TPM2) + tpm2_gen_interrupt(chip, false); + else + tpm_gen_interrupt(chip); chip->vendor.irq = chip->vendor.probed_irq; @@ -765,16 +794,44 @@ static int tpm_tis_init(struct device *dev, acpi_handle acpi_dev_handle, } } - if (tpm_get_timeouts(chip)) { - dev_err(dev, "Could not get TPM timeouts and durations\n"); - rc = -ENODEV; - goto out_err; - } + if (chip->flags & TPM_CHIP_FLAG_TPM2) { + chip->vendor.timeout_a = msecs_to_jiffies(TPM2_TIMEOUT_A); + chip->vendor.timeout_b = msecs_to_jiffies(TPM2_TIMEOUT_B); + chip->vendor.timeout_c = msecs_to_jiffies(TPM2_TIMEOUT_C); + chip->vendor.timeout_d = msecs_to_jiffies(TPM2_TIMEOUT_D); + chip->vendor.duration[TPM_SHORT] = + msecs_to_jiffies(TPM2_DURATION_SHORT); + chip->vendor.duration[TPM_MEDIUM] = + msecs_to_jiffies(TPM2_DURATION_MEDIUM); + chip->vendor.duration[TPM_LONG] = + msecs_to_jiffies(TPM2_DURATION_LONG); + + rc = tpm2_do_selftest(chip); + if (rc == TPM2_RC_INITIALIZE) { + dev_warn(dev, "Firmware has not started TPM\n"); + rc = tpm2_startup(chip, TPM2_SU_CLEAR); + if (!rc) + rc = tpm2_do_selftest(chip); + } - if (tpm_do_selftest(chip)) { - dev_err(dev, "TPM self test failed\n"); - rc = -ENODEV; - goto out_err; + if (rc) { + dev_err(dev, "TPM self test failed\n"); + if (rc > 0) + rc = -ENODEV; + goto out_err; + } + } else { + if (tpm_get_timeouts(chip)) { + dev_err(dev, "Could not get TPM timeouts and durations\n"); + rc = -ENODEV; + goto out_err; + } + + if (tpm_do_selftest(chip)) { + dev_err(dev, "TPM self test failed\n"); + rc = -ENODEV; + goto out_err; + } } return tpm_chip_register(chip); @@ -808,14 +865,23 @@ static void tpm_tis_reenable_interrupts(struct tpm_chip *chip) static int tpm_tis_resume(struct device *dev) { struct tpm_chip *chip = dev_get_drvdata(dev); - int ret; + int ret = 0; if (chip->vendor.irq) tpm_tis_reenable_interrupts(chip); - ret = tpm_pm_resume(dev); - if (!ret) - tpm_do_selftest(chip); + if (chip->flags & TPM_CHIP_FLAG_TPM2) { + /* NOP if firmware properly does this. */ + tpm2_startup(chip, TPM2_SU_STATE); + + ret = tpm2_shutdown(chip, TPM2_SU_STATE); + if (!ret) + ret = tpm2_do_selftest(chip); + } else { + ret = tpm_pm_resume(dev); + if (!ret) + tpm_do_selftest(chip); + } return ret; } From c4eadfafb91d5501095c55ffadaa1168743f39d3 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Tue, 13 Jan 2015 23:13:14 +0100 Subject: [PATCH 130/601] tpm/tpm_i2c_stm_st33: Add status check when reading data on the FIFO Add a return value check when reading data from the FIFO register. Cc: Reviewed-by: Jason Gunthorpe Signed-off-by: Christophe Ricard Reviewed-by: Peter Huewe Signed-off-by: Peter Huewe --- drivers/char/tpm/tpm_i2c_stm_st33.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/char/tpm/tpm_i2c_stm_st33.c b/drivers/char/tpm/tpm_i2c_stm_st33.c index 48c4808d5a7a0c..4f70296f3a7fb3 100644 --- a/drivers/char/tpm/tpm_i2c_stm_st33.c +++ b/drivers/char/tpm/tpm_i2c_stm_st33.c @@ -441,7 +441,7 @@ static int wait_for_stat(struct tpm_chip *chip, u8 mask, unsigned long timeout, */ static int recv_data(struct tpm_chip *chip, u8 *buf, size_t count) { - int size = 0, burstcnt, len; + int size = 0, burstcnt, len, ret; struct tpm_stm_dev *tpm_dev; tpm_dev = (struct tpm_stm_dev *)TPM_VPRIV(chip); @@ -455,7 +455,10 @@ static int recv_data(struct tpm_chip *chip, u8 *buf, size_t count) if (burstcnt < 0) return burstcnt; len = min_t(int, burstcnt, count - size); - I2C_READ_DATA(tpm_dev, TPM_DATA_FIFO, buf + size, len); + ret = I2C_READ_DATA(tpm_dev, TPM_DATA_FIFO, buf + size, len); + if (ret < 0) + return ret; + size += len; } return size; From ac77d33e91efcccb7b5b555a41481a6d775b6f69 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Tue, 13 Jan 2015 23:13:09 +0100 Subject: [PATCH 131/601] tpm/tpm_i2c_stm_st33: Remove sparse spaces Remove some useless spaces (new line or space) Reviewed-by: Jason Gunthorpe Signed-off-by: Christophe Ricard Reviewed-by: Peter Huewe Signed-off-by: Peter Huewe --- drivers/char/tpm/tpm_i2c_stm_st33.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/char/tpm/tpm_i2c_stm_st33.c b/drivers/char/tpm/tpm_i2c_stm_st33.c index 4f70296f3a7fb3..f05b0a43d87b13 100644 --- a/drivers/char/tpm/tpm_i2c_stm_st33.c +++ b/drivers/char/tpm/tpm_i2c_stm_st33.c @@ -238,7 +238,6 @@ static int check_locality(struct tpm_chip *chip) return chip->vendor.locality; return -EACCES; - } /* check_locality() */ /* @@ -352,7 +351,7 @@ static u8 interrupt_to_status(u8 irq_mask) { u8 status = 0; - if ((irq_mask & TPM_INTF_STS_VALID_INT) == TPM_INTF_STS_VALID_INT) + if ((irq_mask & TPM_INTF_STS_VALID_INT) == TPM_INTF_STS_VALID_INT) status |= TPM_STS_VALID; if ((irq_mask & TPM_INTF_DATA_AVAIL_INT) == TPM_INTF_DATA_AVAIL_INT) status |= TPM_STS_DATA_AVAIL; @@ -573,7 +572,7 @@ static int tpm_stm_i2c_send(struct tpm_chip *chip, unsigned char *buf, * @param: buf, the buffer to store datas. * @param: count, the number of bytes to send. * @return: In case of success the number of bytes received. - * In other case, a < 0 value describing the issue. + * In other case, a < 0 value describing the issue. */ static int tpm_stm_i2c_recv(struct tpm_chip *chip, unsigned char *buf, size_t count) @@ -636,7 +635,6 @@ static int tpm_stm_i2c_of_request_resources(struct tpm_chip *chip) struct device_node *pp; struct tpm_stm_dev *tpm_dev = (struct tpm_stm_dev *)TPM_VPRIV(chip); struct i2c_client *client = tpm_dev->client; - int gpio; int ret; @@ -849,7 +847,7 @@ static int tpm_stm_i2c_pm_suspend(struct device *dev) ret = tpm_pm_suspend(dev); return ret; -} /* tpm_stm_i2c_suspend() */ +} /* tpm_stm_i2c_suspend() */ /* * tpm_stm_i2c_pm_resume resume the TPM device From fc9ad777c3ebbeaf423f75e0d8b04db42b3c9ebf Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Tue, 13 Jan 2015 23:13:10 +0100 Subject: [PATCH 132/601] tpm/tpm_i2c_stm_st33: Sanity cleanup Cleanup header description and correct some indent. Reviewed-by: Jason Gunthorpe Signed-off-by: Christophe Ricard Reviewed-by: Peter Huewe Signed-off-by: Peter Huewe --- drivers/char/tpm/tpm_i2c_stm_st33.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/char/tpm/tpm_i2c_stm_st33.c b/drivers/char/tpm/tpm_i2c_stm_st33.c index f05b0a43d87b13..d6e87d73f94942 100644 --- a/drivers/char/tpm/tpm_i2c_stm_st33.c +++ b/drivers/char/tpm/tpm_i2c_stm_st33.c @@ -814,8 +814,7 @@ tpm_stm_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) /* * tpm_stm_i2c_remove remove the TPM device - * @param: client, the i2c_client drescription (TPM I2C description). - * clear_bit(0, &chip->is_open); + * @param: client, the i2c_client description (TPM I2C description). * @return: 0 in case of success. */ static int tpm_stm_i2c_remove(struct i2c_client *client) @@ -890,7 +889,8 @@ MODULE_DEVICE_TABLE(of, of_st33zp24_i2c_match); #endif static SIMPLE_DEV_PM_OPS(tpm_stm_i2c_ops, tpm_stm_i2c_pm_suspend, - tpm_stm_i2c_pm_resume); + tpm_stm_i2c_pm_resume); + static struct i2c_driver tpm_stm_i2c_driver = { .driver = { .owner = THIS_MODULE, From 7b1ee96f074cc931a2c226b33607fe7a74d5cba3 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Tue, 13 Jan 2015 23:13:11 +0100 Subject: [PATCH 133/601] tpm/tpm_i2c_stm_st33: Replace remaining r by ret Some places are still using r instead of ret. Reviewed-by: Jason Gunthorpe Signed-off-by: Christophe Ricard Reviewed-by: Peter Huewe Signed-off-by: Peter Huewe --- drivers/char/tpm/tpm_i2c_stm_st33.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/char/tpm/tpm_i2c_stm_st33.c b/drivers/char/tpm/tpm_i2c_stm_st33.c index d6e87d73f94942..86a24ced66c005 100644 --- a/drivers/char/tpm/tpm_i2c_stm_st33.c +++ b/drivers/char/tpm/tpm_i2c_stm_st33.c @@ -374,7 +374,7 @@ static int wait_for_stat(struct tpm_chip *chip, u8 mask, unsigned long timeout, wait_queue_head_t *queue, bool check_cancel) { unsigned long stop; - int r; + int ret; bool canceled = false; bool condition; u32 cur_intrs; @@ -400,7 +400,7 @@ static int wait_for_stat(struct tpm_chip *chip, u8 mask, unsigned long timeout, if ((long) timeout <= 0) return -1; - r = wait_event_interruptible_timeout(*queue, + ret = wait_event_interruptible_timeout(*queue, cur_intrs != tpm_dev->intrs, timeout); interrupt |= clear_interruption(tpm_dev); @@ -408,12 +408,12 @@ static int wait_for_stat(struct tpm_chip *chip, u8 mask, unsigned long timeout, condition = wait_for_tpm_stat_cond(chip, mask, check_cancel, &canceled); - if (r >= 0 && condition) { + if (ret >= 0 && condition) { if (canceled) return -ECANCELED; return 0; } - if (r == -ERESTARTSYS && freezing(current)) { + if (ret == -ERESTARTSYS && freezing(current)) { clear_thread_flag(TIF_SIGPENDING); goto again; } From 3eda7d0ea3a0365aa72a2007f9450f314d92f065 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Tue, 13 Jan 2015 23:13:13 +0100 Subject: [PATCH 134/601] tpm/tpm_i2c_stm_st33: Change tpm_i2c_stm_st33.h to tpm_stm_st33.h include/linux/platform_data/tpm_i2c_stm_st33.h can be used by other st33 tpm device driver not using i2c protocol. Reviewed-by: Jason Gunthorpe Signed-off-by: Christophe Ricard Reviewed-by: Peter Huewe Signed-off-by: Peter Huewe --- drivers/char/tpm/tpm_i2c_stm_st33.c | 2 +- .../{tpm_i2c_stm_st33.h => tpm_stm_st33.h} | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) rename include/linux/platform_data/{tpm_i2c_stm_st33.h => tpm_stm_st33.h} (85%) diff --git a/drivers/char/tpm/tpm_i2c_stm_st33.c b/drivers/char/tpm/tpm_i2c_stm_st33.c index 86a24ced66c005..dbab8d0d875eca 100644 --- a/drivers/char/tpm/tpm_i2c_stm_st33.c +++ b/drivers/char/tpm/tpm_i2c_stm_st33.c @@ -50,7 +50,7 @@ #include #include -#include +#include #include "tpm.h" #define TPM_ACCESS 0x0 diff --git a/include/linux/platform_data/tpm_i2c_stm_st33.h b/include/linux/platform_data/tpm_stm_st33.h similarity index 85% rename from include/linux/platform_data/tpm_i2c_stm_st33.h rename to include/linux/platform_data/tpm_stm_st33.h index 85775cf5f9a54a..ff75310c0f4746 100644 --- a/include/linux/platform_data/tpm_i2c_stm_st33.h +++ b/include/linux/platform_data/tpm_stm_st33.h @@ -22,18 +22,18 @@ * * @Author: Christophe RICARD tpmsupport@st.com * - * @File: stm_st33_tpm_i2c.h + * @File: stm_st33_tpm.h * * @Date: 09/15/2010 */ -#ifndef __STM_ST33_TPM_I2C_MAIN_H__ -#define __STM_ST33_TPM_I2C_MAIN_H__ +#ifndef __STM_ST33_TPM_H__ +#define __STM_ST33_TPM_H__ - -#define TPM_ST33_I2C "st33zp24_i2c" +#define TPM_ST33_I2C "st33zp24-i2c" +#define TPM_ST33_SPI "st33zp24-spi" struct st33zp24_platform_data { int io_lpcpd; }; -#endif /* __STM_ST33_TPM_I2C_MAIN_H__ */ +#endif /* __STM_ST33_TPM_H__ */ From 961be6650bd4de3ac0dbbb997fa35aabc553d52b Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Tue, 13 Jan 2015 23:13:15 +0100 Subject: [PATCH 135/601] tpm/tpm_i2c_stm_st33/dts/st33zp24-i2c: Rename st33zp24 dts documentation st33zp24 exists in i2c and spi version. Both have different possible configuration. st33zp24.txt is renamed st33zp24-i2c.txt. Reviewed-by: Jason Gunthorpe Signed-off-by: Christophe Ricard Reviewed-by: Peter Huewe Signed-off-by: Peter Huewe --- .../bindings/security/tpm/{st33zp24.txt => st33zp24-i2c.txt} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename Documentation/devicetree/bindings/security/tpm/{st33zp24.txt => st33zp24-i2c.txt} (97%) diff --git a/Documentation/devicetree/bindings/security/tpm/st33zp24.txt b/Documentation/devicetree/bindings/security/tpm/st33zp24-i2c.txt similarity index 97% rename from Documentation/devicetree/bindings/security/tpm/st33zp24.txt rename to Documentation/devicetree/bindings/security/tpm/st33zp24-i2c.txt index 0a7361dae65f57..3ad115efed1ef9 100644 --- a/Documentation/devicetree/bindings/security/tpm/st33zp24.txt +++ b/Documentation/devicetree/bindings/security/tpm/st33zp24-i2c.txt @@ -25,7 +25,7 @@ Example (for ARM-based BeagleBoard xM with ST33ZP24 on I2C2): compatible = "st,st33zp24-i2c"; - reg = <0x013>; + reg = <0x13>; clock-frequency = <400000>; interrupt-parent = <&gpio5>; From f78c81b429ccb0a0574a1f169d208ce159007cb1 Mon Sep 17 00:00:00 2001 From: Peter Huewe Date: Sat, 17 Jan 2015 15:17:51 +0100 Subject: [PATCH 136/601] MAINTAINERS: Add Patchwork and Git URL for TPMDD Maybe it helps people finding the right tree and also simplifies tracking of their patches Signed-off-by: Peter Huewe --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index e18aef54b4b27b..ddf762511cb3ca 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9723,6 +9723,8 @@ M: Peter Huewe M: Marcel Selhorst W: http://tpmdd.sourceforge.net L: tpmdd-devel@lists.sourceforge.net (moderated for non-subscribers) +Q: git git://github.com/PeterHuewe/linux-tpmdd.git +T: https://github.com/PeterHuewe/linux-tpmdd S: Maintained F: drivers/char/tpm/ From 3b09825dd8758a59c1a70e18df5bc8d04f8e5ced Mon Sep 17 00:00:00 2001 From: Bruno E O Meneguele Date: Sat, 17 Jan 2015 17:03:30 +0100 Subject: [PATCH 137/601] char/tpm: fixed white spaces coding style issues Fixed some coding style issues reported by checkpatch. Signed-off-by: Bruno E O Meneguele [phuewe: ported to latest code] Signed-off-by: Peter Huewe --- drivers/char/tpm/tpm.h | 6 +++--- drivers/char/tpm/tpm_i2c_stm_st33.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index cc421cfde389d8..7b0727c5e803ee 100644 --- a/drivers/char/tpm/tpm.h +++ b/drivers/char/tpm/tpm.h @@ -10,13 +10,13 @@ * Maintained by: * * Device driver for TCG/TCPA TPM (trusted platform module). - * Specifications at www.trustedcomputinggroup.org + * Specifications at www.trustedcomputinggroup.org * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation, version 2 of the * License. - * + * */ #include #include @@ -141,7 +141,7 @@ struct tpm_vendor_specific { u16 manufacturer_id; }; -#define TPM_VPRIV(c) (c)->vendor.priv +#define TPM_VPRIV(c) ((c)->vendor.priv) #define TPM_VID_INTEL 0x8086 #define TPM_VID_WINBOND 0x1050 diff --git a/drivers/char/tpm/tpm_i2c_stm_st33.c b/drivers/char/tpm/tpm_i2c_stm_st33.c index dbab8d0d875eca..612845b36c299e 100644 --- a/drivers/char/tpm/tpm_i2c_stm_st33.c +++ b/drivers/char/tpm/tpm_i2c_stm_st33.c @@ -777,7 +777,7 @@ tpm_stm_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) IRQF_TRIGGER_HIGH, "TPM SERIRQ management", chip); if (ret < 0) { - dev_err(chip->pdev , "TPM SERIRQ signals %d not available\n", + dev_err(chip->pdev, "TPM SERIRQ signals %d not available\n", client->irq); goto _tpm_clean_answer; } From 114150d8f4fc9e7a003be2bdb0efd31796d241ff Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 12 Jan 2015 17:51:13 +0000 Subject: [PATCH 138/601] iommu: Allow building iova.c independently In preparation for sharing the IOVA allocator, split it out under its own Kconfig symbol. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/Kconfig | 4 ++++ drivers/iommu/Makefile | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 325188eef1c183..a839ca93376b85 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -13,6 +13,9 @@ menuconfig IOMMU_SUPPORT if IOMMU_SUPPORT +config IOMMU_IOVA + bool + config OF_IOMMU def_bool y depends on OF && IOMMU_API @@ -91,6 +94,7 @@ config INTEL_IOMMU bool "Support for Intel IOMMU using DMA Remapping Devices" depends on PCI_MSI && ACPI && (X86 || IA64_GENERIC) select IOMMU_API + select IOMMU_IOVA select DMAR_TABLE help DMA remapping (DMAR) devices support enables independent address diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 7b976f294a6959..0b1b94ef13abf9 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -1,13 +1,14 @@ obj-$(CONFIG_IOMMU_API) += iommu.o obj-$(CONFIG_IOMMU_API) += iommu-traces.o obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o +obj-$(CONFIG_IOMMU_IOVA) += iova.o obj-$(CONFIG_OF_IOMMU) += of_iommu.o obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o obj-$(CONFIG_ARM_SMMU) += arm-smmu.o obj-$(CONFIG_DMAR_TABLE) += dmar.o -obj-$(CONFIG_INTEL_IOMMU) += iova.o intel-iommu.o +obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o From 85b4545629663486b7f71047ce3b54fa0ad3eb28 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 12 Jan 2015 17:51:14 +0000 Subject: [PATCH 139/601] iommu: Consolidate IOVA allocator code In order to share the IOVA allocator with other architectures, break the unnecssary dependency on the Intel IOMMU driver and move the remaining IOVA internals to iova.c Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 33 ++------------------------------- drivers/iommu/iova.c | 35 +++++++++++++++++++++++++++++++++++ include/linux/iova.h | 3 +++ 3 files changed, 40 insertions(+), 31 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 40dfbc0444c0ea..e758d8ed8fb556 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -485,7 +485,6 @@ __setup("intel_iommu=", intel_iommu_setup); static struct kmem_cache *iommu_domain_cache; static struct kmem_cache *iommu_devinfo_cache; -static struct kmem_cache *iommu_iova_cache; static inline void *alloc_pgtable_page(int node) { @@ -523,16 +522,6 @@ static inline void free_devinfo_mem(void *vaddr) kmem_cache_free(iommu_devinfo_cache, vaddr); } -struct iova *alloc_iova_mem(void) -{ - return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC); -} - -void free_iova_mem(struct iova *iova) -{ - kmem_cache_free(iommu_iova_cache, iova); -} - static inline int domain_type_is_vm(struct dmar_domain *domain) { return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE; @@ -3427,23 +3416,6 @@ static inline int iommu_devinfo_cache_init(void) return ret; } -static inline int iommu_iova_cache_init(void) -{ - int ret = 0; - - iommu_iova_cache = kmem_cache_create("iommu_iova", - sizeof(struct iova), - 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!iommu_iova_cache) { - printk(KERN_ERR "Couldn't create iova cache\n"); - ret = -ENOMEM; - } - - return ret; -} - static int __init iommu_init_mempool(void) { int ret; @@ -3461,7 +3433,7 @@ static int __init iommu_init_mempool(void) kmem_cache_destroy(iommu_domain_cache); domain_error: - kmem_cache_destroy(iommu_iova_cache); + iommu_iova_cache_destroy(); return -ENOMEM; } @@ -3470,8 +3442,7 @@ static void __init iommu_exit_mempool(void) { kmem_cache_destroy(iommu_devinfo_cache); kmem_cache_destroy(iommu_domain_cache); - kmem_cache_destroy(iommu_iova_cache); - + iommu_iova_cache_destroy(); } static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index f6b17e6af2fb26..520b8c8ae0c4df 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -18,6 +18,41 @@ */ #include +#include + +static struct kmem_cache *iommu_iova_cache; + +int iommu_iova_cache_init(void) +{ + int ret = 0; + + iommu_iova_cache = kmem_cache_create("iommu_iova", + sizeof(struct iova), + 0, + SLAB_HWCACHE_ALIGN, + NULL); + if (!iommu_iova_cache) { + pr_err("Couldn't create iova cache\n"); + ret = -ENOMEM; + } + + return ret; +} + +void iommu_iova_cache_destroy(void) +{ + kmem_cache_destroy(iommu_iova_cache); +} + +struct iova *alloc_iova_mem(void) +{ + return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC); +} + +void free_iova_mem(struct iova *iova) +{ + kmem_cache_free(iommu_iova_cache, iova); +} void init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit) diff --git a/include/linux/iova.h b/include/linux/iova.h index 19e81d5ccb6dd8..ad0507c61cc7cc 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -39,6 +39,9 @@ static inline unsigned long iova_size(struct iova *iova) return iova->pfn_hi - iova->pfn_lo + 1; } +int iommu_iova_cache_init(void); +void iommu_iova_cache_destroy(void); + struct iova *alloc_iova_mem(void); void free_iova_mem(struct iova *iova); void free_iova(struct iova_domain *iovad, unsigned long pfn); From 1b72250076dde4276acecf3a7da722b185703e78 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 12 Jan 2015 17:51:15 +0000 Subject: [PATCH 140/601] iommu: Make IOVA domain low limit flexible To share the IOVA allocator with other architectures, it needs to accommodate more general aperture restrictions; move the lower limit from a compile-time constant to a runtime domain property to allow IOVA domains with different requirements to co-exist. Also reword the slightly unclear description of alloc_iova since we're touching it anyway. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 9 ++++++--- drivers/iommu/iova.c | 10 ++++++---- include/linux/iova.h | 7 +++---- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index e758d8ed8fb556..86f9e82b015b60 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -71,6 +71,9 @@ __DOMAIN_MAX_PFN(gaw), (unsigned long)-1)) #define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT) +/* IO virtual address start page frame number */ +#define IOVA_START_PFN (1) + #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) #define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) #define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64)) @@ -1632,7 +1635,7 @@ static int dmar_init_reserved_ranges(void) struct iova *iova; int i; - init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN); + init_iova_domain(&reserved_iova_list, IOVA_START_PFN, DMA_32BIT_PFN); lockdep_set_class(&reserved_iova_list.iova_rbtree_lock, &reserved_rbtree_key); @@ -1690,7 +1693,7 @@ static int domain_init(struct dmar_domain *domain, int guest_width) int adjust_width, agaw; unsigned long sagaw; - init_iova_domain(&domain->iovad, DMA_32BIT_PFN); + init_iova_domain(&domain->iovad, IOVA_START_PFN, DMA_32BIT_PFN); domain_reserve_special_ranges(domain); /* calculate AGAW */ @@ -4313,7 +4316,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) { int adjust_width; - init_iova_domain(&domain->iovad, DMA_32BIT_PFN); + init_iova_domain(&domain->iovad, IOVA_START_PFN, DMA_32BIT_PFN); domain_reserve_special_ranges(domain); /* calculate AGAW */ diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 520b8c8ae0c4df..a3dbba8caa19f8 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -55,11 +55,13 @@ void free_iova_mem(struct iova *iova) } void -init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit) +init_iova_domain(struct iova_domain *iovad, unsigned long start_pfn, + unsigned long pfn_32bit) { spin_lock_init(&iovad->iova_rbtree_lock); iovad->rbroot = RB_ROOT; iovad->cached32_node = NULL; + iovad->start_pfn = start_pfn; iovad->dma_32bit_pfn = pfn_32bit; } @@ -162,7 +164,7 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, if (!curr) { if (size_aligned) pad_size = iova_get_pad_size(size, limit_pfn); - if ((IOVA_START_PFN + size + pad_size) > limit_pfn) { + if ((iovad->start_pfn + size + pad_size) > limit_pfn) { spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); return -ENOMEM; } @@ -237,8 +239,8 @@ iova_insert_rbtree(struct rb_root *root, struct iova *iova) * @size: - size of page frames to allocate * @limit_pfn: - max limit address * @size_aligned: - set if size_aligned address range is required - * This function allocates an iova in the range limit_pfn to IOVA_START_PFN - * looking from limit_pfn instead from IOVA_START_PFN. If the size_aligned + * This function allocates an iova in the range iovad->start_pfn to limit_pfn, + * searching top-down from limit_pfn to iovad->start_pfn. If the size_aligned * flag is set then the allocated address iova->pfn_lo will be naturally * aligned on roundup_power_of_two(size). */ diff --git a/include/linux/iova.h b/include/linux/iova.h index ad0507c61cc7cc..591b19626b4653 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -16,9 +16,6 @@ #include #include -/* IO virtual address start page frame number */ -#define IOVA_START_PFN (1) - /* iova structure */ struct iova { struct rb_node node; @@ -31,6 +28,7 @@ struct iova_domain { spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ struct rb_root rbroot; /* iova domain rbtree root */ struct rb_node *cached32_node; /* Save last alloced node */ + unsigned long start_pfn; /* Lower limit for this domain */ unsigned long dma_32bit_pfn; }; @@ -52,7 +50,8 @@ struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size, struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, unsigned long pfn_hi); void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); -void init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit); +void init_iova_domain(struct iova_domain *iovad, unsigned long start_pfn, + unsigned long pfn_32bit); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); void put_iova_domain(struct iova_domain *iovad); struct iova *split_and_remove_iova(struct iova_domain *iovad, From 0fb5fe874c42942e16c450ae05da453e13a1c09e Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 12 Jan 2015 17:51:16 +0000 Subject: [PATCH 141/601] iommu: Make IOVA domain page size explicit Systems may contain heterogeneous IOMMUs supporting differing minimum page sizes, which may also not be common with the CPU page size. Thus it is practical to have an explicit notion of IOVA granularity to simplify handling of mapping and allocation constraints. As an initial step, move the IOVA page granularity from an implicit compile-time constant to a per-domain property so we can make use of it in IOVA domain context at runtime. To keep the abstraction tidy, extend the little API of inline iova_* helpers to parallel some of the equivalent PAGE_* macros. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 9 ++++++--- drivers/iommu/iova.c | 12 ++++++++++-- include/linux/iova.h | 35 +++++++++++++++++++++++++++++++++-- 3 files changed, 49 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 86f9e82b015b60..ae4c1a854e5789 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1635,7 +1635,8 @@ static int dmar_init_reserved_ranges(void) struct iova *iova; int i; - init_iova_domain(&reserved_iova_list, IOVA_START_PFN, DMA_32BIT_PFN); + init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN, + DMA_32BIT_PFN); lockdep_set_class(&reserved_iova_list.iova_rbtree_lock, &reserved_rbtree_key); @@ -1693,7 +1694,8 @@ static int domain_init(struct dmar_domain *domain, int guest_width) int adjust_width, agaw; unsigned long sagaw; - init_iova_domain(&domain->iovad, IOVA_START_PFN, DMA_32BIT_PFN); + init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN, + DMA_32BIT_PFN); domain_reserve_special_ranges(domain); /* calculate AGAW */ @@ -4316,7 +4318,8 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) { int adjust_width; - init_iova_domain(&domain->iovad, IOVA_START_PFN, DMA_32BIT_PFN); + init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN, + DMA_32BIT_PFN); domain_reserve_special_ranges(domain); /* calculate AGAW */ diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index a3dbba8caa19f8..9dd8208312c2e7 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -55,12 +55,20 @@ void free_iova_mem(struct iova *iova) } void -init_iova_domain(struct iova_domain *iovad, unsigned long start_pfn, - unsigned long pfn_32bit) +init_iova_domain(struct iova_domain *iovad, unsigned long granule, + unsigned long start_pfn, unsigned long pfn_32bit) { + /* + * IOVA granularity will normally be equal to the smallest + * supported IOMMU page size; both *must* be capable of + * representing individual CPU pages exactly. + */ + BUG_ON((granule > PAGE_SIZE) || !is_power_of_2(granule)); + spin_lock_init(&iovad->iova_rbtree_lock); iovad->rbroot = RB_ROOT; iovad->cached32_node = NULL; + iovad->granule = granule; iovad->start_pfn = start_pfn; iovad->dma_32bit_pfn = pfn_32bit; } diff --git a/include/linux/iova.h b/include/linux/iova.h index 591b19626b4653..3920a19d819415 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -28,6 +28,7 @@ struct iova_domain { spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ struct rb_root rbroot; /* iova domain rbtree root */ struct rb_node *cached32_node; /* Save last alloced node */ + unsigned long granule; /* pfn granularity for this domain */ unsigned long start_pfn; /* Lower limit for this domain */ unsigned long dma_32bit_pfn; }; @@ -37,6 +38,36 @@ static inline unsigned long iova_size(struct iova *iova) return iova->pfn_hi - iova->pfn_lo + 1; } +static inline unsigned long iova_shift(struct iova_domain *iovad) +{ + return __ffs(iovad->granule); +} + +static inline unsigned long iova_mask(struct iova_domain *iovad) +{ + return iovad->granule - 1; +} + +static inline size_t iova_offset(struct iova_domain *iovad, dma_addr_t iova) +{ + return iova & iova_mask(iovad); +} + +static inline size_t iova_align(struct iova_domain *iovad, size_t size) +{ + return ALIGN(size, iovad->granule); +} + +static inline dma_addr_t iova_dma_addr(struct iova_domain *iovad, struct iova *iova) +{ + return (dma_addr_t)iova->pfn_lo << iova_shift(iovad); +} + +static inline unsigned long iova_pfn(struct iova_domain *iovad, dma_addr_t iova) +{ + return iova >> iova_shift(iovad); +} + int iommu_iova_cache_init(void); void iommu_iova_cache_destroy(void); @@ -50,8 +81,8 @@ struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size, struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, unsigned long pfn_hi); void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); -void init_iova_domain(struct iova_domain *iovad, unsigned long start_pfn, - unsigned long pfn_32bit); +void init_iova_domain(struct iova_domain *iovad, unsigned long granule, + unsigned long start_pfn, unsigned long pfn_32bit); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); void put_iova_domain(struct iova_domain *iovad); struct iova *split_and_remove_iova(struct iova_domain *iovad, From 6fd492fd746d9858a41dc85eef44bd627b809109 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Fri, 16 Jan 2015 16:47:19 -0700 Subject: [PATCH 142/601] iommu: Fix trace_unmap() to report original iova iommu_unmap() calls trace_unmap() with changed iova and original size. trace_unmap() should report original iova instead. Change iommu_unmap() to call trace_unmap() with original iova. Signed-off-by: Shuah Khan Reported-by: Alex Williamson Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index f7718d73e98470..d4c3db5abf25a5 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1094,6 +1094,7 @@ size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) { size_t unmapped_page, unmapped = 0; unsigned int min_pagesz; + unsigned long orig_iova = iova; if (unlikely(domain->ops->unmap == NULL || domain->ops->pgsize_bitmap == 0UL)) @@ -1133,7 +1134,7 @@ size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) unmapped += unmapped_page; } - trace_unmap(iova, 0, size); + trace_unmap(orig_iova, 0, size); return unmapped; } EXPORT_SYMBOL_GPL(iommu_unmap); From db8614d35bb8fc6d032792c801bd5b38ce860f19 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Fri, 16 Jan 2015 20:53:17 -0700 Subject: [PATCH 143/601] iommu: Change trace unmap api to report unmapped size Currently map and unmap are implemented as events under a common trace class declaration. The common class forces trace_unmap() to require a bogus physical address argument that it doesn't use. Changing unmap to report unmapped size will provide useful information for debugging. Remove common map_unmap trace class and change map and unmap into separate events as opposed to events under the same class to allow for differences in the reporting information. In addition, map and unmap are changed to handle size value as size_t instead of int to match the passed size value and avoid overflow. Signed-off-by: Shuah Khan Suggested-by: Alex Williamson Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 2 +- include/trace/events/iommu.h | 31 ++++++++++++++++++------------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index d4c3db5abf25a5..3a4fb6274c9939 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1134,7 +1134,7 @@ size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) unmapped += unmapped_page; } - trace_unmap(orig_iova, 0, size); + trace_unmap(orig_iova, size, unmapped); return unmapped; } EXPORT_SYMBOL_GPL(iommu_unmap); diff --git a/include/trace/events/iommu.h b/include/trace/events/iommu.h index a8f5c32d174b73..2c7befb10f13e3 100644 --- a/include/trace/events/iommu.h +++ b/include/trace/events/iommu.h @@ -83,7 +83,7 @@ DEFINE_EVENT(iommu_device_event, detach_device_from_domain, TP_ARGS(dev) ); -DECLARE_EVENT_CLASS(iommu_map_unmap, +TRACE_EVENT(map, TP_PROTO(unsigned long iova, phys_addr_t paddr, size_t size), @@ -92,7 +92,7 @@ DECLARE_EVENT_CLASS(iommu_map_unmap, TP_STRUCT__entry( __field(u64, iova) __field(u64, paddr) - __field(int, size) + __field(size_t, size) ), TP_fast_assign( @@ -101,26 +101,31 @@ DECLARE_EVENT_CLASS(iommu_map_unmap, __entry->size = size; ), - TP_printk("IOMMU: iova=0x%016llx paddr=0x%016llx size=0x%x", + TP_printk("IOMMU: iova=0x%016llx paddr=0x%016llx size=%zu", __entry->iova, __entry->paddr, __entry->size ) ); -DEFINE_EVENT(iommu_map_unmap, map, +TRACE_EVENT(unmap, - TP_PROTO(unsigned long iova, phys_addr_t paddr, size_t size), - - TP_ARGS(iova, paddr, size) -); + TP_PROTO(unsigned long iova, size_t size, size_t unmapped_size), -DEFINE_EVENT_PRINT(iommu_map_unmap, unmap, + TP_ARGS(iova, size, unmapped_size), - TP_PROTO(unsigned long iova, phys_addr_t paddr, size_t size), + TP_STRUCT__entry( + __field(u64, iova) + __field(size_t, size) + __field(size_t, unmapped_size) + ), - TP_ARGS(iova, paddr, size), + TP_fast_assign( + __entry->iova = iova; + __entry->size = size; + __entry->unmapped_size = unmapped_size; + ), - TP_printk("IOMMU: iova=0x%016llx size=0x%x", - __entry->iova, __entry->size + TP_printk("IOMMU: iova=0x%016llx size=%zu unmapped_size=%zu", + __entry->iova, __entry->size, __entry->unmapped_size ) ); From fdb1d7be7c4d452e9735aeb2b60ae8a2fcf0a514 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 14 Nov 2014 17:16:49 +0000 Subject: [PATCH 144/601] iommu: introduce generic page table allocation framework This patch introduces a generic framework for allocating page tables for an IOMMU. There are a number of reasons we want to do this: - It avoids duplication of complex table management code in IOMMU drivers that use the same page table format - It removes any coupling with the CPU table format (and even the architecture!) - It defines an API for IOMMU TLB maintenance Tested-by: Laurent Pinchart Signed-off-by: Will Deacon --- drivers/iommu/Kconfig | 8 +++ drivers/iommu/Makefile | 1 + drivers/iommu/io-pgtable.c | 71 ++++++++++++++++++++ drivers/iommu/io-pgtable.h | 128 +++++++++++++++++++++++++++++++++++++ 4 files changed, 208 insertions(+) create mode 100644 drivers/iommu/io-pgtable.c create mode 100644 drivers/iommu/io-pgtable.h diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 325188eef1c183..3faaa41db8ffd6 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -13,6 +13,14 @@ menuconfig IOMMU_SUPPORT if IOMMU_SUPPORT +menu "Generic IOMMU Pagetable Support" + +# Selected by the actual pagetable implementations +config IOMMU_IO_PGTABLE + bool + +endmenu + config OF_IOMMU def_bool y depends on OF && IOMMU_API diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 7b976f294a6959..701c9516e2ae18 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -1,6 +1,7 @@ obj-$(CONFIG_IOMMU_API) += iommu.o obj-$(CONFIG_IOMMU_API) += iommu-traces.o obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o +obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o obj-$(CONFIG_OF_IOMMU) += of_iommu.o obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c new file mode 100644 index 00000000000000..f664a1ca49cf8f --- /dev/null +++ b/drivers/iommu/io-pgtable.c @@ -0,0 +1,71 @@ +/* + * Generic page table allocator for IOMMUs. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Copyright (C) 2014 ARM Limited + * + * Author: Will Deacon + */ + +#include +#include +#include + +#include "io-pgtable.h" + +static const struct io_pgtable_init_fns * +io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = +{ +}; + +struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt, + struct io_pgtable_cfg *cfg, + void *cookie) +{ + struct io_pgtable *iop; + const struct io_pgtable_init_fns *fns; + + if (fmt >= IO_PGTABLE_NUM_FMTS) + return NULL; + + fns = io_pgtable_init_table[fmt]; + if (!fns) + return NULL; + + iop = fns->alloc(cfg, cookie); + if (!iop) + return NULL; + + iop->fmt = fmt; + iop->cookie = cookie; + iop->cfg = *cfg; + + return &iop->ops; +} + +/* + * It is the IOMMU driver's responsibility to ensure that the page table + * is no longer accessible to the walker by this point. + */ +void free_io_pgtable_ops(struct io_pgtable_ops *ops) +{ + struct io_pgtable *iop; + + if (!ops) + return; + + iop = container_of(ops, struct io_pgtable, ops); + iop->cfg.tlb->tlb_flush_all(iop->cookie); + io_pgtable_init_table[iop->fmt]->free(iop); +} diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h new file mode 100644 index 00000000000000..fdd792c3769825 --- /dev/null +++ b/drivers/iommu/io-pgtable.h @@ -0,0 +1,128 @@ +#ifndef __IO_PGTABLE_H +#define __IO_PGTABLE_H + +/* + * Public API for use by IOMMU drivers + */ +enum io_pgtable_fmt { + IO_PGTABLE_NUM_FMTS, +}; + +/** + * struct iommu_gather_ops - IOMMU callbacks for TLB and page table management. + * + * @tlb_flush_all: Synchronously invalidate the entire TLB context. + * @tlb_add_flush: Queue up a TLB invalidation for a virtual address range. + * @tlb_sync: Ensure any queue TLB invalidation has taken effect. + * @flush_pgtable: Ensure page table updates are visible to the IOMMU. + * + * Note that these can all be called in atomic context and must therefore + * not block. + */ +struct iommu_gather_ops { + void (*tlb_flush_all)(void *cookie); + void (*tlb_add_flush)(unsigned long iova, size_t size, bool leaf, + void *cookie); + void (*tlb_sync)(void *cookie); + void (*flush_pgtable)(void *ptr, size_t size, void *cookie); +}; + +/** + * struct io_pgtable_cfg - Configuration data for a set of page tables. + * + * @quirks: A bitmap of hardware quirks that require some special + * action by the low-level page table allocator. + * @pgsize_bitmap: A bitmap of page sizes supported by this set of page + * tables. + * @ias: Input address (iova) size, in bits. + * @oas: Output address (paddr) size, in bits. + * @tlb: TLB management callbacks for this set of tables. + */ +struct io_pgtable_cfg { + int quirks; /* IO_PGTABLE_QUIRK_* */ + unsigned long pgsize_bitmap; + unsigned int ias; + unsigned int oas; + const struct iommu_gather_ops *tlb; + + /* Low-level data specific to the table format */ + union { + }; +}; + +/** + * struct io_pgtable_ops - Page table manipulation API for IOMMU drivers. + * + * @map: Map a physically contiguous memory region. + * @unmap: Unmap a physically contiguous memory region. + * @iova_to_phys: Translate iova to physical address. + * + * These functions map directly onto the iommu_ops member functions with + * the same names. + */ +struct io_pgtable_ops { + int (*map)(struct io_pgtable_ops *ops, unsigned long iova, + phys_addr_t paddr, size_t size, int prot); + int (*unmap)(struct io_pgtable_ops *ops, unsigned long iova, + size_t size); + phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops, + unsigned long iova); +}; + +/** + * alloc_io_pgtable_ops() - Allocate a page table allocator for use by an IOMMU. + * + * @fmt: The page table format. + * @cfg: The page table configuration. This will be modified to represent + * the configuration actually provided by the allocator (e.g. the + * pgsize_bitmap may be restricted). + * @cookie: An opaque token provided by the IOMMU driver and passed back to + * the callback routines in cfg->tlb. + */ +struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt, + struct io_pgtable_cfg *cfg, + void *cookie); + +/** + * free_io_pgtable_ops() - Free an io_pgtable_ops structure. The caller + * *must* ensure that the page table is no longer + * live, but the TLB can be dirty. + * + * @ops: The ops returned from alloc_io_pgtable_ops. + */ +void free_io_pgtable_ops(struct io_pgtable_ops *ops); + + +/* + * Internal structures for page table allocator implementations. + */ + +/** + * struct io_pgtable - Internal structure describing a set of page tables. + * + * @fmt: The page table format. + * @cookie: An opaque token provided by the IOMMU driver and passed back to + * any callback routines. + * @cfg: A copy of the page table configuration. + * @ops: The page table operations in use for this set of page tables. + */ +struct io_pgtable { + enum io_pgtable_fmt fmt; + void *cookie; + struct io_pgtable_cfg cfg; + struct io_pgtable_ops ops; +}; + +/** + * struct io_pgtable_init_fns - Alloc/free a set of page tables for a + * particular format. + * + * @alloc: Allocate a set of page tables described by cfg. + * @free: Free the page tables associated with iop. + */ +struct io_pgtable_init_fns { + struct io_pgtable *(*alloc)(struct io_pgtable_cfg *cfg, void *cookie); + void (*free)(struct io_pgtable *iop); +}; + +#endif /* __IO_PGTABLE_H */ From e1d3c0fd701df831169b116cd5c5d6203ac07f70 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 14 Nov 2014 17:18:23 +0000 Subject: [PATCH 145/601] iommu: add ARM LPAE page table allocator A number of IOMMUs found in ARM SoCs can walk architecture-compatible page tables. This patch adds a generic allocator for Stage-1 and Stage-2 v7/v8 long-descriptor page tables. 4k, 16k and 64k pages are supported, with up to 4-levels of walk to cover a 48-bit address space. Tested-by: Laurent Pinchart Signed-off-by: Will Deacon --- MAINTAINERS | 1 + drivers/iommu/Kconfig | 9 + drivers/iommu/Makefile | 1 + drivers/iommu/io-pgtable-arm.c | 781 +++++++++++++++++++++++++++++++++ drivers/iommu/io-pgtable.c | 11 + drivers/iommu/io-pgtable.h | 14 + 6 files changed, 817 insertions(+) create mode 100644 drivers/iommu/io-pgtable-arm.c diff --git a/MAINTAINERS b/MAINTAINERS index 3589d67437f867..00b27863384e62 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1589,6 +1589,7 @@ M: Will Deacon L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: drivers/iommu/arm-smmu.c +F: drivers/iommu/io-pgtable-arm.c ARM64 PORT (AARCH64 ARCHITECTURE) M: Catalin Marinas diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 3faaa41db8ffd6..306454fbc52d7d 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -19,6 +19,15 @@ menu "Generic IOMMU Pagetable Support" config IOMMU_IO_PGTABLE bool +config IOMMU_IO_PGTABLE_LPAE + bool "ARMv7/v8 Long Descriptor Format" + select IOMMU_IO_PGTABLE + help + Enable support for the ARM long descriptor pagetable format. + This allocator supports 4K/2M/1G, 16K/32M and 64K/512M page + sizes at both stage-1 and stage-2, as well as address spaces + up to 48-bits in size. + endmenu config OF_IOMMU diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index 701c9516e2ae18..d6889b487d5571 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -2,6 +2,7 @@ obj-$(CONFIG_IOMMU_API) += iommu.o obj-$(CONFIG_IOMMU_API) += iommu-traces.o obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o +obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o obj-$(CONFIG_OF_IOMMU) += of_iommu.o obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c new file mode 100644 index 00000000000000..dbe6178a53e943 --- /dev/null +++ b/drivers/iommu/io-pgtable-arm.c @@ -0,0 +1,781 @@ +/* + * CPU-agnostic ARM page table allocator. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Copyright (C) 2014 ARM Limited + * + * Author: Will Deacon + */ + +#define pr_fmt(fmt) "arm-lpae io-pgtable: " fmt + +#include +#include +#include +#include +#include + +#include "io-pgtable.h" + +#define ARM_LPAE_MAX_ADDR_BITS 48 +#define ARM_LPAE_S2_MAX_CONCAT_PAGES 16 +#define ARM_LPAE_MAX_LEVELS 4 + +/* Struct accessors */ +#define io_pgtable_to_data(x) \ + container_of((x), struct arm_lpae_io_pgtable, iop) + +#define io_pgtable_ops_to_pgtable(x) \ + container_of((x), struct io_pgtable, ops) + +#define io_pgtable_ops_to_data(x) \ + io_pgtable_to_data(io_pgtable_ops_to_pgtable(x)) + +/* + * For consistency with the architecture, we always consider + * ARM_LPAE_MAX_LEVELS levels, with the walk starting at level n >=0 + */ +#define ARM_LPAE_START_LVL(d) (ARM_LPAE_MAX_LEVELS - (d)->levels) + +/* + * Calculate the right shift amount to get to the portion describing level l + * in a virtual address mapped by the pagetable in d. + */ +#define ARM_LPAE_LVL_SHIFT(l,d) \ + ((((d)->levels - ((l) - ARM_LPAE_START_LVL(d) + 1)) \ + * (d)->bits_per_level) + (d)->pg_shift) + +#define ARM_LPAE_PAGES_PER_PGD(d) ((d)->pgd_size >> (d)->pg_shift) + +/* + * Calculate the index at level l used to map virtual address a using the + * pagetable in d. + */ +#define ARM_LPAE_PGD_IDX(l,d) \ + ((l) == ARM_LPAE_START_LVL(d) ? ilog2(ARM_LPAE_PAGES_PER_PGD(d)) : 0) + +#define ARM_LPAE_LVL_IDX(a,l,d) \ + (((a) >> ARM_LPAE_LVL_SHIFT(l,d)) & \ + ((1 << ((d)->bits_per_level + ARM_LPAE_PGD_IDX(l,d))) - 1)) + +/* Calculate the block/page mapping size at level l for pagetable in d. */ +#define ARM_LPAE_BLOCK_SIZE(l,d) \ + (1 << (ilog2(sizeof(arm_lpae_iopte)) + \ + ((ARM_LPAE_MAX_LEVELS - (l)) * (d)->bits_per_level))) + +/* Page table bits */ +#define ARM_LPAE_PTE_TYPE_SHIFT 0 +#define ARM_LPAE_PTE_TYPE_MASK 0x3 + +#define ARM_LPAE_PTE_TYPE_BLOCK 1 +#define ARM_LPAE_PTE_TYPE_TABLE 3 +#define ARM_LPAE_PTE_TYPE_PAGE 3 + +#define ARM_LPAE_PTE_XN (((arm_lpae_iopte)3) << 53) +#define ARM_LPAE_PTE_AF (((arm_lpae_iopte)1) << 10) +#define ARM_LPAE_PTE_SH_NS (((arm_lpae_iopte)0) << 8) +#define ARM_LPAE_PTE_SH_OS (((arm_lpae_iopte)2) << 8) +#define ARM_LPAE_PTE_SH_IS (((arm_lpae_iopte)3) << 8) +#define ARM_LPAE_PTE_VALID (((arm_lpae_iopte)1) << 0) + +#define ARM_LPAE_PTE_ATTR_LO_MASK (((arm_lpae_iopte)0x3ff) << 2) +/* Ignore the contiguous bit for block splitting */ +#define ARM_LPAE_PTE_ATTR_HI_MASK (((arm_lpae_iopte)6) << 52) +#define ARM_LPAE_PTE_ATTR_MASK (ARM_LPAE_PTE_ATTR_LO_MASK | \ + ARM_LPAE_PTE_ATTR_HI_MASK) + +/* Stage-1 PTE */ +#define ARM_LPAE_PTE_AP_UNPRIV (((arm_lpae_iopte)1) << 6) +#define ARM_LPAE_PTE_AP_RDONLY (((arm_lpae_iopte)2) << 6) +#define ARM_LPAE_PTE_ATTRINDX_SHIFT 2 +#define ARM_LPAE_PTE_nG (((arm_lpae_iopte)1) << 11) + +/* Stage-2 PTE */ +#define ARM_LPAE_PTE_HAP_FAULT (((arm_lpae_iopte)0) << 6) +#define ARM_LPAE_PTE_HAP_READ (((arm_lpae_iopte)1) << 6) +#define ARM_LPAE_PTE_HAP_WRITE (((arm_lpae_iopte)2) << 6) +#define ARM_LPAE_PTE_MEMATTR_OIWB (((arm_lpae_iopte)0xf) << 2) +#define ARM_LPAE_PTE_MEMATTR_NC (((arm_lpae_iopte)0x5) << 2) +#define ARM_LPAE_PTE_MEMATTR_DEV (((arm_lpae_iopte)0x1) << 2) + +/* Register bits */ +#define ARM_32_LPAE_TCR_EAE (1 << 31) +#define ARM_64_LPAE_S2_TCR_RES1 (1 << 31) + +#define ARM_LPAE_TCR_TG0_4K (0 << 14) +#define ARM_LPAE_TCR_TG0_64K (1 << 14) +#define ARM_LPAE_TCR_TG0_16K (2 << 14) + +#define ARM_LPAE_TCR_SH0_SHIFT 12 +#define ARM_LPAE_TCR_SH0_MASK 0x3 +#define ARM_LPAE_TCR_SH_NS 0 +#define ARM_LPAE_TCR_SH_OS 2 +#define ARM_LPAE_TCR_SH_IS 3 + +#define ARM_LPAE_TCR_ORGN0_SHIFT 10 +#define ARM_LPAE_TCR_IRGN0_SHIFT 8 +#define ARM_LPAE_TCR_RGN_MASK 0x3 +#define ARM_LPAE_TCR_RGN_NC 0 +#define ARM_LPAE_TCR_RGN_WBWA 1 +#define ARM_LPAE_TCR_RGN_WT 2 +#define ARM_LPAE_TCR_RGN_WB 3 + +#define ARM_LPAE_TCR_SL0_SHIFT 6 +#define ARM_LPAE_TCR_SL0_MASK 0x3 + +#define ARM_LPAE_TCR_T0SZ_SHIFT 0 +#define ARM_LPAE_TCR_SZ_MASK 0xf + +#define ARM_LPAE_TCR_PS_SHIFT 16 +#define ARM_LPAE_TCR_PS_MASK 0x7 + +#define ARM_LPAE_TCR_IPS_SHIFT 32 +#define ARM_LPAE_TCR_IPS_MASK 0x7 + +#define ARM_LPAE_TCR_PS_32_BIT 0x0ULL +#define ARM_LPAE_TCR_PS_36_BIT 0x1ULL +#define ARM_LPAE_TCR_PS_40_BIT 0x2ULL +#define ARM_LPAE_TCR_PS_42_BIT 0x3ULL +#define ARM_LPAE_TCR_PS_44_BIT 0x4ULL +#define ARM_LPAE_TCR_PS_48_BIT 0x5ULL + +#define ARM_LPAE_MAIR_ATTR_SHIFT(n) ((n) << 3) +#define ARM_LPAE_MAIR_ATTR_MASK 0xff +#define ARM_LPAE_MAIR_ATTR_DEVICE 0x04 +#define ARM_LPAE_MAIR_ATTR_NC 0x44 +#define ARM_LPAE_MAIR_ATTR_WBRWA 0xff +#define ARM_LPAE_MAIR_ATTR_IDX_NC 0 +#define ARM_LPAE_MAIR_ATTR_IDX_CACHE 1 +#define ARM_LPAE_MAIR_ATTR_IDX_DEV 2 + +/* IOPTE accessors */ +#define iopte_deref(pte,d) \ + (__va((pte) & ((1ULL << ARM_LPAE_MAX_ADDR_BITS) - 1) \ + & ~((1ULL << (d)->pg_shift) - 1))) + +#define iopte_type(pte,l) \ + (((pte) >> ARM_LPAE_PTE_TYPE_SHIFT) & ARM_LPAE_PTE_TYPE_MASK) + +#define iopte_prot(pte) ((pte) & ARM_LPAE_PTE_ATTR_MASK) + +#define iopte_leaf(pte,l) \ + (l == (ARM_LPAE_MAX_LEVELS - 1) ? \ + (iopte_type(pte,l) == ARM_LPAE_PTE_TYPE_PAGE) : \ + (iopte_type(pte,l) == ARM_LPAE_PTE_TYPE_BLOCK)) + +#define iopte_to_pfn(pte,d) \ + (((pte) & ((1ULL << ARM_LPAE_MAX_ADDR_BITS) - 1)) >> (d)->pg_shift) + +#define pfn_to_iopte(pfn,d) \ + (((pfn) << (d)->pg_shift) & ((1ULL << ARM_LPAE_MAX_ADDR_BITS) - 1)) + +struct arm_lpae_io_pgtable { + struct io_pgtable iop; + + int levels; + size_t pgd_size; + unsigned long pg_shift; + unsigned long bits_per_level; + + void *pgd; +}; + +typedef u64 arm_lpae_iopte; + +static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, + unsigned long iova, phys_addr_t paddr, + arm_lpae_iopte prot, int lvl, + arm_lpae_iopte *ptep) +{ + arm_lpae_iopte pte = prot; + + /* We require an unmap first */ + if (WARN_ON(iopte_leaf(*ptep, lvl))) + return -EEXIST; + + if (lvl == ARM_LPAE_MAX_LEVELS - 1) + pte |= ARM_LPAE_PTE_TYPE_PAGE; + else + pte |= ARM_LPAE_PTE_TYPE_BLOCK; + + pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS; + pte |= pfn_to_iopte(paddr >> data->pg_shift, data); + + *ptep = pte; + data->iop.cfg.tlb->flush_pgtable(ptep, sizeof(*ptep), data->iop.cookie); + return 0; +} + +static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, + phys_addr_t paddr, size_t size, arm_lpae_iopte prot, + int lvl, arm_lpae_iopte *ptep) +{ + arm_lpae_iopte *cptep, pte; + void *cookie = data->iop.cookie; + size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data); + + /* Find our entry at the current level */ + ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); + + /* If we can install a leaf entry at this level, then do so */ + if (size == block_size && (size & data->iop.cfg.pgsize_bitmap)) + return arm_lpae_init_pte(data, iova, paddr, prot, lvl, ptep); + + /* We can't allocate tables at the final level */ + if (WARN_ON(lvl >= ARM_LPAE_MAX_LEVELS - 1)) + return -EINVAL; + + /* Grab a pointer to the next level */ + pte = *ptep; + if (!pte) { + cptep = alloc_pages_exact(1UL << data->pg_shift, + GFP_ATOMIC | __GFP_ZERO); + if (!cptep) + return -ENOMEM; + + data->iop.cfg.tlb->flush_pgtable(cptep, 1UL << data->pg_shift, + cookie); + pte = __pa(cptep) | ARM_LPAE_PTE_TYPE_TABLE; + *ptep = pte; + data->iop.cfg.tlb->flush_pgtable(ptep, sizeof(*ptep), cookie); + } else { + cptep = iopte_deref(pte, data); + } + + /* Rinse, repeat */ + return __arm_lpae_map(data, iova, paddr, size, prot, lvl + 1, cptep); +} + +static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, + int prot) +{ + arm_lpae_iopte pte; + + if (data->iop.fmt == ARM_64_LPAE_S1 || + data->iop.fmt == ARM_32_LPAE_S1) { + pte = ARM_LPAE_PTE_AP_UNPRIV | ARM_LPAE_PTE_nG; + + if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ)) + pte |= ARM_LPAE_PTE_AP_RDONLY; + + if (prot & IOMMU_CACHE) + pte |= (ARM_LPAE_MAIR_ATTR_IDX_CACHE + << ARM_LPAE_PTE_ATTRINDX_SHIFT); + } else { + pte = ARM_LPAE_PTE_HAP_FAULT; + if (prot & IOMMU_READ) + pte |= ARM_LPAE_PTE_HAP_READ; + if (prot & IOMMU_WRITE) + pte |= ARM_LPAE_PTE_HAP_WRITE; + if (prot & IOMMU_CACHE) + pte |= ARM_LPAE_PTE_MEMATTR_OIWB; + else + pte |= ARM_LPAE_PTE_MEMATTR_NC; + } + + if (prot & IOMMU_NOEXEC) + pte |= ARM_LPAE_PTE_XN; + + return pte; +} + +static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova, + phys_addr_t paddr, size_t size, int iommu_prot) +{ + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); + arm_lpae_iopte *ptep = data->pgd; + int lvl = ARM_LPAE_START_LVL(data); + arm_lpae_iopte prot; + + /* If no access, then nothing to do */ + if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE))) + return 0; + + prot = arm_lpae_prot_to_pte(data, iommu_prot); + return __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep); +} + +static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl, + arm_lpae_iopte *ptep) +{ + arm_lpae_iopte *start, *end; + unsigned long table_size; + + /* Only leaf entries at the last level */ + if (lvl == ARM_LPAE_MAX_LEVELS - 1) + return; + + if (lvl == ARM_LPAE_START_LVL(data)) + table_size = data->pgd_size; + else + table_size = 1UL << data->pg_shift; + + start = ptep; + end = (void *)ptep + table_size; + + while (ptep != end) { + arm_lpae_iopte pte = *ptep++; + + if (!pte || iopte_leaf(pte, lvl)) + continue; + + __arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data)); + } + + free_pages_exact(start, table_size); +} + +static void arm_lpae_free_pgtable(struct io_pgtable *iop) +{ + struct arm_lpae_io_pgtable *data = io_pgtable_to_data(iop); + + __arm_lpae_free_pgtable(data, ARM_LPAE_START_LVL(data), data->pgd); + kfree(data); +} + +static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, + unsigned long iova, size_t size, + arm_lpae_iopte prot, int lvl, + arm_lpae_iopte *ptep, size_t blk_size) +{ + unsigned long blk_start, blk_end; + phys_addr_t blk_paddr; + arm_lpae_iopte table = 0; + void *cookie = data->iop.cookie; + const struct iommu_gather_ops *tlb = data->iop.cfg.tlb; + + blk_start = iova & ~(blk_size - 1); + blk_end = blk_start + blk_size; + blk_paddr = iopte_to_pfn(*ptep, data) << data->pg_shift; + + for (; blk_start < blk_end; blk_start += size, blk_paddr += size) { + arm_lpae_iopte *tablep; + + /* Unmap! */ + if (blk_start == iova) + continue; + + /* __arm_lpae_map expects a pointer to the start of the table */ + tablep = &table - ARM_LPAE_LVL_IDX(blk_start, lvl, data); + if (__arm_lpae_map(data, blk_start, blk_paddr, size, prot, lvl, + tablep) < 0) { + if (table) { + /* Free the table we allocated */ + tablep = iopte_deref(table, data); + __arm_lpae_free_pgtable(data, lvl + 1, tablep); + } + return 0; /* Bytes unmapped */ + } + } + + *ptep = table; + tlb->flush_pgtable(ptep, sizeof(*ptep), cookie); + iova &= ~(blk_size - 1); + tlb->tlb_add_flush(iova, blk_size, true, cookie); + return size; +} + +static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, + unsigned long iova, size_t size, int lvl, + arm_lpae_iopte *ptep) +{ + arm_lpae_iopte pte; + const struct iommu_gather_ops *tlb = data->iop.cfg.tlb; + void *cookie = data->iop.cookie; + size_t blk_size = ARM_LPAE_BLOCK_SIZE(lvl, data); + + ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); + pte = *ptep; + + /* Something went horribly wrong and we ran out of page table */ + if (WARN_ON(!pte || (lvl == ARM_LPAE_MAX_LEVELS))) + return 0; + + /* If the size matches this level, we're in the right place */ + if (size == blk_size) { + *ptep = 0; + tlb->flush_pgtable(ptep, sizeof(*ptep), cookie); + + if (!iopte_leaf(pte, lvl)) { + /* Also flush any partial walks */ + tlb->tlb_add_flush(iova, size, false, cookie); + tlb->tlb_sync(data->iop.cookie); + ptep = iopte_deref(pte, data); + __arm_lpae_free_pgtable(data, lvl + 1, ptep); + } else { + tlb->tlb_add_flush(iova, size, true, cookie); + } + + return size; + } else if (iopte_leaf(pte, lvl)) { + /* + * Insert a table at the next level to map the old region, + * minus the part we want to unmap + */ + return arm_lpae_split_blk_unmap(data, iova, size, + iopte_prot(pte), lvl, ptep, + blk_size); + } + + /* Keep on walkin' */ + ptep = iopte_deref(pte, data); + return __arm_lpae_unmap(data, iova, size, lvl + 1, ptep); +} + +static int arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova, + size_t size) +{ + size_t unmapped; + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); + struct io_pgtable *iop = &data->iop; + arm_lpae_iopte *ptep = data->pgd; + int lvl = ARM_LPAE_START_LVL(data); + + unmapped = __arm_lpae_unmap(data, iova, size, lvl, ptep); + if (unmapped) + iop->cfg.tlb->tlb_sync(iop->cookie); + + return unmapped; +} + +static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, + unsigned long iova) +{ + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); + arm_lpae_iopte pte, *ptep = data->pgd; + int lvl = ARM_LPAE_START_LVL(data); + + do { + /* Valid IOPTE pointer? */ + if (!ptep) + return 0; + + /* Grab the IOPTE we're interested in */ + pte = *(ptep + ARM_LPAE_LVL_IDX(iova, lvl, data)); + + /* Valid entry? */ + if (!pte) + return 0; + + /* Leaf entry? */ + if (iopte_leaf(pte,lvl)) + goto found_translation; + + /* Take it to the next level */ + ptep = iopte_deref(pte, data); + } while (++lvl < ARM_LPAE_MAX_LEVELS); + + /* Ran out of page tables to walk */ + return 0; + +found_translation: + iova &= ((1 << data->pg_shift) - 1); + return ((phys_addr_t)iopte_to_pfn(pte,data) << data->pg_shift) | iova; +} + +static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg) +{ + unsigned long granule; + + /* + * We need to restrict the supported page sizes to match the + * translation regime for a particular granule. Aim to match + * the CPU page size if possible, otherwise prefer smaller sizes. + * While we're at it, restrict the block sizes to match the + * chosen granule. + */ + if (cfg->pgsize_bitmap & PAGE_SIZE) + granule = PAGE_SIZE; + else if (cfg->pgsize_bitmap & ~PAGE_MASK) + granule = 1UL << __fls(cfg->pgsize_bitmap & ~PAGE_MASK); + else if (cfg->pgsize_bitmap & PAGE_MASK) + granule = 1UL << __ffs(cfg->pgsize_bitmap & PAGE_MASK); + else + granule = 0; + + switch (granule) { + case SZ_4K: + cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G); + break; + case SZ_16K: + cfg->pgsize_bitmap &= (SZ_16K | SZ_32M); + break; + case SZ_64K: + cfg->pgsize_bitmap &= (SZ_64K | SZ_512M); + break; + default: + cfg->pgsize_bitmap = 0; + } +} + +static struct arm_lpae_io_pgtable * +arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg) +{ + unsigned long va_bits, pgd_bits; + struct arm_lpae_io_pgtable *data; + + arm_lpae_restrict_pgsizes(cfg); + + if (!(cfg->pgsize_bitmap & (SZ_4K | SZ_16K | SZ_64K))) + return NULL; + + if (cfg->ias > ARM_LPAE_MAX_ADDR_BITS) + return NULL; + + if (cfg->oas > ARM_LPAE_MAX_ADDR_BITS) + return NULL; + + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return NULL; + + data->pg_shift = __ffs(cfg->pgsize_bitmap); + data->bits_per_level = data->pg_shift - ilog2(sizeof(arm_lpae_iopte)); + + va_bits = cfg->ias - data->pg_shift; + data->levels = DIV_ROUND_UP(va_bits, data->bits_per_level); + + /* Calculate the actual size of our pgd (without concatenation) */ + pgd_bits = va_bits - (data->bits_per_level * (data->levels - 1)); + data->pgd_size = 1UL << (pgd_bits + ilog2(sizeof(arm_lpae_iopte))); + + data->iop.ops = (struct io_pgtable_ops) { + .map = arm_lpae_map, + .unmap = arm_lpae_unmap, + .iova_to_phys = arm_lpae_iova_to_phys, + }; + + return data; +} + +static struct io_pgtable * +arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) +{ + u64 reg; + struct arm_lpae_io_pgtable *data = arm_lpae_alloc_pgtable(cfg); + + if (!data) + return NULL; + + /* TCR */ + reg = (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH0_SHIFT) | + (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) | + (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT); + + switch (1 << data->pg_shift) { + case SZ_4K: + reg |= ARM_LPAE_TCR_TG0_4K; + break; + case SZ_16K: + reg |= ARM_LPAE_TCR_TG0_16K; + break; + case SZ_64K: + reg |= ARM_LPAE_TCR_TG0_64K; + break; + } + + switch (cfg->oas) { + case 32: + reg |= (ARM_LPAE_TCR_PS_32_BIT << ARM_LPAE_TCR_IPS_SHIFT); + break; + case 36: + reg |= (ARM_LPAE_TCR_PS_36_BIT << ARM_LPAE_TCR_IPS_SHIFT); + break; + case 40: + reg |= (ARM_LPAE_TCR_PS_40_BIT << ARM_LPAE_TCR_IPS_SHIFT); + break; + case 42: + reg |= (ARM_LPAE_TCR_PS_42_BIT << ARM_LPAE_TCR_IPS_SHIFT); + break; + case 44: + reg |= (ARM_LPAE_TCR_PS_44_BIT << ARM_LPAE_TCR_IPS_SHIFT); + break; + case 48: + reg |= (ARM_LPAE_TCR_PS_48_BIT << ARM_LPAE_TCR_IPS_SHIFT); + break; + default: + goto out_free_data; + } + + reg |= (64ULL - cfg->ias) << ARM_LPAE_TCR_T0SZ_SHIFT; + cfg->arm_lpae_s1_cfg.tcr = reg; + + /* MAIRs */ + reg = (ARM_LPAE_MAIR_ATTR_NC + << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_NC)) | + (ARM_LPAE_MAIR_ATTR_WBRWA + << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_CACHE)) | + (ARM_LPAE_MAIR_ATTR_DEVICE + << ARM_LPAE_MAIR_ATTR_SHIFT(ARM_LPAE_MAIR_ATTR_IDX_DEV)); + + cfg->arm_lpae_s1_cfg.mair[0] = reg; + cfg->arm_lpae_s1_cfg.mair[1] = 0; + + /* Looking good; allocate a pgd */ + data->pgd = alloc_pages_exact(data->pgd_size, GFP_KERNEL | __GFP_ZERO); + if (!data->pgd) + goto out_free_data; + + cfg->tlb->flush_pgtable(data->pgd, data->pgd_size, cookie); + + /* TTBRs */ + cfg->arm_lpae_s1_cfg.ttbr[0] = virt_to_phys(data->pgd); + cfg->arm_lpae_s1_cfg.ttbr[1] = 0; + return &data->iop; + +out_free_data: + kfree(data); + return NULL; +} + +static struct io_pgtable * +arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) +{ + u64 reg, sl; + struct arm_lpae_io_pgtable *data = arm_lpae_alloc_pgtable(cfg); + + if (!data) + return NULL; + + /* + * Concatenate PGDs at level 1 if possible in order to reduce + * the depth of the stage-2 walk. + */ + if (data->levels == ARM_LPAE_MAX_LEVELS) { + unsigned long pgd_pages; + + pgd_pages = data->pgd_size >> ilog2(sizeof(arm_lpae_iopte)); + if (pgd_pages <= ARM_LPAE_S2_MAX_CONCAT_PAGES) { + data->pgd_size = pgd_pages << data->pg_shift; + data->levels--; + } + } + + /* VTCR */ + reg = ARM_64_LPAE_S2_TCR_RES1 | + (ARM_LPAE_TCR_SH_IS << ARM_LPAE_TCR_SH0_SHIFT) | + (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN0_SHIFT) | + (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN0_SHIFT); + + sl = ARM_LPAE_START_LVL(data); + + switch (1 << data->pg_shift) { + case SZ_4K: + reg |= ARM_LPAE_TCR_TG0_4K; + sl++; /* SL0 format is different for 4K granule size */ + break; + case SZ_16K: + reg |= ARM_LPAE_TCR_TG0_16K; + break; + case SZ_64K: + reg |= ARM_LPAE_TCR_TG0_64K; + break; + } + + switch (cfg->oas) { + case 32: + reg |= (ARM_LPAE_TCR_PS_32_BIT << ARM_LPAE_TCR_PS_SHIFT); + break; + case 36: + reg |= (ARM_LPAE_TCR_PS_36_BIT << ARM_LPAE_TCR_PS_SHIFT); + break; + case 40: + reg |= (ARM_LPAE_TCR_PS_40_BIT << ARM_LPAE_TCR_PS_SHIFT); + break; + case 42: + reg |= (ARM_LPAE_TCR_PS_42_BIT << ARM_LPAE_TCR_PS_SHIFT); + break; + case 44: + reg |= (ARM_LPAE_TCR_PS_44_BIT << ARM_LPAE_TCR_PS_SHIFT); + break; + case 48: + reg |= (ARM_LPAE_TCR_PS_48_BIT << ARM_LPAE_TCR_PS_SHIFT); + break; + default: + goto out_free_data; + } + + reg |= (64ULL - cfg->ias) << ARM_LPAE_TCR_T0SZ_SHIFT; + reg |= (~sl & ARM_LPAE_TCR_SL0_MASK) << ARM_LPAE_TCR_SL0_SHIFT; + cfg->arm_lpae_s2_cfg.vtcr = reg; + + /* Allocate pgd pages */ + data->pgd = alloc_pages_exact(data->pgd_size, GFP_KERNEL | __GFP_ZERO); + if (!data->pgd) + goto out_free_data; + + cfg->tlb->flush_pgtable(data->pgd, data->pgd_size, cookie); + + /* VTTBR */ + cfg->arm_lpae_s2_cfg.vttbr = virt_to_phys(data->pgd); + return &data->iop; + +out_free_data: + kfree(data); + return NULL; +} + +static struct io_pgtable * +arm_32_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) +{ + struct io_pgtable *iop; + + if (cfg->ias > 32 || cfg->oas > 40) + return NULL; + + cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G); + iop = arm_64_lpae_alloc_pgtable_s1(cfg, cookie); + if (iop) { + cfg->arm_lpae_s1_cfg.tcr |= ARM_32_LPAE_TCR_EAE; + cfg->arm_lpae_s1_cfg.tcr &= 0xffffffff; + } + + return iop; +} + +static struct io_pgtable * +arm_32_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) +{ + struct io_pgtable *iop; + + if (cfg->ias > 40 || cfg->oas > 40) + return NULL; + + cfg->pgsize_bitmap &= (SZ_4K | SZ_2M | SZ_1G); + iop = arm_64_lpae_alloc_pgtable_s2(cfg, cookie); + if (iop) + cfg->arm_lpae_s2_cfg.vtcr &= 0xffffffff; + + return iop; +} + +struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = { + .alloc = arm_64_lpae_alloc_pgtable_s1, + .free = arm_lpae_free_pgtable, +}; + +struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns = { + .alloc = arm_64_lpae_alloc_pgtable_s2, + .free = arm_lpae_free_pgtable, +}; + +struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s1_init_fns = { + .alloc = arm_32_lpae_alloc_pgtable_s1, + .free = arm_lpae_free_pgtable, +}; + +struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s2_init_fns = { + .alloc = arm_32_lpae_alloc_pgtable_s2, + .free = arm_lpae_free_pgtable, +}; diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c index f664a1ca49cf8f..6436fe24bc2f6f 100644 --- a/drivers/iommu/io-pgtable.c +++ b/drivers/iommu/io-pgtable.c @@ -24,9 +24,20 @@ #include "io-pgtable.h" +extern struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s1_init_fns; +extern struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s2_init_fns; +extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns; +extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns; + static const struct io_pgtable_init_fns * io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = { +#ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE + [ARM_32_LPAE_S1] = &io_pgtable_arm_32_lpae_s1_init_fns, + [ARM_32_LPAE_S2] = &io_pgtable_arm_32_lpae_s2_init_fns, + [ARM_64_LPAE_S1] = &io_pgtable_arm_64_lpae_s1_init_fns, + [ARM_64_LPAE_S2] = &io_pgtable_arm_64_lpae_s2_init_fns, +#endif }; struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt, diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h index fdd792c3769825..05c4e593a25f68 100644 --- a/drivers/iommu/io-pgtable.h +++ b/drivers/iommu/io-pgtable.h @@ -5,6 +5,10 @@ * Public API for use by IOMMU drivers */ enum io_pgtable_fmt { + ARM_32_LPAE_S1, + ARM_32_LPAE_S2, + ARM_64_LPAE_S1, + ARM_64_LPAE_S2, IO_PGTABLE_NUM_FMTS, }; @@ -47,6 +51,16 @@ struct io_pgtable_cfg { /* Low-level data specific to the table format */ union { + struct { + u64 ttbr[2]; + u64 tcr; + u64 mair[2]; + } arm_lpae_s1_cfg; + + struct { + u64 vttbr; + u64 vtcr; + } arm_lpae_s2_cfg; }; }; From fe4b991dcd84e0104cf2e29223a819335ed048a7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 17 Nov 2014 23:31:12 +0000 Subject: [PATCH 146/601] iommu: add self-consistency tests to ARM LPAE IO page table allocator This patch adds a series of basic self-consistency tests to the ARM LPAE IO page table allocator that exercise corner cases in map/unmap, as well as testing all valid configurations of pagesize, ias and stage. Signed-off-by: Will Deacon --- drivers/iommu/Kconfig | 9 ++ drivers/iommu/io-pgtable-arm.c | 200 ++++++++++++++++++++++++++++++++- 2 files changed, 208 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 306454fbc52d7d..9fd9909867cd8e 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -28,6 +28,15 @@ config IOMMU_IO_PGTABLE_LPAE sizes at both stage-1 and stage-2, as well as address spaces up to 48-bits in size. +config IOMMU_IO_PGTABLE_LPAE_SELFTEST + bool "LPAE selftests" + depends on IOMMU_IO_PGTABLE_LPAE + help + Enable self-tests for LPAE page table allocator. This performs + a series of page-table consistency checks during boot. + + If unsure, say N here. + endmenu config OF_IOMMU diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index dbe6178a53e943..52fb21487f02de 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -193,6 +193,8 @@ struct arm_lpae_io_pgtable { typedef u64 arm_lpae_iopte; +static bool selftest_running = false; + static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, unsigned long iova, phys_addr_t paddr, arm_lpae_iopte prot, int lvl, @@ -201,8 +203,10 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, arm_lpae_iopte pte = prot; /* We require an unmap first */ - if (WARN_ON(iopte_leaf(*ptep, lvl))) + if (iopte_leaf(*ptep, lvl)) { + WARN_ON(!selftest_running); return -EEXIST; + } if (lvl == ARM_LPAE_MAX_LEVELS - 1) pte |= ARM_LPAE_PTE_TYPE_PAGE; @@ -779,3 +783,197 @@ struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s2_init_fns = { .alloc = arm_32_lpae_alloc_pgtable_s2, .free = arm_lpae_free_pgtable, }; + +#ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST + +static struct io_pgtable_cfg *cfg_cookie; + +static void dummy_tlb_flush_all(void *cookie) +{ + WARN_ON(cookie != cfg_cookie); +} + +static void dummy_tlb_add_flush(unsigned long iova, size_t size, bool leaf, + void *cookie) +{ + WARN_ON(cookie != cfg_cookie); + WARN_ON(!(size & cfg_cookie->pgsize_bitmap)); +} + +static void dummy_tlb_sync(void *cookie) +{ + WARN_ON(cookie != cfg_cookie); +} + +static void dummy_flush_pgtable(void *ptr, size_t size, void *cookie) +{ + WARN_ON(cookie != cfg_cookie); +} + +static struct iommu_gather_ops dummy_tlb_ops __initdata = { + .tlb_flush_all = dummy_tlb_flush_all, + .tlb_add_flush = dummy_tlb_add_flush, + .tlb_sync = dummy_tlb_sync, + .flush_pgtable = dummy_flush_pgtable, +}; + +static void __init arm_lpae_dump_ops(struct io_pgtable_ops *ops) +{ + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); + struct io_pgtable_cfg *cfg = &data->iop.cfg; + + pr_err("cfg: pgsize_bitmap 0x%lx, ias %u-bit\n", + cfg->pgsize_bitmap, cfg->ias); + pr_err("data: %d levels, 0x%zx pgd_size, %lu pg_shift, %lu bits_per_level, pgd @ %p\n", + data->levels, data->pgd_size, data->pg_shift, + data->bits_per_level, data->pgd); +} + +#define __FAIL(ops, i) ({ \ + WARN(1, "selftest: test failed for fmt idx %d\n", (i)); \ + arm_lpae_dump_ops(ops); \ + selftest_running = false; \ + -EFAULT; \ +}) + +static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg) +{ + static const enum io_pgtable_fmt fmts[] = { + ARM_64_LPAE_S1, + ARM_64_LPAE_S2, + }; + + int i, j; + unsigned long iova; + size_t size; + struct io_pgtable_ops *ops; + + selftest_running = true; + + for (i = 0; i < ARRAY_SIZE(fmts); ++i) { + cfg_cookie = cfg; + ops = alloc_io_pgtable_ops(fmts[i], cfg, cfg); + if (!ops) { + pr_err("selftest: failed to allocate io pgtable ops\n"); + return -ENOMEM; + } + + /* + * Initial sanity checks. + * Empty page tables shouldn't provide any translations. + */ + if (ops->iova_to_phys(ops, 42)) + return __FAIL(ops, i); + + if (ops->iova_to_phys(ops, SZ_1G + 42)) + return __FAIL(ops, i); + + if (ops->iova_to_phys(ops, SZ_2G + 42)) + return __FAIL(ops, i); + + /* + * Distinct mappings of different granule sizes. + */ + iova = 0; + j = find_first_bit(&cfg->pgsize_bitmap, BITS_PER_LONG); + while (j != BITS_PER_LONG) { + size = 1UL << j; + + if (ops->map(ops, iova, iova, size, IOMMU_READ | + IOMMU_WRITE | + IOMMU_NOEXEC | + IOMMU_CACHE)) + return __FAIL(ops, i); + + /* Overlapping mappings */ + if (!ops->map(ops, iova, iova + size, size, + IOMMU_READ | IOMMU_NOEXEC)) + return __FAIL(ops, i); + + if (ops->iova_to_phys(ops, iova + 42) != (iova + 42)) + return __FAIL(ops, i); + + iova += SZ_1G; + j++; + j = find_next_bit(&cfg->pgsize_bitmap, BITS_PER_LONG, j); + } + + /* Partial unmap */ + size = 1UL << __ffs(cfg->pgsize_bitmap); + if (ops->unmap(ops, SZ_1G + size, size) != size) + return __FAIL(ops, i); + + /* Remap of partial unmap */ + if (ops->map(ops, SZ_1G + size, size, size, IOMMU_READ)) + return __FAIL(ops, i); + + if (ops->iova_to_phys(ops, SZ_1G + size + 42) != (size + 42)) + return __FAIL(ops, i); + + /* Full unmap */ + iova = 0; + j = find_first_bit(&cfg->pgsize_bitmap, BITS_PER_LONG); + while (j != BITS_PER_LONG) { + size = 1UL << j; + + if (ops->unmap(ops, iova, size) != size) + return __FAIL(ops, i); + + if (ops->iova_to_phys(ops, iova + 42)) + return __FAIL(ops, i); + + /* Remap full block */ + if (ops->map(ops, iova, iova, size, IOMMU_WRITE)) + return __FAIL(ops, i); + + if (ops->iova_to_phys(ops, iova + 42) != (iova + 42)) + return __FAIL(ops, i); + + iova += SZ_1G; + j++; + j = find_next_bit(&cfg->pgsize_bitmap, BITS_PER_LONG, j); + } + + free_io_pgtable_ops(ops); + } + + selftest_running = false; + return 0; +} + +static int __init arm_lpae_do_selftests(void) +{ + static const unsigned long pgsize[] = { + SZ_4K | SZ_2M | SZ_1G, + SZ_16K | SZ_32M, + SZ_64K | SZ_512M, + }; + + static const unsigned int ias[] = { + 32, 36, 40, 42, 44, 48, + }; + + int i, j, pass = 0, fail = 0; + struct io_pgtable_cfg cfg = { + .tlb = &dummy_tlb_ops, + .oas = 48, + }; + + for (i = 0; i < ARRAY_SIZE(pgsize); ++i) { + for (j = 0; j < ARRAY_SIZE(ias); ++j) { + cfg.pgsize_bitmap = pgsize[i]; + cfg.ias = ias[j]; + pr_info("selftest: pgsize_bitmap 0x%08lx, IAS %u\n", + pgsize[i], ias[j]); + if (arm_lpae_run_tests(&cfg)) + fail++; + else + pass++; + } + } + + pr_info("selftest: completed with %d PASS %d FAIL\n", pass, fail); + return fail ? -EFAULT : 0; +} +subsys_initcall(arm_lpae_do_selftests); +#endif From c896c132b01895fd1445d178e36155b671c6f9ee Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sun, 14 Dec 2014 23:34:50 +0200 Subject: [PATCH 147/601] iommu: io-pgtable-arm: add non-secure quirk The quirk causes the Non-Secure bit to be set in all page table entries. Signed-off-by: Laurent Pinchart Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm.c | 7 +++++++ drivers/iommu/io-pgtable.h | 3 ++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 52fb21487f02de..5a500edf00cc14 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -82,11 +82,13 @@ #define ARM_LPAE_PTE_TYPE_TABLE 3 #define ARM_LPAE_PTE_TYPE_PAGE 3 +#define ARM_LPAE_PTE_NSTABLE (((arm_lpae_iopte)1) << 63) #define ARM_LPAE_PTE_XN (((arm_lpae_iopte)3) << 53) #define ARM_LPAE_PTE_AF (((arm_lpae_iopte)1) << 10) #define ARM_LPAE_PTE_SH_NS (((arm_lpae_iopte)0) << 8) #define ARM_LPAE_PTE_SH_OS (((arm_lpae_iopte)2) << 8) #define ARM_LPAE_PTE_SH_IS (((arm_lpae_iopte)3) << 8) +#define ARM_LPAE_PTE_NS (((arm_lpae_iopte)1) << 5) #define ARM_LPAE_PTE_VALID (((arm_lpae_iopte)1) << 0) #define ARM_LPAE_PTE_ATTR_LO_MASK (((arm_lpae_iopte)0x3ff) << 2) @@ -208,6 +210,9 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, return -EEXIST; } + if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS) + pte |= ARM_LPAE_PTE_NS; + if (lvl == ARM_LPAE_MAX_LEVELS - 1) pte |= ARM_LPAE_PTE_TYPE_PAGE; else @@ -251,6 +256,8 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, data->iop.cfg.tlb->flush_pgtable(cptep, 1UL << data->pg_shift, cookie); pte = __pa(cptep) | ARM_LPAE_PTE_TYPE_TABLE; + if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS) + pte |= ARM_LPAE_PTE_NSTABLE; *ptep = pte; data->iop.cfg.tlb->flush_pgtable(ptep, sizeof(*ptep), cookie); } else { diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h index 05c4e593a25f68..10e32f69c66813 100644 --- a/drivers/iommu/io-pgtable.h +++ b/drivers/iommu/io-pgtable.h @@ -43,7 +43,8 @@ struct iommu_gather_ops { * @tlb: TLB management callbacks for this set of tables. */ struct io_pgtable_cfg { - int quirks; /* IO_PGTABLE_QUIRK_* */ + #define IO_PGTABLE_QUIRK_ARM_NS (1 << 0) /* Set NS bit in PTEs */ + int quirks; unsigned long pgsize_bitmap; unsigned int ias; unsigned int oas; From 518f7136244c167538f732691be589959310b295 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 14 Nov 2014 17:17:54 +0000 Subject: [PATCH 148/601] iommu/arm-smmu: make use of generic LPAE allocator The ARM SMMU can walk LPAE page tables, so make use of the generic allocator. Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 - drivers/iommu/Kconfig | 6 +- drivers/iommu/arm-smmu.c | 886 ++++++++++++--------------------------- 3 files changed, 266 insertions(+), 627 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b1f9a20a367746..528c3fd2d4c1a5 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -349,7 +349,6 @@ config ARM64_VA_BITS_42 config ARM64_VA_BITS_48 bool "48-bit" - depends on !ARM_SMMU endchoice diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 9fd9909867cd8e..87060ad6829d78 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -330,13 +330,13 @@ config SPAPR_TCE_IOMMU config ARM_SMMU bool "ARM Ltd. System MMU (SMMU) Support" - depends on ARM64 || (ARM_LPAE && OF) + depends on ARM64 || ARM select IOMMU_API + select IOMMU_IO_PGTABLE_LPAE select ARM_DMA_USE_IOMMU if ARM help Support for implementations of the ARM System MMU architecture - versions 1 and 2. The driver supports both v7l and v8l table - formats with 4k and 64k page sizes. + versions 1 and 2. Say Y here if your SoC includes an IOMMU device implementing the ARM SMMU architecture. diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 6cd47b75286f98..919ba433d219b2 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -23,8 +23,6 @@ * - Stream-matching and stream-indexing * - v7/v8 long-descriptor format * - Non-secure access to the SMMU - * - 4k and 64k pages, with contiguous pte hints. - * - Up to 48-bit addressing (dependent on VA_BITS) * - Context fault reporting */ @@ -36,7 +34,6 @@ #include #include #include -#include #include #include #include @@ -46,7 +43,7 @@ #include -#include +#include "io-pgtable.h" /* Maximum number of stream IDs assigned to a single device */ #define MAX_MASTER_STREAMIDS MAX_PHANDLE_ARGS @@ -71,40 +68,6 @@ ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS) \ ? 0x400 : 0)) -/* Page table bits */ -#define ARM_SMMU_PTE_XN (((pteval_t)3) << 53) -#define ARM_SMMU_PTE_CONT (((pteval_t)1) << 52) -#define ARM_SMMU_PTE_AF (((pteval_t)1) << 10) -#define ARM_SMMU_PTE_SH_NS (((pteval_t)0) << 8) -#define ARM_SMMU_PTE_SH_OS (((pteval_t)2) << 8) -#define ARM_SMMU_PTE_SH_IS (((pteval_t)3) << 8) -#define ARM_SMMU_PTE_PAGE (((pteval_t)3) << 0) - -#if PAGE_SIZE == SZ_4K -#define ARM_SMMU_PTE_CONT_ENTRIES 16 -#elif PAGE_SIZE == SZ_64K -#define ARM_SMMU_PTE_CONT_ENTRIES 32 -#else -#define ARM_SMMU_PTE_CONT_ENTRIES 1 -#endif - -#define ARM_SMMU_PTE_CONT_SIZE (PAGE_SIZE * ARM_SMMU_PTE_CONT_ENTRIES) -#define ARM_SMMU_PTE_CONT_MASK (~(ARM_SMMU_PTE_CONT_SIZE - 1)) - -/* Stage-1 PTE */ -#define ARM_SMMU_PTE_AP_UNPRIV (((pteval_t)1) << 6) -#define ARM_SMMU_PTE_AP_RDONLY (((pteval_t)2) << 6) -#define ARM_SMMU_PTE_ATTRINDX_SHIFT 2 -#define ARM_SMMU_PTE_nG (((pteval_t)1) << 11) - -/* Stage-2 PTE */ -#define ARM_SMMU_PTE_HAP_FAULT (((pteval_t)0) << 6) -#define ARM_SMMU_PTE_HAP_READ (((pteval_t)1) << 6) -#define ARM_SMMU_PTE_HAP_WRITE (((pteval_t)2) << 6) -#define ARM_SMMU_PTE_MEMATTR_OIWB (((pteval_t)0xf) << 2) -#define ARM_SMMU_PTE_MEMATTR_NC (((pteval_t)0x5) << 2) -#define ARM_SMMU_PTE_MEMATTR_DEV (((pteval_t)0x1) << 2) - /* Configuration registers */ #define ARM_SMMU_GR0_sCR0 0x0 #define sCR0_CLIENTPD (1 << 0) @@ -132,17 +95,11 @@ #define ARM_SMMU_GR0_sGFSYNR0 0x50 #define ARM_SMMU_GR0_sGFSYNR1 0x54 #define ARM_SMMU_GR0_sGFSYNR2 0x58 -#define ARM_SMMU_GR0_PIDR0 0xfe0 -#define ARM_SMMU_GR0_PIDR1 0xfe4 -#define ARM_SMMU_GR0_PIDR2 0xfe8 #define ID0_S1TS (1 << 30) #define ID0_S2TS (1 << 29) #define ID0_NTS (1 << 28) #define ID0_SMS (1 << 27) -#define ID0_PTFS_SHIFT 24 -#define ID0_PTFS_MASK 0x2 -#define ID0_PTFS_V8_ONLY 0x2 #define ID0_CTTW (1 << 14) #define ID0_NUMIRPT_SHIFT 16 #define ID0_NUMIRPT_MASK 0xff @@ -169,9 +126,6 @@ #define ID2_PTFS_16K (1 << 13) #define ID2_PTFS_64K (1 << 14) -#define PIDR2_ARCH_SHIFT 4 -#define PIDR2_ARCH_MASK 0xf - /* Global TLB invalidation */ #define ARM_SMMU_GR0_STLBIALL 0x60 #define ARM_SMMU_GR0_TLBIVMID 0x64 @@ -231,13 +185,20 @@ #define ARM_SMMU_CB_TTBCR2 0x10 #define ARM_SMMU_CB_TTBR0_LO 0x20 #define ARM_SMMU_CB_TTBR0_HI 0x24 +#define ARM_SMMU_CB_TTBR1_LO 0x28 +#define ARM_SMMU_CB_TTBR1_HI 0x2c #define ARM_SMMU_CB_TTBCR 0x30 #define ARM_SMMU_CB_S1_MAIR0 0x38 +#define ARM_SMMU_CB_S1_MAIR1 0x3c #define ARM_SMMU_CB_FSR 0x58 #define ARM_SMMU_CB_FAR_LO 0x60 #define ARM_SMMU_CB_FAR_HI 0x64 #define ARM_SMMU_CB_FSYNR0 0x68 +#define ARM_SMMU_CB_S1_TLBIVA 0x600 #define ARM_SMMU_CB_S1_TLBIASID 0x610 +#define ARM_SMMU_CB_S1_TLBIVAL 0x620 +#define ARM_SMMU_CB_S2_TLBIIPAS2 0x630 +#define ARM_SMMU_CB_S2_TLBIIPAS2L 0x638 #define SCTLR_S1_ASIDPNE (1 << 12) #define SCTLR_CFCFG (1 << 7) @@ -252,44 +213,9 @@ #define RESUME_RETRY (0 << 0) #define RESUME_TERMINATE (1 << 0) -#define TTBCR_EAE (1 << 31) - -#define TTBCR_PASIZE_SHIFT 16 -#define TTBCR_PASIZE_MASK 0x7 - -#define TTBCR_TG0_4K (0 << 14) -#define TTBCR_TG0_64K (1 << 14) - -#define TTBCR_SH0_SHIFT 12 -#define TTBCR_SH0_MASK 0x3 -#define TTBCR_SH_NS 0 -#define TTBCR_SH_OS 2 -#define TTBCR_SH_IS 3 - -#define TTBCR_ORGN0_SHIFT 10 -#define TTBCR_IRGN0_SHIFT 8 -#define TTBCR_RGN_MASK 0x3 -#define TTBCR_RGN_NC 0 -#define TTBCR_RGN_WBWA 1 -#define TTBCR_RGN_WT 2 -#define TTBCR_RGN_WB 3 - -#define TTBCR_SL0_SHIFT 6 -#define TTBCR_SL0_MASK 0x3 -#define TTBCR_SL0_LVL_2 0 -#define TTBCR_SL0_LVL_1 1 - -#define TTBCR_T1SZ_SHIFT 16 -#define TTBCR_T0SZ_SHIFT 0 -#define TTBCR_SZ_MASK 0xf - #define TTBCR2_SEP_SHIFT 15 #define TTBCR2_SEP_MASK 0x7 -#define TTBCR2_PASIZE_SHIFT 0 -#define TTBCR2_PASIZE_MASK 0x7 - -/* Common definitions for PASize and SEP fields */ #define TTBCR2_ADDR_32 0 #define TTBCR2_ADDR_36 1 #define TTBCR2_ADDR_40 2 @@ -297,16 +223,7 @@ #define TTBCR2_ADDR_44 4 #define TTBCR2_ADDR_48 5 -#define TTBRn_HI_ASID_SHIFT 16 - -#define MAIR_ATTR_SHIFT(n) ((n) << 3) -#define MAIR_ATTR_MASK 0xff -#define MAIR_ATTR_DEVICE 0x04 -#define MAIR_ATTR_NC 0x44 -#define MAIR_ATTR_WBRWA 0xff -#define MAIR_ATTR_IDX_NC 0 -#define MAIR_ATTR_IDX_CACHE 1 -#define MAIR_ATTR_IDX_DEV 2 +#define TTBRn_HI_ASID_SHIFT 16 #define FSR_MULTI (1 << 31) #define FSR_SS (1 << 30) @@ -380,10 +297,9 @@ struct arm_smmu_device { u32 num_mapping_groups; DECLARE_BITMAP(smr_map, ARM_SMMU_MAX_SMRS); - unsigned long s1_input_size; - unsigned long s1_output_size; - unsigned long s2_input_size; - unsigned long s2_output_size; + unsigned long va_size; + unsigned long ipa_size; + unsigned long pa_size; u32 num_global_irqs; u32 num_context_irqs; @@ -397,7 +313,6 @@ struct arm_smmu_cfg { u8 cbndx; u8 irptndx; u32 cbar; - pgd_t *pgd; }; #define INVALID_IRPTNDX 0xff @@ -412,11 +327,15 @@ enum arm_smmu_domain_stage { struct arm_smmu_domain { struct arm_smmu_device *smmu; + struct io_pgtable_ops *pgtbl_ops; + spinlock_t pgtbl_lock; struct arm_smmu_cfg cfg; enum arm_smmu_domain_stage stage; - spinlock_t lock; + struct mutex init_mutex; /* Protects smmu pointer */ }; +static struct iommu_ops arm_smmu_ops; + static DEFINE_SPINLOCK(arm_smmu_devices_lock); static LIST_HEAD(arm_smmu_devices); @@ -597,7 +516,7 @@ static void __arm_smmu_free_bitmap(unsigned long *map, int idx) } /* Wait for any pending TLB invalidations to complete */ -static void arm_smmu_tlb_sync(struct arm_smmu_device *smmu) +static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu) { int count = 0; void __iomem *gr0_base = ARM_SMMU_GR0(smmu); @@ -615,12 +534,19 @@ static void arm_smmu_tlb_sync(struct arm_smmu_device *smmu) } } -static void arm_smmu_tlb_inv_context(struct arm_smmu_domain *smmu_domain) +static void arm_smmu_tlb_sync(void *cookie) +{ + struct arm_smmu_domain *smmu_domain = cookie; + __arm_smmu_tlb_sync(smmu_domain->smmu); +} + +static void arm_smmu_tlb_inv_context(void *cookie) { + struct arm_smmu_domain *smmu_domain = cookie; struct arm_smmu_cfg *cfg = &smmu_domain->cfg; struct arm_smmu_device *smmu = smmu_domain->smmu; - void __iomem *base = ARM_SMMU_GR0(smmu); bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS; + void __iomem *base; if (stage1) { base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); @@ -632,9 +558,76 @@ static void arm_smmu_tlb_inv_context(struct arm_smmu_domain *smmu_domain) base + ARM_SMMU_GR0_TLBIVMID); } - arm_smmu_tlb_sync(smmu); + __arm_smmu_tlb_sync(smmu); +} + +static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, + bool leaf, void *cookie) +{ + struct arm_smmu_domain *smmu_domain = cookie; + struct arm_smmu_cfg *cfg = &smmu_domain->cfg; + struct arm_smmu_device *smmu = smmu_domain->smmu; + bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS; + void __iomem *reg; + + if (stage1) { + reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); + reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA; + + if (!IS_ENABLED(CONFIG_64BIT) || smmu->version == ARM_SMMU_V1) { + iova &= ~12UL; + iova |= ARM_SMMU_CB_ASID(cfg); + writel_relaxed(iova, reg); +#ifdef CONFIG_64BIT + } else { + iova >>= 12; + iova |= (u64)ARM_SMMU_CB_ASID(cfg) << 48; + writeq_relaxed(iova, reg); +#endif + } +#ifdef CONFIG_64BIT + } else if (smmu->version == ARM_SMMU_V2) { + reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); + reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L : + ARM_SMMU_CB_S2_TLBIIPAS2; + writeq_relaxed(iova >> 12, reg); +#endif + } else { + reg = ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_TLBIVMID; + writel_relaxed(ARM_SMMU_CB_VMID(cfg), reg); + } +} + +static void arm_smmu_flush_pgtable(void *addr, size_t size, void *cookie) +{ + struct arm_smmu_domain *smmu_domain = cookie; + struct arm_smmu_device *smmu = smmu_domain->smmu; + unsigned long offset = (unsigned long)addr & ~PAGE_MASK; + + + /* Ensure new page tables are visible to the hardware walker */ + if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) { + dsb(ishst); + } else { + /* + * If the SMMU can't walk tables in the CPU caches, treat them + * like non-coherent DMA since we need to flush the new entries + * all the way out to memory. There's no possibility of + * recursion here as the SMMU table walker will not be wired + * through another SMMU. + */ + dma_map_page(smmu->dev, virt_to_page(addr), offset, size, + DMA_TO_DEVICE); + } } +static struct iommu_gather_ops arm_smmu_gather_ops = { + .tlb_flush_all = arm_smmu_tlb_inv_context, + .tlb_add_flush = arm_smmu_tlb_inv_range_nosync, + .tlb_sync = arm_smmu_tlb_sync, + .flush_pgtable = arm_smmu_flush_pgtable, +}; + static irqreturn_t arm_smmu_context_fault(int irq, void *dev) { int flags, ret; @@ -712,29 +705,8 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev) return IRQ_HANDLED; } -static void arm_smmu_flush_pgtable(struct arm_smmu_device *smmu, void *addr, - size_t size) -{ - unsigned long offset = (unsigned long)addr & ~PAGE_MASK; - - - /* Ensure new page tables are visible to the hardware walker */ - if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) { - dsb(ishst); - } else { - /* - * If the SMMU can't walk tables in the CPU caches, treat them - * like non-coherent DMA since we need to flush the new entries - * all the way out to memory. There's no possibility of - * recursion here as the SMMU table walker will not be wired - * through another SMMU. - */ - dma_map_page(smmu->dev, virt_to_page(addr), offset, size, - DMA_TO_DEVICE); - } -} - -static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain) +static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, + struct io_pgtable_cfg *pgtbl_cfg) { u32 reg; bool stage1; @@ -771,124 +743,68 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain) #else reg = CBA2R_RW64_32BIT; #endif - writel_relaxed(reg, - gr1_base + ARM_SMMU_GR1_CBA2R(cfg->cbndx)); - - /* TTBCR2 */ - switch (smmu->s1_input_size) { - case 32: - reg = (TTBCR2_ADDR_32 << TTBCR2_SEP_SHIFT); - break; - case 36: - reg = (TTBCR2_ADDR_36 << TTBCR2_SEP_SHIFT); - break; - case 39: - case 40: - reg = (TTBCR2_ADDR_40 << TTBCR2_SEP_SHIFT); - break; - case 42: - reg = (TTBCR2_ADDR_42 << TTBCR2_SEP_SHIFT); - break; - case 44: - reg = (TTBCR2_ADDR_44 << TTBCR2_SEP_SHIFT); - break; - case 48: - reg = (TTBCR2_ADDR_48 << TTBCR2_SEP_SHIFT); - break; - } - - switch (smmu->s1_output_size) { - case 32: - reg |= (TTBCR2_ADDR_32 << TTBCR2_PASIZE_SHIFT); - break; - case 36: - reg |= (TTBCR2_ADDR_36 << TTBCR2_PASIZE_SHIFT); - break; - case 39: - case 40: - reg |= (TTBCR2_ADDR_40 << TTBCR2_PASIZE_SHIFT); - break; - case 42: - reg |= (TTBCR2_ADDR_42 << TTBCR2_PASIZE_SHIFT); - break; - case 44: - reg |= (TTBCR2_ADDR_44 << TTBCR2_PASIZE_SHIFT); - break; - case 48: - reg |= (TTBCR2_ADDR_48 << TTBCR2_PASIZE_SHIFT); - break; - } - - if (stage1) - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR2); + writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(cfg->cbndx)); } - /* TTBR0 */ - arm_smmu_flush_pgtable(smmu, cfg->pgd, - PTRS_PER_PGD * sizeof(pgd_t)); - reg = __pa(cfg->pgd); - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO); - reg = (phys_addr_t)__pa(cfg->pgd) >> 32; - if (stage1) + /* TTBRs */ + if (stage1) { + reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0]; + writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO); + reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0] >> 32; reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT; - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI); - - /* - * TTBCR - * We use long descriptor, with inner-shareable WBWA tables in TTBR0. - */ - if (smmu->version > ARM_SMMU_V1) { - if (PAGE_SIZE == SZ_4K) - reg = TTBCR_TG0_4K; - else - reg = TTBCR_TG0_64K; + writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI); - if (!stage1) { - reg |= (64 - smmu->s2_input_size) << TTBCR_T0SZ_SHIFT; + reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1]; + writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_LO); + reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1] >> 32; + reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT; + writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_HI); + } else { + reg = pgtbl_cfg->arm_lpae_s2_cfg.vttbr; + writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO); + reg = pgtbl_cfg->arm_lpae_s2_cfg.vttbr >> 32; + writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI); + } - switch (smmu->s2_output_size) { + /* TTBCR */ + if (stage1) { + reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr; + writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR); + if (smmu->version > ARM_SMMU_V1) { + reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32; + switch (smmu->va_size) { case 32: - reg |= (TTBCR2_ADDR_32 << TTBCR_PASIZE_SHIFT); + reg |= (TTBCR2_ADDR_32 << TTBCR2_SEP_SHIFT); break; case 36: - reg |= (TTBCR2_ADDR_36 << TTBCR_PASIZE_SHIFT); + reg |= (TTBCR2_ADDR_36 << TTBCR2_SEP_SHIFT); break; case 40: - reg |= (TTBCR2_ADDR_40 << TTBCR_PASIZE_SHIFT); + reg |= (TTBCR2_ADDR_40 << TTBCR2_SEP_SHIFT); break; case 42: - reg |= (TTBCR2_ADDR_42 << TTBCR_PASIZE_SHIFT); + reg |= (TTBCR2_ADDR_42 << TTBCR2_SEP_SHIFT); break; case 44: - reg |= (TTBCR2_ADDR_44 << TTBCR_PASIZE_SHIFT); + reg |= (TTBCR2_ADDR_44 << TTBCR2_SEP_SHIFT); break; case 48: - reg |= (TTBCR2_ADDR_48 << TTBCR_PASIZE_SHIFT); + reg |= (TTBCR2_ADDR_48 << TTBCR2_SEP_SHIFT); break; } - } else { - reg |= (64 - smmu->s1_input_size) << TTBCR_T0SZ_SHIFT; + writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR2); } } else { - reg = 0; + reg = pgtbl_cfg->arm_lpae_s2_cfg.vtcr; + writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR); } - reg |= TTBCR_EAE | - (TTBCR_SH_IS << TTBCR_SH0_SHIFT) | - (TTBCR_RGN_WBWA << TTBCR_ORGN0_SHIFT) | - (TTBCR_RGN_WBWA << TTBCR_IRGN0_SHIFT); - - if (!stage1) - reg |= (TTBCR_SL0_LVL_1 << TTBCR_SL0_SHIFT); - - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR); - - /* MAIR0 (stage-1 only) */ + /* MAIRs (stage-1 only) */ if (stage1) { - reg = (MAIR_ATTR_NC << MAIR_ATTR_SHIFT(MAIR_ATTR_IDX_NC)) | - (MAIR_ATTR_WBRWA << MAIR_ATTR_SHIFT(MAIR_ATTR_IDX_CACHE)) | - (MAIR_ATTR_DEVICE << MAIR_ATTR_SHIFT(MAIR_ATTR_IDX_DEV)); + reg = pgtbl_cfg->arm_lpae_s1_cfg.mair[0]; writel_relaxed(reg, cb_base + ARM_SMMU_CB_S1_MAIR0); + reg = pgtbl_cfg->arm_lpae_s1_cfg.mair[1]; + writel_relaxed(reg, cb_base + ARM_SMMU_CB_S1_MAIR1); } /* SCTLR */ @@ -905,11 +821,14 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, struct arm_smmu_device *smmu) { int irq, start, ret = 0; - unsigned long flags; + unsigned long ias, oas; + struct io_pgtable_ops *pgtbl_ops; + struct io_pgtable_cfg pgtbl_cfg; + enum io_pgtable_fmt fmt; struct arm_smmu_domain *smmu_domain = domain->priv; struct arm_smmu_cfg *cfg = &smmu_domain->cfg; - spin_lock_irqsave(&smmu_domain->lock, flags); + mutex_lock(&smmu_domain->init_mutex); if (smmu_domain->smmu) goto out_unlock; @@ -940,6 +859,12 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, case ARM_SMMU_DOMAIN_S1: cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS; start = smmu->num_s2_context_banks; + ias = smmu->va_size; + oas = smmu->ipa_size; + if (IS_ENABLED(CONFIG_64BIT)) + fmt = ARM_64_LPAE_S1; + else + fmt = ARM_32_LPAE_S1; break; case ARM_SMMU_DOMAIN_NESTED: /* @@ -949,6 +874,12 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, case ARM_SMMU_DOMAIN_S2: cfg->cbar = CBAR_TYPE_S2_TRANS; start = 0; + ias = smmu->ipa_size; + oas = smmu->pa_size; + if (IS_ENABLED(CONFIG_64BIT)) + fmt = ARM_64_LPAE_S2; + else + fmt = ARM_32_LPAE_S2; break; default: ret = -EINVAL; @@ -968,10 +899,30 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, cfg->irptndx = cfg->cbndx; } - ACCESS_ONCE(smmu_domain->smmu) = smmu; - arm_smmu_init_context_bank(smmu_domain); - spin_unlock_irqrestore(&smmu_domain->lock, flags); + pgtbl_cfg = (struct io_pgtable_cfg) { + .pgsize_bitmap = arm_smmu_ops.pgsize_bitmap, + .ias = ias, + .oas = oas, + .tlb = &arm_smmu_gather_ops, + }; + + smmu_domain->smmu = smmu; + pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); + if (!pgtbl_ops) { + ret = -ENOMEM; + goto out_clear_smmu; + } + + /* Update our support page sizes to reflect the page table format */ + arm_smmu_ops.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap; + + /* Initialise the context bank with our page table cfg */ + arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg); + /* + * Request context fault interrupt. Do this last to avoid the + * handler seeing a half-initialised domain state. + */ irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx]; ret = request_irq(irq, arm_smmu_context_fault, IRQF_SHARED, "arm-smmu-context-fault", domain); @@ -981,10 +932,16 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, cfg->irptndx = INVALID_IRPTNDX; } + mutex_unlock(&smmu_domain->init_mutex); + + /* Publish page table ops for map/unmap */ + smmu_domain->pgtbl_ops = pgtbl_ops; return 0; +out_clear_smmu: + smmu_domain->smmu = NULL; out_unlock: - spin_unlock_irqrestore(&smmu_domain->lock, flags); + mutex_unlock(&smmu_domain->init_mutex); return ret; } @@ -999,23 +956,27 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) if (!smmu) return; - /* Disable the context bank and nuke the TLB before freeing it. */ + /* + * Disable the context bank and free the page tables before freeing + * it. + */ cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR); - arm_smmu_tlb_inv_context(smmu_domain); if (cfg->irptndx != INVALID_IRPTNDX) { irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx]; free_irq(irq, domain); } + if (smmu_domain->pgtbl_ops) + free_io_pgtable_ops(smmu_domain->pgtbl_ops); + __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx); } static int arm_smmu_domain_init(struct iommu_domain *domain) { struct arm_smmu_domain *smmu_domain; - pgd_t *pgd; /* * Allocate the domain and initialise some of its data structures. @@ -1026,81 +987,10 @@ static int arm_smmu_domain_init(struct iommu_domain *domain) if (!smmu_domain) return -ENOMEM; - pgd = kcalloc(PTRS_PER_PGD, sizeof(pgd_t), GFP_KERNEL); - if (!pgd) - goto out_free_domain; - smmu_domain->cfg.pgd = pgd; - - spin_lock_init(&smmu_domain->lock); + mutex_init(&smmu_domain->init_mutex); + spin_lock_init(&smmu_domain->pgtbl_lock); domain->priv = smmu_domain; return 0; - -out_free_domain: - kfree(smmu_domain); - return -ENOMEM; -} - -static void arm_smmu_free_ptes(pmd_t *pmd) -{ - pgtable_t table = pmd_pgtable(*pmd); - - __free_page(table); -} - -static void arm_smmu_free_pmds(pud_t *pud) -{ - int i; - pmd_t *pmd, *pmd_base = pmd_offset(pud, 0); - - pmd = pmd_base; - for (i = 0; i < PTRS_PER_PMD; ++i) { - if (pmd_none(*pmd)) - continue; - - arm_smmu_free_ptes(pmd); - pmd++; - } - - pmd_free(NULL, pmd_base); -} - -static void arm_smmu_free_puds(pgd_t *pgd) -{ - int i; - pud_t *pud, *pud_base = pud_offset(pgd, 0); - - pud = pud_base; - for (i = 0; i < PTRS_PER_PUD; ++i) { - if (pud_none(*pud)) - continue; - - arm_smmu_free_pmds(pud); - pud++; - } - - pud_free(NULL, pud_base); -} - -static void arm_smmu_free_pgtables(struct arm_smmu_domain *smmu_domain) -{ - int i; - struct arm_smmu_cfg *cfg = &smmu_domain->cfg; - pgd_t *pgd, *pgd_base = cfg->pgd; - - /* - * Recursively free the page tables for this domain. We don't - * care about speculative TLB filling because the tables should - * not be active in any context bank at this point (SCTLR.M is 0). - */ - pgd = pgd_base; - for (i = 0; i < PTRS_PER_PGD; ++i) { - if (pgd_none(*pgd)) - continue; - arm_smmu_free_puds(pgd); - pgd++; - } - - kfree(pgd_base); } static void arm_smmu_domain_destroy(struct iommu_domain *domain) @@ -1112,7 +1002,6 @@ static void arm_smmu_domain_destroy(struct iommu_domain *domain) * already been detached. */ arm_smmu_destroy_domain_context(domain); - arm_smmu_free_pgtables(smmu_domain); kfree(smmu_domain); } @@ -1244,7 +1133,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) { int ret; struct arm_smmu_domain *smmu_domain = domain->priv; - struct arm_smmu_device *smmu, *dom_smmu; + struct arm_smmu_device *smmu; struct arm_smmu_master_cfg *cfg; smmu = find_smmu_for_device(dev); @@ -1258,21 +1147,16 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) return -EEXIST; } + /* Ensure that the domain is finalised */ + ret = arm_smmu_init_domain_context(domain, smmu); + if (IS_ERR_VALUE(ret)) + return ret; + /* * Sanity check the domain. We don't support domains across * different SMMUs. */ - dom_smmu = ACCESS_ONCE(smmu_domain->smmu); - if (!dom_smmu) { - /* Now that we have a master, we can finalise the domain */ - ret = arm_smmu_init_domain_context(domain, smmu); - if (IS_ERR_VALUE(ret)) - return ret; - - dom_smmu = smmu_domain->smmu; - } - - if (dom_smmu != smmu) { + if (smmu_domain->smmu != smmu) { dev_err(dev, "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n", dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev)); @@ -1303,293 +1187,55 @@ static void arm_smmu_detach_dev(struct iommu_domain *domain, struct device *dev) arm_smmu_domain_remove_master(smmu_domain, cfg); } -static bool arm_smmu_pte_is_contiguous_range(unsigned long addr, - unsigned long end) -{ - return !(addr & ~ARM_SMMU_PTE_CONT_MASK) && - (addr + ARM_SMMU_PTE_CONT_SIZE <= end); -} - -static int arm_smmu_alloc_init_pte(struct arm_smmu_device *smmu, pmd_t *pmd, - unsigned long addr, unsigned long end, - unsigned long pfn, int prot, int stage) -{ - pte_t *pte, *start; - pteval_t pteval = ARM_SMMU_PTE_PAGE | ARM_SMMU_PTE_AF; - - if (pmd_none(*pmd)) { - /* Allocate a new set of tables */ - pgtable_t table = alloc_page(GFP_ATOMIC|__GFP_ZERO); - - if (!table) - return -ENOMEM; - - arm_smmu_flush_pgtable(smmu, page_address(table), PAGE_SIZE); - pmd_populate(NULL, pmd, table); - arm_smmu_flush_pgtable(smmu, pmd, sizeof(*pmd)); - } - - if (stage == 1) { - pteval |= ARM_SMMU_PTE_AP_UNPRIV | ARM_SMMU_PTE_nG; - if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ)) - pteval |= ARM_SMMU_PTE_AP_RDONLY; - - if (prot & IOMMU_CACHE) - pteval |= (MAIR_ATTR_IDX_CACHE << - ARM_SMMU_PTE_ATTRINDX_SHIFT); - } else { - pteval |= ARM_SMMU_PTE_HAP_FAULT; - if (prot & IOMMU_READ) - pteval |= ARM_SMMU_PTE_HAP_READ; - if (prot & IOMMU_WRITE) - pteval |= ARM_SMMU_PTE_HAP_WRITE; - if (prot & IOMMU_CACHE) - pteval |= ARM_SMMU_PTE_MEMATTR_OIWB; - else - pteval |= ARM_SMMU_PTE_MEMATTR_NC; - } - - if (prot & IOMMU_NOEXEC) - pteval |= ARM_SMMU_PTE_XN; - - /* If no access, create a faulting entry to avoid TLB fills */ - if (!(prot & (IOMMU_READ | IOMMU_WRITE))) - pteval &= ~ARM_SMMU_PTE_PAGE; - - pteval |= ARM_SMMU_PTE_SH_IS; - start = pmd_page_vaddr(*pmd) + pte_index(addr); - pte = start; - - /* - * Install the page table entries. This is fairly complicated - * since we attempt to make use of the contiguous hint in the - * ptes where possible. The contiguous hint indicates a series - * of ARM_SMMU_PTE_CONT_ENTRIES ptes mapping a physically - * contiguous region with the following constraints: - * - * - The region start is aligned to ARM_SMMU_PTE_CONT_SIZE - * - Each pte in the region has the contiguous hint bit set - * - * This complicates unmapping (also handled by this code, when - * neither IOMMU_READ or IOMMU_WRITE are set) because it is - * possible, yet highly unlikely, that a client may unmap only - * part of a contiguous range. This requires clearing of the - * contiguous hint bits in the range before installing the new - * faulting entries. - * - * Note that re-mapping an address range without first unmapping - * it is not supported, so TLB invalidation is not required here - * and is instead performed at unmap and domain-init time. - */ - do { - int i = 1; - - pteval &= ~ARM_SMMU_PTE_CONT; - - if (arm_smmu_pte_is_contiguous_range(addr, end)) { - i = ARM_SMMU_PTE_CONT_ENTRIES; - pteval |= ARM_SMMU_PTE_CONT; - } else if (pte_val(*pte) & - (ARM_SMMU_PTE_CONT | ARM_SMMU_PTE_PAGE)) { - int j; - pte_t *cont_start; - unsigned long idx = pte_index(addr); - - idx &= ~(ARM_SMMU_PTE_CONT_ENTRIES - 1); - cont_start = pmd_page_vaddr(*pmd) + idx; - for (j = 0; j < ARM_SMMU_PTE_CONT_ENTRIES; ++j) - pte_val(*(cont_start + j)) &= - ~ARM_SMMU_PTE_CONT; - - arm_smmu_flush_pgtable(smmu, cont_start, - sizeof(*pte) * - ARM_SMMU_PTE_CONT_ENTRIES); - } - - do { - *pte = pfn_pte(pfn, __pgprot(pteval)); - } while (pte++, pfn++, addr += PAGE_SIZE, --i); - } while (addr != end); - - arm_smmu_flush_pgtable(smmu, start, sizeof(*pte) * (pte - start)); - return 0; -} - -static int arm_smmu_alloc_init_pmd(struct arm_smmu_device *smmu, pud_t *pud, - unsigned long addr, unsigned long end, - phys_addr_t phys, int prot, int stage) -{ - int ret; - pmd_t *pmd; - unsigned long next, pfn = __phys_to_pfn(phys); - -#ifndef __PAGETABLE_PMD_FOLDED - if (pud_none(*pud)) { - pmd = (pmd_t *)get_zeroed_page(GFP_ATOMIC); - if (!pmd) - return -ENOMEM; - - arm_smmu_flush_pgtable(smmu, pmd, PAGE_SIZE); - pud_populate(NULL, pud, pmd); - arm_smmu_flush_pgtable(smmu, pud, sizeof(*pud)); - - pmd += pmd_index(addr); - } else -#endif - pmd = pmd_offset(pud, addr); - - do { - next = pmd_addr_end(addr, end); - ret = arm_smmu_alloc_init_pte(smmu, pmd, addr, next, pfn, - prot, stage); - phys += next - addr; - pfn = __phys_to_pfn(phys); - } while (pmd++, addr = next, addr < end); - - return ret; -} - -static int arm_smmu_alloc_init_pud(struct arm_smmu_device *smmu, pgd_t *pgd, - unsigned long addr, unsigned long end, - phys_addr_t phys, int prot, int stage) -{ - int ret = 0; - pud_t *pud; - unsigned long next; - -#ifndef __PAGETABLE_PUD_FOLDED - if (pgd_none(*pgd)) { - pud = (pud_t *)get_zeroed_page(GFP_ATOMIC); - if (!pud) - return -ENOMEM; - - arm_smmu_flush_pgtable(smmu, pud, PAGE_SIZE); - pgd_populate(NULL, pgd, pud); - arm_smmu_flush_pgtable(smmu, pgd, sizeof(*pgd)); - - pud += pud_index(addr); - } else -#endif - pud = pud_offset(pgd, addr); - - do { - next = pud_addr_end(addr, end); - ret = arm_smmu_alloc_init_pmd(smmu, pud, addr, next, phys, - prot, stage); - phys += next - addr; - } while (pud++, addr = next, addr < end); - - return ret; -} - -static int arm_smmu_handle_mapping(struct arm_smmu_domain *smmu_domain, - unsigned long iova, phys_addr_t paddr, - size_t size, int prot) -{ - int ret, stage; - unsigned long end; - phys_addr_t input_mask, output_mask; - struct arm_smmu_device *smmu = smmu_domain->smmu; - struct arm_smmu_cfg *cfg = &smmu_domain->cfg; - pgd_t *pgd = cfg->pgd; - unsigned long flags; - - if (cfg->cbar == CBAR_TYPE_S2_TRANS) { - stage = 2; - input_mask = (1ULL << smmu->s2_input_size) - 1; - output_mask = (1ULL << smmu->s2_output_size) - 1; - } else { - stage = 1; - input_mask = (1ULL << smmu->s1_input_size) - 1; - output_mask = (1ULL << smmu->s1_output_size) - 1; - } - - if (!pgd) - return -EINVAL; - - if (size & ~PAGE_MASK) - return -EINVAL; - - if ((phys_addr_t)iova & ~input_mask) - return -ERANGE; - - if (paddr & ~output_mask) - return -ERANGE; - - spin_lock_irqsave(&smmu_domain->lock, flags); - pgd += pgd_index(iova); - end = iova + size; - do { - unsigned long next = pgd_addr_end(iova, end); - - ret = arm_smmu_alloc_init_pud(smmu, pgd, iova, next, paddr, - prot, stage); - if (ret) - goto out_unlock; - - paddr += next - iova; - iova = next; - } while (pgd++, iova != end); - -out_unlock: - spin_unlock_irqrestore(&smmu_domain->lock, flags); - - return ret; -} - static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { + int ret; + unsigned long flags; struct arm_smmu_domain *smmu_domain = domain->priv; + struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops; - if (!smmu_domain) + if (!ops) return -ENODEV; - return arm_smmu_handle_mapping(smmu_domain, iova, paddr, size, prot); + spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags); + ret = ops->map(ops, iova, paddr, size, prot); + spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags); + return ret; } static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) { - int ret; + size_t ret; + unsigned long flags; struct arm_smmu_domain *smmu_domain = domain->priv; + struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops; - ret = arm_smmu_handle_mapping(smmu_domain, iova, 0, size, 0); - arm_smmu_tlb_inv_context(smmu_domain); - return ret ? 0 : size; + if (!ops) + return 0; + + spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags); + ret = ops->unmap(ops, iova, size); + spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags); + return ret; } static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { - pgd_t *pgdp, pgd; - pud_t pud; - pmd_t pmd; - pte_t pte; + phys_addr_t ret; + unsigned long flags; struct arm_smmu_domain *smmu_domain = domain->priv; - struct arm_smmu_cfg *cfg = &smmu_domain->cfg; + struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops; - pgdp = cfg->pgd; - if (!pgdp) + if (!ops) return 0; - pgd = *(pgdp + pgd_index(iova)); - if (pgd_none(pgd)) - return 0; - - pud = *pud_offset(&pgd, iova); - if (pud_none(pud)) - return 0; - - pmd = *pmd_offset(&pud, iova); - if (pmd_none(pmd)) - return 0; - - pte = *(pmd_page_vaddr(pmd) + pte_index(iova)); - if (pte_none(pte)) - return 0; - - return __pfn_to_phys(pte_pfn(pte)) | (iova & ~PAGE_MASK); + spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags); + ret = ops->iova_to_phys(ops, iova); + spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags); + return ret; } static bool arm_smmu_capable(enum iommu_cap cap) @@ -1698,24 +1344,34 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain, static int arm_smmu_domain_set_attr(struct iommu_domain *domain, enum iommu_attr attr, void *data) { + int ret = 0; struct arm_smmu_domain *smmu_domain = domain->priv; + mutex_lock(&smmu_domain->init_mutex); + switch (attr) { case DOMAIN_ATTR_NESTING: - if (smmu_domain->smmu) - return -EPERM; + if (smmu_domain->smmu) { + ret = -EPERM; + goto out_unlock; + } + if (*(int *)data) smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED; else smmu_domain->stage = ARM_SMMU_DOMAIN_S1; - return 0; + break; default: - return -ENODEV; + ret = -ENODEV; } + +out_unlock: + mutex_unlock(&smmu_domain->init_mutex); + return ret; } -static const struct iommu_ops arm_smmu_ops = { +static struct iommu_ops arm_smmu_ops = { .capable = arm_smmu_capable, .domain_init = arm_smmu_domain_init, .domain_destroy = arm_smmu_domain_destroy, @@ -1729,9 +1385,7 @@ static const struct iommu_ops arm_smmu_ops = { .remove_device = arm_smmu_remove_device, .domain_get_attr = arm_smmu_domain_get_attr, .domain_set_attr = arm_smmu_domain_set_attr, - .pgsize_bitmap = (SECTION_SIZE | - ARM_SMMU_PTE_CONT_SIZE | - PAGE_SIZE), + .pgsize_bitmap = -1UL, /* Restricted during device attach */ }; static void arm_smmu_device_reset(struct arm_smmu_device *smmu) @@ -1782,7 +1436,7 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu) reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT); /* Push the button */ - arm_smmu_tlb_sync(smmu); + __arm_smmu_tlb_sync(smmu); writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0); } @@ -1816,12 +1470,6 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) /* ID0 */ id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0); -#ifndef CONFIG_64BIT - if (((id >> ID0_PTFS_SHIFT) & ID0_PTFS_MASK) == ID0_PTFS_V8_ONLY) { - dev_err(smmu->dev, "\tno v7 descriptor support!\n"); - return -ENODEV; - } -#endif /* Restrict available stages based on module parameter */ if (force_stage == 1) @@ -1894,16 +1542,14 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12; /* Check for size mismatch of SMMU address space from mapped region */ - size = 1 << - (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1); + size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1); size *= 2 << smmu->pgshift; if (smmu->size != size) dev_warn(smmu->dev, "SMMU address space size (0x%lx) differs from mapped region size (0x%lx)!\n", size, smmu->size); - smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & - ID1_NUMS2CB_MASK; + smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK; smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK; if (smmu->num_s2_context_banks > smmu->num_context_banks) { dev_err(smmu->dev, "impossible number of S2 context banks!\n"); @@ -1915,46 +1561,40 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) /* ID2 */ id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2); size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK); - smmu->s1_output_size = min_t(unsigned long, PHYS_MASK_SHIFT, size); + smmu->ipa_size = size; - /* Stage-2 input size limited due to pgd allocation (PTRS_PER_PGD) */ -#ifdef CONFIG_64BIT - smmu->s2_input_size = min_t(unsigned long, VA_BITS, size); -#else - smmu->s2_input_size = min(32UL, size); -#endif - - /* The stage-2 output mask is also applied for bypass */ + /* The output mask is also applied for bypass */ size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK); - smmu->s2_output_size = min_t(unsigned long, PHYS_MASK_SHIFT, size); + smmu->pa_size = size; if (smmu->version == ARM_SMMU_V1) { - smmu->s1_input_size = 32; + smmu->va_size = smmu->ipa_size; + size = SZ_4K | SZ_2M | SZ_1G; } else { -#ifdef CONFIG_64BIT size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK; - size = min(VA_BITS, arm_smmu_id_size_to_bits(size)); -#else - size = 32; + smmu->va_size = arm_smmu_id_size_to_bits(size); +#ifndef CONFIG_64BIT + smmu->va_size = min(32UL, smmu->va_size); #endif - smmu->s1_input_size = size; - - if ((PAGE_SIZE == SZ_4K && !(id & ID2_PTFS_4K)) || - (PAGE_SIZE == SZ_64K && !(id & ID2_PTFS_64K)) || - (PAGE_SIZE != SZ_4K && PAGE_SIZE != SZ_64K)) { - dev_err(smmu->dev, "CPU page size 0x%lx unsupported\n", - PAGE_SIZE); - return -ENODEV; - } + size = 0; + if (id & ID2_PTFS_4K) + size |= SZ_4K | SZ_2M | SZ_1G; + if (id & ID2_PTFS_16K) + size |= SZ_16K | SZ_32M; + if (id & ID2_PTFS_64K) + size |= SZ_64K | SZ_512M; } + arm_smmu_ops.pgsize_bitmap &= size; + dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n", size); + if (smmu->features & ARM_SMMU_FEAT_TRANS_S1) dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n", - smmu->s1_input_size, smmu->s1_output_size); + smmu->va_size, smmu->ipa_size); if (smmu->features & ARM_SMMU_FEAT_TRANS_S2) dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n", - smmu->s2_input_size, smmu->s2_output_size); + smmu->ipa_size, smmu->pa_size); return 0; } From 3c8567d1cac0fa4c44ddcf0a956cae0bf2a348f3 Mon Sep 17 00:00:00 2001 From: Mitchel Humpherys Date: Tue, 23 Dec 2014 17:39:22 +0000 Subject: [PATCH 149/601] iommu/arm-smmu: don't touch the secure STLBIALL register Currently we do a STLBIALL when we initialize the SMMU. However, in some configurations that register is not supposed to be touched and is marked as "Secure only" in the spec. Rip it out. Signed-off-by: Mitchel Humpherys Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 919ba433d219b2..006f006c35e967 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -127,7 +127,6 @@ #define ID2_PTFS_64K (1 << 14) /* Global TLB invalidation */ -#define ARM_SMMU_GR0_STLBIALL 0x60 #define ARM_SMMU_GR0_TLBIVMID 0x64 #define ARM_SMMU_GR0_TLBIALLNSNH 0x68 #define ARM_SMMU_GR0_TLBIALLH 0x6c @@ -1414,7 +1413,6 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu) } /* Invalidate the TLB, just in case */ - writel_relaxed(0, gr0_base + ARM_SMMU_GR0_STLBIALL); writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH); writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH); From 54c523127bcca986c6f9b04c7b56a949ea011899 Mon Sep 17 00:00:00 2001 From: Matt Wagantall Date: Mon, 15 Dec 2014 23:47:23 +0000 Subject: [PATCH 150/601] iopoll: Introduce memory-mapped IO polling macros It is sometimes necessary to poll a memory-mapped register until its value satisfies some condition. Introduce a family of convenience macros that do this. Tight-looping, sleeping, and timing out can all be accomplished using these macros. Cc: Thierry Reding Cc: Andrew Morton Cc: Robert Elliott Acked-by: Arnd Bergmann Acked-by: Will Deacon Signed-off-by: Matt Wagantall Signed-off-by: Mitchel Humpherys Signed-off-by: Will Deacon --- include/linux/iopoll.h | 144 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 include/linux/iopoll.h diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h new file mode 100644 index 00000000000000..1c30014ed17608 --- /dev/null +++ b/include/linux/iopoll.h @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2012-2014 The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef _LINUX_IOPOLL_H +#define _LINUX_IOPOLL_H + +#include +#include +#include +#include +#include +#include + +/** + * readx_poll_timeout - Periodically poll an address until a condition is met or a timeout occurs + * @op: accessor function (takes @addr as its only argument) + * @addr: Address to poll + * @val: Variable to read the value into + * @cond: Break condition (usually involving @val) + * @sleep_us: Maximum time to sleep between reads in us (0 + * tight-loops). Should be less than ~20ms since usleep_range + * is used (see Documentation/timers/timers-howto.txt). + * @timeout_us: Timeout in us, 0 means never timeout + * + * Returns 0 on success and -ETIMEDOUT upon a timeout. In either + * case, the last read value at @addr is stored in @val. Must not + * be called from atomic context if sleep_us or timeout_us are used. + * + * When available, you'll probably want to use one of the specialized + * macros defined below rather than this macro directly. + */ +#define readx_poll_timeout(op, addr, val, cond, sleep_us, timeout_us) \ +({ \ + ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \ + might_sleep_if(sleep_us); \ + for (;;) { \ + (val) = op(addr); \ + if (cond) \ + break; \ + if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \ + (val) = op(addr); \ + break; \ + } \ + if (sleep_us) \ + usleep_range((sleep_us >> 2) + 1, sleep_us); \ + } \ + (cond) ? 0 : -ETIMEDOUT; \ +}) + +/** + * readx_poll_timeout_atomic - Periodically poll an address until a condition is met or a timeout occurs + * @op: accessor function (takes @addr as its only argument) + * @addr: Address to poll + * @val: Variable to read the value into + * @cond: Break condition (usually involving @val) + * @delay_us: Time to udelay between reads in us (0 tight-loops). Should + * be less than ~10us since udelay is used (see + * Documentation/timers/timers-howto.txt). + * @timeout_us: Timeout in us, 0 means never timeout + * + * Returns 0 on success and -ETIMEDOUT upon a timeout. In either + * case, the last read value at @addr is stored in @val. + * + * When available, you'll probably want to use one of the specialized + * macros defined below rather than this macro directly. + */ +#define readx_poll_timeout_atomic(op, addr, val, cond, delay_us, timeout_us) \ +({ \ + ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \ + for (;;) { \ + (val) = op(addr); \ + if (cond) \ + break; \ + if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \ + (val) = op(addr); \ + break; \ + } \ + if (delay_us) \ + udelay(delay_us); \ + } \ + (cond) ? 0 : -ETIMEDOUT; \ +}) + + +#define readb_poll_timeout(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout(readb, addr, val, cond, delay_us, timeout_us) + +#define readb_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout_atomic(readb, addr, val, cond, delay_us, timeout_us) + +#define readw_poll_timeout(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout(readw, addr, val, cond, delay_us, timeout_us) + +#define readw_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout_atomic(readw, addr, val, cond, delay_us, timeout_us) + +#define readl_poll_timeout(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout(readl, addr, val, cond, delay_us, timeout_us) + +#define readl_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout_atomic(readl, addr, val, cond, delay_us, timeout_us) + +#define readq_poll_timeout(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout(readq, addr, val, cond, delay_us, timeout_us) + +#define readq_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout_atomic(readq, addr, val, cond, delay_us, timeout_us) + +#define readb_relaxed_poll_timeout(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout(readb_relaxed, addr, val, cond, delay_us, timeout_us) + +#define readb_relaxed_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout_atomic(readb_relaxed, addr, val, cond, delay_us, timeout_us) + +#define readw_relaxed_poll_timeout(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout(readw_relaxed, addr, val, cond, delay_us, timeout_us) + +#define readw_relaxed_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout_atomic(readw_relaxed, addr, val, cond, delay_us, timeout_us) + +#define readl_relaxed_poll_timeout(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout(readl_relaxed, addr, val, cond, delay_us, timeout_us) + +#define readl_relaxed_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout_atomic(readl_relaxed, addr, val, cond, delay_us, timeout_us) + +#define readq_relaxed_poll_timeout(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout(readq_relaxed, addr, val, cond, delay_us, timeout_us) + +#define readq_relaxed_poll_timeout_atomic(addr, val, cond, delay_us, timeout_us) \ + readx_poll_timeout_atomic(readq_relaxed, addr, val, cond, delay_us, timeout_us) + +#endif /* _LINUX_IOPOLL_H */ From 1a28979b322bb28d8f95f76f080c53dbb9a8222d Mon Sep 17 00:00:00 2001 From: Lukasz Pawelczyk Date: Wed, 26 Nov 2014 15:31:06 +0100 Subject: [PATCH 151/601] smack: miscellaneous small fixes in function comments Signed-off-by: Lukasz Pawelczyk --- security/smack/smack_lsm.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index f1b17a476e12d1..dcfaddd955d1e7 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -202,6 +202,7 @@ static int smk_bu_credfile(const struct cred *cred, struct file *file, /** * smk_fetch - Fetch the smack label from a file. + * @name: type of the label (attribute) * @ip: a pointer to the inode * @dp: a pointer to the dentry * @@ -254,7 +255,9 @@ struct inode_smack *new_inode_smack(struct smack_known *skp) /** * new_task_smack - allocate a task security blob - * @smack: a pointer to the Smack label to use in the blob + * @task: a pointer to the Smack label for the running task + * @forked: a pointer to the Smack label for the forked task + * @gfp: type of the memory for the allocation * * Returns the new blob or NULL if there's no memory available */ @@ -277,8 +280,9 @@ static struct task_smack *new_task_smack(struct smack_known *task, /** * smk_copy_rules - copy a rule set - * @nhead - new rules header pointer - * @ohead - old rules header pointer + * @nhead: new rules header pointer + * @ohead: old rules header pointer + * @gfp: type of the memory for the allocation * * Returns 0 on success, -ENOMEM on error */ @@ -3834,11 +3838,11 @@ static void smack_key_free(struct key *key) key->security = NULL; } -/* +/** * smack_key_permission - Smack access on a key * @key_ref: gets to the object * @cred: the credentials to use - * @perm: unused + * @perm: requested key permissions * * Return 0 if the task has read and write to the object, * an error code otherwise From 68390ccf8b0a3470032f053d50379cfd49fbe952 Mon Sep 17 00:00:00 2001 From: Lukasz Pawelczyk Date: Wed, 26 Nov 2014 15:31:07 +0100 Subject: [PATCH 152/601] smack: fix logic in smack_inode_init_security function In principle if this function was called with "value" == NULL and "len" not NULL it could return different results for the "len" compared to a case where "name" was not NULL. This is a hypothetical case that does not exist in the kernel, but it's a logic bug nonetheless. Signed-off-by: Lukasz Pawelczyk --- security/smack/smack_lsm.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index dcfaddd955d1e7..048d92e81a3406 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -800,7 +800,7 @@ static int smack_inode_init_security(struct inode *inode, struct inode *dir, if (name) *name = XATTR_SMACK_SUFFIX; - if (value) { + if (value && len) { rcu_read_lock(); may = smk_access_entry(skp->smk_known, dsp->smk_known, &skp->smk_rules); @@ -821,10 +821,9 @@ static int smack_inode_init_security(struct inode *inode, struct inode *dir, *value = kstrdup(isp->smk_known, GFP_NOFS); if (*value == NULL) return -ENOMEM; - } - if (len) *len = strlen(isp->smk_known); + } return 0; } From 1d8c2326a4a2a4d942f9165b5702fe6f869ccf48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stelmach?= Date: Tue, 16 Dec 2014 16:53:08 +0100 Subject: [PATCH 153/601] smack: introduce a special case for tmpfs in smack_d_instantiate() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Files created with __shmem_file_stup() appear to have somewhat fake dentries which make them look like root directories and not get the label the current process or ("*") star meant for tmpfs files. Signed-off-by: Łukasz Stelmach --- security/smack/smack_lsm.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 048d92e81a3406..2160e88a2e4e8c 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -3036,7 +3036,8 @@ static void smack_d_instantiate(struct dentry *opt_dentry, struct inode *inode) * of the superblock. */ if (opt_dentry->d_parent == opt_dentry) { - if (sbp->s_magic == CGROUP_SUPER_MAGIC) { + switch (sbp->s_magic) { + case CGROUP_SUPER_MAGIC: /* * The cgroup filesystem is never mounted, * so there's no opportunity to set the mount @@ -3044,8 +3045,19 @@ static void smack_d_instantiate(struct dentry *opt_dentry, struct inode *inode) */ sbsp->smk_root = &smack_known_star; sbsp->smk_default = &smack_known_star; + isp->smk_inode = sbsp->smk_root; + break; + case TMPFS_MAGIC: + /* + * What about shmem/tmpfs anonymous files with dentry + * obtained from d_alloc_pseudo()? + */ + isp->smk_inode = smk_of_current(); + break; + default: + isp->smk_inode = sbsp->smk_root; + break; } - isp->smk_inode = sbsp->smk_root; isp->smk_flags |= SMK_INODE_INSTANT; goto unlockandout; } From 96be7b5424948ae39d29d5149eaec0bd6edd7404 Mon Sep 17 00:00:00 2001 From: Zbigniew Jasinski Date: Mon, 29 Dec 2014 15:34:58 +0100 Subject: [PATCH 154/601] smack: Fix a bidirectional UDS connect check typo The 54e70ec5eb090193b03e69d551fa6771a5a217c4 commit introduced a bidirectional check that should have checked for mutual WRITE access between two labels. Due to a typo subject's OUT label is checked with object's OUT. Should be OUT to IN. Signed-off-by: Zbigniew Jasinski --- security/smack/smack_lsm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 2160e88a2e4e8c..654345de62e7e6 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -3312,7 +3312,7 @@ static int smack_unix_stream_connect(struct sock *sock, if (!smack_privileged(CAP_MAC_OVERRIDE)) { skp = ssp->smk_out; - okp = osp->smk_out; + okp = osp->smk_in; #ifdef CONFIG_AUDIT smk_ad_init_net(&ad, __func__, LSM_AUDIT_DATA_NET, &net); smk_ad_setfield_u_net_sk(&ad, other); @@ -3320,6 +3320,8 @@ static int smack_unix_stream_connect(struct sock *sock, rc = smk_access(skp, okp, MAY_WRITE, &ad); rc = smk_bu_note("UDS connect", skp, okp, MAY_WRITE, rc); if (rc == 0) { + okp = osp->smk_out; + skp = ssp->smk_in; rc = smk_access(okp, skp, MAY_WRITE, NULL); rc = smk_bu_note("UDS connect", okp, skp, MAY_WRITE, rc); From 859a732e4f713270152c78df6e09accbde006734 Mon Sep 17 00:00:00 2001 From: Mitchel Humpherys Date: Wed, 29 Oct 2014 21:13:40 +0000 Subject: [PATCH 155/601] iommu/arm-smmu: add support for iova_to_phys through ATS1PR Currently, we provide the iommu_ops.iova_to_phys service by doing a table walk in software to translate IO virtual addresses to physical addresses. On SMMUs that support it, it can be useful to ask the SMMU itself to do the translation. This can be used to warm the TLBs for an SMMU. It can also be useful for testing and hardware validation. Since the address translation registers are optional on SMMUv2, only enable hardware translations when using SMMUv1 or when SMMU_IDR0.S1TS=1 and SMMU_IDR0.ATOSNS=0, as described in the ARM SMMU v1-v2 spec. Signed-off-by: Mitchel Humpherys [will: reworked on top of generic iopgtbl changes] Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 69 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 67 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 006f006c35e967..1d6d43bb339529 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -100,6 +101,7 @@ #define ID0_S2TS (1 << 29) #define ID0_NTS (1 << 28) #define ID0_SMS (1 << 27) +#define ID0_ATOSNS (1 << 26) #define ID0_CTTW (1 << 14) #define ID0_NUMIRPT_SHIFT 16 #define ID0_NUMIRPT_MASK 0xff @@ -189,6 +191,8 @@ #define ARM_SMMU_CB_TTBCR 0x30 #define ARM_SMMU_CB_S1_MAIR0 0x38 #define ARM_SMMU_CB_S1_MAIR1 0x3c +#define ARM_SMMU_CB_PAR_LO 0x50 +#define ARM_SMMU_CB_PAR_HI 0x54 #define ARM_SMMU_CB_FSR 0x58 #define ARM_SMMU_CB_FAR_LO 0x60 #define ARM_SMMU_CB_FAR_HI 0x64 @@ -198,6 +202,9 @@ #define ARM_SMMU_CB_S1_TLBIVAL 0x620 #define ARM_SMMU_CB_S2_TLBIIPAS2 0x630 #define ARM_SMMU_CB_S2_TLBIIPAS2L 0x638 +#define ARM_SMMU_CB_ATS1PR_LO 0x800 +#define ARM_SMMU_CB_ATS1PR_HI 0x804 +#define ARM_SMMU_CB_ATSR 0x8f0 #define SCTLR_S1_ASIDPNE (1 << 12) #define SCTLR_CFCFG (1 << 7) @@ -209,6 +216,10 @@ #define SCTLR_M (1 << 0) #define SCTLR_EAE_SBOP (SCTLR_AFE | SCTLR_TRE) +#define CB_PAR_F (1 << 0) + +#define ATSR_ACTIVE (1 << 0) + #define RESUME_RETRY (0 << 0) #define RESUME_TERMINATE (1 << 0) @@ -282,6 +293,7 @@ struct arm_smmu_device { #define ARM_SMMU_FEAT_TRANS_S1 (1 << 2) #define ARM_SMMU_FEAT_TRANS_S2 (1 << 3) #define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4) +#define ARM_SMMU_FEAT_TRANS_OPS (1 << 5) u32 features; #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0) @@ -1220,8 +1232,52 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, return ret; } +static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, + dma_addr_t iova) +{ + struct arm_smmu_domain *smmu_domain = domain->priv; + struct arm_smmu_device *smmu = smmu_domain->smmu; + struct arm_smmu_cfg *cfg = &smmu_domain->cfg; + struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops; + struct device *dev = smmu->dev; + void __iomem *cb_base; + u32 tmp; + u64 phys; + + cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); + + if (smmu->version == 1) { + u32 reg = iova & ~0xfff; + writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO); + } else { + u32 reg = iova & ~0xfff; + writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO); + reg = (iova & ~0xfff) >> 32; + writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_HI); + } + + if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp, + !(tmp & ATSR_ACTIVE), 5, 50)) { + dev_err(dev, + "iova to phys timed out on 0x%pad. Falling back to software table walk.\n", + &iova); + return ops->iova_to_phys(ops, iova); + } + + phys = readl_relaxed(cb_base + ARM_SMMU_CB_PAR_LO); + phys |= ((u64)readl_relaxed(cb_base + ARM_SMMU_CB_PAR_HI)) << 32; + + if (phys & CB_PAR_F) { + dev_err(dev, "translation fault!\n"); + dev_err(dev, "PAR = 0x%llx\n", phys); + return 0; + } + + return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff); +} + static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain, - dma_addr_t iova) + dma_addr_t iova) { phys_addr_t ret; unsigned long flags; @@ -1232,8 +1288,12 @@ static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain, return 0; spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags); - ret = ops->iova_to_phys(ops, iova); + if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS) + ret = arm_smmu_iova_to_phys_hard(domain, iova); + else + ret = ops->iova_to_phys(ops, iova); spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags); + return ret; } @@ -1496,6 +1556,11 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) return -ENODEV; } + if (smmu->version == 1 || (!(id & ID0_ATOSNS) && (id & ID0_S1TS))) { + smmu->features |= ARM_SMMU_FEAT_TRANS_OPS; + dev_notice(smmu->dev, "\taddress translation ops\n"); + } + if (id & ID0_CTTW) { smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK; dev_notice(smmu->dev, "\tcoherent table walk\n"); From 914309995ed5c6c737df4135ac5ba2b67cff194d Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Mon, 19 Jan 2015 10:43:40 +0200 Subject: [PATCH 156/601] char/tpm/tpm_crb: fix build error SIMPLE_DEV_PM_OPS() was inside #ifdef CONFIG_PM_SLEEP. Fixes: 30fc8d1 ("tpm: TPM 2.0 CRB Interface") Signed-off-by: Jarkko Sakkinen Signed-off-by: Peter Huewe --- drivers/char/tpm/tpm_crb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c index c248a356d5c9e6..3dd23cfae4feef 100644 --- a/drivers/char/tpm/tpm_crb.c +++ b/drivers/char/tpm/tpm_crb.c @@ -107,9 +107,9 @@ static int crb_resume(struct device *dev) return rc; } +#endif static SIMPLE_DEV_PM_OPS(crb_pm, tpm_pm_suspend, crb_resume); -#endif static u8 crb_status(struct tpm_chip *chip) { From a85cade6762bf566698e625ec1a4461245e40768 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 20 Jan 2015 17:36:53 +1100 Subject: [PATCH 157/601] powerpc: Update all configs using savedefconfig It looks like it's ~4 years since we updated some of these, so do a bulk update. Verified that the before and after generated configs are exactly the same. Which begs the question why update them? The answer is that it can be confusing when the stored defconfig drifts too far from the generated result. Signed-off-by: Michael Ellerman --- arch/powerpc/configs/40x/acadia_defconfig | 21 +- arch/powerpc/configs/40x/ep405_defconfig | 18 +- arch/powerpc/configs/40x/kilauea_defconfig | 27 +- arch/powerpc/configs/40x/klondike_defconfig | 4 - arch/powerpc/configs/40x/makalu_defconfig | 20 +- arch/powerpc/configs/40x/obs600_defconfig | 13 +- arch/powerpc/configs/40x/virtex_defconfig | 21 +- arch/powerpc/configs/40x/walnut_defconfig | 18 +- arch/powerpc/configs/44x/akebono_defconfig | 7 - arch/powerpc/configs/44x/arches_defconfig | 23 +- arch/powerpc/configs/44x/bamboo_defconfig | 15 +- arch/powerpc/configs/44x/bluestone_defconfig | 16 +- .../powerpc/configs/44x/canyonlands_defconfig | 26 +- arch/powerpc/configs/44x/currituck_defconfig | 13 +- arch/powerpc/configs/44x/ebony_defconfig | 17 +- arch/powerpc/configs/44x/eiger_defconfig | 26 +- arch/powerpc/configs/44x/icon_defconfig | 20 - arch/powerpc/configs/44x/iss476-smp_defconfig | 16 +- arch/powerpc/configs/44x/katmai_defconfig | 15 - arch/powerpc/configs/44x/rainier_defconfig | 15 +- arch/powerpc/configs/44x/redwood_defconfig | 26 +- arch/powerpc/configs/44x/sam440ep_defconfig | 19 +- arch/powerpc/configs/44x/sequoia_defconfig | 21 +- arch/powerpc/configs/44x/taishan_defconfig | 16 +- arch/powerpc/configs/44x/virtex5_defconfig | 21 +- arch/powerpc/configs/44x/warp_defconfig | 23 +- arch/powerpc/configs/52xx/cm5200_defconfig | 19 +- arch/powerpc/configs/52xx/lite5200b_defconfig | 20 +- arch/powerpc/configs/52xx/motionpro_defconfig | 22 +- arch/powerpc/configs/52xx/pcm030_defconfig | 21 +- arch/powerpc/configs/52xx/tqm5200_defconfig | 22 +- arch/powerpc/configs/83xx/asp8347_defconfig | 22 +- arch/powerpc/configs/83xx/kmeter1_defconfig | 4 - .../configs/83xx/mpc8313_rdb_defconfig | 25 +- .../configs/83xx/mpc8315_rdb_defconfig | 22 +- .../configs/83xx/mpc832x_mds_defconfig | 21 +- .../configs/83xx/mpc832x_rdb_defconfig | 23 +- .../configs/83xx/mpc834x_itx_defconfig | 20 +- .../configs/83xx/mpc834x_itxgp_defconfig | 20 +- .../configs/83xx/mpc834x_mds_defconfig | 23 +- .../configs/83xx/mpc836x_mds_defconfig | 22 +- .../configs/83xx/mpc836x_rdk_defconfig | 16 +- .../configs/83xx/mpc837x_mds_defconfig | 15 +- .../configs/83xx/mpc837x_rdb_defconfig | 17 +- arch/powerpc/configs/83xx/sbc834x_defconfig | 16 +- arch/powerpc/configs/85xx/ge_imp3a_defconfig | 16 +- arch/powerpc/configs/85xx/kmp204x_defconfig | 3 - arch/powerpc/configs/85xx/ksi8560_defconfig | 17 +- .../configs/85xx/mpc8540_ads_defconfig | 19 +- .../configs/85xx/mpc8560_ads_defconfig | 19 +- .../configs/85xx/mpc85xx_cds_defconfig | 21 +- arch/powerpc/configs/85xx/ppa8548_defconfig | 62 ++-- arch/powerpc/configs/85xx/sbc8548_defconfig | 40 +- arch/powerpc/configs/85xx/socrates_defconfig | 24 +- arch/powerpc/configs/85xx/stx_gp3_defconfig | 15 +- arch/powerpc/configs/85xx/tqm8540_defconfig | 17 +- arch/powerpc/configs/85xx/tqm8541_defconfig | 17 +- arch/powerpc/configs/85xx/tqm8548_defconfig | 22 +- arch/powerpc/configs/85xx/tqm8555_defconfig | 17 +- arch/powerpc/configs/85xx/tqm8560_defconfig | 17 +- .../configs/85xx/xes_mpc85xx_defconfig | 34 +- arch/powerpc/configs/86xx/gef_ppc9a_defconfig | 36 +- .../powerpc/configs/86xx/gef_sbc310_defconfig | 35 +- .../powerpc/configs/86xx/gef_sbc610_defconfig | 53 +-- .../configs/86xx/mpc8610_hpcd_defconfig | 32 +- .../configs/86xx/mpc8641_hpcn_defconfig | 33 +- arch/powerpc/configs/86xx/sbc8641d_defconfig | 54 +-- arch/powerpc/configs/adder875_defconfig | 27 +- arch/powerpc/configs/amigaone_defconfig | 36 +- arch/powerpc/configs/c2k_defconfig | 82 +---- arch/powerpc/configs/cell_defconfig | 30 +- arch/powerpc/configs/celleb_defconfig | 21 +- arch/powerpc/configs/chrp32_defconfig | 37 +- arch/powerpc/configs/corenet32_smp_defconfig | 19 +- arch/powerpc/configs/corenet64_smp_defconfig | 1 - arch/powerpc/configs/ep8248e_defconfig | 17 +- arch/powerpc/configs/ep88xc_defconfig | 29 +- arch/powerpc/configs/g5_defconfig | 60 +-- arch/powerpc/configs/gamecube_defconfig | 15 +- arch/powerpc/configs/holly_defconfig | 15 +- arch/powerpc/configs/linkstation_defconfig | 28 +- arch/powerpc/configs/maple_defconfig | 26 +- arch/powerpc/configs/mgcoge_defconfig | 12 +- arch/powerpc/configs/mpc512x_defconfig | 2 - arch/powerpc/configs/mpc5200_defconfig | 18 +- arch/powerpc/configs/mpc7448_hpc2_defconfig | 18 +- arch/powerpc/configs/mpc8272_ads_defconfig | 24 +- arch/powerpc/configs/mpc83xx_defconfig | 7 - arch/powerpc/configs/mpc85xx_defconfig | 58 +-- arch/powerpc/configs/mpc85xx_smp_defconfig | 62 +--- arch/powerpc/configs/mpc866_ads_defconfig | 22 +- arch/powerpc/configs/mpc86xx_defconfig | 39 +- arch/powerpc/configs/mpc885_ads_defconfig | 29 +- arch/powerpc/configs/mvme5100_defconfig | 9 - arch/powerpc/configs/pasemi_defconfig | 3 - arch/powerpc/configs/pmac32_defconfig | 65 +--- arch/powerpc/configs/ppc40x_defconfig | 17 +- arch/powerpc/configs/ppc44x_defconfig | 14 - arch/powerpc/configs/ppc64_defconfig | 12 +- arch/powerpc/configs/ppc64e_defconfig | 4 - arch/powerpc/configs/ppc6xx_defconfig | 348 +++++++----------- arch/powerpc/configs/pq2fads_defconfig | 28 +- arch/powerpc/configs/ps3_defconfig | 16 +- arch/powerpc/configs/pseries_defconfig | 12 +- arch/powerpc/configs/pseries_le_defconfig | 14 +- arch/powerpc/configs/storcenter_defconfig | 12 +- arch/powerpc/configs/tqm8xx_defconfig | 32 +- arch/powerpc/configs/wii_defconfig | 18 +- 108 files changed, 581 insertions(+), 2207 deletions(-) diff --git a/arch/powerpc/configs/40x/acadia_defconfig b/arch/powerpc/configs/40x/acadia_defconfig index 69e06eeae6a661..9110a5cb1bb718 100644 --- a/arch/powerpc/configs/40x/acadia_defconfig +++ b/arch/powerpc/configs/40x/acadia_defconfig @@ -1,19 +1,15 @@ CONFIG_40x=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y -CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set CONFIG_ACADIA=y # CONFIG_WALNUT is not set -CONFIG_SPARSE_IRQ=y CONFIG_PCI=y CONFIG_NET=y CONFIG_PACKET=y @@ -31,27 +27,18 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=m CONFIG_MTD_CFI=y CONFIG_MTD_JEDECPROBE=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 -# CONFIG_MISC_DEVICES is not set CONFIG_NETDEVICES=y -CONFIG_ETHERNET=y -CONFIG_NET_VENDOR_IBM=y -CONFIG_MII=y CONFIG_IBM_EMAC=y CONFIG_IBM_EMAC_RXB=256 CONFIG_IBM_EMAC_TXB=256 CONFIG_IBM_EMAC_DEBUG=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set @@ -65,20 +52,14 @@ CONFIG_SERIAL_OF_PLATFORM=y CONFIG_THERMAL=y # CONFIG_USB_SUPPORT is not set CONFIG_EXT2_FS=y -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_CRYPTO=y CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_ECB=y CONFIG_CRYPTO_PCBC=y diff --git a/arch/powerpc/configs/40x/ep405_defconfig b/arch/powerpc/configs/40x/ep405_defconfig index e9d84b5d0ab650..790366652ba309 100644 --- a/arch/powerpc/configs/40x/ep405_defconfig +++ b/arch/powerpc/configs/40x/ep405_defconfig @@ -1,19 +1,15 @@ CONFIG_40x=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y -CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set CONFIG_EP405=y # CONFIG_WALNUT is not set -CONFIG_SPARSE_IRQ=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -30,19 +26,14 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=m CONFIG_MTD_CFI=y CONFIG_MTD_JEDECPROBE=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 CONFIG_NETDEVICES=y -CONFIG_ETHERNET=y -CONFIG_NET_VENDOR_IBM=y CONFIG_IBM_EMAC=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set @@ -55,27 +46,20 @@ CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set CONFIG_THERMAL=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_USB=y CONFIG_USB_MON=y CONFIG_USB_OHCI_HCD=y CONFIG_USB_OHCI_HCD_PPC_OF_BE=y CONFIG_USB_OHCI_HCD_PPC_OF_LE=y CONFIG_EXT2_FS=y -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_CRYPTO=y CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_ECB=y CONFIG_CRYPTO_PCBC=y diff --git a/arch/powerpc/configs/40x/kilauea_defconfig b/arch/powerpc/configs/40x/kilauea_defconfig index 5ff338f6443f06..01bd71bac027ea 100644 --- a/arch/powerpc/configs/40x/kilauea_defconfig +++ b/arch/powerpc/configs/40x/kilauea_defconfig @@ -1,21 +1,17 @@ CONFIG_40x=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y -CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set CONFIG_KILAUEA=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y # CONFIG_WALNUT is not set -CONFIG_SPARSE_IRQ=y CONFIG_PCI=y CONFIG_NET=y CONFIG_PACKET=y @@ -33,8 +29,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_JEDECPROBE=y @@ -42,21 +36,12 @@ CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y CONFIG_MTD_NAND=y CONFIG_MTD_NAND_NDFC=y -CONFIG_PROC_DEVICETREE=y -CONFIG_PM=y -CONFIG_SUSPEND=y -CONFIG_PPC4xx_CPM=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 -# CONFIG_MISC_DEVICES is not set CONFIG_NETDEVICES=y -CONFIG_ETHERNET=y -CONFIG_NET_VENDOR_IBM=y CONFIG_IBM_EMAC=y CONFIG_IBM_EMAC_RXB=256 CONFIG_IBM_EMAC_TXB=256 -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set @@ -75,20 +60,14 @@ CONFIG_THERMAL=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1307=y CONFIG_EXT2_FS=y -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_CRYPTO=y CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_ECB=y CONFIG_CRYPTO_PCBC=y diff --git a/arch/powerpc/configs/40x/klondike_defconfig b/arch/powerpc/configs/40x/klondike_defconfig index c0d228dc73dc52..e2036b7c7edb71 100644 --- a/arch/powerpc/configs/40x/klondike_defconfig +++ b/arch/powerpc/configs/40x/klondike_defconfig @@ -1,5 +1,4 @@ CONFIG_40x=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_SYSFS_DEPRECATED=y @@ -14,10 +13,8 @@ CONFIG_MODULE_UNLOAD=y CONFIG_APM8018X=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_MATH_EMULATION=y -# CONFIG_MIGRATION is not set # CONFIG_SUSPEND is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 CONFIG_SCSI=y @@ -51,5 +48,4 @@ CONFIG_AVERAGE=y CONFIG_MAGIC_SYSRQ=y # CONFIG_SCHED_DEBUG is not set # CONFIG_DEBUG_BUGVERBOSE is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y # CONFIG_FTRACE is not set diff --git a/arch/powerpc/configs/40x/makalu_defconfig b/arch/powerpc/configs/40x/makalu_defconfig index 84505e3aa0fb2f..efd51701fb4d49 100644 --- a/arch/powerpc/configs/40x/makalu_defconfig +++ b/arch/powerpc/configs/40x/makalu_defconfig @@ -1,19 +1,15 @@ CONFIG_40x=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y -CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set CONFIG_MAKALU=y # CONFIG_WALNUT is not set -CONFIG_SPARSE_IRQ=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -30,25 +26,17 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=m CONFIG_MTD_CFI=y CONFIG_MTD_JEDECPROBE=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 -# CONFIG_MISC_DEVICES is not set CONFIG_NETDEVICES=y -CONFIG_ETHERNET=y -CONFIG_NET_VENDOR_IBM=y CONFIG_IBM_EMAC=y CONFIG_IBM_EMAC_RXB=256 CONFIG_IBM_EMAC_TXB=256 -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set @@ -62,20 +50,14 @@ CONFIG_SERIAL_OF_PLATFORM=y CONFIG_THERMAL=y # CONFIG_USB_SUPPORT is not set CONFIG_EXT2_FS=y -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_CRYPTO=y CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_ECB=y CONFIG_CRYPTO_PCBC=y diff --git a/arch/powerpc/configs/40x/obs600_defconfig b/arch/powerpc/configs/40x/obs600_defconfig index 91c110dad2d6a7..5ded3dcdf60a08 100644 --- a/arch/powerpc/configs/40x/obs600_defconfig +++ b/arch/powerpc/configs/40x/obs600_defconfig @@ -1,7 +1,8 @@ CONFIG_40x=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y @@ -11,8 +12,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_WALNUT is not set CONFIG_OBS600=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_MATH_EMULATION=y CONFIG_NET=y CONFIG_PACKET=y @@ -30,8 +29,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_JEDECPROBE=y @@ -39,7 +36,6 @@ CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y CONFIG_MTD_NAND=y CONFIG_MTD_NAND_NDFC=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 CONFIG_NETDEVICES=y @@ -68,13 +64,10 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y -CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_CRYPTO=y CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_ECB=y CONFIG_CRYPTO_PCBC=y diff --git a/arch/powerpc/configs/40x/virtex_defconfig b/arch/powerpc/configs/40x/virtex_defconfig index 0a81e1f7dd5937..bcb0c4d854db7d 100644 --- a/arch/powerpc/configs/40x/virtex_defconfig +++ b/arch/powerpc/configs/40x/virtex_defconfig @@ -1,5 +1,4 @@ CONFIG_40x=y -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -7,7 +6,6 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y @@ -35,16 +33,11 @@ CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_MANGLE=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=8192 CONFIG_XILINX_SYSACE=y CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -CONFIG_MII=y -# CONFIG_NETDEV_10000 is not set -# CONFIG_SERIO_I8042 is not set # CONFIG_SERIO_SERPORT is not set CONFIG_SERIO_XILINX_XPS_PS2=y CONFIG_SERIAL_8250=y @@ -60,15 +53,9 @@ CONFIG_GPIO_XILINX=y CONFIG_FB=y CONFIG_FB_XILINX=y CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_FONTS=y -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y CONFIG_LOGO=y -# CONFIG_HID_SUPPORT is not set # CONFIG_USB_SUPPORT is not set CONFIG_EXT2_FS=y -CONFIG_INOTIFY=y -CONFIG_AUTOFS_FS=y CONFIG_AUTOFS4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y @@ -76,16 +63,16 @@ CONFIG_TMPFS=y CONFIG_CRAMFS=y CONFIG_ROMFS_FS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=m CONFIG_NLS_ISO8859_1=m CONFIG_NLS_UTF8=m CONFIG_CRC_CCITT=y +CONFIG_FONTS=y +CONFIG_FONT_8x8=y +CONFIG_FONT_8x16=y CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y +CONFIG_DEBUG_KERNEL=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/40x/walnut_defconfig b/arch/powerpc/configs/40x/walnut_defconfig index 0a19f4386ee9fe..37c838f26e53fe 100644 --- a/arch/powerpc/configs/40x/walnut_defconfig +++ b/arch/powerpc/configs/40x/walnut_defconfig @@ -1,17 +1,13 @@ CONFIG_40x=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y -CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set -CONFIG_SPARSE_IRQ=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -28,19 +24,14 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=m CONFIG_MTD_CFI=y CONFIG_MTD_JEDECPROBE=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 CONFIG_NETDEVICES=y -CONFIG_ETHERNET=y -CONFIG_NET_VENDOR_IBM=y CONFIG_IBM_EMAC=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set @@ -53,22 +44,15 @@ CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set CONFIG_THERMAL=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_EXT2_FS=y -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_CRYPTO=y CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_ECB=y CONFIG_CRYPTO_PCBC=y diff --git a/arch/powerpc/configs/44x/akebono_defconfig b/arch/powerpc/configs/44x/akebono_defconfig index 7e2530cd9d3070..ea4ef02a057895 100644 --- a/arch/powerpc/configs/44x/akebono_defconfig +++ b/arch/powerpc/configs/44x/akebono_defconfig @@ -4,9 +4,6 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -CONFIG_RD_BZIP2=y -CONFIG_RD_LZMA=y -CONFIG_RD_XZ=y CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y # CONFIG_SLUB_CPU_PARTIAL is not set @@ -15,7 +12,6 @@ CONFIG_OPROFILE=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set -# CONFIG_POWERNV_MSI is not set CONFIG_PPC_47x=y # CONFIG_EBONY is not set CONFIG_AKEBONO=y @@ -26,7 +22,6 @@ CONFIG_IRQ_ALL_CPUS=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="" # CONFIG_SUSPEND is not set -CONFIG_PCI_MSI=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -48,7 +43,6 @@ CONFIG_MTD_BLOCK=y CONFIG_MTD_JEDECPROBE=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 # CONFIG_SCSI_PROC_FS is not set @@ -62,7 +56,6 @@ CONFIG_BLK_DEV_SD=y # CONFIG_NET_VENDOR_AMD is not set # CONFIG_NET_VENDOR_ARC is not set # CONFIG_NET_VENDOR_ATHEROS is not set -# CONFIG_NET_CADENCE is not set # CONFIG_NET_VENDOR_BROADCOM is not set # CONFIG_NET_VENDOR_BROCADE is not set # CONFIG_NET_VENDOR_CHELSIO is not set diff --git a/arch/powerpc/configs/44x/arches_defconfig b/arch/powerpc/configs/44x/arches_defconfig index 44355c53cd3082..95494209c124e3 100644 --- a/arch/powerpc/configs/44x/arches_defconfig +++ b/arch/powerpc/configs/44x/arches_defconfig @@ -1,19 +1,16 @@ CONFIG_44x=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_EBONY is not set CONFIG_ARCHES=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_SPARSE_IRQ=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="" CONFIG_NET=y @@ -32,24 +29,16 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 -# CONFIG_MISC_DEVICES is not set CONFIG_NETDEVICES=y -CONFIG_ETHERNET=y -CONFIG_NET_VENDOR_IBM=y CONFIG_IBM_EMAC=y CONFIG_IBM_EMAC_RXB=256 CONFIG_IBM_EMAC_TXB=256 -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set @@ -64,21 +53,15 @@ CONFIG_I2C=y CONFIG_I2C_CHARDEV=y CONFIG_I2C_IBM_IIC=y CONFIG_SENSORS_AD7414=y -CONFIG_VIDEO_OUTPUT_CONTROL=m # CONFIG_USB_SUPPORT is not set CONFIG_EXT2_FS=y -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y diff --git a/arch/powerpc/configs/44x/bamboo_defconfig b/arch/powerpc/configs/44x/bamboo_defconfig index cef7d62560c48e..a046f08413fdc5 100644 --- a/arch/powerpc/configs/44x/bamboo_defconfig +++ b/arch/powerpc/configs/44x/bamboo_defconfig @@ -1,17 +1,14 @@ CONFIG_44x=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set CONFIG_BAMBOO=y # CONFIG_EBONY is not set -CONFIG_SPARSE_IRQ=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="" CONFIG_NET=y @@ -28,12 +25,9 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 CONFIG_NETDEVICES=y -CONFIG_ETHERNET=y -CONFIG_NET_VENDOR_IBM=y CONFIG_IBM_EMAC=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set @@ -47,23 +41,16 @@ CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set CONFIG_THERMAL=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_EXT2_FS=y -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_CRYPTO=y CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_ECB=y CONFIG_CRYPTO_PCBC=y diff --git a/arch/powerpc/configs/44x/bluestone_defconfig b/arch/powerpc/configs/44x/bluestone_defconfig index ca7f1f32f2b2e1..a326b773ac052c 100644 --- a/arch/powerpc/configs/44x/bluestone_defconfig +++ b/arch/powerpc/configs/44x/bluestone_defconfig @@ -1,19 +1,16 @@ CONFIG_44x=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y -# CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_PCI_QUIRKS is not set +# CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_COMPAT_BRK is not set CONFIG_BLUESTONE=y # CONFIG_EBONY is not set -# CONFIG_KVM_GUEST is not set -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_SPARSE_IRQ=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="" CONFIG_NET=y @@ -27,18 +24,13 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 CONFIG_NETDEVICES=y -CONFIG_ETHERNET=y -CONFIG_NET_VENDOR_IBM=y CONFIG_IBM_EMAC=y CONFIG_IBM_EMAC_RXB=256 CONFIG_IBM_EMAC_TXB=256 @@ -53,7 +45,6 @@ CONFIG_I2C=y CONFIG_I2C_CHARDEV=y CONFIG_I2C_IBM_IIC=y CONFIG_SENSORS_AD7414=y -# CONFIG_HID_SUPPORT is not set # CONFIG_USB_SUPPORT is not set CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_M41T80=y @@ -63,6 +54,5 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NLS=y diff --git a/arch/powerpc/configs/44x/canyonlands_defconfig b/arch/powerpc/configs/44x/canyonlands_defconfig index 9919a91add1201..d939e71fff7dad 100644 --- a/arch/powerpc/configs/44x/canyonlands_defconfig +++ b/arch/powerpc/configs/44x/canyonlands_defconfig @@ -1,19 +1,16 @@ CONFIG_44x=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_EBONY is not set CONFIG_CANYONLANDS=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_SPARSE_IRQ=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="" CONFIG_NET=y @@ -32,29 +29,18 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y CONFIG_MTD_NAND=y CONFIG_MTD_NAND_NDFC=y -CONFIG_PROC_DEVICETREE=y -CONFIG_PM=y -CONFIG_SUSPEND=y -CONFIG_PPC4xx_CPM=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 -# CONFIG_MISC_DEVICES is not set CONFIG_NETDEVICES=y -CONFIG_ETHERNET=y -CONFIG_NET_VENDOR_IBM=y CONFIG_IBM_EMAC=y CONFIG_IBM_EMAC_RXB=256 CONFIG_IBM_EMAC_TXB=256 -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set @@ -76,21 +62,15 @@ CONFIG_USB_EHCI_HCD=m CONFIG_USB_OHCI_HCD=y CONFIG_USB_OHCI_HCD_PPC_OF_BE=y CONFIG_USB_OHCI_HCD_PPC_OF_LE=y -CONFIG_USB_LIBUSUAL=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_M41T80=y CONFIG_EXT2_FS=y -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y diff --git a/arch/powerpc/configs/44x/currituck_defconfig b/arch/powerpc/configs/44x/currituck_defconfig index 47de682614431b..5aa312a158dd50 100644 --- a/arch/powerpc/configs/44x/currituck_defconfig +++ b/arch/powerpc/configs/44x/currituck_defconfig @@ -1,9 +1,7 @@ CONFIG_44x=y CONFIG_SMP=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_SPARSE_IRQ=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y @@ -39,12 +37,10 @@ CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_CONNECTOR=y CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_JEDECPROBE=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 # CONFIG_SCSI_PROC_FS is not set @@ -56,7 +52,6 @@ CONFIG_SATA_SIL24=y # CONFIG_ATA_SFF is not set CONFIG_NETDEVICES=y CONFIG_E1000E=y -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set @@ -84,22 +79,18 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_NLS_DEFAULT="n" -CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_INFO=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y -CONFIG_DEBUG_INFO=y -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_XMON=y CONFIG_XMON_DEFAULT=y CONFIG_PPC_EARLY_DEBUG=y CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW=0x10000000 CONFIG_PPC_EARLY_DEBUG_44x_PHYSHIGH=0x200 -CONFIG_CRYPTO=y CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_ECB=y CONFIG_CRYPTO_PCBC=y diff --git a/arch/powerpc/configs/44x/ebony_defconfig b/arch/powerpc/configs/44x/ebony_defconfig index 31b58b0d52e263..5909e016c37dab 100644 --- a/arch/powerpc/configs/44x/ebony_defconfig +++ b/arch/powerpc/configs/44x/ebony_defconfig @@ -1,18 +1,14 @@ CONFIG_44x=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y -CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set CONFIG_MATH_EMULATION=y -CONFIG_SPARSE_IRQ=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -28,19 +24,14 @@ CONFIG_IP_PNP_BOOTP=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_JEDECPROBE=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 CONFIG_NETDEVICES=y -CONFIG_ETHERNET=y -CONFIG_NET_VENDOR_IBM=y CONFIG_IBM_EMAC=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set @@ -55,21 +46,15 @@ CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_HWMON is not set CONFIG_THERMAL=y CONFIG_EXT2_FS=y -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_CRYPTO=y CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_ECB=y CONFIG_CRYPTO_PCBC=y diff --git a/arch/powerpc/configs/44x/eiger_defconfig b/arch/powerpc/configs/44x/eiger_defconfig index faccaf65f394c9..57499d25c87797 100644 --- a/arch/powerpc/configs/44x/eiger_defconfig +++ b/arch/powerpc/configs/44x/eiger_defconfig @@ -1,19 +1,15 @@ CONFIG_44x=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y -# CONFIG_BLK_DEV_BSG is not set # CONFIG_EBONY is not set CONFIG_EIGER=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_SPARSE_IRQ=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="" CONFIG_PCIEPORTBUS=y @@ -33,20 +29,15 @@ CONFIG_IP_PNP_BOOTP=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y -CONFIG_MTD_CONCAT=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y CONFIG_MTD_NAND=y CONFIG_MTD_NAND_NDFC=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 -# CONFIG_MISC_DEVICES is not set CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_SG=y @@ -54,13 +45,10 @@ CONFIG_FUSION=y CONFIG_FUSION_SAS=y CONFIG_I2O=y CONFIG_NETDEVICES=y -CONFIG_ETHERNET=y -CONFIG_NET_VENDOR_IBM=y CONFIG_IBM_EMAC=y CONFIG_IBM_EMAC_RXB=256 CONFIG_IBM_EMAC_TXB=256 CONFIG_E1000E=y -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set @@ -80,25 +68,18 @@ CONFIG_I2C_DEBUG_CORE=y CONFIG_I2C_DEBUG_ALGO=y CONFIG_I2C_DEBUG_BUS=y # CONFIG_HWMON is not set -CONFIG_VIDEO_OUTPUT_CONTROL=m # CONFIG_USB_SUPPORT is not set CONFIG_DMADEVICES=y CONFIG_EXT2_FS=y -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_CRYPTO=y CONFIG_CRYPTO_CRYPTD=y CONFIG_CRYPTO_AUTHENC=y CONFIG_CRYPTO_CCM=y @@ -116,7 +97,6 @@ CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_SHA1=y CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_SHA512=y -CONFIG_CRYPTO_AES=y CONFIG_CRYPTO_ARC4=y CONFIG_CRYPTO_BLOWFISH=y CONFIG_CRYPTO_DES=y diff --git a/arch/powerpc/configs/44x/icon_defconfig b/arch/powerpc/configs/44x/icon_defconfig index 05782c145141fb..5d52185d8f5a92 100644 --- a/arch/powerpc/configs/44x/icon_defconfig +++ b/arch/powerpc/configs/44x/icon_defconfig @@ -1,19 +1,14 @@ CONFIG_44x=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 -CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y -# CONFIG_BLK_DEV_BSG is not set # CONFIG_EBONY is not set CONFIG_ICON=y CONFIG_HIGHMEM=y -CONFIG_SPARSE_IRQ=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="" CONFIG_PCIEPORTBUS=y @@ -34,17 +29,13 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 CONFIG_XILINX_SYSACE=y -# CONFIG_MISC_DEVICES is not set CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_SCSI_CONSTANTS=y @@ -55,11 +46,7 @@ CONFIG_FUSION_SAS=y CONFIG_FUSION_CTL=y CONFIG_FUSION_LOGGING=y CONFIG_NETDEVICES=y -CONFIG_ETHERNET=y -CONFIG_NET_VENDOR_IBM=y CONFIG_IBM_EMAC=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_WLAN is not set CONFIG_INPUT_MOUSEDEV_SCREEN_X=640 CONFIG_INPUT_MOUSEDEV_SCREEN_Y=480 @@ -79,7 +66,6 @@ CONFIG_I2C_CHARDEV=y CONFIG_I2C_IBM_IIC=y # CONFIG_HWMON is not set CONFIG_MFD_SM501=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_FB=y CONFIG_FB_SM501=y CONFIG_FRAMEBUFFER_CONSOLE=y @@ -92,25 +78,19 @@ CONFIG_RTC_DRV_DS1307=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_850=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_ISO8859_15=y CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_CRYPTO=y CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_ECB=y CONFIG_CRYPTO_PCBC=y diff --git a/arch/powerpc/configs/44x/iss476-smp_defconfig b/arch/powerpc/configs/44x/iss476-smp_defconfig index 49a1518a4e697a..0ad3e449526e2b 100644 --- a/arch/powerpc/configs/44x/iss476-smp_defconfig +++ b/arch/powerpc/configs/44x/iss476-smp_defconfig @@ -1,15 +1,11 @@ CONFIG_44x=y CONFIG_SMP=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_SPARSE_IRQ=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y -CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_PROFILING=y CONFIG_OPROFILE=y CONFIG_MODULES=y @@ -25,7 +21,6 @@ CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="root=/dev/issblk0" # CONFIG_PCI is not set CONFIG_ADVANCED_OPTIONS=y -CONFIG_NONSTATIC_KERNEL=y CONFIG_DYNAMIC_MEMSTART=y CONFIG_NET=y CONFIG_PACKET=y @@ -42,13 +37,10 @@ CONFIG_IP_PNP_BOOTP=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_JEDECPROBE=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 # CONFIG_INPUT is not set @@ -72,15 +64,11 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_CRAMFS=y # CONFIG_NETWORK_FILESYSTEMS is not set -CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_INFO=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y -CONFIG_DEBUG_INFO=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_PPC_EARLY_DEBUG=y -CONFIG_CRYPTO=y CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_ECB=y CONFIG_CRYPTO_PCBC=y diff --git a/arch/powerpc/configs/44x/katmai_defconfig b/arch/powerpc/configs/44x/katmai_defconfig index f1137972ed4129..a042335971da5f 100644 --- a/arch/powerpc/configs/44x/katmai_defconfig +++ b/arch/powerpc/configs/44x/katmai_defconfig @@ -1,17 +1,14 @@ CONFIG_44x=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_EBONY is not set CONFIG_KATMAI=y -CONFIG_SPARSE_IRQ=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="" CONFIG_NET=y @@ -30,19 +27,14 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 CONFIG_MACINTOSH_DRIVERS=y CONFIG_NETDEVICES=y -CONFIG_ETHERNET=y -CONFIG_NET_VENDOR_IBM=y CONFIG_IBM_EMAC=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set @@ -55,22 +47,15 @@ CONFIG_SERIAL_8250_SHARE_IRQ=y CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_EXT2_FS=y -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_CRYPTO=y CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_ECB=y CONFIG_CRYPTO_PCBC=y diff --git a/arch/powerpc/configs/44x/rainier_defconfig b/arch/powerpc/configs/44x/rainier_defconfig index 4b91a44c4c32c6..91c2aff9bd55ac 100644 --- a/arch/powerpc/configs/44x/rainier_defconfig +++ b/arch/powerpc/configs/44x/rainier_defconfig @@ -1,10 +1,8 @@ CONFIG_44x=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y @@ -12,7 +10,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_EBONY is not set CONFIG_RAINIER=y CONFIG_MATH_EMULATION=y -CONFIG_SPARSE_IRQ=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="" CONFIG_NET=y @@ -31,14 +28,11 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_CFI=y CONFIG_MTD_JEDECPROBE=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 CONFIG_MACINTOSH_DRIVERS=y @@ -55,26 +49,19 @@ CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set CONFIG_THERMAL=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_EXT2_FS=y -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_PPC_EARLY_DEBUG=y CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW=0xef600300 -CONFIG_CRYPTO=y CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_ECB=y CONFIG_CRYPTO_PCBC=y diff --git a/arch/powerpc/configs/44x/redwood_defconfig b/arch/powerpc/configs/44x/redwood_defconfig index b7113e114a14b3..7fddf3fe275c43 100644 --- a/arch/powerpc/configs/44x/redwood_defconfig +++ b/arch/powerpc/configs/44x/redwood_defconfig @@ -1,19 +1,15 @@ CONFIG_44x=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y -# CONFIG_BLK_DEV_BSG is not set # CONFIG_EBONY is not set CONFIG_REDWOOD=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_SPARSE_IRQ=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="" CONFIG_PCIEPORTBUS=y @@ -33,18 +29,13 @@ CONFIG_IP_PNP_BOOTP=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y -CONFIG_MTD_CONCAT=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 -# CONFIG_MISC_DEVICES is not set CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_SG=y @@ -52,14 +43,11 @@ CONFIG_FUSION=y CONFIG_FUSION_SAS=y CONFIG_I2O=y CONFIG_NETDEVICES=y -CONFIG_ETHERNET=y -CONFIG_NET_VENDOR_IBM=y CONFIG_IBM_EMAC=y CONFIG_IBM_EMAC_RXB=256 CONFIG_IBM_EMAC_TXB=256 CONFIG_IBM_EMAC_DEBUG=y CONFIG_E1000E=y -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set @@ -79,25 +67,18 @@ CONFIG_I2C_DEBUG_CORE=y CONFIG_I2C_DEBUG_ALGO=y CONFIG_I2C_DEBUG_BUS=y # CONFIG_HWMON is not set -CONFIG_VIDEO_OUTPUT_CONTROL=m # CONFIG_USB_SUPPORT is not set CONFIG_DMADEVICES=y CONFIG_EXT2_FS=y -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_CRYPTO=y CONFIG_CRYPTO_CRYPTD=y CONFIG_CRYPTO_AUTHENC=y CONFIG_CRYPTO_CCM=y @@ -115,7 +96,6 @@ CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_SHA1=y CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_SHA512=y -CONFIG_CRYPTO_AES=y CONFIG_CRYPTO_ARC4=y CONFIG_CRYPTO_BLOWFISH=y CONFIG_CRYPTO_DES=y diff --git a/arch/powerpc/configs/44x/sam440ep_defconfig b/arch/powerpc/configs/44x/sam440ep_defconfig index 9622eb2a3e37d2..6928012f3813ba 100644 --- a/arch/powerpc/configs/44x/sam440ep_defconfig +++ b/arch/powerpc/configs/44x/sam440ep_defconfig @@ -1,20 +1,19 @@ CONFIG_44x=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_IKCONFIG=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +CONFIG_AMIGA_PARTITION=y # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_IOSCHED_CFQ is not set # CONFIG_EBONY is not set CONFIG_SAM440EP=y -CONFIG_SPARSE_IRQ=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="" CONFIG_NET=y @@ -31,11 +30,9 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 -# CONFIG_MISC_DEVICES is not set CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y CONFIG_CHR_DEV_SG=y @@ -44,11 +41,7 @@ CONFIG_ATA=y # CONFIG_SATA_PMP is not set CONFIG_SATA_SIL=y CONFIG_NETDEVICES=y -CONFIG_ETHERNET=y -CONFIG_NET_VENDOR_IBM=y CONFIG_IBM_EMAC=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set CONFIG_INPUT_FF_MEMLESS=m CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y @@ -59,7 +52,6 @@ CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_HW_RANDOM is not set CONFIG_I2C_IBM_IIC=y # CONFIG_HWMON is not set -CONFIG_VIDEO_OUTPUT_CONTROL=y CONFIG_FB=y CONFIG_FB_RADEON=y CONFIG_LCD_CLASS_DEVICE=y @@ -80,10 +72,8 @@ CONFIG_HID_MONTEREY=y CONFIG_HID_PANTHERLORD=y CONFIG_HID_PETALYNX=y CONFIG_HID_SAMSUNG=y -CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y CONFIG_USB=y -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_EHCI_HCD=m CONFIG_USB_OHCI_HCD=y CONFIG_USB_OHCI_HCD_PPC_OF_BE=y @@ -99,7 +89,6 @@ CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_REISERFS_FS=y -CONFIG_INOTIFY=y CONFIG_AUTOFS4_FS=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y @@ -111,11 +100,7 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_AFFS_FS=m # CONFIG_NETWORK_FILESYSTEMS is not set -CONFIG_PARTITION_ADVANCED=y -CONFIG_AMIGA_PARTITION=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_CRC_T10DIF=y CONFIG_MAGIC_SYSRQ=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y diff --git a/arch/powerpc/configs/44x/sequoia_defconfig b/arch/powerpc/configs/44x/sequoia_defconfig index 9642d99b47f175..c294369cc39ffc 100644 --- a/arch/powerpc/configs/44x/sequoia_defconfig +++ b/arch/powerpc/configs/44x/sequoia_defconfig @@ -1,19 +1,16 @@ CONFIG_44x=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_EBONY is not set CONFIG_SEQUOIA=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_SPARSE_IRQ=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="" CONFIG_NET=y @@ -32,8 +29,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_CFI=y CONFIG_MTD_JEDECPROBE=y CONFIG_MTD_CFI_INTELEXT=y @@ -41,12 +36,9 @@ CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y CONFIG_MTD_NAND=y CONFIG_MTD_NAND_NDFC=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 CONFIG_NETDEVICES=y -CONFIG_ETHERNET=y -CONFIG_NET_VENDOR_IBM=y CONFIG_IBM_EMAC=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set @@ -60,24 +52,17 @@ CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set CONFIG_THERMAL=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_EXT2_FS=y -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_CRYPTO=y CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_ECB=y CONFIG_CRYPTO_PCBC=y diff --git a/arch/powerpc/configs/44x/taishan_defconfig b/arch/powerpc/configs/44x/taishan_defconfig index 09e3075030bfe5..e779228d6cd642 100644 --- a/arch/powerpc/configs/44x/taishan_defconfig +++ b/arch/powerpc/configs/44x/taishan_defconfig @@ -1,17 +1,14 @@ CONFIG_44x=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_EBONY is not set CONFIG_TAISHAN=y -CONFIG_SPARSE_IRQ=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="" CONFIG_NET=y @@ -30,17 +27,13 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 CONFIG_MACINTOSH_DRIVERS=y CONFIG_NETDEVICES=y -CONFIG_ETHERNET=y -CONFIG_NET_VENDOR_IBM=y CONFIG_IBM_EMAC=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set @@ -54,23 +47,16 @@ CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set CONFIG_THERMAL=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_EXT2_FS=y -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_CRYPTO=y CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_ECB=y CONFIG_CRYPTO_PCBC=y diff --git a/arch/powerpc/configs/44x/virtex5_defconfig b/arch/powerpc/configs/44x/virtex5_defconfig index 1eb3caf828a5e9..53d0300b339011 100644 --- a/arch/powerpc/configs/44x/virtex5_defconfig +++ b/arch/powerpc/configs/44x/virtex5_defconfig @@ -1,5 +1,4 @@ CONFIG_44x=y -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -7,7 +6,6 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y @@ -34,16 +32,11 @@ CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_MANGLE=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=8192 CONFIG_XILINX_SYSACE=y CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -CONFIG_MII=y -# CONFIG_NETDEV_10000 is not set -# CONFIG_SERIO_I8042 is not set # CONFIG_SERIO_SERPORT is not set CONFIG_SERIO_XILINX_XPS_PS2=y CONFIG_SERIAL_8250=y @@ -59,15 +52,9 @@ CONFIG_GPIO_XILINX=y CONFIG_FB=y CONFIG_FB_XILINX=y CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_FONTS=y -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y CONFIG_LOGO=y -# CONFIG_HID_SUPPORT is not set # CONFIG_USB_SUPPORT is not set CONFIG_EXT2_FS=y -CONFIG_INOTIFY=y -CONFIG_AUTOFS_FS=y CONFIG_AUTOFS4_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y @@ -75,16 +62,16 @@ CONFIG_TMPFS=y CONFIG_CRAMFS=y CONFIG_ROMFS_FS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=m CONFIG_NLS_ISO8859_1=m CONFIG_NLS_UTF8=m CONFIG_CRC_CCITT=y +CONFIG_FONTS=y +CONFIG_FONT_8x8=y +CONFIG_FONT_8x16=y CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_INFO=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y +CONFIG_DEBUG_KERNEL=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/44x/warp_defconfig b/arch/powerpc/configs/44x/warp_defconfig index 551e50a0be5e9e..ee434375fc24a9 100644 --- a/arch/powerpc/configs/44x/warp_defconfig +++ b/arch/powerpc/configs/44x/warp_defconfig @@ -1,5 +1,4 @@ CONFIG_44x=y -CONFIG_EXPERIMENTAL=y CONFIG_LOCALVERSION="-pika" # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y @@ -7,7 +6,6 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y @@ -16,7 +14,6 @@ CONFIG_MODULE_UNLOAD=y CONFIG_WARP=y CONFIG_PPC4xx_GPIO=y CONFIG_HZ_1000=y -CONFIG_SPARSE_IRQ=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="ip=on" # CONFIG_PCI is not set @@ -35,8 +32,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -44,21 +39,14 @@ CONFIG_MTD_PHYSMAP_OF=y CONFIG_MTD_NAND=y CONFIG_MTD_NAND_NDFC=y CONFIG_MTD_UBI=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_RAM=y -CONFIG_MISC_DEVICES=y CONFIG_EEPROM_AT24=y CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_SCSI_SPI_ATTRS=y # CONFIG_SCSI_LOWLEVEL is not set CONFIG_NETDEVICES=y -CONFIG_ETHERNET=y -CONFIG_NET_VENDOR_IBM=y -CONFIG_MII=y CONFIG_IBM_EMAC=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set @@ -72,7 +60,6 @@ CONFIG_I2C_IBM_IIC=y CONFIG_GPIO_SYSFS=y CONFIG_SENSORS_AD7414=y CONFIG_THERMAL=y -CONFIG_THERMAL_HWMON=y CONFIG_WATCHDOG=y CONFIG_USB=y CONFIG_USB_MON=y @@ -83,14 +70,12 @@ CONFIG_MMC=y CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y CONFIG_LEDS_GPIO=y -# CONFIG_LEDS_GPIO_PLATFORM is not set CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_EXT3_FS_XATTR is not set -CONFIG_INOTIFY=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y @@ -99,7 +84,6 @@ CONFIG_JFFS2_FS=y CONFIG_UBIFS_FS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_850=y @@ -110,13 +94,10 @@ CONFIG_NLS_UTF8=y CONFIG_CRC_CCITT=y CONFIG_CRC_T10DIF=y CONFIG_PRINTK_TIME=y -CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_INFO=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_SCHED_DEBUG is not set # CONFIG_DEBUG_BUGVERBOSE is not set -CONFIG_DEBUG_INFO=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/52xx/cm5200_defconfig b/arch/powerpc/configs/52xx/cm5200_defconfig index 0dc99e14103551..19fad0e0016e99 100644 --- a/arch/powerpc/configs/52xx/cm5200_defconfig +++ b/arch/powerpc/configs/52xx/cm5200_defconfig @@ -1,18 +1,15 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_KALLSYMS is not set # CONFIG_EPOLL is not set # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y # CONFIG_PPC_CHRP is not set CONFIG_PPC_MPC52xx=y CONFIG_PPC_MPC5200_SIMPLE=y # CONFIG_PPC_PMAC is not set -CONFIG_SPARSE_IRQ=y CONFIG_PM=y # CONFIG_PCI is not set CONFIG_NET=y @@ -31,26 +28,20 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 -# CONFIG_MISC_DEVICES is not set CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_SG=y # CONFIG_SCSI_LOWLEVEL is not set CONFIG_NETDEVICES=y -CONFIG_LXT_PHY=y -CONFIG_NET_ETHERNET=y CONFIG_FEC_MPC52xx=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set +CONFIG_LXT_PHY=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set @@ -64,7 +55,6 @@ CONFIG_I2C_MPC=y # CONFIG_HWMON is not set CONFIG_WATCHDOG=y CONFIG_USB=y -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_OHCI_HCD=y CONFIG_USB_OHCI_HCD_PPC_OF_BE=y CONFIG_USB_STORAGE=y @@ -73,7 +63,6 @@ CONFIG_PPC_BESTCOMM=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y @@ -81,17 +70,13 @@ CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_CRYPTO_ECB=y CONFIG_CRYPTO_PCBC=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/52xx/lite5200b_defconfig b/arch/powerpc/configs/52xx/lite5200b_defconfig index 104a332e79ab96..5f40ba92a39a8e 100644 --- a/arch/powerpc/configs/52xx/lite5200b_defconfig +++ b/arch/powerpc/configs/52xx/lite5200b_defconfig @@ -1,10 +1,9 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_KALLSYMS is not set # CONFIG_EPOLL is not set CONFIG_MODULES=y @@ -15,10 +14,6 @@ CONFIG_PPC_MPC52xx=y CONFIG_PPC_MPC5200_SIMPLE=y CONFIG_PPC_LITE5200=y # CONFIG_PPC_PMAC is not set -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_SPARSE_IRQ=y -CONFIG_PM=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -33,7 +28,6 @@ CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 @@ -42,9 +36,8 @@ CONFIG_BLK_DEV_SD=y CONFIG_ATA=y CONFIG_PATA_MPC52xx=y CONFIG_NETDEVICES=y -CONFIG_LXT_PHY=y -CONFIG_NET_ETHERNET=y CONFIG_FEC_MPC52xx=y +CONFIG_LXT_PHY=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set @@ -57,23 +50,18 @@ CONFIG_I2C=y CONFIG_I2C_CHARDEV=y CONFIG_I2C_MPC=y # CONFIG_HWMON is not set -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_DMADEVICES=y CONFIG_PPC_BESTCOMM=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_INFO=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set -CONFIG_DEBUG_INFO=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/52xx/motionpro_defconfig b/arch/powerpc/configs/52xx/motionpro_defconfig index c936fab9ec4a62..909e185a88d112 100644 --- a/arch/powerpc/configs/52xx/motionpro_defconfig +++ b/arch/powerpc/configs/52xx/motionpro_defconfig @@ -1,18 +1,15 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_KALLSYMS is not set # CONFIG_EPOLL is not set # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y # CONFIG_PPC_CHRP is not set CONFIG_PPC_MPC52xx=y CONFIG_PPC_MPC5200_SIMPLE=y # CONFIG_PPC_PMAC is not set -CONFIG_SPARSE_IRQ=y CONFIG_PM=y # CONFIG_PCI is not set CONFIG_NET=y @@ -30,24 +27,21 @@ CONFIG_SYN_COOKIES=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CONCAT=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_ROM=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 -CONFIG_MISC_DEVICES=y CONFIG_EEPROM_LEGACY=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_SG=y CONFIG_ATA=y CONFIG_PATA_MPC52xx=y CONFIG_NETDEVICES=y +CONFIG_FEC_MPC52xx=y CONFIG_MARVELL_PHY=y CONFIG_DAVICOM_PHY=y CONFIG_QSEMI_PHY=y @@ -58,11 +52,6 @@ CONFIG_SMSC_PHY=y CONFIG_BROADCOM_PHY=y CONFIG_ICPLUS_PHY=y CONFIG_MDIO_BITBANG=y -CONFIG_NET_ETHERNET=y -CONFIG_MII=y -CONFIG_FEC_MPC52xx=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set @@ -86,7 +75,6 @@ CONFIG_PPC_BESTCOMM=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y @@ -94,18 +82,14 @@ CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_INFO=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set -CONFIG_DEBUG_INFO=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_CRYPTO_ECB=y CONFIG_CRYPTO_PCBC=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/52xx/pcm030_defconfig b/arch/powerpc/configs/52xx/pcm030_defconfig index 1d03c35540c768..649a01a0044d2c 100644 --- a/arch/powerpc/configs/52xx/pcm030_defconfig +++ b/arch/powerpc/configs/52xx/pcm030_defconfig @@ -1,15 +1,14 @@ -CONFIG_EXPERIMENTAL=y CONFIG_LOCALVERSION="trunk" # CONFIG_LOCALVERSION_AUTO is not set # CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_VM_EVENT_COUNTERS is not set CONFIG_SLAB=y CONFIG_MODULES=y @@ -21,11 +20,8 @@ CONFIG_MODULE_UNLOAD=y CONFIG_PPC_MPC52xx=y CONFIG_PPC_MPC5200_SIMPLE=y # CONFIG_PPC_PMAC is not set -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_HZ_100=y CONFIG_PREEMPT=y -CONFIG_SPARSE_IRQ=y # CONFIG_SECCOMP is not set CONFIG_NET=y CONFIG_PACKET=y @@ -45,40 +41,31 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_PHYSMAP=y -CONFIG_PROC_DEVICETREE=y # CONFIG_BLK_DEV is not set -# CONFIG_MISC_DEVICES is not set # CONFIG_SCSI_PROC_FS is not set CONFIG_BLK_DEV_SD=m # CONFIG_SCSI_LOWLEVEL is not set CONFIG_ATA=m CONFIG_PATA_MPC52xx=m CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -CONFIG_MII=y CONFIG_FEC_MPC52xx=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_MPC52xx=y CONFIG_SERIAL_MPC52xx_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set CONFIG_HW_RANDOM=y CONFIG_I2C=y CONFIG_I2C_CHARDEV=y CONFIG_I2C_MPC=y # CONFIG_HWMON is not set CONFIG_USB=y -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_OHCI_HCD=m -# CONFIG_USB_OHCI_HCD_PPC_SOC is not set CONFIG_USB_OHCI_HCD_PPC_OF_BE=y # CONFIG_USB_OHCI_HCD_PCI is not set CONFIG_USB_STORAGE=m @@ -95,8 +82,6 @@ CONFIG_FAT_DEFAULT_CODEPAGE=850 CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NLS_CODEPAGE_850=y CONFIG_NLS_ISO8859_1=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set diff --git a/arch/powerpc/configs/52xx/tqm5200_defconfig b/arch/powerpc/configs/52xx/tqm5200_defconfig index ca83ec88b11400..efab8388ea9246 100644 --- a/arch/powerpc/configs/52xx/tqm5200_defconfig +++ b/arch/powerpc/configs/52xx/tqm5200_defconfig @@ -1,17 +1,14 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y -CONFIG_SPARSE_IRQ=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_EMBEDDED=y -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_KALLSYMS is not set # CONFIG_EPOLL is not set +CONFIG_EMBEDDED=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y # CONFIG_PPC_CHRP is not set CONFIG_PPC_MPC52xx=y CONFIG_PPC_MPC5200_SIMPLE=y @@ -34,17 +31,13 @@ CONFIG_SYN_COOKIES=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CONCAT=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_ROM=y CONFIG_MTD_PHYSMAP_OF=y CONFIG_MTD_PLATRAM=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 @@ -54,12 +47,9 @@ CONFIG_ATA=y CONFIG_PATA_MPC52xx=y CONFIG_PATA_PLATFORM=y CONFIG_NETDEVICES=y +CONFIG_FEC_MPC52xx=y CONFIG_LXT_PHY=y CONFIG_FIXED_PHY=y -CONFIG_NET_ETHERNET=y -CONFIG_FEC_MPC52xx=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set CONFIG_SERIAL_MPC52xx=y CONFIG_SERIAL_MPC52xx_CONSOLE=y CONFIG_SERIAL_MPC52xx_CONSOLE_BAUD=115200 @@ -75,7 +65,6 @@ CONFIG_FB_FOREIGN_ENDIAN=y CONFIG_FB_SM501=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_USB=y -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_MON=y CONFIG_USB_OHCI_HCD=y CONFIG_USB_OHCI_HCD_PPC_OF_BE=y @@ -95,17 +84,14 @@ CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_INFO=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set -CONFIG_DEBUG_INFO=y CONFIG_CRYPTO_ECB=y CONFIG_CRYPTO_PCBC=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/83xx/asp8347_defconfig b/arch/powerpc/configs/83xx/asp8347_defconfig index 985f95c7280a95..bcdfb07921fc38 100644 --- a/arch/powerpc/configs/83xx/asp8347_defconfig +++ b/arch/powerpc/configs/83xx/asp8347_defconfig @@ -1,21 +1,20 @@ CONFIG_FSL_EMB_PERFMON=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +# CONFIG_MSDOS_PARTITION is not set # CONFIG_PPC_CHRP is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_83xx=y CONFIG_ASP834x=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_SPARSE_IRQ=y CONFIG_PCI=y CONFIG_NET=y CONFIG_PACKET=y @@ -34,22 +33,16 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y CONFIG_MTD_REDBOOT_PARTS=y CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -CONFIG_MII=y CONFIG_GIANFAR=y -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -63,8 +56,6 @@ CONFIG_I2C_CHARDEV=y CONFIG_I2C_MPC=y CONFIG_THERMAL=y CONFIG_WATCHDOG=y -CONFIG_VIDEO_OUTPUT_CONTROL=m -# CONFIG_HID_SUPPORT is not set CONFIG_USB=y CONFIG_USB_MON=y CONFIG_USB_EHCI_HCD=y @@ -74,17 +65,12 @@ CONFIG_RTC_DRV_DS1374=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y -# CONFIG_MSDOS_PARTITION is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/83xx/kmeter1_defconfig b/arch/powerpc/configs/83xx/kmeter1_defconfig index e12e60c3b9a25c..11a959283149c8 100644 --- a/arch/powerpc/configs/83xx/kmeter1_defconfig +++ b/arch/powerpc/configs/83xx/kmeter1_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -36,7 +35,6 @@ CONFIG_BRIDGE=m CONFIG_VLAN_8021Q=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_INTELEXT=y @@ -45,10 +43,8 @@ CONFIG_MTD_PHYSMAP_OF=y CONFIG_MTD_PHRAM=y CONFIG_MTD_UBI=y CONFIG_MTD_UBI_GLUEBI=y -CONFIG_PROC_DEVICETREE=y CONFIG_NETDEVICES=y CONFIG_DUMMY=y -CONFIG_MII=y CONFIG_TUN=y CONFIG_UCC_GETH=y CONFIG_MARVELL_PHY=y diff --git a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig index 4b4a2a9133a5eb..b47a41f77836e3 100644 --- a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig +++ b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig @@ -1,20 +1,18 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y # CONFIG_PPC_CHRP is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_83xx=y CONFIG_MPC831x_RDB=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_SPARSE_IRQ=y CONFIG_PCI=y CONFIG_NET=y CONFIG_PACKET=y @@ -30,15 +28,12 @@ CONFIG_SYN_COOKIES=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y CONFIG_MTD_NAND=y CONFIG_MTD_NAND_FSL_ELBC=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 @@ -51,12 +46,10 @@ CONFIG_MD_LINEAR=y CONFIG_MD_RAID0=y CONFIG_MD_RAID1=y CONFIG_NETDEVICES=y +CONFIG_GIANFAR=y +CONFIG_E100=y CONFIG_CICADA_PHY=y CONFIG_FIXED_PHY=y -CONFIG_NET_ETHERNET=y -CONFIG_NET_PCI=y -CONFIG_E100=y -CONFIG_GIANFAR=y # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -71,7 +64,6 @@ CONFIG_I2C_MPC=y CONFIG_SPI=y CONFIG_SPI_BITBANG=y CONFIG_WATCHDOG=y -CONFIG_VIDEO_OUTPUT_CONTROL=m # CONFIG_USB_HID is not set CONFIG_USB=y CONFIG_USB_MON=y @@ -82,7 +74,6 @@ CONFIG_USB_OHCI_HCD_PPC_OF_BE=y CONFIG_USB_UHCI_HCD=y CONFIG_USB_STORAGE=y CONFIG_USB_GADGET=y -CONFIG_USB_GADGET_NET2280=y CONFIG_USB_ETH=m CONFIG_RTC_CLASS=y CONFIG_RTC_INTF_DEV_UIE_EMUL=y @@ -90,18 +81,12 @@ CONFIG_RTC_DRV_DS1307=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_CRYPTO_PCBC=m # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig index 5871395573c517..e28c83f320c1e6 100644 --- a/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig +++ b/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig @@ -1,20 +1,18 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y # CONFIG_PPC_CHRP is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_83xx=y CONFIG_MPC831x_RDB=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_SPARSE_IRQ=y CONFIG_PCI=y CONFIG_NET=y CONFIG_PACKET=y @@ -30,13 +28,11 @@ CONFIG_SYN_COOKIES=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y CONFIG_MTD_NAND=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 @@ -50,10 +46,8 @@ CONFIG_MD_LINEAR=y CONFIG_MD_RAID0=y CONFIG_MD_RAID1=y CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -CONFIG_NET_PCI=y -CONFIG_E100=y CONFIG_GIANFAR=y +CONFIG_E100=y # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -68,7 +62,6 @@ CONFIG_I2C_MPC=y CONFIG_SPI=y CONFIG_SPI_BITBANG=y CONFIG_WATCHDOG=y -CONFIG_VIDEO_OUTPUT_CONTROL=m # CONFIG_USB_HID is not set CONFIG_USB=y CONFIG_USB_MON=y @@ -79,7 +72,6 @@ CONFIG_USB_OHCI_HCD_PPC_OF_BE=y CONFIG_USB_UHCI_HCD=y CONFIG_USB_STORAGE=y CONFIG_USB_GADGET=y -CONFIG_USB_GADGET_NET2280=y CONFIG_USB_ETH=m CONFIG_RTC_CLASS=y CONFIG_RTC_INTF_DEV_UIE_EMUL=y @@ -87,18 +79,12 @@ CONFIG_RTC_DRV_DS1307=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_CRYPTO_PCBC=m # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/83xx/mpc832x_mds_defconfig b/arch/powerpc/configs/83xx/mpc832x_mds_defconfig index a5699a1f7d0a73..e84d35b848c085 100644 --- a/arch/powerpc/configs/83xx/mpc832x_mds_defconfig +++ b/arch/powerpc/configs/83xx/mpc832x_mds_defconfig @@ -1,22 +1,21 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +# CONFIG_MSDOS_PARTITION is not set # CONFIG_PPC_CHRP is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_83xx=y CONFIG_MPC832x_MDS=y CONFIG_QUICC_ENGINE=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_MATH_EMULATION=y -CONFIG_SPARSE_IRQ=y CONFIG_PCI=y CONFIG_NET=y CONFIG_PACKET=y @@ -31,16 +30,13 @@ CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 CONFIG_SCSI=y CONFIG_NETDEVICES=y -CONFIG_DAVICOM_PHY=y -CONFIG_NET_ETHERNET=y -CONFIG_MII=y CONFIG_UCC_GETH=y +CONFIG_DAVICOM_PHY=y # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -53,23 +49,16 @@ CONFIG_I2C=y CONFIG_I2C_CHARDEV=y CONFIG_I2C_MPC=y CONFIG_WATCHDOG=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1374=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y -# CONFIG_MSDOS_PARTITION is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig b/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig index 5adc4cea42d359..ae145f4105903a 100644 --- a/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig +++ b/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig @@ -1,22 +1,21 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +CONFIG_LDM_PARTITION=y # CONFIG_PPC_CHRP is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_83xx=y CONFIG_MPC832x_RDB=y CONFIG_QUICC_ENGINE=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_MATH_EMULATION=y -CONFIG_SPARSE_IRQ=y CONFIG_PCI=y CONFIG_NET=y CONFIG_PACKET=y @@ -31,18 +30,15 @@ CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_NETDEVICES=y -CONFIG_ICPLUS_PHY=y -CONFIG_NET_ETHERNET=y -CONFIG_MII=y -CONFIG_E1000=y CONFIG_UCC_GETH=y +CONFIG_E1000=y +CONFIG_ICPLUS_PHY=y # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -58,7 +54,6 @@ CONFIG_I2C_MPC=y CONFIG_SPI=y CONFIG_SPI_BITBANG=y CONFIG_WATCHDOG=y -CONFIG_VIDEO_OUTPUT_CONTROL=m # CONFIG_USB_HID is not set CONFIG_USB=y CONFIG_USB_MON=y @@ -71,24 +66,18 @@ CONFIG_MMC_SPI=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_LDM_PARTITION=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_932=y CONFIG_NLS_ISO8859_8=y CONFIG_NLS_ISO8859_1=y CONFIG_CRC_T10DIF=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/83xx/mpc834x_itx_defconfig b/arch/powerpc/configs/83xx/mpc834x_itx_defconfig index 82b6b6c88d6a70..2a5fdcbabcddb2 100644 --- a/arch/powerpc/configs/83xx/mpc834x_itx_defconfig +++ b/arch/powerpc/configs/83xx/mpc834x_itx_defconfig @@ -1,20 +1,19 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +CONFIG_MAC_PARTITION=y # CONFIG_PPC_CHRP is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_83xx=y CONFIG_MPC834x_ITX=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_SPARSE_IRQ=y CONFIG_PCI=y CONFIG_NET=y CONFIG_PACKET=y @@ -30,11 +29,9 @@ CONFIG_SYN_COOKIES=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 @@ -52,9 +49,9 @@ CONFIG_MD_LINEAR=y CONFIG_MD_RAID0=y CONFIG_MD_RAID1=y CONFIG_NETDEVICES=y +CONFIG_GIANFAR=y CONFIG_CICADA_PHY=y CONFIG_FIXED_PHY=y -CONFIG_GIANFAR=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set @@ -69,7 +66,6 @@ CONFIG_SPI=y CONFIG_SPI_BITBANG=y # CONFIG_HWMON is not set CONFIG_WATCHDOG=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_USB=y CONFIG_USB_MON=y CONFIG_USB_EHCI_HCD=y @@ -82,19 +78,13 @@ CONFIG_RTC_DRV_DS1307=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_MAC_PARTITION=y CONFIG_CRC_T10DIF=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_CRYPTO_PCBC=m # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig b/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig index f8b228aaa03a27..9a2ff25a2e98b1 100644 --- a/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig +++ b/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig @@ -1,20 +1,19 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +CONFIG_MAC_PARTITION=y # CONFIG_PPC_CHRP is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_83xx=y CONFIG_MPC834x_ITX=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_SPARSE_IRQ=y CONFIG_PCI=y CONFIG_NET=y CONFIG_PACKET=y @@ -30,11 +29,9 @@ CONFIG_SYN_COOKIES=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 @@ -43,8 +40,8 @@ CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_SG=y CONFIG_SCSI_SPI_ATTRS=y CONFIG_NETDEVICES=y -CONFIG_CICADA_PHY=y CONFIG_GIANFAR=y +CONFIG_CICADA_PHY=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set @@ -59,7 +56,6 @@ CONFIG_SPI=y CONFIG_SPI_BITBANG=y # CONFIG_HWMON is not set CONFIG_WATCHDOG=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_USB=y CONFIG_USB_MON=y CONFIG_USB_EHCI_HCD=y @@ -72,19 +68,13 @@ CONFIG_RTC_DRV_DS1307=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_MAC_PARTITION=y CONFIG_CRC_T10DIF=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_CRYPTO_PCBC=m # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/83xx/mpc834x_mds_defconfig b/arch/powerpc/configs/83xx/mpc834x_mds_defconfig index 99660c06219182..e44edc57554943 100644 --- a/arch/powerpc/configs/83xx/mpc834x_mds_defconfig +++ b/arch/powerpc/configs/83xx/mpc834x_mds_defconfig @@ -1,20 +1,19 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +# CONFIG_MSDOS_PARTITION is not set # CONFIG_PPC_CHRP is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_83xx=y CONFIG_MPC834x_MDS=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_SPARSE_IRQ=y CONFIG_PCI=y CONFIG_NET=y CONFIG_PACKET=y @@ -30,16 +29,13 @@ CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 CONFIG_NETDEVICES=y -CONFIG_MARVELL_PHY=y -CONFIG_NET_ETHERNET=y -CONFIG_NET_PCI=y -CONFIG_E100=y CONFIG_GIANFAR=y +CONFIG_E100=y +CONFIG_MARVELL_PHY=y # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -52,23 +48,16 @@ CONFIG_I2C=y CONFIG_I2C_CHARDEV=y CONFIG_I2C_MPC=y CONFIG_WATCHDOG=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1374=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y -# CONFIG_MSDOS_PARTITION is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/83xx/mpc836x_mds_defconfig b/arch/powerpc/configs/83xx/mpc836x_mds_defconfig index 05710bbfd2ef3e..94a7d85f1603eb 100644 --- a/arch/powerpc/configs/83xx/mpc836x_mds_defconfig +++ b/arch/powerpc/configs/83xx/mpc836x_mds_defconfig @@ -1,21 +1,20 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +# CONFIG_MSDOS_PARTITION is not set # CONFIG_PPC_CHRP is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_83xx=y CONFIG_MPC836x_MDS=y CONFIG_QUICC_ENGINE=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_SPARSE_IRQ=y CONFIG_PCI=y CONFIG_NET=y CONFIG_PACKET=y @@ -32,21 +31,17 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 CONFIG_SCSI=y CONFIG_NETDEVICES=y -CONFIG_MARVELL_PHY=y -CONFIG_NET_ETHERNET=y -CONFIG_MII=y CONFIG_UCC_GETH=y +CONFIG_MARVELL_PHY=y # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -59,23 +54,16 @@ CONFIG_I2C=y CONFIG_I2C_CHARDEV=y CONFIG_I2C_MPC=y CONFIG_WATCHDOG=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1374=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y -# CONFIG_MSDOS_PARTITION is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig b/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig index 0540d673a05231..761ed8ea072993 100644 --- a/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig +++ b/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig @@ -1,20 +1,19 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +# CONFIG_MSDOS_PARTITION is not set # CONFIG_PPC_CHRP is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_83xx=y CONFIG_MPC836x_RDK=y CONFIG_QUICC_ENGINE=y CONFIG_QE_GPIO=y -CONFIG_SPARSE_IRQ=y CONFIG_PCI=y CONFIG_NET=y CONFIG_PACKET=y @@ -30,20 +29,17 @@ CONFIG_SYN_COOKIES=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_ADV_OPTIONS=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 CONFIG_NETDEVICES=y -CONFIG_BROADCOM_PHY=y CONFIG_UCC_GETH=y -# CONFIG_NETDEV_10000 is not set +CONFIG_BROADCOM_PHY=y # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -71,17 +67,11 @@ CONFIG_LOGO=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y -# CONFIG_MSDOS_PARTITION is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_PPC_EARLY_DEBUG=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/83xx/mpc837x_mds_defconfig b/arch/powerpc/configs/83xx/mpc837x_mds_defconfig index f367985be6f770..bcf1b48cc9e69c 100644 --- a/arch/powerpc/configs/83xx/mpc837x_mds_defconfig +++ b/arch/powerpc/configs/83xx/mpc837x_mds_defconfig @@ -1,18 +1,16 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y # CONFIG_PPC_CHRP is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_83xx=y CONFIG_MPC837x_MDS=y -CONFIG_SPARSE_IRQ=y CONFIG_PCI=y CONFIG_NET=y CONFIG_PACKET=y @@ -28,7 +26,6 @@ CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 @@ -37,10 +34,8 @@ CONFIG_CHR_DEV_SG=y CONFIG_ATA=y CONFIG_SATA_FSL=y CONFIG_NETDEVICES=y -CONFIG_MARVELL_PHY=y -CONFIG_NET_ETHERNET=y -CONFIG_MII=y CONFIG_GIANFAR=y +CONFIG_MARVELL_PHY=y # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -54,21 +49,15 @@ CONFIG_I2C=y CONFIG_I2C_CHARDEV=y CONFIG_I2C_MPC=y CONFIG_WATCHDOG=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y CONFIG_CRC_T10DIF=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig index 414eda381591dd..f0f0ebf7512513 100644 --- a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig +++ b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig @@ -1,18 +1,16 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y # CONFIG_PPC_CHRP is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_83xx=y CONFIG_MPC837x_RDB=y -CONFIG_SPARSE_IRQ=y CONFIG_PCI=y CONFIG_NET=y CONFIG_PACKET=y @@ -30,7 +28,6 @@ CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 @@ -43,12 +40,9 @@ CONFIG_BLK_DEV_MD=y CONFIG_MD_RAID1=y CONFIG_MD_RAID456=y CONFIG_NETDEVICES=y +CONFIG_GIANFAR=y CONFIG_MARVELL_PHY=y CONFIG_FIXED_PHY=y -CONFIG_NET_ETHERNET=y -CONFIG_MII=y -CONFIG_GIANFAR=y -# CONFIG_NETDEV_10000 is not set CONFIG_INPUT_FF_MEMLESS=m # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set @@ -63,7 +57,6 @@ CONFIG_I2C=y CONFIG_I2C_CHARDEV=y CONFIG_I2C_MPC=y CONFIG_WATCHDOG=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_HID_A4TECH=y CONFIG_HID_APPLE=y CONFIG_HID_BELKIN=y @@ -78,7 +71,6 @@ CONFIG_HID_MONTEREY=y CONFIG_HID_PANTHERLORD=y CONFIG_HID_PETALYNX=y CONFIG_HID_SAMSUNG=y -CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y CONFIG_USB=y CONFIG_USB_MON=y @@ -87,18 +79,13 @@ CONFIG_USB_EHCI_FSL=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y CONFIG_CRC_T10DIF=y # CONFIG_ENABLE_MUST_CHECK is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/83xx/sbc834x_defconfig b/arch/powerpc/configs/83xx/sbc834x_defconfig index 4ae385894c643e..d2e4d82de14d84 100644 --- a/arch/powerpc/configs/83xx/sbc834x_defconfig +++ b/arch/powerpc/configs/83xx/sbc834x_defconfig @@ -1,8 +1,6 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set CONFIG_SLAB=y @@ -13,7 +11,6 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_PPC_PMAC is not set CONFIG_PPC_83xx=y CONFIG_SBC834x=y -CONFIG_SPARSE_IRQ=y CONFIG_PCI=y CONFIG_NET=y CONFIG_PACKET=y @@ -30,15 +27,11 @@ CONFIG_SYN_COOKIES=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CONCAT=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 @@ -47,11 +40,8 @@ CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y # CONFIG_SCSI_LOWLEVEL is not set CONFIG_NETDEVICES=y -CONFIG_BROADCOM_PHY=y -CONFIG_NET_ETHERNET=y -CONFIG_MII=y CONFIG_GIANFAR=y -# CONFIG_NETDEV_10000 is not set +CONFIG_BROADCOM_PHY=y # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -78,15 +68,11 @@ CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_EXT3_FS_XATTR is not set -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/85xx/ge_imp3a_defconfig b/arch/powerpc/configs/85xx/ge_imp3a_defconfig index b4c4b469e320e5..b0939dd9ad6f58 100644 --- a/arch/powerpc/configs/85xx/ge_imp3a_defconfig +++ b/arch/powerpc/configs/85xx/ge_imp3a_defconfig @@ -1,17 +1,15 @@ CONFIG_PPC_85xx=y CONFIG_SMP=y CONFIG_NR_CPUS=2 -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y -CONFIG_SPARSE_IRQ=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y # CONFIG_UTS_NS is not set # CONFIG_IPC_NS is not set -# CONFIG_USER_NS is not set # CONFIG_PID_NS is not set # CONFIG_NET_NS is not set CONFIG_SYSFS_DEPRECATED=y @@ -28,7 +26,6 @@ CONFIG_QUICC_ENGINE=y CONFIG_QE_GPIO=y CONFIG_CPM2=y CONFIG_HIGHMEM=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_HZ_1000=y CONFIG_PREEMPT=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set @@ -71,8 +68,6 @@ CONFIG_IPV6_TUNNEL=m CONFIG_NET_PKTGEN=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_JEDECPROBE=y @@ -81,13 +76,11 @@ CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y CONFIG_MTD_NAND=y CONFIG_MTD_NAND_FSL_ELBC=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=131072 -CONFIG_MISC_DEVICES=y CONFIG_DS1682=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y @@ -125,7 +118,6 @@ CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_NR_UARTS=2 CONFIG_SERIAL_8250_RUNTIME_UARTS=2 -CONFIG_SERIAL_8250_EXTENDED=y CONFIG_SERIAL_8250_MANY_PORTS=y CONFIG_SERIAL_8250_DETECT_IRQ=y CONFIG_SERIAL_8250_RSA=y @@ -141,7 +133,6 @@ CONFIG_SENSORS_LM90=y CONFIG_SENSORS_LM92=y CONFIG_WATCHDOG=y CONFIG_GEF_WDT=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_HID_DRAGONRISE=y CONFIG_HID_GYRATION=y CONFIG_HID_TWINHAN=y @@ -149,7 +140,6 @@ CONFIG_HID_ORTEK=y CONFIG_HID_PANTHERLORD=y CONFIG_HID_PETALYNX=y CONFIG_HID_SAMSUNG=y -CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y CONFIG_HID_GREENASIA=y CONFIG_HID_SMARTJOYPLUS=y @@ -172,7 +162,6 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_RX8581=y CONFIG_DMADEVICES=y CONFIG_FSL_DMA=y -# CONFIG_NET_DMA is not set CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y @@ -194,7 +183,6 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y CONFIG_NFSD=y @@ -244,10 +232,8 @@ CONFIG_CRC_CCITT=y CONFIG_CRC_T10DIF=y CONFIG_LIBCRC32C=y CONFIG_MAGIC_SYSRQ=y -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_MD5=y -CONFIG_CRYPTO_SHA256=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_DES=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/85xx/kmp204x_defconfig b/arch/powerpc/configs/85xx/kmp204x_defconfig index e362d588dfbfea..e94d3eb4a8c18e 100644 --- a/arch/powerpc/configs/85xx/kmp204x_defconfig +++ b/arch/powerpc/configs/85xx/kmp204x_defconfig @@ -94,7 +94,6 @@ CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_MTD_M25P80=y CONFIG_MTD_PHRAM=y CONFIG_MTD_NAND=y CONFIG_MTD_NAND_ECC_BCH=y @@ -111,7 +110,6 @@ CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y CONFIG_CHR_DEV_SG=y -CONFIG_SCSI_MULTI_LUN=y CONFIG_SCSI_LOGGING=y CONFIG_SCSI_SYM53C8XX_2=y CONFIG_NETDEVICES=y @@ -120,7 +118,6 @@ CONFIG_NETDEVICES=y # CONFIG_NET_VENDOR_ALTEON is not set # CONFIG_NET_VENDOR_AMD is not set # CONFIG_NET_VENDOR_ATHEROS is not set -# CONFIG_NET_CADENCE is not set # CONFIG_NET_VENDOR_BROADCOM is not set # CONFIG_NET_VENDOR_BROCADE is not set # CONFIG_NET_VENDOR_CHELSIO is not set diff --git a/arch/powerpc/configs/85xx/ksi8560_defconfig b/arch/powerpc/configs/85xx/ksi8560_defconfig index aee0d17a955138..3be85c5f1a2a14 100644 --- a/arch/powerpc/configs/85xx/ksi8560_defconfig +++ b/arch/powerpc/configs/85xx/ksi8560_defconfig @@ -1,17 +1,16 @@ CONFIG_PPC_85xx=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +# CONFIG_MSDOS_PARTITION is not set CONFIG_KSI8560=y CONFIG_CPM2=y CONFIG_HIGHMEM=y CONFIG_BINFMT_MISC=y CONFIG_MATH_EMULATION=y -CONFIG_SPARSE_IRQ=y # CONFIG_SECCOMP is not set CONFIG_NET=y CONFIG_PACKET=y @@ -27,8 +26,6 @@ CONFIG_SYN_COOKIES=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CONCAT=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_JEDECPROBE=y @@ -39,12 +36,11 @@ CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 CONFIG_IDE=y CONFIG_NETDEVICES=y -CONFIG_MARVELL_PHY=y -CONFIG_NET_ETHERNET=y CONFIG_FS_ENET=y # CONFIG_FS_ENET_HAS_SCC is not set CONFIG_FS_ENET_MDIO_FCC=y CONFIG_GIANFAR=y +CONFIG_MARVELL_PHY=y # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -53,22 +49,15 @@ CONFIG_GIANFAR=y CONFIG_SERIAL_CPM=y CONFIG_SERIAL_CPM_CONSOLE=y CONFIG_GEN_RTC=y -CONFIG_VIDEO_OUTPUT_CONTROL=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y -# CONFIG_MSDOS_PARTITION is not set CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y CONFIG_DEBUG_MUTEXES=y # CONFIG_DEBUG_BUGVERBOSE is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/85xx/mpc8540_ads_defconfig b/arch/powerpc/configs/85xx/mpc8540_ads_defconfig index 11662c217ac09f..e38c373f2edf40 100644 --- a/arch/powerpc/configs/85xx/mpc8540_ads_defconfig +++ b/arch/powerpc/configs/85xx/mpc8540_ads_defconfig @@ -1,17 +1,16 @@ CONFIG_PPC_85xx=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +# CONFIG_MSDOS_PARTITION is not set CONFIG_MPC8540_ADS=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_BINFMT_MISC=y CONFIG_MATH_EMULATION=y -CONFIG_SPARSE_IRQ=y # CONFIG_SECCOMP is not set CONFIG_NET=y CONFIG_PACKET=y @@ -27,13 +26,10 @@ CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -CONFIG_MII=y CONFIG_GIANFAR=y # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set @@ -44,20 +40,13 @@ CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y # CONFIG_HW_RANDOM is not set CONFIG_GEN_RTC=y -CONFIG_VIDEO_OUTPUT_CONTROL=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y -# CONFIG_MSDOS_PARTITION is not set -CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y CONFIG_DEBUG_MUTEXES=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/85xx/mpc8560_ads_defconfig b/arch/powerpc/configs/85xx/mpc8560_ads_defconfig index ebe9b30b072110..48fc8e3a7be02c 100644 --- a/arch/powerpc/configs/85xx/mpc8560_ads_defconfig +++ b/arch/powerpc/configs/85xx/mpc8560_ads_defconfig @@ -1,15 +1,14 @@ CONFIG_PPC_85xx=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +# CONFIG_MSDOS_PARTITION is not set CONFIG_MPC8560_ADS=y CONFIG_BINFMT_MISC=y CONFIG_MATH_EMULATION=y -CONFIG_SPARSE_IRQ=y # CONFIG_SECCOMP is not set CONFIG_PCI=y CONFIG_PCI_DEBUG=y @@ -30,13 +29,12 @@ CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 CONFIG_NETDEVICES=y -CONFIG_MARVELL_PHY=y -CONFIG_DAVICOM_PHY=y -CONFIG_NET_ETHERNET=y CONFIG_FS_ENET=y # CONFIG_FS_ENET_HAS_SCC is not set -CONFIG_E1000=y CONFIG_GIANFAR=y +CONFIG_E1000=y +CONFIG_MARVELL_PHY=y +CONFIG_DAVICOM_PHY=y # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -45,20 +43,13 @@ CONFIG_GIANFAR=y CONFIG_SERIAL_CPM=y CONFIG_SERIAL_CPM_CONSOLE=y CONFIG_GEN_RTC=y -CONFIG_VIDEO_OUTPUT_CONTROL=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y -# CONFIG_MSDOS_PARTITION is not set -CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y CONFIG_DEBUG_MUTEXES=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig b/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig index eb25229b387a1d..ecb0c3bf879606 100644 --- a/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig +++ b/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig @@ -1,17 +1,16 @@ CONFIG_PPC_85xx=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +# CONFIG_MSDOS_PARTITION is not set CONFIG_MPC85xx_CDS=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_BINFMT_MISC=y CONFIG_MATH_EMULATION=y -CONFIG_SPARSE_IRQ=y # CONFIG_SECCOMP is not set CONFIG_PCI=y CONFIG_NET=y @@ -28,7 +27,6 @@ CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 @@ -36,10 +34,8 @@ CONFIG_IDE=y CONFIG_BLK_DEV_GENERIC=y CONFIG_BLK_DEV_VIA82CXXX=y CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -CONFIG_MII=y -CONFIG_E1000=y CONFIG_GIANFAR=y +CONFIG_E1000=y # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -49,20 +45,13 @@ CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y # CONFIG_HW_RANDOM is not set CONFIG_GEN_RTC=y -CONFIG_VIDEO_OUTPUT_CONTROL=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y -# CONFIG_MSDOS_PARTITION is not set -CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y CONFIG_DEBUG_MUTEXES=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/85xx/ppa8548_defconfig b/arch/powerpc/configs/85xx/ppa8548_defconfig index e80bb9b21eacac..190978a5b7d5a5 100644 --- a/arch/powerpc/configs/85xx/ppa8548_defconfig +++ b/arch/powerpc/configs/85xx/ppa8548_defconfig @@ -1,11 +1,16 @@ CONFIG_PPC_85xx=y -CONFIG_PPA8548=y -CONFIG_DTC=y -CONFIG_DEFAULT_UIMAGE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y -# CONFIG_PCI is not set -# CONFIG_USB_SUPPORT is not set +CONFIG_PPA8548=y +CONFIG_FSL_LBC=y +CONFIG_RAPIDIO=y +CONFIG_FSL_RIO=y +CONFIG_RAPIDIO_DMA_ENGINE=y +CONFIG_RAPIDIO_ENUM_BASIC=y +CONFIG_RAPIDIO_TSI57X=y +CONFIG_RAPIDIO_CPS_XX=y +CONFIG_RAPIDIO_TSI568=y +CONFIG_RAPIDIO_CPS_GEN2=y CONFIG_ADVANCED_OPTIONS=y CONFIG_LOWMEM_SIZE_BOOL=y CONFIG_LOWMEM_SIZE=0x40000000 @@ -14,51 +19,28 @@ CONFIG_LOWMEM_CAM_NUM=4 CONFIG_PAGE_OFFSET_BOOL=y CONFIG_PAGE_OFFSET=0xb0000000 CONFIG_KERNEL_START_BOOL=y -CONFIG_KERNEL_START=0xb0000000 -# CONFIG_PHYSICAL_START_BOOL is not set -CONFIG_PHYSICAL_START=0x00000000 -CONFIG_PHYSICAL_ALIGN=0x04000000 CONFIG_TASK_SIZE_BOOL=y CONFIG_TASK_SIZE=0xb0000000 - -CONFIG_FSL_LBC=y -CONFIG_FSL_DMA=y -CONFIG_FSL_RIO=y - -CONFIG_RAPIDIO=y -CONFIG_RAPIDIO_DMA_ENGINE=y -CONFIG_RAPIDIO_TSI57X=y -CONFIG_RAPIDIO_TSI568=y -CONFIG_RAPIDIO_CPS_XX=y -CONFIG_RAPIDIO_CPS_GEN2=y -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_PROC_DEVICETREE=y - +CONFIG_NET=y +CONFIG_INET=y +CONFIG_IP_PNP=y CONFIG_MTD=y -CONFIG_MTD_BLKDEVS=y +CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y -CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_CFI_INTELEXT=y -CONFIG_MTD_CHAR=y -CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CONCAT=y +CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y - +CONFIG_NETDEVICES=y +CONFIG_GIANFAR=y +CONFIG_MARVELL_PHY=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y CONFIG_I2C=y CONFIG_I2C_MPC=y -CONFIG_I2C_CHARDEV +# CONFIG_USB_SUPPORT is not set CONFIG_RTC_CLASS=y -CONFIG_RTC_HCTOSYS=y CONFIG_RTC_DRV_ISL1208=y - -CONFIG_NET=y -CONFIG_INET=y -CONFIG_IP_PNP=y -CONFIG_NETDEVICES=y -CONFIG_MII=y -CONFIG_GIANFAR=y -CONFIG_MARVELL_PHY=y +CONFIG_FSL_DMA=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y diff --git a/arch/powerpc/configs/85xx/sbc8548_defconfig b/arch/powerpc/configs/85xx/sbc8548_defconfig index 008a7a47b89b38..72b7ccfbe2c230 100644 --- a/arch/powerpc/configs/85xx/sbc8548_defconfig +++ b/arch/powerpc/configs/85xx/sbc8548_defconfig @@ -1,16 +1,13 @@ CONFIG_PPC_85xx=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_SLAB=y # CONFIG_BLK_DEV_BSG is not set CONFIG_SBC8548=y CONFIG_BINFMT_MISC=y CONFIG_MATH_EMULATION=y -CONFIG_SPARSE_IRQ=y # CONFIG_SECCOMP is not set CONFIG_PCI=y CONFIG_NET=y @@ -27,14 +24,19 @@ CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set -CONFIG_PROC_DEVICETREE=y +CONFIG_MTD=y +CONFIG_MTD_BLOCK=y +CONFIG_MTD_CFI=y +CONFIG_MTD_CFI_ADV_OPTIONS=y +CONFIG_MTD_CFI_GEOMETRY=y +CONFIG_MTD_CFI_I4=y +CONFIG_MTD_CFI_INTELEXT=y +CONFIG_MTD_PHYSMAP_OF=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_NETDEVICES=y -CONFIG_BROADCOM_PHY=y -CONFIG_NET_ETHERNET=y -CONFIG_MII=y CONFIG_GIANFAR=y +CONFIG_BROADCOM_PHY=y # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -44,33 +46,9 @@ CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y # CONFIG_HW_RANDOM is not set CONFIG_GEN_RTC=y -CONFIG_VIDEO_OUTPUT_CONTROL=y -# CONFIG_HID_SUPPORT is not set # CONFIG_USB_SUPPORT is not set -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_MTD=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y -CONFIG_MTD_BLKDEVS=y -CONFIG_MTD_BLOCK=y -CONFIG_MTD_CFI=y -CONFIG_MTD_GEN_PROBE=y -CONFIG_MTD_CFI_ADV_OPTIONS=y -CONFIG_MTD_CFI_NOSWAP=y -CONFIG_MTD_CFI_GEOMETRY=y -CONFIG_MTD_MAP_BANK_WIDTH_1=y -CONFIG_MTD_MAP_BANK_WIDTH_2=y -CONFIG_MTD_MAP_BANK_WIDTH_4=y -CONFIG_MTD_CFI_I1=y -CONFIG_MTD_CFI_I2=y -CONFIG_MTD_CFI_I4=y -CONFIG_MTD_CFI_INTELEXT=y -CONFIG_MTD_CFI_UTIL=y -CONFIG_MTD_PHYSMAP_OF=y diff --git a/arch/powerpc/configs/85xx/socrates_defconfig b/arch/powerpc/configs/85xx/socrates_defconfig index 435fd408eef192..0ad7bd5ee6b665 100644 --- a/arch/powerpc/configs/85xx/socrates_defconfig +++ b/arch/powerpc/configs/85xx/socrates_defconfig @@ -1,20 +1,17 @@ CONFIG_PPC_85xx=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=16 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set -# CONFIG_HOTPLUG is not set # CONFIG_EPOLL is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y CONFIG_SOCRATES=y CONFIG_MATH_EMULATION=y -CONFIG_SPARSE_IRQ=y CONFIG_PCI=y CONFIG_NET=y CONFIG_PACKET=y @@ -28,13 +25,8 @@ CONFIG_SYN_COOKIES=y # CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_CAN=y -CONFIG_CAN_RAW=y -CONFIG_CAN_BCM=y CONFIG_MTD=y -CONFIG_MTD_CONCAT=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_JEDECPROBE=y @@ -49,11 +41,8 @@ CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y # CONFIG_SCSI_LOWLEVEL is not set CONFIG_NETDEVICES=y -CONFIG_MARVELL_PHY=y -CONFIG_NET_ETHERNET=y -CONFIG_MII=y CONFIG_GIANFAR=y -# CONFIG_NETDEV_10000 is not set +CONFIG_MARVELL_PHY=y CONFIG_INPUT_MOUSEDEV_SCREEN_X=800 CONFIG_INPUT_MOUSEDEV_SCREEN_Y=480 CONFIG_INPUT_EVDEV=y @@ -65,7 +54,6 @@ CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_NR_UARTS=2 CONFIG_SERIAL_8250_RUNTIME_UARTS=2 -CONFIG_SERIAL_8250_EXTENDED=y CONFIG_SERIAL_8250_MANY_PORTS=y CONFIG_SERIAL_8250_DETECT_IRQ=y CONFIG_SERIAL_8250_RSA=y @@ -82,8 +70,6 @@ CONFIG_FB_MB862XX=y CONFIG_FB_MB862XX_LIME=y # CONFIG_VGA_CONSOLE is not set CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_FONTS=y -CONFIG_FONT_8x16=y CONFIG_USB=y CONFIG_USB_ANNOUNCE_NEW_DEVICES=y CONFIG_USB_MON=y @@ -95,15 +81,11 @@ CONFIG_RTC_CLASS=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y +CONFIG_FONTS=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/85xx/stx_gp3_defconfig b/arch/powerpc/configs/85xx/stx_gp3_defconfig index 5d4db154bf597f..f66d16ba8c5893 100644 --- a/arch/powerpc/configs/85xx/stx_gp3_defconfig +++ b/arch/powerpc/configs/85xx/stx_gp3_defconfig @@ -1,9 +1,7 @@ CONFIG_PPC_85xx=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODVERSIONS=y @@ -12,7 +10,6 @@ CONFIG_STX_GP3=y CONFIG_HIGHMEM=y CONFIG_BINFMT_MISC=m CONFIG_MATH_EMULATION=y -CONFIG_SPARSE_IRQ=y CONFIG_PCI=y CONFIG_NET=y CONFIG_PACKET=y @@ -28,7 +25,6 @@ CONFIG_IP_NF_FILTER=m CONFIG_NET_PKTGEN=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set -CONFIG_PROC_DEVICETREE=y CONFIG_PARPORT=m CONFIG_PARPORT_PC=m CONFIG_BLK_DEV_LOOP=m @@ -42,12 +38,10 @@ CONFIG_BLK_DEV_SD=m CONFIG_CHR_DEV_ST=m CONFIG_BLK_DEV_SR=m CONFIG_CHR_DEV_SG=m -CONFIG_SCSI_MULTI_LUN=y CONFIG_SCSI_CONSTANTS=y CONFIG_NETDEVICES=y -CONFIG_MARVELL_PHY=y -CONFIG_NET_ETHERNET=y CONFIG_GIANFAR=y +CONFIG_MARVELL_PHY=y CONFIG_INPUT_MOUSEDEV_SCREEN_X=1280 CONFIG_INPUT_MOUSEDEV_SCREEN_Y=1024 CONFIG_INPUT_JOYDEV=m @@ -63,8 +57,6 @@ CONFIG_SOUND=m CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y -CONFIG_AUTOFS_FS=m CONFIG_AUTOFS4_FS=y CONFIG_ISO9660_FS=m CONFIG_UDF_FS=m @@ -73,16 +65,11 @@ CONFIG_VFAT_FS=m CONFIG_TMPFS=y CONFIG_CRAMFS=m CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y -CONFIG_SMB_FS=m CONFIG_NLS=y CONFIG_CRC_CCITT=y CONFIG_CRC_T10DIF=m -CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_BDI_SWITCH=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/85xx/tqm8540_defconfig b/arch/powerpc/configs/85xx/tqm8540_defconfig index 5a800e6e38e313..4daaf2943b44e8 100644 --- a/arch/powerpc/configs/85xx/tqm8540_defconfig +++ b/arch/powerpc/configs/85xx/tqm8540_defconfig @@ -1,17 +1,15 @@ CONFIG_PPC_85xx=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set -# CONFIG_HOTPLUG is not set # CONFIG_EPOLL is not set # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +# CONFIG_MSDOS_PARTITION is not set CONFIG_TQM8540=y CONFIG_MATH_EMULATION=y -CONFIG_SPARSE_IRQ=y CONFIG_PCI=y CONFIG_NET=y CONFIG_PACKET=y @@ -25,9 +23,7 @@ CONFIG_SYN_COOKIES=y # CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_MTD=y -CONFIG_MTD_CONCAT=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -38,10 +34,8 @@ CONFIG_IDE=y CONFIG_BLK_DEV_GENERIC=y CONFIG_BLK_DEV_VIA82CXXX=y CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -CONFIG_NET_PCI=y -CONFIG_E100=y CONFIG_GIANFAR=y +CONFIG_E100=y # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -58,15 +52,10 @@ CONFIG_SENSORS_LM75=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y -# CONFIG_MSDOS_PARTITION is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/85xx/tqm8541_defconfig b/arch/powerpc/configs/85xx/tqm8541_defconfig index 2d936697d69ed7..bb402b3cf78624 100644 --- a/arch/powerpc/configs/85xx/tqm8541_defconfig +++ b/arch/powerpc/configs/85xx/tqm8541_defconfig @@ -1,17 +1,15 @@ CONFIG_PPC_85xx=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set -# CONFIG_HOTPLUG is not set # CONFIG_EPOLL is not set # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +# CONFIG_MSDOS_PARTITION is not set CONFIG_TQM8541=y CONFIG_MATH_EMULATION=y -CONFIG_SPARSE_IRQ=y CONFIG_PCI=y CONFIG_NET=y CONFIG_PACKET=y @@ -25,9 +23,7 @@ CONFIG_SYN_COOKIES=y # CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_MTD=y -CONFIG_MTD_CONCAT=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -38,10 +34,8 @@ CONFIG_IDE=y CONFIG_BLK_DEV_GENERIC=y CONFIG_BLK_DEV_VIA82CXXX=y CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -CONFIG_NET_PCI=y -CONFIG_E100=y CONFIG_GIANFAR=y +CONFIG_E100=y # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -60,15 +54,10 @@ CONFIG_SENSORS_LM75=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y -# CONFIG_MSDOS_PARTITION is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/85xx/tqm8548_defconfig b/arch/powerpc/configs/85xx/tqm8548_defconfig index ce8a67e894737e..685d0fb132d673 100644 --- a/arch/powerpc/configs/85xx/tqm8548_defconfig +++ b/arch/powerpc/configs/85xx/tqm8548_defconfig @@ -1,20 +1,19 @@ CONFIG_PPC_85xx=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +# CONFIG_MSDOS_PARTITION is not set CONFIG_TQM8548=y CONFIG_HIGHMEM=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_BINFMT_MISC=y CONFIG_MATH_EMULATION=y -CONFIG_SPARSE_IRQ=y # CONFIG_SECCOMP is not set CONFIG_PCI=y CONFIG_PCIEPORTBUS=y @@ -34,22 +33,16 @@ CONFIG_SYN_COOKIES=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y -CONFIG_MTD_BLKDEVS=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y CONFIG_MTD_NAND_ECC_SMC=y CONFIG_MTD_NAND=y CONFIG_MTD_NAND_FSL_UPM=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -CONFIG_MII=y CONFIG_GIANFAR=y # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set @@ -63,22 +56,15 @@ CONFIG_I2C=y CONFIG_I2C_CHARDEV=y CONFIG_I2C_MPC=y CONFIG_SENSORS_LM75=y -CONFIG_VIDEO_OUTPUT_CONTROL=y # CONFIG_USB_SUPPORT is not set CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1307=y -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y -# CONFIG_MSDOS_PARTITION is not set -CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y CONFIG_DEBUG_MUTEXES=y # CONFIG_DEBUG_BUGVERBOSE is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/85xx/tqm8555_defconfig b/arch/powerpc/configs/85xx/tqm8555_defconfig index a4e12971ccac8c..02a931d4e954ba 100644 --- a/arch/powerpc/configs/85xx/tqm8555_defconfig +++ b/arch/powerpc/configs/85xx/tqm8555_defconfig @@ -1,17 +1,15 @@ CONFIG_PPC_85xx=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set -# CONFIG_HOTPLUG is not set # CONFIG_EPOLL is not set # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +# CONFIG_MSDOS_PARTITION is not set CONFIG_TQM8555=y CONFIG_MATH_EMULATION=y -CONFIG_SPARSE_IRQ=y CONFIG_PCI=y CONFIG_NET=y CONFIG_PACKET=y @@ -25,9 +23,7 @@ CONFIG_SYN_COOKIES=y # CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_MTD=y -CONFIG_MTD_CONCAT=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -38,10 +34,8 @@ CONFIG_IDE=y CONFIG_BLK_DEV_GENERIC=y CONFIG_BLK_DEV_VIA82CXXX=y CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -CONFIG_NET_PCI=y -CONFIG_E100=y CONFIG_GIANFAR=y +CONFIG_E100=y # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -60,15 +54,10 @@ CONFIG_SENSORS_LM75=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y -# CONFIG_MSDOS_PARTITION is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/85xx/tqm8560_defconfig b/arch/powerpc/configs/85xx/tqm8560_defconfig index 341abe18a74dea..633d5b759a36bb 100644 --- a/arch/powerpc/configs/85xx/tqm8560_defconfig +++ b/arch/powerpc/configs/85xx/tqm8560_defconfig @@ -1,17 +1,15 @@ CONFIG_PPC_85xx=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_KALLSYMS is not set -# CONFIG_HOTPLUG is not set # CONFIG_EPOLL is not set # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +# CONFIG_MSDOS_PARTITION is not set CONFIG_TQM8560=y CONFIG_MATH_EMULATION=y -CONFIG_SPARSE_IRQ=y CONFIG_PCI=y CONFIG_NET=y CONFIG_PACKET=y @@ -25,9 +23,7 @@ CONFIG_SYN_COOKIES=y # CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_MTD=y -CONFIG_MTD_CONCAT=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -38,10 +34,8 @@ CONFIG_IDE=y CONFIG_BLK_DEV_GENERIC=y CONFIG_BLK_DEV_VIA82CXXX=y CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -CONFIG_NET_PCI=y -CONFIG_E100=y CONFIG_GIANFAR=y +CONFIG_E100=y # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -60,15 +54,10 @@ CONFIG_SENSORS_LM75=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y -# CONFIG_MSDOS_PARTITION is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig index 72df8ab8449e4c..34f3ea1729e0b0 100644 --- a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig +++ b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig @@ -1,29 +1,25 @@ CONFIG_PPC_85xx=y CONFIG_SMP=y CONFIG_NR_CPUS=2 -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_BSD_PROCESS_ACCT=y CONFIG_AUDIT=y +CONFIG_BSD_PROCESS_ACCT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y -CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y CONFIG_XES_MPC85xx=y -CONFIG_GPIO_MPC8XXX=y CONFIG_HIGHMEM=y CONFIG_MATH_EMULATION=y -CONFIG_SPARSE_IRQ=y CONFIG_PCI=y CONFIG_PCIEPORTBUS=y # CONFIG_PCIEASPM is not set @@ -52,12 +48,9 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y CONFIG_NET_IPIP=y -CONFIG_NET_IPGRE=y -CONFIG_NET_IPGRE_BROADCAST=y CONFIG_IP_MROUTE=y CONFIG_IP_PIMSM_V1=y CONFIG_IP_PIMSM_V2=y -CONFIG_ARPD=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set @@ -67,8 +60,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y CONFIG_MTD_REDBOOT_PARTS=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_JEDECPROBE=y @@ -79,7 +70,6 @@ CONFIG_MTD_PHYSMAP_OF=y CONFIG_MTD_NAND=y CONFIG_MTD_NAND_FSL_ELBC=y CONFIG_MTD_NAND_FSL_UPM=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_NBD=y CONFIG_BLK_DEV_RAM=y @@ -88,19 +78,15 @@ CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y CONFIG_CHR_DEV_SG=y -CONFIG_SCSI_MULTI_LUN=y CONFIG_SCSI_LOGGING=y CONFIG_ATA=y CONFIG_SATA_AHCI=y CONFIG_PATA_ALI=y CONFIG_NETDEVICES=y CONFIG_DUMMY=y -CONFIG_BROADCOM_PHY=y -CONFIG_NET_ETHERNET=y -CONFIG_MII=y -CONFIG_E1000=y CONFIG_GIANFAR=y -# CONFIG_NETDEV_10000 is not set +CONFIG_E1000=y +CONFIG_BROADCOM_PHY=y # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -109,7 +95,6 @@ CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_NR_UARTS=2 CONFIG_SERIAL_8250_RUNTIME_UARTS=2 -CONFIG_SERIAL_8250_EXTENDED=y CONFIG_SERIAL_8250_MANY_PORTS=y CONFIG_SERIAL_8250_DETECT_IRQ=y CONFIG_SERIAL_8250_RSA=y @@ -118,25 +103,19 @@ CONFIG_NVRAM=y CONFIG_I2C=y CONFIG_I2C_CHARDEV=y CONFIG_I2C_MPC=y -CONFIG_GPIO_SYSFS=y -CONFIG_GPIO_PCA953X=y CONFIG_SENSORS_DS1621=y CONFIG_SENSORS_LM90=y CONFIG_WATCHDOG=y -CONFIG_VIDEO_OUTPUT_CONTROL=y CONFIG_USB=y -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_MON=y CONFIG_USB_ISP1760_HCD=y CONFIG_USB_STORAGE=y CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y -CONFIG_LEDS_GPIO=y CONFIG_LEDS_PCA955X=y CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=y CONFIG_LEDS_TRIGGER_HEARTBEAT=y -CONFIG_LEDS_TRIGGER_GPIO=y CONFIG_EDAC=y CONFIG_EDAC_MM_EDAC=y CONFIG_RTC_CLASS=y @@ -147,7 +126,6 @@ CONFIG_FSL_DMA=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y CONFIG_ZISOFS=y @@ -159,17 +137,13 @@ CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_JFFS2_SUMMARY=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NFSD=y -CONFIG_PARTITION_ADVANCED=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_CRC_T10DIF=y -CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_MD5=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/86xx/gef_ppc9a_defconfig b/arch/powerpc/configs/86xx/gef_ppc9a_defconfig index 7cb9719abf3dd0..9792a2cb9b207b 100644 --- a/arch/powerpc/configs/86xx/gef_ppc9a_defconfig +++ b/arch/powerpc/configs/86xx/gef_ppc9a_defconfig @@ -1,8 +1,8 @@ CONFIG_SMP=y CONFIG_NR_CPUS=2 -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_IKCONFIG=y @@ -10,7 +10,6 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_MODULES=y @@ -21,11 +20,9 @@ CONFIG_MODULE_UNLOAD=y CONFIG_PPC_86xx=y CONFIG_GEF_PPC9A=y CONFIG_HIGHMEM=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_HZ_1000=y CONFIG_PREEMPT=y CONFIG_BINFMT_MISC=m -CONFIG_SPARSE_IRQ=y CONFIG_PCI=y CONFIG_PCIEPORTBUS=y # CONFIG_PCIEASPM is not set @@ -52,8 +49,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y CONFIG_NET_IPIP=m -CONFIG_NET_IPGRE=m -CONFIG_NET_IPGRE_BROADCAST=y CONFIG_IP_MROUTE=y CONFIG_IP_PIMSM_V1=y CONFIG_IP_PIMSM_V2=y @@ -69,9 +64,6 @@ CONFIG_IPV6_TUNNEL=m CONFIG_NET_PKTGEN=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y -CONFIG_MTD_CONCAT=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_JEDECPROBE=y @@ -83,7 +75,6 @@ CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=131072 -CONFIG_MISC_DEVICES=y CONFIG_DS1682=y CONFIG_IDE=y CONFIG_BLK_DEV_IDECS=y @@ -93,35 +84,32 @@ CONFIG_BLK_DEV_SR=y CONFIG_ATA=y CONFIG_SATA_SIL=y CONFIG_NETDEVICES=y -CONFIG_DUMMY=m CONFIG_BONDING=m +CONFIG_DUMMY=m +CONFIG_NETCONSOLE=y CONFIG_TUN=m -CONFIG_NET_ETHERNET=y -CONFIG_MII=y CONFIG_GIANFAR=y -# CONFIG_NETDEV_10000 is not set CONFIG_PPP=m -CONFIG_PPP_MULTILINK=y +CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m CONFIG_PPP_FILTER=y +CONFIG_PPP_MULTILINK=y +CONFIG_PPPOE=m CONFIG_PPP_ASYNC=m CONFIG_PPP_SYNC_TTY=m -CONFIG_PPP_DEFLATE=m -CONFIG_PPP_BSDCOMP=m -CONFIG_PPPOE=m CONFIG_SLIP=m CONFIG_SLIP_COMPRESSED=y CONFIG_SLIP_SMART=y CONFIG_SLIP_MODE_SLIP6=y -CONFIG_NETCONSOLE=y # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y # CONFIG_SERIAL_8250_PCI is not set CONFIG_SERIAL_8250_NR_UARTS=2 CONFIG_SERIAL_8250_RUNTIME_UARTS=2 -# CONFIG_LEGACY_PTYS is not set CONFIG_HW_RANDOM=y CONFIG_NVRAM=y CONFIG_I2C=y @@ -133,7 +121,6 @@ CONFIG_SENSORS_LM90=y CONFIG_SENSORS_LM92=y CONFIG_WATCHDOG=y CONFIG_GEF_WDT=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_HID_A4TECH=y CONFIG_HID_APPLE=y CONFIG_HID_BELKIN=y @@ -148,10 +135,8 @@ CONFIG_HID_MONTEREY=y CONFIG_HID_PANTHERLORD=y CONFIG_HID_PETALYNX=y CONFIG_HID_SAMSUNG=y -CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y CONFIG_USB=y -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_EHCI_HCD=y # CONFIG_USB_EHCI_HCD_PPC_OF is not set CONFIG_USB_OHCI_HCD=y @@ -160,7 +145,6 @@ CONFIG_RTC_CLASS=y # CONFIG_RTC_INTF_PROC is not set CONFIG_RTC_DRV_RX8581=y CONFIG_STAGING=y -# CONFIG_STAGING_EXCLUDE_BUILD is not set CONFIG_VME_BUS=y CONFIG_VME_TSI148=y CONFIG_EXT2_FS=y @@ -169,7 +153,6 @@ CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_INOTIFY=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y CONFIG_ZISOFS=y @@ -182,7 +165,6 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y CONFIG_CIFS=m @@ -230,7 +212,5 @@ CONFIG_CRC_CCITT=y CONFIG_CRC_T10DIF=y CONFIG_LIBCRC32C=y CONFIG_MAGIC_SYSRQ=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set diff --git a/arch/powerpc/configs/86xx/gef_sbc310_defconfig b/arch/powerpc/configs/86xx/gef_sbc310_defconfig index ecabf625d2497d..cadc36682bb4f2 100644 --- a/arch/powerpc/configs/86xx/gef_sbc310_defconfig +++ b/arch/powerpc/configs/86xx/gef_sbc310_defconfig @@ -1,8 +1,8 @@ CONFIG_SMP=y CONFIG_NR_CPUS=2 -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_IKCONFIG=y @@ -10,7 +10,6 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_MODULES=y @@ -21,11 +20,9 @@ CONFIG_MODULE_UNLOAD=y CONFIG_PPC_86xx=y CONFIG_GEF_SBC310=y CONFIG_HIGHMEM=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_HZ_1000=y CONFIG_PREEMPT=y CONFIG_BINFMT_MISC=y -CONFIG_SPARSE_IRQ=y CONFIG_PCI=y CONFIG_PCIEPORTBUS=y # CONFIG_PCIEASPM is not set @@ -52,8 +49,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y CONFIG_NET_IPIP=m -CONFIG_NET_IPGRE=m -CONFIG_NET_IPGRE_BROADCAST=y CONFIG_IP_MROUTE=y CONFIG_IP_PIMSM_V1=y CONFIG_IP_PIMSM_V2=y @@ -69,9 +64,6 @@ CONFIG_IPV6_TUNNEL=m CONFIG_NET_PKTGEN=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y -CONFIG_MTD_CONCAT=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_JEDECPROBE=y @@ -83,7 +75,6 @@ CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=131072 -CONFIG_MISC_DEVICES=y CONFIG_DS1682=y CONFIG_IDE=y CONFIG_BLK_DEV_IDECS=y @@ -94,35 +85,32 @@ CONFIG_ATA=y CONFIG_SATA_SIL24=y # CONFIG_ATA_SFF is not set CONFIG_NETDEVICES=y -CONFIG_DUMMY=m CONFIG_BONDING=m +CONFIG_DUMMY=m +CONFIG_NETCONSOLE=y CONFIG_TUN=m -CONFIG_NET_ETHERNET=y -CONFIG_MII=y CONFIG_GIANFAR=y -# CONFIG_NETDEV_10000 is not set CONFIG_PPP=m -CONFIG_PPP_MULTILINK=y +CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m CONFIG_PPP_FILTER=y +CONFIG_PPP_MULTILINK=y +CONFIG_PPPOE=m CONFIG_PPP_ASYNC=m CONFIG_PPP_SYNC_TTY=m -CONFIG_PPP_DEFLATE=m -CONFIG_PPP_BSDCOMP=m -CONFIG_PPPOE=m CONFIG_SLIP=m CONFIG_SLIP_COMPRESSED=y CONFIG_SLIP_SMART=y CONFIG_SLIP_MODE_SLIP6=y -CONFIG_NETCONSOLE=y # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y # CONFIG_SERIAL_8250_PCI is not set CONFIG_SERIAL_8250_NR_UARTS=2 CONFIG_SERIAL_8250_RUNTIME_UARTS=2 -# CONFIG_LEGACY_PTYS is not set CONFIG_HW_RANDOM=y CONFIG_NVRAM=y CONFIG_I2C=y @@ -134,7 +122,6 @@ CONFIG_SENSORS_LM90=y CONFIG_SENSORS_LM92=y CONFIG_WATCHDOG=y CONFIG_GEF_WDT=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_HID_A4TECH=y CONFIG_HID_APPLE=y CONFIG_HID_BELKIN=y @@ -149,10 +136,8 @@ CONFIG_HID_MONTEREY=y CONFIG_HID_PANTHERLORD=y CONFIG_HID_PETALYNX=y CONFIG_HID_SAMSUNG=y -CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y CONFIG_USB=y -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_EHCI_HCD=y # CONFIG_USB_EHCI_HCD_PPC_OF is not set CONFIG_USB_OHCI_HCD=y @@ -166,7 +151,6 @@ CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_INOTIFY=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y CONFIG_ZISOFS=y @@ -179,7 +163,6 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y CONFIG_CIFS=m @@ -227,7 +210,5 @@ CONFIG_CRC_CCITT=y CONFIG_CRC_T10DIF=y CONFIG_LIBCRC32C=y CONFIG_MAGIC_SYSRQ=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set diff --git a/arch/powerpc/configs/86xx/gef_sbc610_defconfig b/arch/powerpc/configs/86xx/gef_sbc610_defconfig index 4a4a86fb0d3d24..2aa7d9737e43cb 100644 --- a/arch/powerpc/configs/86xx/gef_sbc610_defconfig +++ b/arch/powerpc/configs/86xx/gef_sbc610_defconfig @@ -1,8 +1,8 @@ CONFIG_SMP=y CONFIG_NR_CPUS=2 -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_IKCONFIG=y @@ -10,7 +10,6 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_MODULES=y @@ -21,11 +20,9 @@ CONFIG_MODULE_UNLOAD=y CONFIG_PPC_86xx=y CONFIG_GEF_SBC610=y CONFIG_HIGHMEM=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_HZ_1000=y CONFIG_PREEMPT=y CONFIG_BINFMT_MISC=m -CONFIG_SPARSE_IRQ=y CONFIG_PCI=y CONFIG_PCIEPORTBUS=y # CONFIG_PCIEASPM is not set @@ -46,8 +43,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y CONFIG_NET_IPIP=m -CONFIG_NET_IPGRE=m -CONFIG_NET_IPGRE_BROADCAST=y CONFIG_IP_MROUTE=y CONFIG_IP_PIMSM_V1=y CONFIG_IP_PIMSM_V2=y @@ -62,22 +57,17 @@ CONFIG_INET6_IPCOMP=m CONFIG_IPV6_TUNNEL=m CONFIG_NETFILTER=y # CONFIG_NETFILTER_XT_MATCH_SCTP is not set -CONFIG_IP_NF_QUEUE=m CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_ADDRTYPE=m CONFIG_IP_NF_MATCH_ECN=m CONFIG_IP_NF_MATCH_TTL=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_LOG=m -CONFIG_IP_NF_TARGET_ULOG=m CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_IP6_NF_QUEUE=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_EUI64=m CONFIG_IP6_NF_MATCH_FRAG=m @@ -85,7 +75,6 @@ CONFIG_IP6_NF_MATCH_OPTS=m CONFIG_IP6_NF_MATCH_HL=m CONFIG_IP6_NF_MATCH_IPV6HEADER=m CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_TARGET_LOG=m CONFIG_IP6_NF_FILTER=m CONFIG_IP6_NF_MANGLE=m CONFIG_IP6_NF_RAW=m @@ -98,7 +87,6 @@ CONFIG_ATM_MPOA=m CONFIG_ATM_BR2684=m CONFIG_BRIDGE=m CONFIG_VLAN_8021Q=m -CONFIG_WAN_ROUTER=m CONFIG_NET_SCHED=y CONFIG_NET_SCH_CBQ=m CONFIG_NET_SCH_HTB=m @@ -122,9 +110,6 @@ CONFIG_NET_PKTGEN=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CONCAT=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_JEDECPROBE=y @@ -136,7 +121,6 @@ CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=131072 -CONFIG_MISC_DEVICES=y CONFIG_DS1682=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y @@ -144,37 +128,34 @@ CONFIG_BLK_DEV_SR=y CONFIG_ATA=y CONFIG_SATA_SIL=y CONFIG_NETDEVICES=y -CONFIG_DUMMY=m CONFIG_BONDING=m +CONFIG_DUMMY=m +CONFIG_NETCONSOLE=y CONFIG_TUN=m -CONFIG_NET_ETHERNET=y -CONFIG_MII=y CONFIG_GIANFAR=y -# CONFIG_NETDEV_10000 is not set CONFIG_PPP=m -CONFIG_PPP_MULTILINK=y +CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m CONFIG_PPP_FILTER=y +CONFIG_PPP_MULTILINK=y +CONFIG_PPPOATM=m +CONFIG_PPPOE=m CONFIG_PPP_ASYNC=m CONFIG_PPP_SYNC_TTY=m -CONFIG_PPP_DEFLATE=m -CONFIG_PPP_BSDCOMP=m -CONFIG_PPPOE=m -CONFIG_PPPOATM=m CONFIG_SLIP=m CONFIG_SLIP_COMPRESSED=y CONFIG_SLIP_SMART=y CONFIG_SLIP_MODE_SLIP6=y -CONFIG_NETCONSOLE=y CONFIG_INPUT_FF_MEMLESS=m # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y # CONFIG_SERIAL_8250_PCI is not set CONFIG_SERIAL_8250_NR_UARTS=2 CONFIG_SERIAL_8250_RUNTIME_UARTS=2 -# CONFIG_LEGACY_PTYS is not set CONFIG_HW_RANDOM=y CONFIG_NVRAM=y CONFIG_I2C=y @@ -186,7 +167,6 @@ CONFIG_SENSORS_LM90=y CONFIG_SENSORS_LM92=y CONFIG_WATCHDOG=y CONFIG_GEF_WDT=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_HID_A4TECH=y CONFIG_HID_APPLE=y CONFIG_HID_BELKIN=y @@ -201,10 +181,8 @@ CONFIG_HID_MONTEREY=y CONFIG_HID_PANTHERLORD=y CONFIG_HID_PETALYNX=y CONFIG_HID_SAMSUNG=y -CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y CONFIG_USB=y -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_EHCI_HCD=y # CONFIG_USB_EHCI_HCD_PPC_OF is not set CONFIG_USB_OHCI_HCD=y @@ -213,7 +191,6 @@ CONFIG_RTC_CLASS=y # CONFIG_RTC_INTF_PROC is not set CONFIG_RTC_DRV_RX8581=y CONFIG_STAGING=y -# CONFIG_STAGING_EXCLUDE_BUILD is not set CONFIG_VME_BUS=y CONFIG_VME_TSI148=y CONFIG_EXT2_FS=y @@ -222,14 +199,12 @@ CONFIG_EXT2_FS_POSIX_ACL=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_POSIX_ACL=y -CONFIG_INOTIFY=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y CONFIG_CIFS=m @@ -273,28 +248,20 @@ CONFIG_NLS_ISO8859_15=m CONFIG_NLS_KOI8_R=m CONFIG_NLS_KOI8_U=m CONFIG_NLS_UTF8=m +CONFIG_DEBUG_INFO=y CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set -CONFIG_DEBUG_INFO=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_SHA256=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_WP512=m -CONFIG_CRYPTO_AES=m CONFIG_CRYPTO_ANUBIS=m -CONFIG_CRYPTO_ARC4=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m diff --git a/arch/powerpc/configs/86xx/mpc8610_hpcd_defconfig b/arch/powerpc/configs/86xx/mpc8610_hpcd_defconfig index 9b192bb6bd3d39..e32207de2b77d9 100644 --- a/arch/powerpc/configs/86xx/mpc8610_hpcd_defconfig +++ b/arch/powerpc/configs/86xx/mpc8610_hpcd_defconfig @@ -1,27 +1,25 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_SWAP is not set CONFIG_SYSVIPC=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y -CONFIG_KALLSYMS_EXTRA_PASS=y # CONFIG_ELF_CORE is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +CONFIG_LDM_PARTITION=y # CONFIG_IOSCHED_CFQ is not set # CONFIG_PPC_CHRP is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_86xx=y CONFIG_MPC8610_HPCD=y CONFIG_HIGHMEM=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_HZ_1000=y -CONFIG_SPARSE_IRQ=y CONFIG_FORCE_MAX_ZONEORDER=12 # CONFIG_SECCOMP is not set CONFIG_PCI=y @@ -42,14 +40,12 @@ CONFIG_IPV6=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y CONFIG_MTD_NAND=y CONFIG_MTD_NAND_FSL_ELBC=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=131072 @@ -61,15 +57,13 @@ CONFIG_SATA_AHCI=y CONFIG_PATA_ALI=y CONFIG_NETDEVICES=y CONFIG_DUMMY=y -CONFIG_NET_ETHERNET=y CONFIG_NET_TULIP=y CONFIG_ULI526X=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set CONFIG_SERIO_LIBPS2=y +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_NR_UARTS=2 @@ -79,19 +73,14 @@ CONFIG_SERIAL_8250_MANY_PORTS=y CONFIG_SERIAL_8250_SHARE_IRQ=y CONFIG_SERIAL_8250_DETECT_IRQ=y CONFIG_SERIAL_8250_RSA=y -# CONFIG_LEGACY_PTYS is not set # CONFIG_HW_RANDOM is not set CONFIG_I2C=y CONFIG_I2C_MPC=y # CONFIG_HWMON is not set -CONFIG_VIDEO_OUTPUT_CONTROL=y CONFIG_FB=y CONFIG_FB_FSL_DIU=y CONFIG_VGACON_SOFT_SCROLLBACK=y CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_FONTS=y -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y CONFIG_SOUND=y CONFIG_SND=y CONFIG_SND_MIXER_OSS=y @@ -108,17 +97,14 @@ CONFIG_EXT3_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NFSD=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_LDM_PARTITION=y CONFIG_NLS=y CONFIG_CRC_T10DIF=y -CONFIG_DEBUG_KERNEL=y +CONFIG_FONTS=y +CONFIG_FONT_8x8=y +CONFIG_FONT_8x16=y +CONFIG_DEBUG_INFO=y CONFIG_DEBUG_SHIRQ=y CONFIG_DETECT_HUNG_TASK=y -CONFIG_DEBUG_INFO=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig b/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig index 76f43df3dec73e..a36e11ddaebd5d 100644 --- a/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig +++ b/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig @@ -1,33 +1,31 @@ CONFIG_SMP=y CONFIG_NR_CPUS=2 -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_BSD_PROCESS_ACCT=y CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BSD_PROCESS_ACCT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y -CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +CONFIG_MAC_PARTITION=y # CONFIG_PPC_CHRP is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_86xx=y CONFIG_MPC8641_HPCN=y CONFIG_HIGHMEM=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_HZ_1000=y CONFIG_BINFMT_MISC=m -CONFIG_SPARSE_IRQ=y CONFIG_PCI=y CONFIG_NET=y CONFIG_PACKET=y @@ -45,12 +43,9 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y CONFIG_NET_IPIP=y -CONFIG_NET_IPGRE=y -CONFIG_NET_IPGRE_BROADCAST=y CONFIG_IP_MROUTE=y CONFIG_IP_PIMSM_V1=y CONFIG_IP_PIMSM_V2=y -CONFIG_ARPD=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set @@ -58,28 +53,23 @@ CONFIG_ARPD=y CONFIG_IPV6=y CONFIG_IP_SCTP=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_NBD=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=131072 -CONFIG_MISC_DEVICES=y CONFIG_EEPROM_LEGACY=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y CONFIG_CHR_DEV_SG=y -CONFIG_SCSI_MULTI_LUN=y CONFIG_SCSI_LOGGING=y CONFIG_ATA=y CONFIG_SATA_AHCI=y CONFIG_PATA_ALI=y CONFIG_NETDEVICES=y CONFIG_DUMMY=y -CONFIG_VITESSE_PHY=y -CONFIG_NET_ETHERNET=y -CONFIG_MII=y CONFIG_GIANFAR=y +CONFIG_VITESSE_PHY=y CONFIG_INPUT_FF_MEMLESS=m # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set @@ -99,7 +89,6 @@ CONFIG_NVRAM=y CONFIG_I2C=y CONFIG_I2C_MPC=y # CONFIG_HWMON is not set -CONFIG_VIDEO_OUTPUT_CONTROL=y CONFIG_SOUND=y CONFIG_SND=y CONFIG_SND_MIXER_OSS=y @@ -120,7 +109,6 @@ CONFIG_HID_MONTEREY=y CONFIG_HID_PANTHERLORD=y CONFIG_HID_PETALYNX=y CONFIG_HID_SAMSUNG=y -CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y CONFIG_USB=y CONFIG_USB_MON=y @@ -134,7 +122,6 @@ CONFIG_RTC_DRV_CMOS=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_ISO9660_FS=m CONFIG_JOLIET=y CONFIG_ZISOFS=y @@ -158,18 +145,12 @@ CONFIG_QNX4FS_FS=m CONFIG_SYSV_FS=m CONFIG_UFS_FS=m CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y CONFIG_NFSD=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_MAC_PARTITION=y CONFIG_CRC_T10DIF=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DETECT_HUNG_TASK=y CONFIG_DEBUG_INFO=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y +CONFIG_DETECT_HUNG_TASK=y CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/86xx/sbc8641d_defconfig b/arch/powerpc/configs/86xx/sbc8641d_defconfig index 99ea8746bbafcb..db79bdee844b9a 100644 --- a/arch/powerpc/configs/86xx/sbc8641d_defconfig +++ b/arch/powerpc/configs/86xx/sbc8641d_defconfig @@ -1,8 +1,8 @@ CONFIG_SMP=y CONFIG_NR_CPUS=2 -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_IKCONFIG=y @@ -10,7 +10,6 @@ CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_MODULES=y @@ -20,10 +19,8 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_PPC_PMAC is not set CONFIG_PPC_86xx=y CONFIG_SBC8641D=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_PREEMPT=y CONFIG_BINFMT_MISC=m -CONFIG_SPARSE_IRQ=y CONFIG_PCI=y CONFIG_PCIEPORTBUS=y # CONFIG_PCIEASPM is not set @@ -43,8 +40,6 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y CONFIG_NET_IPIP=m -CONFIG_NET_IPGRE=m -CONFIG_NET_IPGRE_BROADCAST=y CONFIG_IP_MROUTE=y CONFIG_IP_PIMSM_V1=y CONFIG_IP_PIMSM_V2=y @@ -59,22 +54,17 @@ CONFIG_INET6_IPCOMP=m CONFIG_IPV6_TUNNEL=m CONFIG_NETFILTER=y # CONFIG_NETFILTER_XT_MATCH_SCTP is not set -CONFIG_IP_NF_QUEUE=m CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_ADDRTYPE=m CONFIG_IP_NF_MATCH_ECN=m CONFIG_IP_NF_MATCH_TTL=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_LOG=m -CONFIG_IP_NF_TARGET_ULOG=m CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_RAW=m CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_IP6_NF_QUEUE=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_EUI64=m CONFIG_IP6_NF_MATCH_FRAG=m @@ -82,7 +72,6 @@ CONFIG_IP6_NF_MATCH_OPTS=m CONFIG_IP6_NF_MATCH_HL=m CONFIG_IP6_NF_MATCH_IPV6HEADER=m CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_TARGET_LOG=m CONFIG_IP6_NF_FILTER=m CONFIG_IP6_NF_MANGLE=m CONFIG_IP6_NF_RAW=m @@ -95,7 +84,6 @@ CONFIG_ATM_MPOA=m CONFIG_ATM_BR2684=m CONFIG_BRIDGE=m CONFIG_VLAN_8021Q=m -CONFIG_WAN_ROUTER=m CONFIG_NET_SCHED=y CONFIG_NET_SCH_CBQ=m CONFIG_NET_SCH_HTB=m @@ -119,8 +107,6 @@ CONFIG_NET_PKTGEN=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CONCAT=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_ADV_OPTIONS=y @@ -145,28 +131,25 @@ CONFIG_DM_SNAPSHOT=y CONFIG_DM_MIRROR=y CONFIG_DM_ZERO=y CONFIG_NETDEVICES=y -CONFIG_DUMMY=m CONFIG_BONDING=m +CONFIG_DUMMY=m +CONFIG_NETCONSOLE=y CONFIG_TUN=m -CONFIG_BROADCOM_PHY=y -CONFIG_NET_ETHERNET=y -CONFIG_MII=y CONFIG_GIANFAR=y -# CONFIG_NETDEV_10000 is not set +CONFIG_BROADCOM_PHY=y CONFIG_PPP=m -CONFIG_PPP_MULTILINK=y +CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m CONFIG_PPP_FILTER=y +CONFIG_PPP_MULTILINK=y +CONFIG_PPPOATM=m +CONFIG_PPPOE=m CONFIG_PPP_ASYNC=m CONFIG_PPP_SYNC_TTY=m -CONFIG_PPP_DEFLATE=m -CONFIG_PPP_BSDCOMP=m -CONFIG_PPPOE=m -CONFIG_PPPOATM=m CONFIG_SLIP=m CONFIG_SLIP_COMPRESSED=y CONFIG_SLIP_SMART=y CONFIG_SLIP_MODE_SLIP6=y -CONFIG_NETCONSOLE=y # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO is not set @@ -180,7 +163,6 @@ CONFIG_I2C_CHARDEV=y CONFIG_I2C_MPC=y CONFIG_WATCHDOG=y CONFIG_SOFT_WATCHDOG=m -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y @@ -190,20 +172,14 @@ CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_REISERFS_FS=m CONFIG_REISERFS_FS_XATTR=y CONFIG_REISERFS_FS_POSIX_ACL=y -CONFIG_OCFS2_FS=m -CONFIG_INOTIFY=y -CONFIG_AUTOFS_FS=m CONFIG_AUTOFS4_FS=m CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_MINIX_FS=m CONFIG_ROMFS_FS=m CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_SMB_FS=m -CONFIG_SMB_NLS_DEFAULT=y CONFIG_CIFS=m CONFIG_CIFS_XATTR=y CONFIG_CIFS_POSIX=y @@ -245,29 +221,21 @@ CONFIG_NLS_ISO8859_15=m CONFIG_NLS_KOI8_R=m CONFIG_NLS_KOI8_U=m CONFIG_NLS_UTF8=m -CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_INFO=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set -CONFIG_DEBUG_INFO=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_SHA256=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_WP512=m -CONFIG_CRYPTO_AES=m CONFIG_CRYPTO_ANUBIS=m -CONFIG_CRYPTO_ARC4=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m diff --git a/arch/powerpc/configs/adder875_defconfig b/arch/powerpc/configs/adder875_defconfig index 15b1ff5d96e763..d89ff40d39b730 100644 --- a/arch/powerpc/configs/adder875_defconfig +++ b/arch/powerpc/configs/adder875_defconfig @@ -1,21 +1,18 @@ CONFIG_PPC_8xx=y -CONFIG_EXPERIMENTAL=y # CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_ELF_CORE is not set # CONFIG_BASE_FULL is not set # CONFIG_FUTEX is not set # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y # CONFIG_IOSCHED_CFQ is not set CONFIG_PPC_ADDER875=y CONFIG_8xx_COPYBACK=y CONFIG_HZ_1000=y -CONFIG_SPARSE_IRQ=y # CONFIG_SECCOMP is not set CONFIG_NET=y CONFIG_PACKET=y @@ -32,42 +29,30 @@ CONFIG_SYN_COOKIES=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y # CONFIG_BLK_DEV is not set -# CONFIG_MISC_DEVICES is not set CONFIG_NETDEVICES=y -CONFIG_DAVICOM_PHY=y -CONFIG_NET_ETHERNET=y CONFIG_FS_ENET=y # CONFIG_FS_ENET_HAS_SCC is not set -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set +CONFIG_DAVICOM_PHY=y # CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_CPM=y CONFIG_SERIAL_CPM_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set CONFIG_GEN_RTC=y # CONFIG_HWMON is not set CONFIG_THERMAL=y -CONFIG_VIDEO_OUTPUT_CONTROL=y -# CONFIG_HID_SUPPORT is not set # CONFIG_USB_SUPPORT is not set # CONFIG_DNOTIFY is not set CONFIG_TMPFS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y -# CONFIG_CRC32 is not set -CONFIG_MAGIC_SYSRQ=y +CONFIG_CRC32_SLICEBY4=y +CONFIG_DEBUG_INFO=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y -CONFIG_DEBUG_INFO=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_CRC32_SLICEBY4=y diff --git a/arch/powerpc/configs/amigaone_defconfig b/arch/powerpc/configs/amigaone_defconfig index 8c66b13e59fcff..84f1b41405798d 100644 --- a/arch/powerpc/configs/amigaone_defconfig +++ b/arch/powerpc/configs/amigaone_defconfig @@ -1,8 +1,9 @@ CONFIG_ALTIVEC=y -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=15 @@ -12,14 +13,13 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +CONFIG_AMIGA_PARTITION=y # CONFIG_PPC_CHRP is not set # CONFIG_PPC_PMAC is not set CONFIG_AMIGAONE=y CONFIG_HIGHMEM=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_BINFMT_MISC=y -# CONFIG_MIGRATION is not set CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -37,11 +37,9 @@ CONFIG_NETFILTER=y # CONFIG_NETFILTER_XT_TARGET_TCPMSS is not set # CONFIG_NETFILTER_XT_MATCH_CONNTRACK is not set # CONFIG_NETFILTER_XT_MATCH_STATE is not set -# CONFIG_IP_NF_TARGET_ULOG is not set # CONFIG_IP_NF_MANGLE is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_STANDALONE is not set -CONFIG_PROC_DEVICETREE=y CONFIG_PARPORT=y CONFIG_PARPORT_PC=y CONFIG_PARPORT_PC_FIFO=y @@ -65,24 +63,19 @@ CONFIG_SCSI_SYM53C8XX_2=y CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0 # CONFIG_SCSI_SYM53C8XX_MMIO is not set CONFIG_NETDEVICES=y -CONFIG_PHYLIB=y -CONFIG_NET_ETHERNET=y -CONFIG_NET_VENDOR_3COM=y CONFIG_VORTEX=y -CONFIG_NET_PCI=y CONFIG_8139CP=y CONFIG_8139TOO=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set +CONFIG_PHYLIB=y CONFIG_PPP=m -CONFIG_PPP_MULTILINK=y -CONFIG_PPP_FILTER=y -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -CONFIG_PPP_DEFLATE=m CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_FILTER=y CONFIG_PPP_MPPE=m +CONFIG_PPP_MULTILINK=y CONFIG_PPPOE=m +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m CONFIG_INPUT_EVDEV=y CONFIG_INPUT_MISC=y CONFIG_INPUT_PCSPKR=y @@ -96,7 +89,6 @@ CONFIG_FIRMWARE_EDID=y CONFIG_FB_TILEBLITTING=y CONFIG_FB_RADEON=y CONFIG_FB_3DFX=y -CONFIG_DISPLAY_SUPPORT=m CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_HID_GYRATION=y @@ -104,7 +96,6 @@ CONFIG_HID_NTRIG=y CONFIG_HID_PANTHERLORD=y CONFIG_HID_PETALYNX=y CONFIG_HID_SAMSUNG=y -CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y CONFIG_HID_TOPSEED=y CONFIG_USB=y @@ -117,27 +108,20 @@ CONFIG_RTC_DRV_CMOS=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y CONFIG_EXT4_FS=y -CONFIG_INOTIFY=y CONFIG_ISO9660_FS=y CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_AFFS_FS=m -CONFIG_PARTITION_ADVANCED=y -CONFIG_AMIGA_PARTITION=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=m CONFIG_CRC_T10DIF=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_MUTEXES=y -CONFIG_DEBUG_SPINLOCK_SLEEP=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_XMON=y CONFIG_XMON_DEFAULT=y -CONFIG_CRYPTO=y CONFIG_CRYPTO_CBC=m CONFIG_CRYPTO_PCBC=m # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/c2k_defconfig b/arch/powerpc/configs/c2k_defconfig index 8a08d6dcb0b473..91862292cd5586 100644 --- a/arch/powerpc/configs/c2k_defconfig +++ b/arch/powerpc/configs/c2k_defconfig @@ -1,17 +1,23 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_BSD_PROCESS_ACCT=y CONFIG_AUDIT=y +CONFIG_BSD_PROCESS_ACCT=y CONFIG_BLK_DEV_INITRD=y -CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_PROFILING=y CONFIG_OPROFILE=m CONFIG_KPROBES=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y -# CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +CONFIG_OSF_PARTITION=y +CONFIG_MAC_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_SGI_PARTITION=y +CONFIG_SUN_PARTITION=y # CONFIG_PPC_CHRP is not set # CONFIG_PPC_PMAC is not set CONFIG_EMBEDDED6xx=y @@ -24,7 +30,6 @@ CONFIG_CPU_FREQ_GOV_ONDEMAND=m CONFIG_HIGHMEM=y CONFIG_PREEMPT_VOLUNTARY=y CONFIG_BINFMT_MISC=y -CONFIG_SPARSE_IRQ=y CONFIG_PM=y CONFIG_PCI_MSI=y CONFIG_HOTPLUG_PCI=y @@ -43,8 +48,6 @@ CONFIG_IP_ROUTE_VERBOSE=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_NET_IPIP=m -CONFIG_NET_IPGRE=m -CONFIG_NET_IPGRE_BROADCAST=y CONFIG_IP_MROUTE=y CONFIG_IP_PIMSM_V1=y CONFIG_IP_PIMSM_V2=y @@ -52,22 +55,17 @@ CONFIG_SYN_COOKIES=y CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_IPCOMP=m -CONFIG_IPV6_PRIVACY=y CONFIG_INET6_AH=m CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m CONFIG_IPV6_TUNNEL=m CONFIG_NETFILTER=y # CONFIG_NETFILTER_XT_MATCH_SCTP is not set -CONFIG_IP_NF_QUEUE=m CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_ADDRTYPE=m CONFIG_IP_NF_MATCH_ECN=m CONFIG_IP_NF_MATCH_TTL=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_LOG=m -CONFIG_IP_NF_TARGET_ULOG=m CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_RAW=m @@ -81,7 +79,6 @@ CONFIG_IP6_NF_MATCH_OPTS=m CONFIG_IP6_NF_MATCH_HL=m CONFIG_IP6_NF_MATCH_IPV6HEADER=m CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_TARGET_LOG=m CONFIG_IP6_NF_FILTER=m CONFIG_IP6_NF_MANGLE=m CONFIG_IP6_NF_RAW=m @@ -133,8 +130,6 @@ CONFIG_NET_CLS_RSVP=m CONFIG_NET_CLS_RSVP6=m CONFIG_NET_CLS_IND=y CONFIG_BT=m -CONFIG_BT_L2CAP=y -CONFIG_BT_SCO=y CONFIG_BT_RFCOMM=m CONFIG_BT_RFCOMM_TTY=y CONFIG_BT_BNEP=m @@ -149,9 +144,6 @@ CONFIG_BT_HCIBFUSB=m CONFIG_BT_HCIVHCI=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y -CONFIG_MTD_CONCAT=m -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=m CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -162,7 +154,6 @@ CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=16384 -# CONFIG_MISC_DEVICES is not set CONFIG_SCSI=m CONFIG_BLK_DEV_SD=m CONFIG_CHR_DEV_ST=m @@ -182,7 +173,6 @@ CONFIG_AIC7XXX_CMDS_PER_DEVICE=4 CONFIG_AIC7XXX_RESET_DELAY_MS=15000 # CONFIG_AIC7XXX_DEBUG_ENABLE is not set # CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set -CONFIG_SCSI_AIC7XXX_OLD=m CONFIG_SCSI_AIC79XX=m CONFIG_AIC79XX_CMDS_PER_DEVICE=4 CONFIG_AIC79XX_RESET_DELAY_MS=15000 @@ -199,18 +189,14 @@ CONFIG_SCSI_IPS=m CONFIG_SCSI_INITIO=m CONFIG_SCSI_SYM53C8XX_2=m CONFIG_SCSI_QLOGIC_1280=m -CONFIG_SCSI_LPFC=m CONFIG_NETDEVICES=y -CONFIG_DUMMY=m CONFIG_BONDING=m +CONFIG_DUMMY=m +CONFIG_NETCONSOLE=m CONFIG_TUN=m -CONFIG_VITESSE_PHY=y -CONFIG_NET_ETHERNET=y -CONFIG_MII=y -CONFIG_MV643XX_ETH=y -# CONFIG_NETDEV_10000 is not set # CONFIG_ATM_DRIVERS is not set -CONFIG_NETCONSOLE=m +CONFIG_MV643XX_ETH=y +CONFIG_VITESSE_PHY=y # CONFIG_INPUT_MOUSEDEV_PSAUX is not set CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_KEYBOARD is not set @@ -218,10 +204,10 @@ CONFIG_INPUT_EVDEV=y CONFIG_INPUT_MISC=y CONFIG_INPUT_UINPUT=m # CONFIG_SERIO is not set +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_NONSTANDARD=y CONFIG_SERIAL_MPSC=y CONFIG_SERIAL_MPSC_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set CONFIG_NVRAM=m CONFIG_GEN_RTC=m CONFIG_RAW_DRIVER=y @@ -236,7 +222,7 @@ CONFIG_SENSORS_ADM1026=m CONFIG_SENSORS_ADM1031=m CONFIG_SENSORS_DS1621=m CONFIG_SENSORS_GL518SM=m -CONFIG_SENSORS_IT87=m +CONFIG_SENSORS_MAX1619=m CONFIG_SENSORS_LM75=m CONFIG_SENSORS_LM77=m CONFIG_SENSORS_LM78=m @@ -245,23 +231,17 @@ CONFIG_SENSORS_LM83=m CONFIG_SENSORS_LM85=m CONFIG_SENSORS_LM87=m CONFIG_SENSORS_LM90=m -CONFIG_SENSORS_MAX1619=m CONFIG_SENSORS_PCF8591=m -CONFIG_SENSORS_SMSC47M1=m -CONFIG_SENSORS_SMSC47B397=m CONFIG_SENSORS_VIA686A=m CONFIG_SENSORS_W83781D=m CONFIG_SENSORS_W83L785TS=m -CONFIG_SENSORS_W83627HF=m CONFIG_WATCHDOG=y CONFIG_SOFT_WATCHDOG=m CONFIG_PCIPCWATCHDOG=m CONFIG_WDTPCI=m CONFIG_USBPCWATCHDOG=m # CONFIG_VGA_CONSOLE is not set -# CONFIG_HID_SUPPORT is not set CONFIG_USB=m -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_MON=m CONFIG_USB_EHCI_HCD=m CONFIG_USB_EHCI_ROOT_HUB_TT=y @@ -350,24 +330,12 @@ CONFIG_JFFS2_FS=y CONFIG_CRAMFS=m CONFIG_VXFS_FS=m CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_RPCSEC_GSS_SPKM3=m CONFIG_CIFS=m CONFIG_CIFS_XATTR=y CONFIG_CIFS_POSIX=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_OSF_PARTITION=y -CONFIG_MAC_PARTITION=y -CONFIG_BSD_DISKLABEL=y -CONFIG_MINIX_SUBPARTITION=y -CONFIG_SOLARIS_X86_PARTITION=y -CONFIG_UNIXWARE_DISKLABEL=y -CONFIG_SGI_PARTITION=y -CONFIG_SUN_PARTITION=y -CONFIG_EFI_PARTITION=y CONFIG_NLS=y CONFIG_NLS_DEFAULT="utf8" CONFIG_NLS_CODEPAGE_437=y @@ -409,21 +377,16 @@ CONFIG_NLS_KOI8_R=m CONFIG_NLS_KOI8_U=m CONFIG_CRC_CCITT=m CONFIG_CRC_T10DIF=m +CONFIG_DEBUG_INFO=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y -CONFIG_DETECT_HUNG_TASK=y -CONFIG_DEBUG_SPINLOCK=y -CONFIG_DEBUG_SPINLOCK_SLEEP=y +CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_HIGHMEM=y -CONFIG_DEBUG_INFO=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_DEBUG_STACK_USAGE=y +CONFIG_DETECT_HUNG_TASK=y +CONFIG_DEBUG_SPINLOCK=y CONFIG_BOOTX_TEXT=y CONFIG_PPC_EARLY_DEBUG=y -CONFIG_PPC_EARLY_DEBUG_BOOTX=y -CONFIG_KEYS=y CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y @@ -431,16 +394,11 @@ CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y CONFIG_CRYPTO_NULL=m -CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_SHA1=y -CONFIG_CRYPTO_SHA256=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_WP512=m -CONFIG_CRYPTO_AES=m -CONFIG_CRYPTO_ARC4=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAST6=m CONFIG_CRYPTO_KHAZAD=m diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig index 7a7b3c879f96f8..9788b3c2d56343 100644 --- a/arch/powerpc/configs/cell_defconfig +++ b/arch/powerpc/configs/cell_defconfig @@ -3,9 +3,10 @@ CONFIG_TUNE_CELL=y CONFIG_ALTIVEC=y CONFIG_SMP=y CONFIG_NR_CPUS=4 -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_FHANDLE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=15 @@ -17,7 +18,7 @@ CONFIG_PROFILING=y CONFIG_OPROFILE=m CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y -# CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y # CONFIG_PPC_POWERNV is not set # CONFIG_PPC_PSERIES is not set # CONFIG_PPC_PMAC is not set @@ -34,8 +35,6 @@ CONFIG_CPU_FREQ_GOV_POWERSAVE=y CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_GOV_ONDEMAND=y CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_BINFMT_MISC=m CONFIG_IRQ_ALL_CPUS=y CONFIG_NUMA=y @@ -63,7 +62,6 @@ CONFIG_INET6_IPCOMP=m # CONFIG_IPV6_SIT is not set CONFIG_IPV6_TUNNEL=m CONFIG_NETFILTER=y -CONFIG_NETFILTER_NETLINK_QUEUE=m CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m CONFIG_NETFILTER_XT_TARGET_DSCP=m CONFIG_NETFILTER_XT_TARGET_MARK=m @@ -94,16 +92,12 @@ CONFIG_NETFILTER_XT_MATCH_STRING=m CONFIG_NETFILTER_XT_MATCH_TCPMSS=m CONFIG_NETFILTER_XT_MATCH_TIME=m CONFIG_NETFILTER_XT_MATCH_U32=m -CONFIG_IP_NF_QUEUE=m CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_ADDRTYPE=m CONFIG_IP_NF_MATCH_AH=m CONFIG_IP_NF_MATCH_ECN=m CONFIG_IP_NF_MATCH_TTL=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_LOG=m -CONFIG_IP_NF_TARGET_ULOG=m CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_TTL=m @@ -112,7 +106,6 @@ CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=131072 @@ -144,26 +137,24 @@ CONFIG_NETDEVICES=y CONFIG_BONDING=m CONFIG_MACVLAN=m CONFIG_TUN=y -CONFIG_NET_ETHERNET=y -CONFIG_MII=y +CONFIG_TIGON3=y CONFIG_E1000=m CONFIG_SKGE=m CONFIG_SKY2=m -CONFIG_TIGON3=y -CONFIG_SPIDER_NET=y CONFIG_GELIC_NET=m CONFIG_GELIC_WIRELESS=y +CONFIG_SPIDER_NET=y # CONFIG_INPUT_MOUSEDEV_PSAUX is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO_I8042 is not set +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_NONSTANDARD=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_TXX9_NR_UARTS=2 CONFIG_SERIAL_TXX9_CONSOLE=y CONFIG_SERIAL_OF_PLATFORM=y -# CONFIG_LEGACY_PTYS is not set CONFIG_HVC_RTAS=y CONFIG_HVC_BEAT=y CONFIG_IPMI_HANDLER=m @@ -174,9 +165,7 @@ CONFIG_IPMI_POWEROFF=m # CONFIG_HW_RANDOM is not set CONFIG_GEN_RTC=y CONFIG_I2C=y -# CONFIG_HWMON is not set CONFIG_WATCHDOG=y -CONFIG_VIDEO_OUTPUT_CONTROL=m # CONFIG_VGA_CONSOLE is not set CONFIG_HID=m # CONFIG_USB_HID is not set @@ -199,7 +188,6 @@ CONFIG_EDAC_CELL=y CONFIG_UIO=m CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y -CONFIG_INOTIFY=y CONFIG_AUTOFS4_FS=m CONFIG_ISO9660_FS=m CONFIG_JOLIET=y @@ -210,11 +198,8 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_HUGETLBFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_EFI_PARTITION=y CONFIG_NLS_ISO8859_1=m CONFIG_NLS_ISO8859_2=m CONFIG_NLS_ISO8859_3=m @@ -227,11 +212,10 @@ CONFIG_NLS_ISO8859_13=m CONFIG_NLS_ISO8859_14=m CONFIG_NLS_ISO8859_15=m # CONFIG_ENABLE_MUST_CHECK is not set -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_MUTEXES=y -CONFIG_DEBUG_SPINLOCK_SLEEP=y CONFIG_XMON=y CONFIG_XMON_DEFAULT=y CONFIG_CRYPTO_ECB=m diff --git a/arch/powerpc/configs/celleb_defconfig b/arch/powerpc/configs/celleb_defconfig index acccbfde8a5094..ff454dcd2dd3b0 100644 --- a/arch/powerpc/configs/celleb_defconfig +++ b/arch/powerpc/configs/celleb_defconfig @@ -3,9 +3,10 @@ CONFIG_TUNE_CELL=y CONFIG_ALTIVEC=y CONFIG_SMP=y CONFIG_NR_CPUS=4 -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_FHANDLE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=15 @@ -15,6 +16,7 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y +CONFIG_PARTITION_ADVANCED=y # CONFIG_PPC_POWERNV is not set # CONFIG_PPC_PSERIES is not set # CONFIG_PPC_PMAC is not set @@ -23,8 +25,6 @@ CONFIG_SPU_FS=y # CONFIG_CBE_THERM is not set CONFIG_UDBG_RTAS_CONSOLE=y # CONFIG_RTAS_PROC is not set -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_BINFMT_MISC=m CONFIG_KEXEC=y CONFIG_NUMA=y @@ -40,9 +40,7 @@ CONFIG_INET6_ESP=m CONFIG_INET6_IPCOMP=m CONFIG_IPV6_TUNNEL=m CONFIG_NETFILTER=y -CONFIG_IP_NF_QUEUE=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=131072 @@ -55,7 +53,6 @@ CONFIG_SCSI=m CONFIG_BLK_DEV_SD=m CONFIG_BLK_DEV_SR=m CONFIG_CHR_DEV_SG=m -CONFIG_SCSI_MULTI_LUN=y CONFIG_MD=y CONFIG_BLK_DEV_MD=m CONFIG_MD_LINEAR=m @@ -68,17 +65,15 @@ CONFIG_DM_MIRROR=m CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -CONFIG_MII=y CONFIG_SPIDER_NET=y # CONFIG_INPUT_MOUSEDEV_PSAUX is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_SERIO_I8042 is not set +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_NONSTANDARD=y CONFIG_SERIAL_TXX9_NR_UARTS=3 CONFIG_SERIAL_TXX9_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set CONFIG_HVC_RTAS=y CONFIG_HVC_BEAT=y # CONFIG_HW_RANDOM is not set @@ -89,7 +84,6 @@ CONFIG_WATCHDOG=y # CONFIG_VGA_CONSOLE is not set CONFIG_USB_HIDDEV=y CONFIG_USB=y -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_MON=y CONFIG_USB_EHCI_HCD=m # CONFIG_USB_EHCI_HCD_PPC_OF is not set @@ -103,7 +97,6 @@ CONFIG_EXT2_FS_XIP=y CONFIG_EXT3_FS=y CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_EXT3_FS_SECURITY=y -CONFIG_INOTIFY=y CONFIG_ISO9660_FS=m CONFIG_JOLIET=y CONFIG_UDF_FS=m @@ -113,13 +106,10 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_HUGETLBFS=y CONFIG_NFS_FS=m -CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_NFSD=m CONFIG_NFSD_V3=y CONFIG_NFSD_V3_ACL=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_EFI_PARTITION=y CONFIG_NLS_ISO8859_1=m CONFIG_NLS_ISO8859_2=m CONFIG_NLS_ISO8859_3=m @@ -132,11 +122,10 @@ CONFIG_NLS_ISO8859_13=m CONFIG_NLS_ISO8859_14=m CONFIG_NLS_ISO8859_15=m CONFIG_LIBCRC32C=m -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_MUTEXES=y -CONFIG_DEBUG_SPINLOCK_SLEEP=y CONFIG_XMON=y CONFIG_XMON_DEFAULT=y CONFIG_CRYPTO_NULL=m diff --git a/arch/powerpc/configs/chrp32_defconfig b/arch/powerpc/configs/chrp32_defconfig index db5b30857e1c12..253a9f200097fb 100644 --- a/arch/powerpc/configs/chrp32_defconfig +++ b/arch/powerpc/configs/chrp32_defconfig @@ -1,25 +1,24 @@ CONFIG_SMP=y -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=15 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set # CONFIG_COMPAT_BRK is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +CONFIG_MAC_PARTITION=y # CONFIG_PPC_PMAC is not set CONFIG_HIGHMEM=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_BINFMT_MISC=y CONFIG_IRQ_ALL_CPUS=y -# CONFIG_MIGRATION is not set CONFIG_ISA=y CONFIG_NET=y CONFIG_PACKET=y @@ -37,11 +36,9 @@ CONFIG_NETFILTER=y # CONFIG_NETFILTER_XT_TARGET_TCPMSS is not set # CONFIG_NETFILTER_XT_MATCH_CONNTRACK is not set # CONFIG_NETFILTER_XT_MATCH_STATE is not set -# CONFIG_IP_NF_TARGET_ULOG is not set # CONFIG_IP_NF_MANGLE is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_STANDALONE is not set -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_FD=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y @@ -60,31 +57,28 @@ CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SYM53C8XX_2=y CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0 CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y +CONFIG_PCNET32=y CONFIG_NET_TULIP=y CONFIG_DE4X5=y -CONFIG_NET_PCI=y -CONFIG_PCNET32=y +CONFIG_MV643XX_ETH=y CONFIG_8139CP=y CONFIG_8139TOO=y # CONFIG_8139TOO_PIO is not set CONFIG_VIA_RHINE=y -CONFIG_MV643XX_ETH=y CONFIG_PPP=m -CONFIG_PPP_MULTILINK=y -CONFIG_PPP_FILTER=y -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -CONFIG_PPP_DEFLATE=m CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_FILTER=y CONFIG_PPP_MPPE=m +CONFIG_PPP_MULTILINK=y CONFIG_PPPOE=m +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m CONFIG_INPUT_EVDEV=y CONFIG_INPUT_MISC=y CONFIG_INPUT_UINPUT=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_BRIQ_PANEL=m # CONFIG_HW_RANDOM is not set CONFIG_NVRAM=y CONFIG_GEN_RTC=y @@ -101,14 +95,12 @@ CONFIG_FB_ATY=y CONFIG_FB_ATY_CT=y CONFIG_FB_ATY_GX=y CONFIG_FB_3DFX=y -CONFIG_DISPLAY_SUPPORT=m CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_HID_GYRATION=y CONFIG_HID_PANTHERLORD=y CONFIG_HID_PETALYNX=y CONFIG_HID_SAMSUNG=y -CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y CONFIG_USB=y CONFIG_USB_MON=y @@ -120,26 +112,19 @@ CONFIG_USB_STORAGE=m CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y CONFIG_EXT4_FS=y -CONFIG_INOTIFY=y CONFIG_ISO9660_FS=y CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_TMPFS=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_MAC_PARTITION=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=m CONFIG_CRC_T10DIF=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_MUTEXES=y -CONFIG_DEBUG_SPINLOCK_SLEEP=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_XMON=y CONFIG_XMON_DEFAULT=y -CONFIG_CRYPTO=y CONFIG_CRYPTO_CBC=m CONFIG_CRYPTO_PCBC=m # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/corenet32_smp_defconfig b/arch/powerpc/configs/corenet32_smp_defconfig index 611efe99faebe9..2826bb4ff56058 100644 --- a/arch/powerpc/configs/corenet32_smp_defconfig +++ b/arch/powerpc/configs/corenet32_smp_defconfig @@ -1,13 +1,12 @@ CONFIG_PPC_85xx=y CONFIG_SMP=y CONFIG_NR_CPUS=8 -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_BSD_PROCESS_ACCT=y CONFIG_AUDIT=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_BSD_PROCESS_ACCT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 @@ -57,7 +56,6 @@ CONFIG_NET_IPIP=y CONFIG_IP_MROUTE=y CONFIG_IP_PIMSM_V1=y CONFIG_IP_PIMSM_V2=y -CONFIG_ARPD=y CONFIG_INET_AH=y CONFIG_INET_ESP=y CONFIG_INET_IPCOMP=y @@ -69,17 +67,14 @@ CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_MTD_M25P80=y CONFIG_MTD_NAND=y CONFIG_MTD_NAND_FSL_ELBC=y CONFIG_MTD_NAND_FSL_IFC=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=131072 @@ -87,7 +82,6 @@ CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y CONFIG_CHR_DEV_SG=y -CONFIG_SCSI_MULTI_LUN=y CONFIG_SCSI_LOGGING=y CONFIG_SCSI_SYM53C8XX_2=y CONFIG_ATA=y @@ -100,8 +94,8 @@ CONFIG_NETDEVICES=y CONFIG_FSL_PQ_MDIO=y CONFIG_E1000=y CONFIG_E1000E=y -CONFIG_VITESSE_PHY=y CONFIG_AT803X_PHY=y +CONFIG_VITESSE_PHY=y CONFIG_FIXED_PHY=y # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set @@ -123,7 +117,6 @@ CONFIG_SPI_GPIO=y CONFIG_SPI_FSL_SPI=y CONFIG_SPI_FSL_ESPI=y # CONFIG_HWMON is not set -CONFIG_VIDEO_OUTPUT_CONTROL=y CONFIG_USB_HID=m CONFIG_USB=y CONFIG_USB_MON=y @@ -143,10 +136,10 @@ CONFIG_RTC_DRV_DS1307=y CONFIG_RTC_DRV_DS1374=y CONFIG_RTC_DRV_DS3232=y CONFIG_UIO=y -CONFIG_STAGING=y -CONFIG_MEMORY=y CONFIG_VIRT_DRIVERS=y CONFIG_FSL_HV_MANAGER=y +CONFIG_STAGING=y +CONFIG_FSL_CORENET_CF=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set @@ -170,17 +163,15 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_850=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=m +CONFIG_DEBUG_INFO=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_SHIRQ=y CONFIG_DETECT_HUNG_TASK=y -CONFIG_DEBUG_INFO=y CONFIG_RCU_TRACE=y CONFIG_CRYPTO_NULL=y CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_MD4=y CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_SHA512=y -CONFIG_CRYPTO_AES=y # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_DEV_FSL_CAAM=y -CONFIG_FSL_CORENET_CF=y diff --git a/arch/powerpc/configs/corenet64_smp_defconfig b/arch/powerpc/configs/corenet64_smp_defconfig index be24a18c0d96c5..6db97e4414b2ea 100644 --- a/arch/powerpc/configs/corenet64_smp_defconfig +++ b/arch/powerpc/configs/corenet64_smp_defconfig @@ -118,7 +118,6 @@ CONFIG_FSL_DMA=y CONFIG_VIRT_DRIVERS=y CONFIG_FSL_HV_MANAGER=y CONFIG_FSL_CORENET_CF=y -CONFIG_MEMORY=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y CONFIG_ISO9660_FS=m diff --git a/arch/powerpc/configs/ep8248e_defconfig b/arch/powerpc/configs/ep8248e_defconfig index fceffb3cffbee4..7c137041f1d6f1 100644 --- a/arch/powerpc/configs/ep8248e_defconfig +++ b/arch/powerpc/configs/ep8248e_defconfig @@ -5,13 +5,13 @@ CONFIG_LOG_BUF_SHIFT=14 CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y CONFIG_SLAB=y +CONFIG_PARTITION_ADVANCED=y # CONFIG_IOSCHED_CFQ is not set # CONFIG_PPC_CHRP is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_82xx=y CONFIG_EP8248E=y CONFIG_BINFMT_MISC=y -CONFIG_SPARSE_IRQ=y # CONFIG_SECCOMP is not set CONFIG_PCI=y CONFIG_NET=y @@ -28,7 +28,6 @@ CONFIG_NETFILTER=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_ADV_OPTIONS=y @@ -38,14 +37,11 @@ CONFIG_MTD_CFI_GEOMETRY=y # CONFIG_MTD_CFI_I1 is not set CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y -# CONFIG_MISC_DEVICES is not set CONFIG_NETDEVICES=y -CONFIG_DAVICOM_PHY=y -CONFIG_NET_ETHERNET=y CONFIG_FS_ENET=y # CONFIG_FS_ENET_HAS_SCC is not set +CONFIG_DAVICOM_PHY=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set @@ -57,27 +53,20 @@ CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_EXT3_FS_XATTR is not set -CONFIG_INOTIFY=y CONFIG_AUTOFS4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y CONFIG_NLS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y -# CONFIG_CRC32 is not set +CONFIG_DEBUG_INFO=y CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y # CONFIG_SCHED_DEBUG is not set -CONFIG_DEBUG_INFO=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_BDI_SWITCH=y CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_ECB=y diff --git a/arch/powerpc/configs/ep88xc_defconfig b/arch/powerpc/configs/ep88xc_defconfig index b8a79d7ee89fae..ee96be889dac97 100644 --- a/arch/powerpc/configs/ep88xc_defconfig +++ b/arch/powerpc/configs/ep88xc_defconfig @@ -1,24 +1,20 @@ CONFIG_PPC_8xx=y -CONFIG_EXPERIMENTAL=y # CONFIG_SWAP is not set CONFIG_SYSVIPC=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_ELF_CORE is not set # CONFIG_BASE_FULL is not set # CONFIG_FUTEX is not set # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y # CONFIG_IOSCHED_CFQ is not set CONFIG_PPC_EP88XC=y CONFIG_8xx_COPYBACK=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_HZ_100=y -CONFIG_8XX_MINIMAL_FPEMU=y -CONFIG_SPARSE_IRQ=y # CONFIG_SECCOMP is not set CONFIG_NET=y CONFIG_PACKET=y @@ -35,27 +31,21 @@ CONFIG_SYN_COOKIES=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_PROC_DEVICETREE=y # CONFIG_BLK_DEV is not set -# CONFIG_MISC_DEVICES is not set CONFIG_NETDEVICES=y -CONFIG_LXT_PHY=y -CONFIG_NET_ETHERNET=y CONFIG_FS_ENET=y # CONFIG_FS_ENET_HAS_SCC is not set -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set +CONFIG_LXT_PHY=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_CPM=y CONFIG_SERIAL_CPM_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set CONFIG_GEN_RTC=y # CONFIG_HWMON is not set # CONFIG_USB_SUPPORT is not set @@ -63,13 +53,8 @@ CONFIG_GEN_RTC=y CONFIG_TMPFS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y -# CONFIG_CRC32 is not set +CONFIG_CRC32_SLICEBY4=y +CONFIG_DEBUG_INFO=y CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y -CONFIG_DEBUG_INFO=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_CRC32_SLICEBY4=y diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index 6fab06f7f4117b..5e0eed26137ed6 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -2,10 +2,11 @@ CONFIG_PPC64=y CONFIG_ALTIVEC=y CONFIG_SMP=y CONFIG_NR_CPUS=4 -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_FHANDLE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_BLK_DEV_INITRD=y @@ -16,17 +17,15 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y +CONFIG_PARTITION_ADVANCED=y # CONFIG_PPC_POWERNV is not set # CONFIG_PPC_PSERIES is not set CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_GOV_POWERSAVE=y CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_PMAC64=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_KEXEC=y CONFIG_IRQ_ALL_CPUS=y -# CONFIG_MIGRATION is not set CONFIG_PCI_MSI=y CONFIG_NET=y CONFIG_PACKET=y @@ -52,9 +51,7 @@ CONFIG_NF_CONNTRACK_IRC=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_CONNTRACK_IPV4=m -CONFIG_IP_NF_QUEUE=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y @@ -69,7 +66,6 @@ CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y -CONFIG_SCSI_MULTI_LUN=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SPI_ATTRS=y CONFIG_ATA=y @@ -87,33 +83,29 @@ CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m CONFIG_DM_MIRROR=m CONFIG_DM_ZERO=m -CONFIG_IEEE1394=y -CONFIG_IEEE1394_OHCI1394=y -CONFIG_IEEE1394_SBP2=m -CONFIG_IEEE1394_ETH1394=m -CONFIG_IEEE1394_RAWIO=y -CONFIG_IEEE1394_VIDEO1394=m -CONFIG_IEEE1394_DV1394=m CONFIG_ADB_PMU=y CONFIG_PMAC_SMU=y CONFIG_MAC_EMUMOUSEBTN=y -CONFIG_THERM_PM72=y CONFIG_WINDFARM=y CONFIG_WINDFARM_PM81=y CONFIG_WINDFARM_PM91=y CONFIG_WINDFARM_PM112=y CONFIG_WINDFARM_PM121=y CONFIG_NETDEVICES=y -CONFIG_DUMMY=m CONFIG_BONDING=m +CONFIG_DUMMY=m CONFIG_TUN=m -CONFIG_NET_ETHERNET=y -CONFIG_MII=y -CONFIG_SUNGEM=y CONFIG_ACENIC=m CONFIG_ACENIC_OMIT_TIGON_I=y -CONFIG_E1000=y CONFIG_TIGON3=y +CONFIG_E1000=y +CONFIG_SUNGEM=y +CONFIG_PPP=m +CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPPOE=m +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m CONFIG_USB_CATC=m CONFIG_USB_KAWETH=m CONFIG_USB_PEGASUS=m @@ -123,12 +115,6 @@ CONFIG_USB_USBNET=m # CONFIG_USB_NET_NET1080 is not set # CONFIG_USB_NET_CDC_SUBSET is not set # CONFIG_USB_NET_ZAURUS is not set -CONFIG_PPP=m -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -CONFIG_PPP_DEFLATE=m -CONFIG_PPP_BSDCOMP=m -CONFIG_PPPOE=m # CONFIG_INPUT_MOUSEDEV_PSAUX is not set CONFIG_INPUT_JOYDEV=m CONFIG_INPUT_EVDEV=y @@ -140,10 +126,8 @@ CONFIG_INPUT_EVDEV=y CONFIG_GEN_RTC=y CONFIG_RAW_DRIVER=y CONFIG_I2C_CHARDEV=y -# CONFIG_HWMON is not set CONFIG_AGP=m CONFIG_AGP_UNINORTH=m -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_FB=y CONFIG_FIRMWARE_EDID=y CONFIG_FB_TILEBLITTING=y @@ -167,15 +151,14 @@ CONFIG_SND_AOA_ONYX=m CONFIG_SND_AOA_TAS=m CONFIG_SND_AOA_TOONIE=m CONFIG_SND_USB_AUDIO=m -CONFIG_HID_PID=y -CONFIG_USB_HIDDEV=y CONFIG_HID_GYRATION=y CONFIG_LOGITECH_FF=y CONFIG_HID_PANTHERLORD=y CONFIG_HID_PETALYNX=y CONFIG_HID_SAMSUNG=y -CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y +CONFIG_HID_PID=y +CONFIG_USB_HIDDEV=y CONFIG_USB=y CONFIG_USB_MON=y CONFIG_USB_EHCI_HCD=y @@ -245,8 +228,6 @@ CONFIG_REISERFS_FS_POSIX_ACL=y CONFIG_REISERFS_FS_SECURITY=y CONFIG_XFS_FS=m CONFIG_XFS_POSIX_ACL=y -CONFIG_INOTIFY=y -CONFIG_AUTOFS_FS=m CONFIG_ISO9660_FS=y CONFIG_JOLIET=y CONFIG_ZISOFS=y @@ -260,14 +241,12 @@ CONFIG_HFS_FS=m CONFIG_HFSPLUS_FS=m CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_NFSD=y CONFIG_NFSD_V3_ACL=y CONFIG_NFSD_V4=y CONFIG_CIFS=m -CONFIG_PARTITION_ADVANCED=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_1250=y CONFIG_NLS_CODEPAGE_1251=y @@ -276,30 +255,21 @@ CONFIG_NLS_ISO8859_1=y CONFIG_NLS_ISO8859_15=y CONFIG_NLS_UTF8=y CONFIG_CRC_T10DIF=y -CONFIG_LIBCRC32C=m -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_MUTEXES=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_LATENCYTOP=y -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_BOOTX_TEXT=y CONFIG_PPC_EARLY_DEBUG=y -CONFIG_PPC_EARLY_DEBUG_BOOTX=y CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m -CONFIG_CRYPTO_SHA256=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_WP512=m -CONFIG_CRYPTO_AES=m CONFIG_CRYPTO_ANUBIS=m -CONFIG_CRYPTO_ARC4=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_CAST5=m CONFIG_CRYPTO_CAST6=m diff --git a/arch/powerpc/configs/gamecube_defconfig b/arch/powerpc/configs/gamecube_defconfig index 9ef2cc13e1b48c..6c6c60f1aba4ef 100644 --- a/arch/powerpc/configs/gamecube_defconfig +++ b/arch/powerpc/configs/gamecube_defconfig @@ -1,11 +1,9 @@ -CONFIG_EXPERIMENTAL=y CONFIG_LOCALVERSION="-gcn" CONFIG_SYSVIPC=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_ELF_CORE is not set CONFIG_PERF_EVENTS=y @@ -22,7 +20,6 @@ CONFIG_GAMECUBE=y CONFIG_PREEMPT=y CONFIG_BINFMT_MISC=m CONFIG_KEXEC=y -# CONFIG_MIGRATION is not set # CONFIG_SECCOMP is not set CONFIG_ADVANCED_OPTIONS=y CONFIG_NET=y @@ -42,15 +39,11 @@ CONFIG_IP_PNP_RARP=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_STANDALONE is not set # CONFIG_FW_LOADER is not set -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=2 CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_WLAN is not set CONFIG_INPUT_FF_MEMLESS=m # CONFIG_INPUT_MOUSEDEV is not set @@ -61,8 +54,8 @@ CONFIG_INPUT_EVDEV=y CONFIG_INPUT_JOYSTICK=y # CONFIG_SERIO_I8042 is not set # CONFIG_SERIO_SERPORT is not set -# CONFIG_DEVKMEM is not set CONFIG_LEGACY_PTY_COUNT=64 +# CONFIG_DEVKMEM is not set # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set CONFIG_FB=y @@ -86,7 +79,6 @@ CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_EXT3_FS_XATTR is not set -CONFIG_INOTIFY=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y CONFIG_MSDOS_FS=y @@ -95,20 +87,15 @@ CONFIG_PROC_KCORE=y # CONFIG_PROC_PAGE_MONITOR is not set CONFIG_TMPFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_CIFS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_CRC_CCITT=y CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_SPINLOCK=y CONFIG_DEBUG_MUTEXES=y -CONFIG_DEBUG_SPINLOCK_SLEEP=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_LATENCYTOP=y -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_SCHED_TRACER=y CONFIG_DMA_API_DEBUG=y CONFIG_PPC_EARLY_DEBUG=y diff --git a/arch/powerpc/configs/holly_defconfig b/arch/powerpc/configs/holly_defconfig index 94ebfee188db27..5e0f2551e5c707 100644 --- a/arch/powerpc/configs/holly_defconfig +++ b/arch/powerpc/configs/holly_defconfig @@ -1,17 +1,16 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y # CONFIG_PPC_CHRP is not set # CONFIG_PPC_PMAC is not set CONFIG_EMBEDDED6xx=y CONFIG_PPC_HOLLY=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_BINFMT_MISC=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="console=ttyS0,115200" @@ -30,18 +29,15 @@ CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=131072 CONFIG_BLK_DEV_SD=y CONFIG_ATA=y CONFIG_NETDEVICES=y -CONFIG_PHYLIB=y -CONFIG_NET_ETHERNET=y -CONFIG_NET_VENDOR_3COM=y CONFIG_VORTEX=y CONFIG_TSI108_ETH=y +CONFIG_PHYLIB=y # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -57,14 +53,11 @@ CONFIG_SERIAL_OF_PLATFORM=y CONFIG_GEN_RTC=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y # CONFIG_SCHED_DEBUG is not set # CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_XMON=y diff --git a/arch/powerpc/configs/linkstation_defconfig b/arch/powerpc/configs/linkstation_defconfig index b5e684640fdfb6..62ae92956d0552 100644 --- a/arch/powerpc/configs/linkstation_defconfig +++ b/arch/powerpc/configs/linkstation_defconfig @@ -1,6 +1,7 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 @@ -13,10 +14,7 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_PPC_PMAC is not set CONFIG_EMBEDDED6xx=y CONFIG_LINKSTATION=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_HZ_100=y -CONFIG_SPARSE_IRQ=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -43,12 +41,8 @@ CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m CONFIG_NETFILTER_XT_MATCH_STATE=m CONFIG_NF_CONNTRACK_IPV4=m CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_ADDRTYPE=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_NF_NAT=m -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_REDIRECT=m CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_TTL=m @@ -58,10 +52,7 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y -CONFIG_MTD_CONCAT=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_JEDECPROBE=y @@ -72,27 +63,23 @@ CONFIG_MTD_CFI_GEOMETRY=y # CONFIG_MTD_CFI_I2 is not set CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=2 CONFIG_BLK_DEV_RAM_SIZE=8192 -CONFIG_MISC_DEVICES=y CONFIG_EEPROM_LEGACY=m CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_SG=y -CONFIG_SCSI_MULTI_LUN=y CONFIG_ATA=y CONFIG_PATA_IT821X=y CONFIG_PATA_SIL680=y CONFIG_NETDEVICES=y +CONFIG_NETCONSOLE=y CONFIG_TUN=m -CONFIG_NET_ETHERNET=y CONFIG_NET_TULIP=y CONFIG_TULIP=y CONFIG_TULIP_MMIO=y CONFIG_R8169=y -CONFIG_NETCONSOLE=y # CONFIG_INPUT_MOUSEDEV_PSAUX is not set CONFIG_INPUT_EVDEV=m # CONFIG_INPUT_KEYBOARD is not set @@ -106,7 +93,6 @@ CONFIG_HW_RANDOM=y CONFIG_I2C=y CONFIG_I2C_CHARDEV=y CONFIG_I2C_MPC=y -CONFIG_VIDEO_OUTPUT_CONTROL=m # CONFIG_VGA_CONSOLE is not set CONFIG_HID=m # CONFIG_USB_HID is not set @@ -126,7 +112,6 @@ CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_XFS_FS=m -CONFIG_INOTIFY=y CONFIG_ISO9660_FS=m CONFIG_JOLIET=y CONFIG_ZISOFS=y @@ -137,7 +122,6 @@ CONFIG_NTFS_FS=m CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y @@ -153,15 +137,9 @@ CONFIG_CRC_T10DIF=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y -CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_SHA1=m -CONFIG_CRYPTO_AES=m -CONFIG_CRYPTO_ARC4=m CONFIG_CRYPTO_BLOWFISH=m CONFIG_CRYPTO_SERPENT=m CONFIG_CRYPTO_TWOFISH=m diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig index fbd9e4163311c0..ac9666f8abf171 100644 --- a/arch/powerpc/configs/maple_defconfig +++ b/arch/powerpc/configs/maple_defconfig @@ -1,10 +1,11 @@ CONFIG_PPC64=y CONFIG_SMP=y CONFIG_NR_CPUS=4 -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_FHANDLE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y # CONFIG_COMPAT_BRK is not set @@ -16,16 +17,15 @@ CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +CONFIG_MAC_PARTITION=y # CONFIG_PPC_POWERNV is not set # CONFIG_PPC_PSERIES is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_MAPLE=y CONFIG_UDBG_RTAS_CONSOLE=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_KEXEC=y CONFIG_IRQ_ALL_CPUS=y -# CONFIG_MIGRATION is not set CONFIG_PCI_MSI=y CONFIG_NET=y CONFIG_PACKET=y @@ -38,7 +38,6 @@ CONFIG_IP_PNP_DHCP=y # CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=8192 CONFIG_IDE=y @@ -52,11 +51,9 @@ CONFIG_CHR_DEV_SG=y CONFIG_SCSI_IPR=y CONFIG_ATA=y CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -CONFIG_NET_PCI=y CONFIG_AMD8111_ETH=y -CONFIG_E1000=y CONFIG_TIGON3=y +CONFIG_E1000=y CONFIG_USB_PEGASUS=y # CONFIG_INPUT_MOUSEDEV_PSAUX is not set CONFIG_INPUT_MOUSEDEV_SCREEN_X=1600 @@ -72,13 +69,11 @@ CONFIG_GEN_RTC=y CONFIG_I2C=y CONFIG_I2C_CHARDEV=y CONFIG_I2C_AMD8111=y -# CONFIG_HWMON is not set # CONFIG_VGA_CONSOLE is not set CONFIG_HID_GYRATION=y CONFIG_HID_PANTHERLORD=y CONFIG_HID_PETALYNX=y CONFIG_HID_SAMSUNG=y -CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y CONFIG_USB=y CONFIG_USB_MON=y @@ -111,7 +106,6 @@ CONFIG_EXT2_FS_XIP=y CONFIG_EXT3_FS=y # CONFIG_EXT3_FS_XATTR is not set CONFIG_EXT4_FS=y -CONFIG_INOTIFY=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y @@ -119,28 +113,22 @@ CONFIG_TMPFS=y CONFIG_HUGETLBFS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_MAC_PARTITION=y CONFIG_NLS_DEFAULT="utf-8" CONFIG_NLS_UTF8=y CONFIG_CRC_CCITT=y CONFIG_CRC_T10DIF=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_SPINLOCK_SLEEP=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_LATENCYTOP=y -CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_DEBUG_STACK_USAGE=y +CONFIG_DEBUG_STACKOVERFLOW=y +CONFIG_LATENCYTOP=y CONFIG_XMON=y CONFIG_XMON_DEFAULT=y CONFIG_BOOTX_TEXT=y CONFIG_PPC_EARLY_DEBUG=y -CONFIG_PPC_EARLY_DEBUG_BOOTX=y CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/mgcoge_defconfig b/arch/powerpc/configs/mgcoge_defconfig index 8fa84f156ef3f2..666922c5b57229 100644 --- a/arch/powerpc/configs/mgcoge_defconfig +++ b/arch/powerpc/configs/mgcoge_defconfig @@ -1,7 +1,7 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_SWAP is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 @@ -16,7 +16,6 @@ CONFIG_PARTITION_ADVANCED=y # CONFIG_PPC_PMAC is not set CONFIG_PPC_82xx=y CONFIG_MGCOGE=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_BINFMT_MISC=y # CONFIG_SECCOMP is not set CONFIG_NET=y @@ -36,8 +35,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y -CONFIG_MTD_BLKDEVS=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_ADV_OPTIONS=y CONFIG_MTD_CFI_GEOMETRY=y @@ -45,7 +42,6 @@ CONFIG_MTD_CFI_GEOMETRY=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_NETDEVICES=y @@ -68,7 +64,6 @@ CONFIG_USB_GADGET=y CONFIG_USB_FSL_USB2=y CONFIG_USB_G_SERIAL=y CONFIG_UIO=y -CONFIG_UIO_PDRV=y CONFIG_EXT2_FS=y CONFIG_AUTOFS4_FS=y CONFIG_PROC_KCORE=y @@ -77,16 +72,15 @@ CONFIG_JFFS2_FS=y CONFIG_CRAMFS=y CONFIG_SQUASHFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y -CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_INFO=y CONFIG_DEBUG_FS=y +CONFIG_MAGIC_SYSRQ=y # CONFIG_SCHED_DEBUG is not set -CONFIG_DEBUG_INFO=y CONFIG_BDI_SWITCH=y CONFIG_CRYPTO_ECB=y CONFIG_CRYPTO_PCBC=y diff --git a/arch/powerpc/configs/mpc512x_defconfig b/arch/powerpc/configs/mpc512x_defconfig index ee853a1b1b2c15..59b85cb95259be 100644 --- a/arch/powerpc/configs/mpc512x_defconfig +++ b/arch/powerpc/configs/mpc512x_defconfig @@ -50,7 +50,6 @@ CONFIG_MTD_PHYSMAP_OF=y CONFIG_MTD_NAND=y CONFIG_MTD_NAND_MPC5121_NFC=y CONFIG_MTD_UBI=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=1 CONFIG_BLK_DEV_RAM_SIZE=8192 @@ -81,7 +80,6 @@ CONFIG_MDIO_BITBANG=y # CONFIG_WLAN is not set # CONFIG_INPUT_MOUSEDEV_PSAUX is not set CONFIG_INPUT_EVDEV=y -CONFIG_VT_HW_CONSOLE_BINDING=y # CONFIG_DEVKMEM is not set CONFIG_SERIAL_MPC52xx=y CONFIG_SERIAL_MPC52xx_CONSOLE=y diff --git a/arch/powerpc/configs/mpc5200_defconfig b/arch/powerpc/configs/mpc5200_defconfig index 69fd8adf9f5e9d..9fd041bfd778b4 100644 --- a/arch/powerpc/configs/mpc5200_defconfig +++ b/arch/powerpc/configs/mpc5200_defconfig @@ -1,6 +1,6 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y -CONFIG_SPARSE_IRQ=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y CONFIG_MODULES=y @@ -16,8 +16,6 @@ CONFIG_PPC_MPC5200_BUGFIX=y CONFIG_PPC_MPC5200_LPBFIFO=m # CONFIG_PPC_PMAC is not set CONFIG_SIMPLE_GPIO=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -33,8 +31,6 @@ CONFIG_SYN_COOKIES=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -42,7 +38,6 @@ CONFIG_MTD_ROM=y CONFIG_MTD_PHYSMAP_OF=y CONFIG_MTD_PLATRAM=y CONFIG_MTD_UBI=m -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 @@ -77,7 +72,6 @@ CONFIG_SENSORS_LM87=m CONFIG_WATCHDOG=y CONFIG_MFD_SM501=m CONFIG_DRM=y -CONFIG_VIDEO_OUTPUT_CONTROL=y CONFIG_FB=y CONFIG_FB_FOREIGN_ENDIAN=y CONFIG_FB_RADEON=y @@ -94,9 +88,6 @@ CONFIG_SND=y # CONFIG_SND_SPI is not set # CONFIG_SND_USB is not set CONFIG_SND_SOC=y -CONFIG_SND_SOC_MPC5200_I2S=y -CONFIG_SND_MPC52xx_SOC_PCM030=y -CONFIG_SND_MPC52xx_SOC_EFIKA=y CONFIG_HID_DRAGONRISE=y CONFIG_HID_GYRATION=y CONFIG_HID_TWINHAN=y @@ -104,7 +95,6 @@ CONFIG_HID_ORTEK=y CONFIG_HID_PANTHERLORD=y CONFIG_HID_PETALYNX=y CONFIG_HID_SAMSUNG=y -CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y CONFIG_HID_GREENASIA=y CONFIG_HID_SMARTJOYPLUS=y @@ -112,7 +102,6 @@ CONFIG_HID_TOPSEED=y CONFIG_HID_THRUSTMASTER=y CONFIG_HID_ZEROPLUS=y CONFIG_USB=y -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_MON=y CONFIG_USB_OHCI_HCD=y CONFIG_USB_OHCI_HCD_PPC_OF_BE=y @@ -135,13 +124,12 @@ CONFIG_JFFS2_FS=y CONFIG_UBIFS_FS=m CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_INFO=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y -CONFIG_DEBUG_INFO=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/mpc7448_hpc2_defconfig b/arch/powerpc/configs/mpc7448_hpc2_defconfig index 75f0bbf0f6e86f..e2647d5bb6051e 100644 --- a/arch/powerpc/configs/mpc7448_hpc2_defconfig +++ b/arch/powerpc/configs/mpc7448_hpc2_defconfig @@ -1,19 +1,17 @@ CONFIG_ALTIVEC=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y # CONFIG_PPC_CHRP is not set # CONFIG_PPC_PMAC is not set CONFIG_EMBEDDED6xx=y CONFIG_MPC7448HPC2=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_BINFMT_MISC=y -CONFIG_SPARSE_IRQ=y # CONFIG_SECCOMP is not set CONFIG_NET=y CONFIG_PACKET=y @@ -29,7 +27,6 @@ CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=131072 @@ -37,13 +34,11 @@ CONFIG_BLK_DEV_SD=y CONFIG_ATA=y CONFIG_SATA_MV=y CONFIG_NETDEVICES=y -CONFIG_PHYLIB=y -CONFIG_NET_ETHERNET=y -CONFIG_NET_PCI=y CONFIG_E100=y CONFIG_8139TOO=y # CONFIG_8139TOO_PIO is not set CONFIG_TSI108_ETH=y +CONFIG_PHYLIB=y # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set @@ -53,17 +48,12 @@ CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y # CONFIG_HW_RANDOM is not set CONFIG_GEN_RTC=y -CONFIG_VIDEO_OUTPUT_CONTROL=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y CONFIG_CRC_T10DIF=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/mpc8272_ads_defconfig b/arch/powerpc/configs/mpc8272_ads_defconfig index 6a22400f73c199..825b052176af6b 100644 --- a/arch/powerpc/configs/mpc8272_ads_defconfig +++ b/arch/powerpc/configs/mpc8272_ads_defconfig @@ -1,17 +1,17 @@ CONFIG_SYSVIPC=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y +CONFIG_PARTITION_ADVANCED=y # CONFIG_PPC_CHRP is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_82xx=y CONFIG_MPC8272_ADS=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_BINFMT_MISC=y -CONFIG_SPARSE_IRQ=y CONFIG_PCI=y CONFIG_NET=y CONFIG_PACKET=y @@ -27,7 +27,6 @@ CONFIG_NETFILTER=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_JEDECPROBE=y CONFIG_MTD_CFI_ADV_OPTIONS=y @@ -39,52 +38,41 @@ CONFIG_MTD_CFI_GEOMETRY=y CONFIG_MTD_CFI_I4=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y -# CONFIG_MISC_DEVICES is not set CONFIG_NETDEVICES=y CONFIG_TUN=y -CONFIG_DAVICOM_PHY=y -CONFIG_NET_ETHERNET=y CONFIG_FS_ENET=y # CONFIG_FS_ENET_HAS_SCC is not set CONFIG_FS_ENET_MDIO_FCC=y +CONFIG_DAVICOM_PHY=y CONFIG_PPP=y +CONFIG_PPP_DEFLATE=y CONFIG_PPP_ASYNC=y CONFIG_PPP_SYNC_TTY=y -CONFIG_PPP_DEFLATE=y CONFIG_INPUT_EVDEV=y -# CONFIG_SERIO_I8042 is not set # CONFIG_VT is not set CONFIG_SERIAL_CPM=y CONFIG_SERIAL_CPM_CONSOLE=y # CONFIG_HWMON is not set -# CONFIG_HID_SUPPORT is not set # CONFIG_USB_SUPPORT is not set CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_AUTOFS4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y CONFIG_NLS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y +CONFIG_DEBUG_INFO=y CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y -CONFIG_DEBUG_INFO=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_BDI_SWITCH=y CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_ECB=y diff --git a/arch/powerpc/configs/mpc83xx_defconfig b/arch/powerpc/configs/mpc83xx_defconfig index 23fec79964cf8a..671e220a9a9834 100644 --- a/arch/powerpc/configs/mpc83xx_defconfig +++ b/arch/powerpc/configs/mpc83xx_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y @@ -45,14 +44,12 @@ CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y CONFIG_MTD_NAND=y CONFIG_MTD_NAND_FSL_ELBC=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=32768 @@ -62,7 +59,6 @@ CONFIG_ATA=y CONFIG_SATA_FSL=y CONFIG_SATA_SIL=y CONFIG_NETDEVICES=y -CONFIG_MII=y CONFIG_UCC_GETH=y CONFIG_GIANFAR=y CONFIG_MARVELL_PHY=y @@ -82,7 +78,6 @@ CONFIG_I2C=y CONFIG_I2C_CHARDEV=y CONFIG_I2C_MPC=y CONFIG_WATCHDOG=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_HID_A4TECH=y CONFIG_HID_APPLE=y CONFIG_HID_BELKIN=y @@ -97,7 +92,6 @@ CONFIG_HID_MONTEREY=y CONFIG_HID_PANTHERLORD=y CONFIG_HID_PETALYNX=y CONFIG_HID_SAMSUNG=y -CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y CONFIG_USB=y CONFIG_USB_MON=y @@ -119,6 +113,5 @@ CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_SHA512=y -CONFIG_CRYPTO_AES=y # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_DEV_TALITOS=y diff --git a/arch/powerpc/configs/mpc85xx_defconfig b/arch/powerpc/configs/mpc85xx_defconfig index 02395fab19bd6c..cfae862d634742 100644 --- a/arch/powerpc/configs/mpc85xx_defconfig +++ b/arch/powerpc/configs/mpc85xx_defconfig @@ -20,6 +20,7 @@ CONFIG_MODVERSIONS=y # CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y CONFIG_MAC_PARTITION=y +CONFIG_C293_PCIE=y CONFIG_MPC8540_ADS=y CONFIG_MPC8560_ADS=y CONFIG_MPC85xx_CDS=y @@ -27,7 +28,6 @@ CONFIG_MPC85xx_MDS=y CONFIG_MPC8536_DS=y CONFIG_MPC85xx_DS=y CONFIG_MPC85xx_RDB=y -CONFIG_C293_PCIE=y CONFIG_P1010_RDB=y CONFIG_P1022_DS=y CONFIG_P1022_RDK=y @@ -70,7 +70,6 @@ CONFIG_NET_IPIP=y CONFIG_IP_MROUTE=y CONFIG_IP_PIMSM_V1=y CONFIG_IP_PIMSM_V2=y -CONFIG_ARPD=y CONFIG_INET_ESP=y # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_LRO is not set @@ -80,33 +79,17 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_MTD=y -CONFIG_MTD_OF_PARTS=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y -CONFIG_MTD_BLKDEVS=y CONFIG_MTD_BLOCK=y CONFIG_FTL=y CONFIG_MTD_CFI=y -CONFIG_MTD_GEN_PROBE=y -CONFIG_MTD_MAP_BANK_WIDTH_1=y -CONFIG_MTD_MAP_BANK_WIDTH_2=y -CONFIG_MTD_MAP_BANK_WIDTH_4=y -CONFIG_MTD_CFI_I1=y -CONFIG_MTD_CFI_I2=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_MTD_M25P80=y -CONFIG_MTD_CFI_UTIL=y -CONFIG_MTD_NAND_ECC=y CONFIG_MTD_NAND=y -CONFIG_MTD_NAND_IDS=y CONFIG_MTD_NAND_FSL_ELBC=y CONFIG_MTD_NAND_FSL_IFC=y CONFIG_MTD_UBI=y -CONFIG_MTD_UBI_WL_THRESHOLD=4096 -CONFIG_MTD_UBI_BEB_RESERVE=1 -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_NBD=y CONFIG_BLK_DEV_RAM=y @@ -117,7 +100,6 @@ CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y CONFIG_CHR_DEV_SG=y -CONFIG_SCSI_MULTI_LUN=y CONFIG_SCSI_LOGGING=y CONFIG_ATA=y CONFIG_SATA_AHCI=y @@ -133,11 +115,11 @@ CONFIG_GIANFAR=y CONFIG_E1000=y CONFIG_E1000E=y CONFIG_IGB=y +CONFIG_AT803X_PHY=y CONFIG_MARVELL_PHY=y CONFIG_DAVICOM_PHY=y CONFIG_CICADA_PHY=y CONFIG_VITESSE_PHY=y -CONFIG_AT803X_PHY=y CONFIG_FIXED_PHY=y CONFIG_INPUT_FF_MEMLESS=m # CONFIG_INPUT_MOUSEDEV is not set @@ -161,14 +143,10 @@ CONFIG_SPI_FSL_SPI=y CONFIG_SPI_FSL_ESPI=y CONFIG_GPIO_MPC8XXX=y # CONFIG_HWMON is not set -CONFIG_VIDEO_OUTPUT_CONTROL=y CONFIG_FB=y CONFIG_FB_FSL_DIU=y # CONFIG_VGA_CONSOLE is not set CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_FONTS=y -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y CONFIG_SOUND=y CONFIG_SND=y # CONFIG_SND_SUPPORT_OLD_API is not set @@ -192,7 +170,6 @@ CONFIG_HID_MONTEREY=y CONFIG_HID_PANTHERLORD=y CONFIG_HID_PETALYNX=y CONFIG_HID_SAMSUNG=y -CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y CONFIG_USB=y CONFIG_USB_MON=y @@ -215,8 +192,6 @@ CONFIG_RTC_DRV_DS3232=y CONFIG_RTC_DRV_CMOS=y CONFIG_DMADEVICES=y CONFIG_FSL_DMA=y -CONFIG_MEMORY=y -# CONFIG_NET_DMA is not set CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set @@ -227,21 +202,9 @@ CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y -CONFIG_NLS_CODEPAGE_437=y -CONFIG_NLS_CODEPAGE_850=y -CONFIG_NLS_ISO8859_1=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_HUGETLBFS=y -CONFIG_JFFS2_FS=y -CONFIG_JFFS2_FS_DEBUG=1 -CONFIG_JFFS2_FS_WRITEBUFFER=y -CONFIG_JFFS2_ZLIB=y -CONFIG_JFFS2_RTIME=y -CONFIG_UBIFS_FS=y -CONFIG_UBIFS_FS_XATTR=y -CONFIG_UBIFS_FS_LZO=y -CONFIG_UBIFS_FS_ZLIB=y CONFIG_ADFS_FS=m CONFIG_AFFS_FS=m CONFIG_HFS_FS=m @@ -249,6 +212,9 @@ CONFIG_HFSPLUS_FS=m CONFIG_BEFS_FS=m CONFIG_BFS_FS=m CONFIG_EFS_FS=m +CONFIG_JFFS2_FS=y +CONFIG_JFFS2_FS_DEBUG=1 +CONFIG_UBIFS_FS=y CONFIG_CRAMFS=y CONFIG_VXFS_FS=m CONFIG_HPFS_FS=m @@ -259,16 +225,16 @@ CONFIG_NFS_FS=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y CONFIG_NFSD=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_850=y +CONFIG_NLS_ISO8859_1=y CONFIG_CRC_T10DIF=y -CONFIG_CRC16=y -CONFIG_ZLIB_DEFLATE=y -CONFIG_LZO_COMPRESS=y -CONFIG_LZO_DECOMPRESS=y -CONFIG_CRYPTO_DEFLATE=y -CONFIG_CRYPTO_LZO=y +CONFIG_FONTS=y +CONFIG_FONT_8x8=y +CONFIG_FONT_8x16=y +CONFIG_DEBUG_INFO=y CONFIG_DEBUG_FS=y CONFIG_DETECT_HUNG_TASK=y -CONFIG_DEBUG_INFO=y CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_SHA512=y diff --git a/arch/powerpc/configs/mpc85xx_smp_defconfig b/arch/powerpc/configs/mpc85xx_smp_defconfig index b5d1b82a1b4352..5b79adb64ce2e5 100644 --- a/arch/powerpc/configs/mpc85xx_smp_defconfig +++ b/arch/powerpc/configs/mpc85xx_smp_defconfig @@ -2,14 +2,13 @@ CONFIG_PPC_85xx=y CONFIG_PHYS_64BIT=y CONFIG_SMP=y CONFIG_NR_CPUS=8 -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_BSD_PROCESS_ACCT=y CONFIG_AUDIT=y CONFIG_IRQ_DOMAIN_DEBUG=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_BSD_PROCESS_ACCT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 @@ -23,6 +22,7 @@ CONFIG_MODVERSIONS=y # CONFIG_BLK_DEV_BSG is not set CONFIG_PARTITION_ADVANCED=y CONFIG_MAC_PARTITION=y +CONFIG_C293_PCIE=y CONFIG_MPC8540_ADS=y CONFIG_MPC8560_ADS=y CONFIG_MPC85xx_CDS=y @@ -30,7 +30,6 @@ CONFIG_MPC85xx_MDS=y CONFIG_MPC8536_DS=y CONFIG_MPC85xx_DS=y CONFIG_MPC85xx_RDB=y -CONFIG_C293_PCIE=y CONFIG_P1010_RDB=y CONFIG_P1022_DS=y CONFIG_P1022_RDK=y @@ -73,7 +72,6 @@ CONFIG_NET_IPIP=y CONFIG_IP_MROUTE=y CONFIG_IP_PIMSM_V1=y CONFIG_IP_PIMSM_V2=y -CONFIG_ARPD=y CONFIG_INET_ESP=y # CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_LRO is not set @@ -83,33 +81,17 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_MTD=y -CONFIG_MTD_OF_PARTS=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y -CONFIG_MTD_BLKDEVS=y CONFIG_MTD_BLOCK=y CONFIG_FTL=y CONFIG_MTD_CFI=y -CONFIG_MTD_GEN_PROBE=y -CONFIG_MTD_MAP_BANK_WIDTH_1=y -CONFIG_MTD_MAP_BANK_WIDTH_2=y -CONFIG_MTD_MAP_BANK_WIDTH_4=y -CONFIG_MTD_CFI_I1=y -CONFIG_MTD_CFI_I2=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_MTD_M25P80=y -CONFIG_MTD_CFI_UTIL=y -CONFIG_MTD_NAND_ECC=y CONFIG_MTD_NAND=y -CONFIG_MTD_NAND_IDS=y CONFIG_MTD_NAND_FSL_ELBC=y CONFIG_MTD_NAND_FSL_IFC=y CONFIG_MTD_UBI=y -CONFIG_MTD_UBI_WL_THRESHOLD=4096 -CONFIG_MTD_UBI_BEB_RESERVE=1 -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_NBD=y CONFIG_BLK_DEV_RAM=y @@ -120,7 +102,6 @@ CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y CONFIG_CHR_DEV_SG=y -CONFIG_SCSI_MULTI_LUN=y CONFIG_SCSI_LOGGING=y CONFIG_ATA=y CONFIG_SATA_AHCI=y @@ -133,11 +114,11 @@ CONFIG_FS_ENET=y CONFIG_UCC_GETH=y CONFIG_GIANFAR=y CONFIG_E1000E=y +CONFIG_AT803X_PHY=y CONFIG_MARVELL_PHY=y CONFIG_DAVICOM_PHY=y CONFIG_CICADA_PHY=y CONFIG_VITESSE_PHY=y -CONFIG_AT803X_PHY=y CONFIG_FIXED_PHY=y CONFIG_INPUT_FF_MEMLESS=m # CONFIG_INPUT_MOUSEDEV is not set @@ -162,14 +143,10 @@ CONFIG_SPI_FSL_SPI=y CONFIG_SPI_FSL_ESPI=y CONFIG_GPIO_MPC8XXX=y # CONFIG_HWMON is not set -CONFIG_VIDEO_OUTPUT_CONTROL=y CONFIG_FB=y CONFIG_FB_FSL_DIU=y # CONFIG_VGA_CONSOLE is not set CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_FONTS=y -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y CONFIG_SOUND=y CONFIG_SND=y # CONFIG_SND_SUPPORT_OLD_API is not set @@ -193,7 +170,6 @@ CONFIG_HID_MONTEREY=y CONFIG_HID_PANTHERLORD=y CONFIG_HID_PETALYNX=y CONFIG_HID_SAMSUNG=y -CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y CONFIG_USB=y CONFIG_USB_MON=y @@ -216,8 +192,6 @@ CONFIG_RTC_DRV_DS3232=y CONFIG_RTC_DRV_CMOS=y CONFIG_DMADEVICES=y CONFIG_FSL_DMA=y -CONFIG_MEMORY=y -# CONFIG_NET_DMA is not set CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set @@ -228,21 +202,9 @@ CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y -CONFIG_NLS_CODEPAGE_437=y -CONFIG_NLS_CODEPAGE_850=y -CONFIG_NLS_ISO8859_1=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_HUGETLBFS=y -CONFIG_JFFS2_FS=y -CONFIG_JFFS2_FS_DEBUG=1 -CONFIG_JFFS2_FS_WRITEBUFFER=y -CONFIG_JFFS2_ZLIB=y -CONFIG_JFFS2_RTIME=y -CONFIG_UBIFS_FS=y -CONFIG_UBIFS_FS_XATTR=y -CONFIG_UBIFS_FS_LZO=y -CONFIG_UBIFS_FS_ZLIB=y CONFIG_ADFS_FS=m CONFIG_AFFS_FS=m CONFIG_HFS_FS=m @@ -250,6 +212,9 @@ CONFIG_HFSPLUS_FS=m CONFIG_BEFS_FS=m CONFIG_BFS_FS=m CONFIG_EFS_FS=m +CONFIG_JFFS2_FS=y +CONFIG_JFFS2_FS_DEBUG=1 +CONFIG_UBIFS_FS=y CONFIG_CRAMFS=y CONFIG_VXFS_FS=m CONFIG_HPFS_FS=m @@ -260,20 +225,19 @@ CONFIG_NFS_FS=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y CONFIG_NFSD=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_850=y +CONFIG_NLS_ISO8859_1=y CONFIG_CRC_T10DIF=y -CONFIG_CRC16=y -CONFIG_ZLIB_DEFLATE=y -CONFIG_LZO_COMPRESS=y -CONFIG_LZO_DECOMPRESS=y -CONFIG_CRYPTO_DEFLATE=y -CONFIG_CRYPTO_LZO=y +CONFIG_FONTS=y +CONFIG_FONT_8x8=y +CONFIG_FONT_8x16=y +CONFIG_DEBUG_INFO=y CONFIG_DEBUG_FS=y CONFIG_DETECT_HUNG_TASK=y -CONFIG_DEBUG_INFO=y CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_SHA512=y -CONFIG_CRYPTO_AES=y # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_DEV_FSL_CAAM=y CONFIG_CRYPTO_DEV_TALITOS=y diff --git a/arch/powerpc/configs/mpc866_ads_defconfig b/arch/powerpc/configs/mpc866_ads_defconfig index d954e80c286a92..321412c5dae496 100644 --- a/arch/powerpc/configs/mpc866_ads_defconfig +++ b/arch/powerpc/configs/mpc866_ads_defconfig @@ -1,25 +1,21 @@ CONFIG_PPC_8xx=y -CONFIG_EXPERIMENTAL=y # CONFIG_SWAP is not set CONFIG_SYSVIPC=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y -# CONFIG_SYSCTL_SYSCALL is not set -# CONFIG_HOTPLUG is not set # CONFIG_BUG is not set # CONFIG_BASE_FULL is not set # CONFIG_EPOLL is not set # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y CONFIG_MPC86XADS=y CONFIG_8xx_COPYBACK=y CONFIG_8xx_CPU6=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_HZ_1000=y CONFIG_MATH_EMULATION=y -CONFIG_SPARSE_IRQ=y # CONFIG_SECCOMP is not set CONFIG_NET=y CONFIG_PACKET=y @@ -32,27 +28,21 @@ CONFIG_SYN_COOKIES=y # CONFIG_IPV6 is not set CONFIG_BLK_DEV_LOOP=y CONFIG_NETDEVICES=y -CONFIG_FIXED_PHY=y -CONFIG_NET_ETHERNET=y CONFIG_FS_ENET=y +CONFIG_FIXED_PHY=y # CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_CPM=y CONFIG_SERIAL_CPM_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set CONFIG_GEN_RTC=y -CONFIG_VIDEO_OUTPUT_CONTROL=y CONFIG_EXT2_FS=y CONFIG_EXT2_FS_XATTR=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_TMPFS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y CONFIG_CRC_CCITT=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -# CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRC32_SLICEBY4=y +# CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/mpc86xx_defconfig b/arch/powerpc/configs/mpc86xx_defconfig index fc58aa8a89e498..a4572563681c7d 100644 --- a/arch/powerpc/configs/mpc86xx_defconfig +++ b/arch/powerpc/configs/mpc86xx_defconfig @@ -1,23 +1,24 @@ CONFIG_SMP=y CONFIG_NR_CPUS=2 -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y -CONFIG_BSD_PROCESS_ACCT=y CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_BSD_PROCESS_ACCT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y -CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +CONFIG_MAC_PARTITION=y # CONFIG_PPC_CHRP is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_86xx=y @@ -26,11 +27,8 @@ CONFIG_SBC8641D=y CONFIG_MPC8610_HPCD=y CONFIG_GEF_SBC610=y CONFIG_HIGHMEM=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_HZ_1000=y CONFIG_BINFMT_MISC=m -CONFIG_SPARSE_IRQ=y CONFIG_PCI=y CONFIG_NET=y CONFIG_PACKET=y @@ -48,12 +46,9 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y CONFIG_NET_IPIP=y -CONFIG_NET_IPGRE=y -CONFIG_NET_IPGRE_BROADCAST=y CONFIG_IP_MROUTE=y CONFIG_IP_PIMSM_V1=y CONFIG_IP_PIMSM_V2=y -CONFIG_ARPD=y # CONFIG_INET_XFRM_MODE_TRANSPORT is not set # CONFIG_INET_XFRM_MODE_TUNNEL is not set # CONFIG_INET_XFRM_MODE_BEET is not set @@ -61,28 +56,23 @@ CONFIG_ARPD=y CONFIG_IPV6=y CONFIG_IP_SCTP=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_NBD=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=131072 -CONFIG_MISC_DEVICES=y CONFIG_EEPROM_LEGACY=y CONFIG_BLK_DEV_SD=y CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y CONFIG_CHR_DEV_SG=y -CONFIG_SCSI_MULTI_LUN=y CONFIG_SCSI_LOGGING=y CONFIG_ATA=y CONFIG_SATA_AHCI=y CONFIG_PATA_ALI=y CONFIG_NETDEVICES=y CONFIG_DUMMY=y -CONFIG_VITESSE_PHY=y -CONFIG_NET_ETHERNET=y -CONFIG_MII=y CONFIG_GIANFAR=y +CONFIG_VITESSE_PHY=y CONFIG_INPUT_FF_MEMLESS=m # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set @@ -102,7 +92,6 @@ CONFIG_NVRAM=y CONFIG_I2C=y CONFIG_I2C_MPC=y # CONFIG_HWMON is not set -CONFIG_VIDEO_OUTPUT_CONTROL=y CONFIG_SOUND=y CONFIG_SND=y CONFIG_SND_MIXER_OSS=y @@ -123,7 +112,6 @@ CONFIG_HID_MONTEREY=y CONFIG_HID_PANTHERLORD=y CONFIG_HID_PETALYNX=y CONFIG_HID_SAMSUNG=y -CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y CONFIG_USB=y CONFIG_USB_MON=y @@ -137,7 +125,6 @@ CONFIG_RTC_DRV_CMOS=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_ISO9660_FS=m CONFIG_JOLIET=y CONFIG_ZISOFS=y @@ -145,9 +132,6 @@ CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y -CONFIG_NLS_CODEPAGE_437=y -CONFIG_NLS_CODEPAGE_850=y -CONFIG_NLS_ISO8859_1=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_ADFS_FS=m @@ -164,18 +148,15 @@ CONFIG_QNX4FS_FS=m CONFIG_SYSV_FS=m CONFIG_UFS_FS=m CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y CONFIG_NFSD=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_MAC_PARTITION=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_850=y +CONFIG_NLS_ISO8859_1=y CONFIG_CRC_T10DIF=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DETECT_HUNG_TASK=y CONFIG_DEBUG_INFO=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y +CONFIG_DETECT_HUNG_TASK=y CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_HMAC=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig index 3f47d00a10c0d9..2a10f98d4ee50b 100644 --- a/arch/powerpc/configs/mpc885_ads_defconfig +++ b/arch/powerpc/configs/mpc885_ads_defconfig @@ -1,23 +1,19 @@ CONFIG_PPC_8xx=y -CONFIG_EXPERIMENTAL=y # CONFIG_SWAP is not set CONFIG_SYSVIPC=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_ELF_CORE is not set # CONFIG_BASE_FULL is not set # CONFIG_FUTEX is not set # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y # CONFIG_IOSCHED_CFQ is not set CONFIG_8xx_COPYBACK=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_HZ_100=y -CONFIG_8XX_MINIMAL_FPEMU=y -CONFIG_SPARSE_IRQ=y # CONFIG_SECCOMP is not set CONFIG_NET=y CONFIG_PACKET=y @@ -34,7 +30,6 @@ CONFIG_SYN_COOKIES=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_JEDECPROBE=y CONFIG_MTD_CFI_ADV_OPTIONS=y @@ -46,22 +41,17 @@ CONFIG_MTD_CFI_GEOMETRY=y CONFIG_MTD_CFI_I4=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_PROC_DEVICETREE=y # CONFIG_BLK_DEV is not set -# CONFIG_MISC_DEVICES is not set CONFIG_NETDEVICES=y -CONFIG_DAVICOM_PHY=y -CONFIG_NET_ETHERNET=y CONFIG_FS_ENET=y # CONFIG_FS_ENET_HAS_SCC is not set -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set +CONFIG_DAVICOM_PHY=y # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_CPM=y CONFIG_SERIAL_CPM_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set CONFIG_GEN_RTC=y # CONFIG_HWMON is not set # CONFIG_USB_SUPPORT is not set @@ -69,13 +59,8 @@ CONFIG_GEN_RTC=y CONFIG_TMPFS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y -# CONFIG_CRC32 is not set +CONFIG_CRC32_SLICEBY4=y +CONFIG_DEBUG_INFO=y CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y -CONFIG_DEBUG_INFO=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_CRC32_SLICEBY4=y diff --git a/arch/powerpc/configs/mvme5100_defconfig b/arch/powerpc/configs/mvme5100_defconfig index 93c7752e2dbbdf..525a2cb500a734 100644 --- a/arch/powerpc/configs/mvme5100_defconfig +++ b/arch/powerpc/configs/mvme5100_defconfig @@ -61,7 +61,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_LAPB=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=2 @@ -88,13 +87,10 @@ CONFIG_I2C=y CONFIG_I2C_CHARDEV=y CONFIG_I2C_MPC=y # CONFIG_HWMON is not set -CONFIG_VIDEO_OUTPUT_CONTROL=m # CONFIG_VGA_CONSOLE is not set # CONFIG_HID is not set # CONFIG_USB_SUPPORT is not set # CONFIG_IOMMU_SUPPORT is not set -CONFIG_VME_BUS=m -CONFIG_VME_CA91CX42=m CONFIG_EXT2_FS=m CONFIG_EXT3_FS=m # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set @@ -122,11 +118,6 @@ CONFIG_NLS_UTF8=m CONFIG_CRC_CCITT=m CONFIG_CRC_T10DIF=y CONFIG_XZ_DEC=y -CONFIG_XZ_DEC_X86=y -CONFIG_XZ_DEC_IA64=y -CONFIG_XZ_DEC_ARM=y -CONFIG_XZ_DEC_ARMTHUMB=y -CONFIG_XZ_DEC_SPARC=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig index 3e72c8c06a0dc7..8f94782eb90713 100644 --- a/arch/powerpc/configs/pasemi_defconfig +++ b/arch/powerpc/configs/pasemi_defconfig @@ -54,7 +54,6 @@ CONFIG_MTD_SLRAM=y CONFIG_MTD_PHRAM=y CONFIG_MTD_NAND=y CONFIG_MTD_NAND_PASEMI=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=16384 @@ -69,7 +68,6 @@ CONFIG_BLK_DEV_SR=y CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y CONFIG_CHR_DEV_SCH=y -CONFIG_SCSI_MULTI_LUN=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y CONFIG_ATA=y @@ -134,7 +132,6 @@ CONFIG_HID_NTRIG=y CONFIG_HID_PANTHERLORD=y CONFIG_HID_PETALYNX=y CONFIG_HID_SAMSUNG=y -CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y CONFIG_HID_GREENASIA=y CONFIG_HID_SMARTJOYPLUS=y diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index 0351b5ffdfef7d..ea8705ffcd7616 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -1,19 +1,20 @@ CONFIG_ALTIVEC=y -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_OPROFILE=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y +CONFIG_PARTITION_ADVANCED=y # CONFIG_PPC_CHRP is not set CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_GOV_POWERSAVE=y @@ -21,13 +22,9 @@ CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_PMAC=y CONFIG_PPC601_SYNC_FIX=y CONFIG_HIGHMEM=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_BINFMT_MISC=m -# CONFIG_MIGRATION is not set -CONFIG_PM=y -CONFIG_PM_DEBUG=y CONFIG_HIBERNATION=y +CONFIG_PM_DEBUG=y CONFIG_APM_EMULATION=y CONFIG_PCCARD=m CONFIG_YENTA=m @@ -46,7 +43,6 @@ CONFIG_INET_ESP=y # CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_NETFILTER=y -CONFIG_NETFILTER_NETLINK_QUEUE=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_FTP=m CONFIG_NF_CONNTRACK_IRC=m @@ -84,18 +80,11 @@ CONFIG_NETFILTER_XT_MATCH_TIME=m CONFIG_NETFILTER_XT_MATCH_U32=m CONFIG_NF_CONNTRACK_IPV4=m CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_ADDRTYPE=m CONFIG_IP_NF_MATCH_AH=m CONFIG_IP_NF_MATCH_ECN=m CONFIG_IP_NF_MATCH_TTL=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_LOG=m -CONFIG_IP_NF_TARGET_ULOG=m -CONFIG_NF_NAT=m -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_NETMAP=m -CONFIG_IP_NF_TARGET_REDIRECT=m CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_TTL=m @@ -112,8 +101,6 @@ CONFIG_IRDA_CACHE_LAST_LSAP=y CONFIG_IRDA_FAST_RR=y CONFIG_IRTTY_SIR=m CONFIG_BT=m -CONFIG_BT_L2CAP=y -CONFIG_BT_SCO=y CONFIG_BT_RFCOMM=m CONFIG_BT_RFCOMM_TTY=y CONFIG_BT_BNEP=m @@ -128,10 +115,8 @@ CONFIG_MAC80211_LEDS=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_STANDALONE is not set CONFIG_CONNECTOR=y -CONFIG_PROC_DEVICETREE=y CONFIG_MAC_FLOPPY=m CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_UB=m CONFIG_BLK_DEV_RAM=y CONFIG_IDE=y CONFIG_BLK_DEV_IDECS=m @@ -152,7 +137,6 @@ CONFIG_SCSI_FC_ATTRS=y CONFIG_SCSI_AIC7XXX=m CONFIG_AIC7XXX_CMDS_PER_DEVICE=253 CONFIG_AIC7XXX_RESET_DELAY_MS=15000 -CONFIG_SCSI_AIC7XXX_OLD=m CONFIG_SCSI_SYM53C8XX_2=y CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0 CONFIG_SCSI_MESH=y @@ -170,12 +154,6 @@ CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m CONFIG_DM_MIRROR=m CONFIG_DM_ZERO=m -CONFIG_IEEE1394=m -CONFIG_IEEE1394_OHCI1394=m -CONFIG_IEEE1394_SBP2=m -CONFIG_IEEE1394_RAWIO=m -CONFIG_IEEE1394_VIDEO1394=m -CONFIG_IEEE1394_DV1394=m CONFIG_ADB=y CONFIG_ADB_CUDA=y CONFIG_ADB_PMU=y @@ -193,27 +171,22 @@ CONFIG_PMAC_RACKMETER=m CONFIG_NETDEVICES=y CONFIG_DUMMY=m CONFIG_TUN=m -CONFIG_NET_ETHERNET=y +CONFIG_PCNET32=y CONFIG_MACE=y CONFIG_BMAC=y CONFIG_SUNGEM=y -CONFIG_NET_PCI=y -CONFIG_PCNET32=y -CONFIG_PRISM54=m -CONFIG_B43=m -CONFIG_B43LEGACY=m -CONFIG_HERMES=m -CONFIG_APPLE_AIRPORT=m -CONFIG_PCMCIA_HERMES=m -CONFIG_P54_COMMON=m -CONFIG_USB_USBNET=m -# CONFIG_USB_NET_CDC_SUBSET is not set CONFIG_PPP=y +CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=y CONFIG_PPP_MULTILINK=y CONFIG_PPP_ASYNC=y CONFIG_PPP_SYNC_TTY=m -CONFIG_PPP_DEFLATE=y -CONFIG_PPP_BSDCOMP=m +CONFIG_USB_USBNET=m +# CONFIG_USB_NET_CDC_SUBSET is not set +CONFIG_PRISM54=m +CONFIG_B43=m +CONFIG_B43LEGACY=m +CONFIG_P54_COMMON=m CONFIG_INPUT_EVDEV=y # CONFIG_KEYBOARD_ATKBD is not set # CONFIG_MOUSE_PS2 is not set @@ -226,7 +199,6 @@ CONFIG_SERIAL_PMACZILOG_TTYS=y CONFIG_NVRAM=y CONFIG_GEN_RTC=y CONFIG_I2C_CHARDEV=m -CONFIG_POWER_SUPPLY=y CONFIG_APM_POWER=y CONFIG_BATTERY_PMU=y CONFIG_HWMON=m @@ -253,7 +225,6 @@ CONFIG_FB_ATY=y CONFIG_FB_ATY_CT=y CONFIG_FB_ATY_GX=y CONFIG_FB_3DFX=y -CONFIG_DISPLAY_SUPPORT=y # CONFIG_VGA_CONSOLE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y @@ -314,7 +285,6 @@ CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_EXT3_FS_POSIX_ACL=y CONFIG_EXT4_FS=y -CONFIG_INOTIFY=y CONFIG_AUTOFS4_FS=m CONFIG_FUSE_FS=m CONFIG_ISO9660_FS=y @@ -328,34 +298,27 @@ CONFIG_TMPFS=y CONFIG_HFS_FS=m CONFIG_HFSPLUS_FS=m CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_NFSD=m CONFIG_NFSD_V3_ACL=y CONFIG_NFSD_V4=y -CONFIG_SMB_FS=m -CONFIG_PARTITION_ADVANCED=y CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_ISO8859_1=m CONFIG_CRC_T10DIF=y CONFIG_LIBCRC32C=m -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_LATENCYTOP=y -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_XMON=y CONFIG_XMON_DEFAULT=y CONFIG_BOOTX_TEXT=y CONFIG_PPC_EARLY_DEBUG=y -CONFIG_PPC_EARLY_DEBUG_BOOTX=y CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_MD4=m -CONFIG_CRYPTO_SHA256=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m diff --git a/arch/powerpc/configs/ppc40x_defconfig b/arch/powerpc/configs/ppc40x_defconfig index 52908c7897d922..a43bf6ea10fbfe 100644 --- a/arch/powerpc/configs/ppc40x_defconfig +++ b/arch/powerpc/configs/ppc40x_defconfig @@ -1,13 +1,10 @@ CONFIG_40x=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y -CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set @@ -18,7 +15,6 @@ CONFIG_HOTFOOT=y CONFIG_KILAUEA=y CONFIG_MAKALU=y CONFIG_XILINX_VIRTEX_GENERIC_BOARD=y -CONFIG_SPARSE_IRQ=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -34,8 +30,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=m CONFIG_MTD_CFI=y CONFIG_MTD_JEDECPROBE=y @@ -43,13 +37,10 @@ CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y CONFIG_MTD_UBI=m CONFIG_MTD_UBI_GLUEBI=m -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 CONFIG_XILINX_SYSACE=m CONFIG_NETDEVICES=y -CONFIG_ETHERNET=y -CONFIG_NET_VENDOR_IBM=y CONFIG_IBM_EMAC=y # CONFIG_INPUT is not set CONFIG_SERIO=m @@ -73,13 +64,11 @@ CONFIG_I2C_IBM_IIC=m CONFIG_GPIO_XILINX=y # CONFIG_HWMON is not set CONFIG_THERMAL=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_FB=m CONFIG_FB_XILINX=m CONFIG_EXT2_FS=y CONFIG_EXT3_FS=m # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_VFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_TMPFS=y @@ -87,16 +76,12 @@ CONFIG_JFFS2_FS=m CONFIG_UBIFS_FS=m CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_ISO8859_1=m -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_ECB=y CONFIG_CRYPTO_PCBC=y diff --git a/arch/powerpc/configs/ppc44x_defconfig b/arch/powerpc/configs/ppc44x_defconfig index 924e10df18444b..bbc7f76d52c89a 100644 --- a/arch/powerpc/configs/ppc44x_defconfig +++ b/arch/powerpc/configs/ppc44x_defconfig @@ -1,13 +1,10 @@ CONFIG_44x=y -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y -CONFIG_KALLSYMS_EXTRA_PASS=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set @@ -28,7 +25,6 @@ CONFIG_YOSEMITE=y CONFIG_XILINX_VIRTEX440_GENERIC_BOARD=y CONFIG_PPC4xx_GPIO=y CONFIG_MATH_EMULATION=y -CONFIG_SPARSE_IRQ=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -44,8 +40,6 @@ CONFIG_BRIDGE=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_CONNECTOR=y CONFIG_MTD=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_JEDECPROBE=y @@ -55,7 +49,6 @@ CONFIG_MTD_NAND=m CONFIG_MTD_NAND_NDFC=m CONFIG_MTD_UBI=m CONFIG_MTD_UBI_GLUEBI=m -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=35000 CONFIG_XILINX_SYSACE=m @@ -64,8 +57,6 @@ CONFIG_BLK_DEV_SD=m # CONFIG_SCSI_LOWLEVEL is not set CONFIG_NETDEVICES=y CONFIG_TUN=m -CONFIG_ETHERNET=y -CONFIG_NET_VENDOR_IBM=y CONFIG_IBM_EMAC=y # CONFIG_INPUT is not set CONFIG_SERIO=m @@ -105,23 +96,18 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_UBIFS_FS=m -CONFIG_UBIFS_FS_XATTR=y CONFIG_LOGFS=m CONFIG_CRAMFS=y CONFIG_SQUASHFS=m CONFIG_SQUASHFS_XATTR=y CONFIG_SQUASHFS_LZO=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_ISO8859_1=m CONFIG_CRC_T10DIF=m CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_CRYPTO_ECB=y CONFIG_CRYPTO_PCBC=y # CONFIG_CRYPTO_ANSI_CPRNG is not set diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 5830d735c5c3a5..7481e4ff87e4e0 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -1,6 +1,4 @@ CONFIG_PPC64=y -CONFIG_ALTIVEC=y -CONFIG_VSX=y CONFIG_SMP=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -42,9 +40,6 @@ CONFIG_PPC_CELLEB=y CONFIG_PPC_CELL_QPACE=y CONFIG_RTAS_FLASH=m CONFIG_IBMEBUS=y -CONFIG_CPU_FREQ=y -CONFIG_CPU_FREQ_GOV_POWERSAVE=y -CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_PMAC64=y CONFIG_HZ_100=y CONFIG_BINFMT_MISC=m @@ -82,7 +77,6 @@ CONFIG_BPF_JIT=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_FD=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_NBD=m @@ -101,7 +95,6 @@ CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y -CONFIG_SCSI_MULTI_LUN=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_FC_ATTRS=y CONFIG_SCSI_CXGB3_ISCSI=m @@ -145,7 +138,6 @@ CONFIG_DM_MULTIPATH_ST=m CONFIG_DM_UEVENT=y CONFIG_ADB_PMU=y CONFIG_PMAC_SMU=y -CONFIG_THERM_PM72=y CONFIG_WINDFARM=y CONFIG_WINDFARM_PM81=y CONFIG_WINDFARM_PM91=y @@ -209,7 +201,6 @@ CONFIG_RAW_DRIVER=y CONFIG_I2C_CHARDEV=y CONFIG_I2C_AMD8111=y CONFIG_I2C_PASEMI=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_FB=y CONFIG_FIRMWARE_EDID=y CONFIG_FB_OF=y @@ -326,7 +317,6 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y -CONFIG_CRC_T10DIF=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_STACK_USAGE=y @@ -362,4 +352,4 @@ CONFIG_CRYPTO_DEV_NX=y CONFIG_CRYPTO_DEV_NX_ENCRYPT=m CONFIG_VIRTUALIZATION=y CONFIG_KVM_BOOK3S_64=m -CONFIG_KVM_BOOK3S_64_HV=y +CONFIG_KVM_BOOK3S_64_HV=m diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig index 67885b2d70aae6..ddf9773458cf2e 100644 --- a/arch/powerpc/configs/ppc64e_defconfig +++ b/arch/powerpc/configs/ppc64e_defconfig @@ -22,7 +22,6 @@ CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y CONFIG_PARTITION_ADVANCED=y CONFIG_MAC_PARTITION=y -CONFIG_EFI_PARTITION=y CONFIG_CORENET_GENERIC=y CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_GOV_POWERSAVE=y @@ -55,7 +54,6 @@ CONFIG_BRIDGE=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_FD=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_NBD=m @@ -70,7 +68,6 @@ CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y -CONFIG_SCSI_MULTI_LUN=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_FC_ATTRS=y CONFIG_SCSI_SRP_ATTRS=y @@ -131,7 +128,6 @@ CONFIG_SERIAL_8250_CONSOLE=y CONFIG_RAW_DRIVER=y CONFIG_I2C_CHARDEV=y CONFIG_I2C_AMD8111=y -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_FB=y CONFIG_FIRMWARE_EDID=y CONFIG_FB_OF=y diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index ad6d6b5af7d7b2..e5d2c3dc07f1db 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -1,26 +1,22 @@ CONFIG_FSL_EMB_PERFMON=y -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_AUDIT=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_TASKSTATS=y CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y -CONFIG_AUDIT=y CONFIG_CGROUPS=y CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y -CONFIG_RESOURCE_COUNTERS=y CONFIG_CGROUP_SCHED=y CONFIG_RT_GROUP_SCHED=y -CONFIG_UTS_NS=y -CONFIG_IPC_NS=y CONFIG_USER_NS=y -CONFIG_PID_NS=y CONFIG_BLK_DEV_INITRD=y -CONFIG_KALLSYMS_EXTRA_PASS=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_OPROFILE=m @@ -29,10 +25,19 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_SRCVERSION_ALL=y CONFIG_BLK_DEV_INTEGRITY=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_OSF_PARTITION=y +CONFIG_AMIGA_PARTITION=y +CONFIG_BSD_DISKLABEL=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_SGI_PARTITION=y +CONFIG_SUN_PARTITION=y +CONFIG_KARMA_PARTITION=y CONFIG_PPC_MPC52xx=y CONFIG_PPC_EFIKA=y CONFIG_PPC_MPC5200_BUGFIX=y -CONFIG_PPC_MPC5200_GPIO=y CONFIG_PPC_82xx=y CONFIG_MPC8272_ADS=y CONFIG_PQ2FADS=y @@ -56,7 +61,6 @@ CONFIG_SBC8641D=y CONFIG_MPC8610_HPCD=y CONFIG_GEF_SBC610=y CONFIG_CPU_FREQ=y -CONFIG_CPU_FREQ_DEBUG=y CONFIG_CPU_FREQ_STAT=m CONFIG_CPU_FREQ_STAT_DETAILS=y CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y @@ -69,19 +73,13 @@ CONFIG_TAU=y CONFIG_TAU_AVERAGE=y CONFIG_QUICC_ENGINE=y CONFIG_QE_GPIO=y -CONFIG_PPC_BESTCOMM=y -CONFIG_GPIO_MPC8XXX=y CONFIG_MCU_MPC8349EMITX=y CONFIG_HIGHMEM=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_HZ_1000=y CONFIG_PREEMPT_VOLUNTARY=y CONFIG_BINFMT_MISC=y -# CONFIG_MIGRATION is not set -CONFIG_PM=y -CONFIG_PM_DEBUG=y CONFIG_HIBERNATION=y +CONFIG_PM_DEBUG=y CONFIG_ISA=y CONFIG_PCIEPORTBUS=y CONFIG_PCI_MSI=y @@ -106,8 +104,6 @@ CONFIG_IP_MULTIPLE_TABLES=y CONFIG_IP_ROUTE_MULTIPATH=y CONFIG_IP_ROUTE_VERBOSE=y CONFIG_NET_IPIP=m -CONFIG_NET_IPGRE=m -CONFIG_NET_IPGRE_BROADCAST=y CONFIG_IP_MROUTE=y CONFIG_IP_PIMSM_V1=y CONFIG_IP_PIMSM_V2=y @@ -128,7 +124,6 @@ CONFIG_TCP_CONG_VENO=m CONFIG_TCP_CONG_YEAH=m CONFIG_TCP_CONG_ILLINOIS=m CONFIG_TCP_MD5SIG=y -CONFIG_IPV6_PRIVACY=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_ROUTE_INFO=y CONFIG_IPV6_OPTIMISTIC_DAD=y @@ -144,7 +139,6 @@ CONFIG_IPV6_MROUTE=y CONFIG_IPV6_PIMSM_V2=y CONFIG_NETLABEL=y CONFIG_NETFILTER=y -CONFIG_NETFILTER_NETLINK_QUEUE=m CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_SECMARK=y CONFIG_NF_CONNTRACK_EVENTS=y @@ -159,7 +153,6 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_CT_NETLINK=m -CONFIG_NETFILTER_TPROXY=m CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m CONFIG_NETFILTER_XT_TARGET_CONNMARK=m CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m @@ -204,21 +197,12 @@ CONFIG_NETFILTER_XT_MATCH_TIME=m CONFIG_NETFILTER_XT_MATCH_U32=m CONFIG_NF_CONNTRACK_IPV4=m # CONFIG_NF_CONNTRACK_PROC_COMPAT is not set -CONFIG_IP_NF_QUEUE=m CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_ADDRTYPE=m CONFIG_IP_NF_MATCH_AH=m CONFIG_IP_NF_MATCH_ECN=m CONFIG_IP_NF_MATCH_TTL=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_LOG=m -CONFIG_IP_NF_TARGET_ULOG=m -CONFIG_NF_NAT=m -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_NETMAP=m -CONFIG_IP_NF_TARGET_REDIRECT=m -CONFIG_NF_NAT_SNMP_BASIC=m CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_CLUSTERIP=m CONFIG_IP_NF_TARGET_ECN=m @@ -229,7 +213,6 @@ CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m -CONFIG_IP6_NF_QUEUE=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -240,7 +223,6 @@ CONFIG_IP6_NF_MATCH_IPV6HEADER=m CONFIG_IP6_NF_MATCH_MH=m CONFIG_IP6_NF_MATCH_RT=m CONFIG_IP6_NF_TARGET_HL=m -CONFIG_IP6_NF_TARGET_LOG=m CONFIG_IP6_NF_FILTER=m CONFIG_IP6_NF_TARGET_REJECT=m CONFIG_IP6_NF_MANGLE=m @@ -266,7 +248,6 @@ CONFIG_BRIDGE_EBT_MARK_T=m CONFIG_BRIDGE_EBT_REDIRECT=m CONFIG_BRIDGE_EBT_SNAT=m CONFIG_BRIDGE_EBT_LOG=m -CONFIG_BRIDGE_EBT_ULOG=m CONFIG_BRIDGE_EBT_NFLOG=m CONFIG_IP_DCCP=m CONFIG_NET_DCCPPROBE=m @@ -284,8 +265,6 @@ CONFIG_ATALK=m CONFIG_DEV_APPLETALK=m CONFIG_IPDDP=m CONFIG_IPDDP_ENCAP=y -CONFIG_IPDDP_DECAP=y -CONFIG_WAN_ROUTER=m CONFIG_NET_SCHED=y CONFIG_NET_SCH_CBQ=m CONFIG_NET_SCH_HTB=m @@ -349,8 +328,6 @@ CONFIG_VLSI_FIR=m CONFIG_VIA_FIR=m CONFIG_MCS_FIR=m CONFIG_BT=m -CONFIG_BT_L2CAP=y -CONFIG_BT_SCO=y CONFIG_BT_RFCOMM=m CONFIG_BT_RFCOMM_TTY=y CONFIG_BT_BNEP=m @@ -380,7 +357,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_DEBUG_DEVRES=y CONFIG_CONNECTOR=y -CONFIG_PROC_DEVICETREE=y CONFIG_PARPORT=m CONFIG_PARPORT_PC=m CONFIG_PARPORT_SERIAL=m @@ -396,7 +372,6 @@ CONFIG_BLK_DEV_RAM_SIZE=16384 CONFIG_CDROM_PKTCDVD=m CONFIG_VIRTIO_BLK=m CONFIG_BLK_DEV_HD=y -CONFIG_MISC_DEVICES=y CONFIG_ENCLOSURE_SERVICES=m CONFIG_SENSORS_TSL2550=m CONFIG_EEPROM_AT24=m @@ -419,7 +394,6 @@ CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y CONFIG_CHR_DEV_SCH=m CONFIG_SCSI_ENCLOSURE=m -CONFIG_SCSI_MULTI_LUN=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y CONFIG_SCSI_SCAN_ASYNC=y @@ -436,8 +410,8 @@ CONFIG_ATA=y # CONFIG_SATA_PMP is not set CONFIG_SATA_FSL=m CONFIG_PDC_ADMA=m -CONFIG_PATA_MPC52xx=m CONFIG_ATA_PIIX=m +CONFIG_PATA_MPC52xx=m CONFIG_PATA_OPTIDMA=m CONFIG_PATA_SCH=m CONFIG_PATA_VIA=m @@ -479,37 +453,44 @@ CONFIG_THERM_WINDTUNNEL=m CONFIG_THERM_ADT746X=m CONFIG_WINDFARM=y CONFIG_PMAC_RACKMETER=m +CONFIG_SENSORS_AMS=m CONFIG_NETDEVICES=y -CONFIG_IFB=m -CONFIG_DUMMY=m CONFIG_BONDING=m -CONFIG_MACVLAN=m +CONFIG_DUMMY=m CONFIG_EQUALIZER=m +CONFIG_NET_FC=y +CONFIG_IFB=m +CONFIG_MACVLAN=m +CONFIG_NETCONSOLE=m CONFIG_TUN=m CONFIG_VETH=m -CONFIG_NET_SB1000=m -CONFIG_MARVELL_PHY=m -CONFIG_DAVICOM_PHY=m -CONFIG_QSEMI_PHY=m -CONFIG_LXT_PHY=m -CONFIG_CICADA_PHY=m -CONFIG_VITESSE_PHY=m -CONFIG_SMSC_PHY=m -CONFIG_BROADCOM_PHY=m -CONFIG_ICPLUS_PHY=m -CONFIG_REALTEK_PHY=m -CONFIG_NET_ETHERNET=y -CONFIG_MACE=m -CONFIG_BMAC=m -CONFIG_HAPPYMEAL=m -CONFIG_SUNGEM=m -CONFIG_CASSINI=m -CONFIG_NET_VENDOR_3COM=y +CONFIG_VIRTIO_NET=m +CONFIG_ATM_TCP=m +CONFIG_ATM_LANAI=m +CONFIG_ATM_ENI=m +CONFIG_ATM_NICSTAR=m +CONFIG_ATM_IDT77252=m +CONFIG_ATM_HE=m CONFIG_EL3=m +CONFIG_PCMCIA_3C574=m +CONFIG_PCMCIA_3C589=m CONFIG_VORTEX=m CONFIG_TYPHOON=m -CONFIG_NET_VENDOR_SMC=y -CONFIG_ULTRA=m +CONFIG_ADAPTEC_STARFIRE=m +CONFIG_ACENIC=m +CONFIG_AMD8111_ETH=m +CONFIG_PCNET32=m +CONFIG_PCMCIA_NMCLAN=m +CONFIG_MACE=m +CONFIG_BMAC=m +CONFIG_ATL1=m +CONFIG_B44=m +CONFIG_BNX2=m +CONFIG_TIGON3=m +CONFIG_BNX2X=m +CONFIG_CHELSIO_T1=m +CONFIG_CHELSIO_T1_1G=y +CONFIG_CHELSIO_T3=m CONFIG_NET_TULIP=y CONFIG_DE2104X=m CONFIG_TULIP=m @@ -519,69 +500,84 @@ CONFIG_WINBOND_840=m CONFIG_DM9102=m CONFIG_ULI526X=m CONFIG_PCMCIA_XIRCOM=m -CONFIG_NET_ISA=y -CONFIG_EWRK3=m -CONFIG_NE2000=m -CONFIG_NET_PCI=y -CONFIG_PCNET32=m -CONFIG_AMD8111_ETH=m -CONFIG_ADAPTEC_STARFIRE=m -CONFIG_B44=m -CONFIG_FORCEDETH=m +CONFIG_DL2K=m +CONFIG_SUNDANCE=m +CONFIG_S2IO=m +CONFIG_FEC_MPC52xx=m +CONFIG_GIANFAR=m +CONFIG_PCMCIA_FMVJ18X=m CONFIG_E100=m +CONFIG_E1000=m +CONFIG_E1000E=m +CONFIG_IGB=m +CONFIG_IXGB=m +CONFIG_IXGBE=m +CONFIG_IP1000=m +CONFIG_MV643XX_ETH=m +CONFIG_SKGE=m +CONFIG_SKY2=m +CONFIG_MYRI10GE=m CONFIG_FEALNX=m CONFIG_NATSEMI=m +CONFIG_NS83820=m +CONFIG_PCMCIA_AXNET=m +CONFIG_NE2000=m CONFIG_NE2K_PCI=m +CONFIG_PCMCIA_PCNET=m +CONFIG_ULTRA=m +CONFIG_FORCEDETH=m +CONFIG_HAMACHI=m +CONFIG_YELLOWFIN=m +CONFIG_QLA3XXX=m +CONFIG_QLGE=m +CONFIG_NETXEN_NIC=m CONFIG_8139CP=m CONFIG_8139TOO=m # CONFIG_8139TOO_PIO is not set CONFIG_8139TOO_8129=y +CONFIG_R8169=m CONFIG_R6040=m +CONFIG_SC92031=m CONFIG_SIS900=m +CONFIG_SIS190=m +CONFIG_SFC=m +CONFIG_PCMCIA_SMC91C92=m CONFIG_EPIC100=m -CONFIG_SUNDANCE=m +CONFIG_HAPPYMEAL=m +CONFIG_SUNGEM=m +CONFIG_CASSINI=m +CONFIG_NIU=m +CONFIG_TEHUTI=m CONFIG_TLAN=m CONFIG_VIA_RHINE=m CONFIG_VIA_RHINE_MMIO=y -CONFIG_SC92031=m -CONFIG_NET_POCKET=y -CONFIG_DE600=m -CONFIG_DE620=m -CONFIG_FEC_MPC52xx=m -CONFIG_ACENIC=m -CONFIG_DL2K=m -CONFIG_E1000=m -CONFIG_E1000E=m -CONFIG_IP1000=m -CONFIG_IGB=m -CONFIG_NS83820=m -CONFIG_HAMACHI=m -CONFIG_YELLOWFIN=m -CONFIG_R8169=m -CONFIG_R8169_VLAN=y -CONFIG_SIS190=m -CONFIG_SKGE=m -CONFIG_SKY2=m CONFIG_VIA_VELOCITY=m -CONFIG_TIGON3=m -CONFIG_BNX2=m -CONFIG_GIANFAR=m -CONFIG_MV643XX_ETH=m -CONFIG_QLA3XXX=m -CONFIG_ATL1=m -CONFIG_CHELSIO_T1=m -CONFIG_CHELSIO_T1_1G=y -CONFIG_CHELSIO_T3=m -CONFIG_IXGBE=m -CONFIG_IXGB=m -CONFIG_S2IO=m -CONFIG_MYRI10GE=m -CONFIG_NETXEN_NIC=m -CONFIG_NIU=m -CONFIG_TEHUTI=m -CONFIG_BNX2X=m -CONFIG_QLGE=m -CONFIG_SFC=m +CONFIG_PCMCIA_XIRC2PS=m +CONFIG_FDDI=y +CONFIG_SKFP=m +CONFIG_NET_SB1000=m +CONFIG_MARVELL_PHY=m +CONFIG_DAVICOM_PHY=m +CONFIG_QSEMI_PHY=m +CONFIG_LXT_PHY=m +CONFIG_CICADA_PHY=m +CONFIG_VITESSE_PHY=m +CONFIG_SMSC_PHY=m +CONFIG_BROADCOM_PHY=m +CONFIG_ICPLUS_PHY=m +CONFIG_REALTEK_PHY=m +CONFIG_PLIP=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_FILTER=y +CONFIG_PPP_MPPE=m +CONFIG_PPP_MULTILINK=y +CONFIG_PPPOATM=m +CONFIG_PPPOE=m +CONFIG_PPP_ASYNC=m +CONFIG_PPP_SYNC_TTY=m +CONFIG_SLIP=m +CONFIG_SLIP_COMPRESSED=y +CONFIG_SLIP_SMART=y CONFIG_USB_CATC=m CONFIG_USB_KAWETH=m CONFIG_USB_PEGASUS=m @@ -597,39 +593,6 @@ CONFIG_USB_ALI_M5632=y CONFIG_USB_AN2720=y CONFIG_USB_EPSON2888=y CONFIG_USB_KC2190=y -CONFIG_NET_PCMCIA=y -CONFIG_PCMCIA_3C589=m -CONFIG_PCMCIA_3C574=m -CONFIG_PCMCIA_FMVJ18X=m -CONFIG_PCMCIA_PCNET=m -CONFIG_PCMCIA_NMCLAN=m -CONFIG_PCMCIA_SMC91C92=m -CONFIG_PCMCIA_XIRC2PS=m -CONFIG_PCMCIA_AXNET=m -CONFIG_ATM_TCP=m -CONFIG_ATM_LANAI=m -CONFIG_ATM_ENI=m -CONFIG_ATM_NICSTAR=m -CONFIG_ATM_IDT77252=m -CONFIG_ATM_HE=m -CONFIG_FDDI=y -CONFIG_SKFP=m -CONFIG_PLIP=m -CONFIG_PPP_MULTILINK=y -CONFIG_PPP_FILTER=y -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -CONFIG_PPP_DEFLATE=m -CONFIG_PPP_MPPE=m -CONFIG_PPPOE=m -CONFIG_PPPOATM=m -CONFIG_SLIP=m -CONFIG_SLIP_COMPRESSED=y -CONFIG_SLIP_SMART=y -CONFIG_NET_FC=y -CONFIG_NETCONSOLE=m -CONFIG_NETCONSOLE_DYNAMIC=y -CONFIG_VIRTIO_NET=m # CONFIG_INPUT_MOUSEDEV_PSAUX is not set CONFIG_INPUT_JOYDEV=m CONFIG_INPUT_EVDEV=y @@ -670,10 +633,8 @@ CONFIG_TABLET_USB_ACECAD=m CONFIG_TABLET_USB_AIPTEK=m CONFIG_TABLET_USB_GTCO=m CONFIG_TABLET_USB_KBTAB=m -CONFIG_TABLET_USB_WACOM=m CONFIG_INPUT_MISC=y CONFIG_INPUT_PCSPKR=m -CONFIG_INPUT_ATI_REMOTE=m CONFIG_INPUT_ATI_REMOTE2=m CONFIG_INPUT_KEYSPAN_REMOTE=m CONFIG_INPUT_POWERMATE=m @@ -685,16 +646,16 @@ CONFIG_GAMEPORT_NS558=m CONFIG_GAMEPORT_L4=m CONFIG_GAMEPORT_EMU10K1=m CONFIG_GAMEPORT_FM801=m -CONFIG_VT_HW_CONSOLE_BINDING=y -# CONFIG_DEVKMEM is not set +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_NONSTANDARD=y CONFIG_ROCKETPORT=m CONFIG_CYCLADES=m CONFIG_SYNCLINK=m CONFIG_SYNCLINKMP=m CONFIG_SYNCLINK_GT=m -CONFIG_N_HDLC=m CONFIG_NOZOMI=m +CONFIG_N_HDLC=m +# CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_CS=m @@ -711,8 +672,6 @@ CONFIG_SERIAL_MPC52xx_CONSOLE=y CONFIG_SERIAL_MPC52xx_CONSOLE_BAUD=115200 CONFIG_SERIAL_JSM=m CONFIG_SERIAL_OF_PLATFORM=y -# CONFIG_LEGACY_PTYS is not set -CONFIG_BRIQ_PANEL=m CONFIG_PRINTER=m CONFIG_LP_CONSOLE=y CONFIG_PPDEV=m @@ -735,6 +694,7 @@ CONFIG_I2C_TINY_USB=m CONFIG_I2C_PCA_ISA=m CONFIG_I2C_STUB=m CONFIG_GPIO_SYSFS=y +CONFIG_GPIO_MPC8XXX=y CONFIG_W1=m CONFIG_W1_MASTER_DS2490=m CONFIG_W1_MASTER_DS2482=m @@ -754,15 +714,13 @@ CONFIG_SENSORS_ADM1029=m CONFIG_SENSORS_ADM1031=m CONFIG_SENSORS_ADM9240=m CONFIG_SENSORS_ADT7470=m -CONFIG_SENSORS_AMS=m CONFIG_SENSORS_ATXP1=m CONFIG_SENSORS_DS1621=m -CONFIG_SENSORS_F71805F=m -CONFIG_SENSORS_F71882FG=m CONFIG_SENSORS_F75375S=m CONFIG_SENSORS_GL518SM=m CONFIG_SENSORS_GL520SM=m -CONFIG_SENSORS_IT87=m +CONFIG_SENSORS_MAX1619=m +CONFIG_SENSORS_MAX6650=m CONFIG_SENSORS_LM63=m CONFIG_SENSORS_LM75=m CONFIG_SENSORS_LM77=m @@ -774,20 +732,12 @@ CONFIG_SENSORS_LM87=m CONFIG_SENSORS_LM90=m CONFIG_SENSORS_LM92=m CONFIG_SENSORS_LM93=m -CONFIG_SENSORS_MAX1619=m -CONFIG_SENSORS_MAX6650=m -CONFIG_SENSORS_PC87360=m -CONFIG_SENSORS_PC87427=m CONFIG_SENSORS_PCF8591=m CONFIG_SENSORS_SIS5595=m -CONFIG_SENSORS_DME1737=m -CONFIG_SENSORS_SMSC47M1=m CONFIG_SENSORS_SMSC47M192=m -CONFIG_SENSORS_SMSC47B397=m CONFIG_SENSORS_ADS7828=m CONFIG_SENSORS_THMC50=m CONFIG_SENSORS_VIA686A=m -CONFIG_SENSORS_VT1211=m CONFIG_SENSORS_VT8231=m CONFIG_SENSORS_W83781D=m CONFIG_SENSORS_W83791D=m @@ -795,8 +745,6 @@ CONFIG_SENSORS_W83792D=m CONFIG_SENSORS_W83793=m CONFIG_SENSORS_W83L785TS=m CONFIG_SENSORS_W83L786NG=m -CONFIG_SENSORS_W83627HF=m -CONFIG_SENSORS_W83627EHF=m CONFIG_THERMAL=y CONFIG_WATCHDOG=y CONFIG_SOFT_WATCHDOG=m @@ -815,7 +763,6 @@ CONFIG_DRM_MGA=m CONFIG_DRM_SIS=m CONFIG_DRM_VIA=m CONFIG_DRM_SAVAGE=m -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_FB=y CONFIG_FB_CIRRUS=m CONFIG_FB_OF=y @@ -850,10 +797,8 @@ CONFIG_FB_TRIDENT=m CONFIG_FB_SM501=m CONFIG_FB_IBM_GXT4500=y CONFIG_LCD_PLATFORM=m -CONFIG_DISPLAY_SUPPORT=m CONFIG_VGACON_SOFT_SCROLLBACK=y CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y CONFIG_LOGO=y # CONFIG_LOGO_LINUX_MONO is not set @@ -897,7 +842,6 @@ CONFIG_SND_CMIPCI=m CONFIG_SND_OXYGEN=m CONFIG_SND_CS4281=m CONFIG_SND_CS46XX=m -CONFIG_SND_CS5530=m CONFIG_SND_DARLA20=m CONFIG_SND_GINA20=m CONFIG_SND_LAYLA20=m @@ -919,7 +863,6 @@ CONFIG_SND_ES1968=m CONFIG_SND_FM801=m CONFIG_SND_HDSP=m CONFIG_SND_HDSPM=m -CONFIG_SND_HIFIER=m CONFIG_SND_ICE1712=m CONFIG_SND_ICE1724=m CONFIG_SND_KORG1212=m @@ -949,8 +892,6 @@ CONFIG_SND_USB_CAIAQ=m CONFIG_SND_USB_CAIAQ_INPUT=y # CONFIG_SND_PCMCIA is not set CONFIG_HIDRAW=y -CONFIG_HID_PID=y -CONFIG_USB_HIDDEV=y CONFIG_HID_GYRATION=y CONFIG_LOGITECH_FF=y CONFIG_LOGIRUMBLEPAD2_FF=y @@ -960,12 +901,12 @@ CONFIG_HID_PETALYNX=y CONFIG_HID_SAMSUNG=y CONFIG_HID_SONY=y CONFIG_HID_SUNPLUS=y +CONFIG_HID_PID=y +CONFIG_USB_HIDDEV=y CONFIG_USB=y CONFIG_USB_ANNOUNCE_NEW_DEVICES=y -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_MON=y CONFIG_USB_EHCI_HCD=m -CONFIG_USB_EHCI_TT_NEWSCHED=y CONFIG_USB_EHCI_FSL=y CONFIG_USB_OHCI_HCD=m CONFIG_USB_OHCI_HCD_PPC_OF_BE=y @@ -1001,7 +942,6 @@ CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m CONFIG_USB_SERIAL_CYPRESS_M8=m CONFIG_USB_SERIAL_EMPEG=m CONFIG_USB_SERIAL_FTDI_SIO=m -CONFIG_USB_SERIAL_FUNSOFT=m CONFIG_USB_SERIAL_VISOR=m CONFIG_USB_SERIAL_IPAQ=m CONFIG_USB_SERIAL_IR=m @@ -1016,12 +956,10 @@ CONFIG_USB_SERIAL_KOBIL_SCT=m CONFIG_USB_SERIAL_MCT_U232=m CONFIG_USB_SERIAL_MOS7720=m CONFIG_USB_SERIAL_MOS7840=m -CONFIG_USB_SERIAL_MOTOROLA=m CONFIG_USB_SERIAL_NAVMAN=m CONFIG_USB_SERIAL_PL2303=m CONFIG_USB_SERIAL_OTI6858=m CONFIG_USB_SERIAL_SPCP8X5=m -CONFIG_USB_SERIAL_HP4X=m CONFIG_USB_SERIAL_SAFE=m CONFIG_USB_SERIAL_SAFE_PADDED=y CONFIG_USB_SERIAL_SIERRAWIRELESS=m @@ -1083,12 +1021,14 @@ CONFIG_RTC_DRV_M48T35=m CONFIG_RTC_DRV_M48T59=m CONFIG_RTC_DRV_V3020=m CONFIG_DMADEVICES=y +CONFIG_PPC_BESTCOMM=y CONFIG_AUXDISPLAY=y CONFIG_KS0108=m CONFIG_UIO=m CONFIG_UIO_CIF=m -CONFIG_UIO_PDRV=m CONFIG_UIO_PDRV_GENIRQ=m +CONFIG_VIRTIO_PCI=m +CONFIG_VIRTIO_BALLOON=m CONFIG_EXT2_FS=m CONFIG_EXT2_FS_XATTR=y CONFIG_EXT2_FS_POSIX_ACL=y @@ -1111,12 +1051,7 @@ CONFIG_XFS_FS=m CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y CONFIG_GFS2_FS=m -CONFIG_OCFS2_FS=m -# CONFIG_OCFS2_DEBUG_MASKLOG is not set CONFIG_QUOTA_NETLINK_INTERFACE=y -# CONFIG_PRINT_QUOTA_WARNING is not set -CONFIG_QFMT_V2=y -CONFIG_AUTOFS_FS=m CONFIG_AUTOFS4_FS=m CONFIG_FUSE_FS=m CONFIG_ISO9660_FS=y @@ -1145,20 +1080,17 @@ CONFIG_ROMFS_FS=m CONFIG_SYSV_FS=m CONFIG_UFS_FS=m CONFIG_NFS_FS=m -CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y -CONFIG_NFS_V4=y +CONFIG_NFS_V4=m CONFIG_NFSD=m CONFIG_NFSD_V3_ACL=y CONFIG_NFSD_V4=y -CONFIG_RPCSEC_GSS_SPKM3=m CONFIG_CIFS=m CONFIG_CIFS_WEAK_PW_HASH=y CONFIG_CIFS_UPCALL=y CONFIG_CIFS_XATTR=y CONFIG_CIFS_POSIX=y CONFIG_CIFS_DFS_UPCALL=y -CONFIG_CIFS_EXPERIMENTAL=y CONFIG_NCP_FS=m CONFIG_NCPFS_PACKET_SIGNING=y CONFIG_NCPFS_IOCTL_LOCKING=y @@ -1170,17 +1102,6 @@ CONFIG_NCPFS_NLS=y CONFIG_NCPFS_EXTRAS=y CONFIG_CODA_FS=m CONFIG_9P_FS=m -CONFIG_PARTITION_ADVANCED=y -CONFIG_OSF_PARTITION=y -CONFIG_AMIGA_PARTITION=y -CONFIG_BSD_DISKLABEL=y -CONFIG_MINIX_SUBPARTITION=y -CONFIG_SOLARIS_X86_PARTITION=y -CONFIG_UNIXWARE_DISKLABEL=y -CONFIG_SGI_PARTITION=y -CONFIG_SUN_PARTITION=y -CONFIG_KARMA_PARTITION=y -CONFIG_EFI_PARTITION=y CONFIG_NLS_DEFAULT="utf8" CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_737=m @@ -1219,29 +1140,27 @@ CONFIG_NLS_ISO8859_14=m CONFIG_NLS_ISO8859_15=m CONFIG_NLS_KOI8_R=m CONFIG_NLS_KOI8_U=m -CONFIG_DLM=m -CONFIG_DLM_DEBUG=y +CONFIG_DEBUG_INFO=y # CONFIG_ENABLE_WARN_DEPRECATED is not set -CONFIG_MAGIC_SYSRQ=y CONFIG_UNUSED_SYMBOLS=y CONFIG_HEADERS_CHECK=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_SHIRQ=y -CONFIG_TIMER_STATS=y CONFIG_DEBUG_OBJECTS=y CONFIG_DEBUG_OBJECTS_FREE=y CONFIG_DEBUG_OBJECTS_TIMERS=y CONFIG_SLUB_DEBUG_ON=y +CONFIG_DEBUG_STACK_USAGE=y +CONFIG_DEBUG_VM=y +CONFIG_DEBUG_HIGHMEM=y +CONFIG_DEBUG_STACKOVERFLOW=y +CONFIG_DEBUG_SHIRQ=y +CONFIG_TIMER_STATS=y CONFIG_DEBUG_RT_MUTEXES=y CONFIG_DEBUG_SPINLOCK=y CONFIG_DEBUG_MUTEXES=y -CONFIG_DEBUG_SPINLOCK_SLEEP=y -CONFIG_DEBUG_HIGHMEM=y -CONFIG_DEBUG_INFO=y -CONFIG_DEBUG_VM=y CONFIG_DEBUG_LIST=y CONFIG_DEBUG_SG=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_FAULT_INJECTION=y CONFIG_FAILSLAB=y CONFIG_FAIL_PAGE_ALLOC=y @@ -1250,17 +1169,12 @@ CONFIG_FAIL_IO_TIMEOUT=y CONFIG_FAULT_INJECTION_DEBUG_FS=y CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y CONFIG_LATENCYTOP=y -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_SCHED_TRACER=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y -CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_DEBUG_STACK_USAGE=y CONFIG_XMON=y CONFIG_BOOTX_TEXT=y CONFIG_PPC_EARLY_DEBUG=y -CONFIG_PPC_EARLY_DEBUG_BOOTX=y -CONFIG_KEYS=y CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y @@ -1268,9 +1182,7 @@ CONFIG_SECURITY_NETWORK_XFRM=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y -CONFIG_CRYPTO_NULL=m CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_CCM=m CONFIG_CRYPTO_GCM=m CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_LRW=m @@ -1278,14 +1190,12 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_XTS=m CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_XCBC=m -CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD128=m CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA1=y -CONFIG_CRYPTO_SHA256=m CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m @@ -1306,5 +1216,3 @@ CONFIG_CRYPTO_DEV_HIFN_795X=m CONFIG_CRYPTO_DEV_HIFN_795X_RNG=y CONFIG_CRYPTO_DEV_TALITOS=m CONFIG_VIRTUALIZATION=y -CONFIG_VIRTIO_PCI=m -CONFIG_VIRTIO_BALLOON=m diff --git a/arch/powerpc/configs/pq2fads_defconfig b/arch/powerpc/configs/pq2fads_defconfig index baad8db21b6132..3e336ee8bb4a56 100644 --- a/arch/powerpc/configs/pq2fads_defconfig +++ b/arch/powerpc/configs/pq2fads_defconfig @@ -1,20 +1,19 @@ CONFIG_SYSVIPC=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y CONFIG_KALLSYMS_ALL=y +CONFIG_PARTITION_ADVANCED=y # CONFIG_PPC_CHRP is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_82xx=y CONFIG_PQ2FADS=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_BINFMT_MISC=y -CONFIG_SPARSE_IRQ=y CONFIG_PCI=y -# CONFIG_8260_PCI9 is not set CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -29,7 +28,6 @@ CONFIG_NETFILTER=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_JEDECPROBE=y CONFIG_MTD_CFI_ADV_OPTIONS=y @@ -41,55 +39,43 @@ CONFIG_MTD_CFI_GEOMETRY=y CONFIG_MTD_CFI_I4=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_IDE=y CONFIG_NETDEVICES=y CONFIG_TUN=y -CONFIG_DAVICOM_PHY=y -CONFIG_NET_ETHERNET=y CONFIG_FS_ENET=y # CONFIG_FS_ENET_HAS_SCC is not set CONFIG_FS_ENET_MDIO_FCC=y +CONFIG_DAVICOM_PHY=y CONFIG_PPP=y +CONFIG_PPP_DEFLATE=y CONFIG_PPP_ASYNC=y CONFIG_PPP_SYNC_TTY=y -CONFIG_PPP_DEFLATE=y CONFIG_INPUT_EVDEV=y -# CONFIG_SERIO_I8042 is not set # CONFIG_VT is not set CONFIG_SERIAL_CPM=y CONFIG_SERIAL_CPM_CONSOLE=y # CONFIG_HWMON is not set -CONFIG_VIDEO_OUTPUT_CONTROL=y -# CONFIG_HID_SUPPORT is not set CONFIG_USB_GADGET=y -CONFIG_USB_GADGET_M66592=y +CONFIG_USB_ETH=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set -CONFIG_INOTIFY=y CONFIG_AUTOFS4_FS=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y -CONFIG_NLS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y +CONFIG_DEBUG_INFO=y CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_SCHED_DEBUG is not set -CONFIG_DEBUG_INFO=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_BDI_SWITCH=y CONFIG_CRYPTO_CBC=y CONFIG_CRYPTO_ECB=y diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index 879de5efb073e3..1078851aa4dc5a 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -8,7 +8,6 @@ CONFIG_POSIX_MQUEUE=y CONFIG_FHANDLE=y CONFIG_HIGH_RES_TIMERS=y CONFIG_BLK_DEV_INITRD=y -CONFIG_RD_LZMA=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_EMBEDDED=y # CONFIG_PERF_EVENTS is not set @@ -64,11 +63,9 @@ CONFIG_BT_HCIBTUSB=m CONFIG_CFG80211=m CONFIG_CFG80211_WEXT=y CONFIG_MAC80211=m -CONFIG_MAC80211_RC_PID=y # CONFIG_MAC80211_RC_MINSTREL is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FIRMWARE_IN_KERNEL is not set -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=65535 @@ -76,7 +73,6 @@ CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y CONFIG_CHR_DEV_SG=m -CONFIG_SCSI_MULTI_LUN=y # CONFIG_SCSI_LOWLEVEL is not set CONFIG_MD=y CONFIG_BLK_DEV_DM=m @@ -107,7 +103,6 @@ CONFIG_INPUT_EVDEV=m # CONFIG_LEGACY_PTYS is not set # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set -CONFIG_VIDEO_OUTPUT_CONTROL=m CONFIG_FB=y CONFIG_FB_PS3=y # CONFIG_VGA_CONSOLE is not set @@ -130,8 +125,6 @@ CONFIG_HID_TWINHAN=m CONFIG_HID_LOGITECH=m CONFIG_HID_LOGITECH_DJ=m CONFIG_HID_MICROSOFT=m -CONFIG_HID_PS3REMOTE=m -CONFIG_HID_SONY=m CONFIG_HID_SUNPLUS=m CONFIG_HID_SMARTJOYPLUS=m CONFIG_USB_HIDDEV=y @@ -169,18 +162,17 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_CRC_CCITT=m CONFIG_CRC_T10DIF=y -CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_INFO=y CONFIG_DEBUG_FS=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_DETECT_HUNG_TASK=y CONFIG_PROVE_LOCKING=y CONFIG_DEBUG_LOCKDEP=y -CONFIG_DEBUG_INFO=y -CONFIG_DEBUG_MEMORY_INIT=y CONFIG_DEBUG_LIST=y CONFIG_RCU_CPU_STALL_TIMEOUT=60 # CONFIG_FTRACE is not set -CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_CRYPTO_CCM=m CONFIG_CRYPTO_GCM=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_MICHAEL_MIC=m diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 1f97364017c747..145e5b0f7b2453 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -1,6 +1,4 @@ CONFIG_PPC64=y -CONFIG_ALTIVEC=y -CONFIG_VSX=y CONFIG_SMP=y CONFIG_NR_CPUS=2048 CONFIG_SYSVIPC=y @@ -40,6 +38,7 @@ CONFIG_DTL=y # CONFIG_PPC_PMAC is not set CONFIG_RTAS_FLASH=m CONFIG_IBMEBUS=y +CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y CONFIG_HZ_100=y CONFIG_BINFMT_MISC=m CONFIG_PPC_TRANSACTIONAL_MEM=y @@ -47,6 +46,7 @@ CONFIG_KEXEC=y CONFIG_IRQ_ALL_CPUS=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y +CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_PPC_64K_PAGES=y CONFIG_PPC_SUBPAGE_PROT=y CONFIG_SCHED_SMT=y @@ -72,7 +72,6 @@ CONFIG_BRIDGE=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y -CONFIG_PROC_DEVICETREE=y CONFIG_PARPORT=m CONFIG_PARPORT_PC=m CONFIG_BLK_DEV_FD=m @@ -90,7 +89,6 @@ CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y -CONFIG_SCSI_MULTI_LUN=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_FC_ATTRS=y CONFIG_SCSI_CXGB3_ISCSI=m @@ -270,7 +268,6 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y -CONFIG_CRC_T10DIF=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_STACK_USAGE=y @@ -304,7 +301,4 @@ CONFIG_CRYPTO_DEV_NX=y CONFIG_CRYPTO_DEV_NX_ENCRYPT=m CONFIG_VIRTUALIZATION=y CONFIG_KVM_BOOK3S_64=m -CONFIG_KVM_BOOK3S_64_HV=y -CONFIG_TRANSPARENT_HUGEPAGE=y -CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y -CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y +CONFIG_KVM_BOOK3S_64_HV=m diff --git a/arch/powerpc/configs/pseries_le_defconfig b/arch/powerpc/configs/pseries_le_defconfig index ac7ca585282795..4ff694c5c855c0 100644 --- a/arch/powerpc/configs/pseries_le_defconfig +++ b/arch/powerpc/configs/pseries_le_defconfig @@ -1,6 +1,4 @@ CONFIG_PPC64=y -CONFIG_ALTIVEC=y -CONFIG_VSX=y CONFIG_SMP=y CONFIG_NR_CPUS=2048 CONFIG_CPU_LITTLE_ENDIAN=y @@ -41,6 +39,7 @@ CONFIG_DTL=y # CONFIG_PPC_PMAC is not set CONFIG_RTAS_FLASH=m CONFIG_IBMEBUS=y +CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y CONFIG_HZ_100=y CONFIG_BINFMT_MISC=m CONFIG_PPC_TRANSACTIONAL_MEM=y @@ -48,6 +47,7 @@ CONFIG_KEXEC=y CONFIG_IRQ_ALL_CPUS=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y +CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_PPC_64K_PAGES=y CONFIG_PPC_SUBPAGE_PROT=y CONFIG_SCHED_SMT=y @@ -73,7 +73,6 @@ CONFIG_BRIDGE=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y -CONFIG_PROC_DEVICETREE=y CONFIG_PARPORT=m CONFIG_PARPORT_PC=m CONFIG_BLK_DEV_FD=m @@ -91,7 +90,6 @@ CONFIG_CHR_DEV_ST=y CONFIG_BLK_DEV_SR=y CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y -CONFIG_SCSI_MULTI_LUN=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_FC_ATTRS=y CONFIG_SCSI_CXGB3_ISCSI=m @@ -271,7 +269,6 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y -CONFIG_CRC_T10DIF=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_STACK_USAGE=y @@ -300,11 +297,6 @@ CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_LZO=m # CONFIG_CRYPTO_ANSI_CPRNG is not set -CONFIG_CRYPTO_DEV_NX=y -CONFIG_CRYPTO_DEV_NX_ENCRYPT=m CONFIG_VIRTUALIZATION=y CONFIG_KVM_BOOK3S_64=m -CONFIG_KVM_BOOK3S_64_HV=y -CONFIG_TRANSPARENT_HUGEPAGE=y -CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y -CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y +CONFIG_KVM_BOOK3S_64_HV=m diff --git a/arch/powerpc/configs/storcenter_defconfig b/arch/powerpc/configs/storcenter_defconfig index 60ad2c08caa60d..b5db7dffe86d4c 100644 --- a/arch/powerpc/configs/storcenter_defconfig +++ b/arch/powerpc/configs/storcenter_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_EXPERT=y @@ -6,13 +5,13 @@ CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y # CONFIG_PPC_CHRP is not set # CONFIG_PPC_PMAC is not set CONFIG_EMBEDDED6xx=y CONFIG_STORCENTER=y CONFIG_HZ_100=y CONFIG_BINFMT_MISC=y -CONFIG_SPARSE_IRQ=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="console=ttyS0,115200" # CONFIG_SECCOMP is not set @@ -29,11 +28,8 @@ CONFIG_IP_PNP_DHCP=y # CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" -# CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_FTL=y CONFIG_NFTL=y @@ -41,7 +37,6 @@ CONFIG_NFTL_RW=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP=y -CONFIG_PROC_DEVICETREE=y CONFIG_IDE=y CONFIG_BLK_DEV_VIA82CXXX=y CONFIG_SCSI=y @@ -57,7 +52,6 @@ CONFIG_MD_RAID456=y CONFIG_NETDEVICES=y CONFIG_DUMMY=m CONFIG_R8169=y -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set @@ -81,12 +75,10 @@ CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set CONFIG_XFS_FS=m -CONFIG_INOTIFY=y CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y # CONFIG_NETWORK_FILESYSTEMS is not set -CONFIG_PARTITION_ADVANCED=y CONFIG_NLS_DEFAULT="utf8" CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y @@ -94,5 +86,3 @@ CONFIG_NLS_UTF8=y CONFIG_CRC_T10DIF=y # CONFIG_ENABLE_WARN_DEPRECATED is not set # CONFIG_ENABLE_MUST_CHECK is not set -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_SYSCTL_SYSCALL_CHECK=y diff --git a/arch/powerpc/configs/tqm8xx_defconfig b/arch/powerpc/configs/tqm8xx_defconfig index 7fe277a7b42212..4c973c5321c60c 100644 --- a/arch/powerpc/configs/tqm8xx_defconfig +++ b/arch/powerpc/configs/tqm8xx_defconfig @@ -1,12 +1,10 @@ CONFIG_PPC_8xx=y -CONFIG_EXPERIMENTAL=y # CONFIG_SWAP is not set CONFIG_SYSVIPC=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 -CONFIG_SYSFS_DEPRECATED_V2=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_ELF_CORE is not set # CONFIG_BASE_FULL is not set # CONFIG_FUTEX is not set @@ -15,15 +13,12 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_SRCVERSION_ALL=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y # CONFIG_IOSCHED_CFQ is not set CONFIG_TQM8XX=y CONFIG_8xx_COPYBACK=y # CONFIG_8xx_CPU15 is not set -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_HZ_100=y -CONFIG_8XX_MINIMAL_FPEMU=y -CONFIG_SPARSE_IRQ=y # CONFIG_SECCOMP is not set CONFIG_NET=y CONFIG_PACKET=y @@ -40,32 +35,24 @@ CONFIG_SYN_COOKIES=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CONCAT=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_OF_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_PROC_DEVICETREE=y # CONFIG_BLK_DEV is not set -# CONFIG_MISC_DEVICES is not set CONFIG_NETDEVICES=y +CONFIG_FS_ENET=y CONFIG_DAVICOM_PHY=y CONFIG_FIXED_PHY=y -CONFIG_NET_ETHERNET=y -CONFIG_FS_ENET=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_WLAN is not set # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_CPM=y CONFIG_SERIAL_CPM_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set CONFIG_HW_RANDOM=y CONFIG_GEN_RTC=y # CONFIG_HWMON is not set @@ -74,13 +61,8 @@ CONFIG_GEN_RTC=y CONFIG_TMPFS=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y -# CONFIG_CRC32 is not set +CONFIG_CRC32_SLICEBY4=y +CONFIG_DEBUG_INFO=y CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y -CONFIG_DEBUG_INFO=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set -CONFIG_CRC32_SLICEBY4=y diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig index 1e2b7d062aa42c..34eaf528fa87f4 100644 --- a/arch/powerpc/configs/wii_defconfig +++ b/arch/powerpc/configs/wii_defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y CONFIG_LOCALVERSION="-wii" # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y @@ -6,7 +5,6 @@ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_BLK_DEV_INITRD=y -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_ELF_CORE is not set CONFIG_PERF_EVENTS=y @@ -22,7 +20,6 @@ CONFIG_WII=y CONFIG_PREEMPT=y CONFIG_BINFMT_MISC=m CONFIG_KEXEC=y -# CONFIG_MIGRATION is not set # CONFIG_SECCOMP is not set CONFIG_ADVANCED_OPTIONS=y CONFIG_NET=y @@ -39,7 +36,6 @@ CONFIG_IP_PNP_RARP=y # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set CONFIG_BT=y -CONFIG_BT_L2CAP=y CONFIG_BT_RFCOMM=y CONFIG_BT_BNEP=y CONFIG_BT_BNEP_MC_FILTER=y @@ -49,18 +45,12 @@ CONFIG_MAC80211=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_STANDALONE is not set # CONFIG_FIRMWARE_IN_KERNEL is not set -CONFIG_PROC_DEVICETREE=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_COUNT=2 CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y -CONFIG_SCSI_MULTI_LUN=y CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y -CONFIG_MII=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set CONFIG_B43=y CONFIG_B43_SDIO=y # CONFIG_B43_PHY_LP is not set @@ -78,8 +68,8 @@ CONFIG_INPUT_MISC=y CONFIG_INPUT_UINPUT=y # CONFIG_SERIO_I8042 is not set # CONFIG_SERIO_SERPORT is not set -# CONFIG_DEVKMEM is not set CONFIG_LEGACY_PTY_COUNT=64 +# CONFIG_DEVKMEM is not set # CONFIG_HW_RANDOM is not set CONFIG_NVRAM=y CONFIG_I2C=y @@ -109,7 +99,6 @@ CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_EXT3_FS_XATTR is not set -CONFIG_INOTIFY=y CONFIG_FUSE_FS=m CONFIG_ISO9660_FS=y CONFIG_JOLIET=y @@ -119,7 +108,6 @@ CONFIG_PROC_KCORE=y # CONFIG_PROC_PAGE_MONITOR is not set CONFIG_TMPFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_CIFS=m CONFIG_NLS_CODEPAGE_437=y @@ -127,13 +115,9 @@ CONFIG_NLS_ISO8859_1=y CONFIG_CRC_CCITT=y CONFIG_PRINTK_TIME=y CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_SPINLOCK=y CONFIG_DEBUG_MUTEXES=y -CONFIG_DEBUG_SPINLOCK_SLEEP=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_LATENCYTOP=y -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_SCHED_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_DMA_API_DEBUG=y From fb892bd0fdcb2e5eac9c105cf68def90396ed8cc Mon Sep 17 00:00:00 2001 From: Jon Medhurst Date: Mon, 19 Jan 2015 15:15:36 +0000 Subject: [PATCH 158/601] ARM: kprobes: Eliminate test code's use of BX instruction on ARMv4 CPUs Non-T variants of ARMv4 CPUs don't support the BX instruction so eliminate its use. Signed-off-by: Jon Medhurst --- arch/arm/probes/kprobes/test-arm.c | 3 +++ arch/arm/probes/kprobes/test-core.c | 10 +++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/arm/probes/kprobes/test-arm.c b/arch/arm/probes/kprobes/test-arm.c index e72b07e8cd9acc..8866aedfdea2c9 100644 --- a/arch/arm/probes/kprobes/test-arm.c +++ b/arch/arm/probes/kprobes/test-arm.c @@ -215,9 +215,12 @@ void kprobe_arm_test_cases(void) TEST_UNSUPPORTED("msr cpsr_f, lr") TEST_UNSUPPORTED("msr spsr, r0") +#if __LINUX_ARM_ARCH__ >= 5 || \ + (__LINUX_ARM_ARCH__ == 4 && !defined(CONFIG_CPU_32v4)) TEST_BF_R("bx r",0,2f,"") TEST_BB_R("bx r",7,2f,"") TEST_BF_R("bxeq r",14,2f,"") +#endif #if __LINUX_ARM_ARCH__ >= 5 TEST_R("clz r0, r",0, 0x0,"") diff --git a/arch/arm/probes/kprobes/test-core.c b/arch/arm/probes/kprobes/test-core.c index e495127d757105..9775de22e2ffa3 100644 --- a/arch/arm/probes/kprobes/test-core.c +++ b/arch/arm/probes/kprobes/test-core.c @@ -236,6 +236,8 @@ static int tests_failed; #ifndef CONFIG_THUMB2_KERNEL +#define RET(reg) "mov pc, "#reg + long arm_func(long r0, long r1); static void __used __naked __arm_kprobes_test_func(void) @@ -245,7 +247,7 @@ static void __used __naked __arm_kprobes_test_func(void) ".type arm_func, %%function \n\t" "arm_func: \n\t" "adds r0, r0, r1 \n\t" - "bx lr \n\t" + "mov pc, lr \n\t" ".code "NORMAL_ISA /* Back to Thumb if necessary */ : : : "r0", "r1", "cc" ); @@ -253,6 +255,8 @@ static void __used __naked __arm_kprobes_test_func(void) #else /* CONFIG_THUMB2_KERNEL */ +#define RET(reg) "bx "#reg + long thumb16_func(long r0, long r1); long thumb32even_func(long r0, long r1); long thumb32odd_func(long r0, long r1); @@ -494,7 +498,7 @@ static void __naked benchmark_nop(void) { __asm__ __volatile__ ( "nop \n\t" - "bx lr" + RET(lr)" \n\t" ); } @@ -977,7 +981,7 @@ void __naked __kprobes_test_case_start(void) "bic r0, lr, #1 @ r0 = inline data \n\t" "mov r1, sp \n\t" "bl kprobes_test_case_start \n\t" - "bx r0 \n\t" + RET(r0)" \n\t" ); } From 2fded7f44b8fcf79e274c3f0cfbd0298f95308f3 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Tue, 23 Dec 2014 16:39:54 -0500 Subject: [PATCH 159/601] audit: remove vestiges of vers_ops Should have been removed with commit 18900909 ("audit: remove the old depricated kernel interface"). Signed-off-by: Richard Guy Briggs Signed-off-by: Paul Moore --- include/linux/audit.h | 1 - kernel/auditfilter.c | 2 -- 2 files changed, 3 deletions(-) diff --git a/include/linux/audit.h b/include/linux/audit.h index 93331929d64310..b481779a8dee5b 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -46,7 +46,6 @@ struct audit_tree; struct sk_buff; struct audit_krule { - int vers_ops; u32 pflags; u32 flags; u32 listnr; diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 103586e239a23c..81c94d739e3f62 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -425,7 +425,6 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, goto exit_nofree; bufp = data->buf; - entry->rule.vers_ops = 2; for (i = 0; i < data->field_count; i++) { struct audit_field *f = &entry->rule.fields[i]; @@ -758,7 +757,6 @@ struct audit_entry *audit_dupe_rule(struct audit_krule *old) return ERR_PTR(-ENOMEM); new = &entry->rule; - new->vers_ops = old->vers_ops; new->flags = old->flags; new->pflags = old->pflags; new->listnr = old->listnr; From 743410a03bbf110da8942a715cf1358344ecc281 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Tue, 20 Jan 2015 12:03:35 +0200 Subject: [PATCH 160/601] tpm: fix format string error in tpm-chip.c dev_set_name() takes three arguments where the second argument is a format string. This patch fixes the call accordingly in tpm-chip.c Reported-by: kbuild test robot Fixes: 313d21eeab92 ("tpm: device class for tpm") Signed-off-by: Jarkko Sakkinen Signed-off-by: Peter Huewe --- drivers/char/tpm/tpm-chip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index 6459af7c164604..1d278ccd751f07 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -125,7 +125,7 @@ struct tpm_chip *tpmm_chip_alloc(struct device *dev, else chip->dev.devt = MKDEV(MAJOR(tpm_devt), chip->dev_num); - dev_set_name(&chip->dev, chip->devname); + dev_set_name(&chip->dev, "%s", chip->devname); device_initialize(&chip->dev); From 5e7270a6dd14fa6e3bb10128f200305b4a75f350 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Fri, 12 Dec 2014 17:19:19 -0800 Subject: [PATCH 161/601] Smack: Rework file hooks This is one of those cases where you look at code you did years ago and wonder what you might have been thinking. There are a number of LSM hooks that work off of file pointers, and most of them really want the security data from the inode. Some, however, really want the security context that the process had when the file was opened. The difference went undetected in Smack until it started getting used in a real system with real testing. At that point it was clear that something was amiss. This patch corrects the misuse of the f_security value in several of the hooks. The behavior will not usually be any different, as the process had to be able to open the file in the first place, and the old check almost always succeeded, as will the new, but for different reasons. Thanks to the Samsung Tizen development team that identified this. Signed-off-by: Casey Schaufler --- security/smack/smack_lsm.c | 40 ++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 654345de62e7e6..1fa72317bbecff 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -160,7 +160,7 @@ static int smk_bu_file(struct file *file, int mode, int rc) { struct task_smack *tsp = current_security(); struct smack_known *sskp = tsp->smk_task; - struct inode *inode = file->f_inode; + struct inode *inode = file_inode(file); char acc[SMK_NUM_ACCESS_TYPE + 1]; if (rc <= 0) @@ -168,7 +168,7 @@ static int smk_bu_file(struct file *file, int mode, int rc) smk_bu_mode(mode, acc); pr_info("Smack Bringup: (%s %s %s) file=(%s %ld %pD) %s\n", - sskp->smk_known, (char *)file->f_security, acc, + sskp->smk_known, smk_of_inode(inode)->smk_known, acc, inode->i_sb->s_id, inode->i_ino, file, current->comm); return 0; @@ -1347,6 +1347,9 @@ static int smack_file_permission(struct file *file, int mask) * The security blob for a file is a pointer to the master * label list, so no allocation is done. * + * f_security is the owner security information. It + * isn't used on file access checks, it's for send_sigio. + * * Returns 0 */ static int smack_file_alloc_security(struct file *file) @@ -1384,17 +1387,18 @@ static int smack_file_ioctl(struct file *file, unsigned int cmd, { int rc = 0; struct smk_audit_info ad; + struct inode *inode = file_inode(file); smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH); smk_ad_setfield_u_fs_path(&ad, file->f_path); if (_IOC_DIR(cmd) & _IOC_WRITE) { - rc = smk_curacc(file->f_security, MAY_WRITE, &ad); + rc = smk_curacc(smk_of_inode(inode), MAY_WRITE, &ad); rc = smk_bu_file(file, MAY_WRITE, rc); } if (rc == 0 && (_IOC_DIR(cmd) & _IOC_READ)) { - rc = smk_curacc(file->f_security, MAY_READ, &ad); + rc = smk_curacc(smk_of_inode(inode), MAY_READ, &ad); rc = smk_bu_file(file, MAY_READ, rc); } @@ -1412,10 +1416,11 @@ static int smack_file_lock(struct file *file, unsigned int cmd) { struct smk_audit_info ad; int rc; + struct inode *inode = file_inode(file); smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH); smk_ad_setfield_u_fs_path(&ad, file->f_path); - rc = smk_curacc(file->f_security, MAY_LOCK, &ad); + rc = smk_curacc(smk_of_inode(inode), MAY_LOCK, &ad); rc = smk_bu_file(file, MAY_LOCK, rc); return rc; } @@ -1437,7 +1442,7 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd, { struct smk_audit_info ad; int rc = 0; - + struct inode *inode = file_inode(file); switch (cmd) { case F_GETLK: @@ -1446,14 +1451,14 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd, case F_SETLKW: smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH); smk_ad_setfield_u_fs_path(&ad, file->f_path); - rc = smk_curacc(file->f_security, MAY_LOCK, &ad); + rc = smk_curacc(smk_of_inode(inode), MAY_LOCK, &ad); rc = smk_bu_file(file, MAY_LOCK, rc); break; case F_SETOWN: case F_SETSIG: smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH); smk_ad_setfield_u_fs_path(&ad, file->f_path); - rc = smk_curacc(file->f_security, MAY_WRITE, &ad); + rc = smk_curacc(smk_of_inode(inode), MAY_WRITE, &ad); rc = smk_bu_file(file, MAY_WRITE, rc); break; default: @@ -1571,14 +1576,10 @@ static int smack_mmap_file(struct file *file, * smack_file_set_fowner - set the file security blob value * @file: object in question * - * Returns 0 - * Further research may be required on this one. */ static void smack_file_set_fowner(struct file *file) { - struct smack_known *skp = smk_of_current(); - - file->f_security = skp; + file->f_security = smk_of_current(); } /** @@ -1630,6 +1631,7 @@ static int smack_file_receive(struct file *file) int rc; int may = 0; struct smk_audit_info ad; + struct inode *inode = file_inode(file); smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH); smk_ad_setfield_u_fs_path(&ad, file->f_path); @@ -1641,7 +1643,7 @@ static int smack_file_receive(struct file *file) if (file->f_mode & FMODE_WRITE) may |= MAY_WRITE; - rc = smk_curacc(file->f_security, may, &ad); + rc = smk_curacc(smk_of_inode(inode), may, &ad); rc = smk_bu_file(file, may, rc); return rc; } @@ -1661,21 +1663,17 @@ static int smack_file_receive(struct file *file) static int smack_file_open(struct file *file, const struct cred *cred) { struct task_smack *tsp = cred->security; - struct inode_smack *isp = file_inode(file)->i_security; + struct inode *inode = file_inode(file); struct smk_audit_info ad; int rc; - if (smack_privileged(CAP_MAC_OVERRIDE)) { - file->f_security = isp->smk_inode; + if (smack_privileged(CAP_MAC_OVERRIDE)) return 0; - } smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH); smk_ad_setfield_u_fs_path(&ad, file->f_path); - rc = smk_access(tsp->smk_task, isp->smk_inode, MAY_READ, &ad); + rc = smk_access(tsp->smk_task, smk_of_inode(inode), MAY_READ, &ad); rc = smk_bu_credfile(cred, file, MAY_READ, rc); - if (rc == 0) - file->f_security = isp->smk_inode; return rc; } From 69f287ae6fc8357e0bc561353a2d585b89ee8cdc Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Fri, 12 Dec 2014 17:08:40 -0800 Subject: [PATCH 162/601] Smack: secmark support for netfilter Smack uses CIPSO to label internet packets and thus provide for access control on delivery of packets. The netfilter facility was not used to allow for Smack to work properly without netfilter configuration. Smack does not need netfilter, however there are cases where it would be handy. As a side effect, the labeling of local IPv4 packets can be optimized and the handling of local IPv6 packets is just all out better. The best part is that the netfilter tools use "contexts" that are just strings, and they work just as well for Smack as they do for SELinux. All of the conditional compilation for IPv6 was implemented by Rafal Krypa Signed-off-by: Casey Schaufler --- security/smack/Kconfig | 12 ++++ security/smack/Makefile | 1 + security/smack/smack.h | 1 + security/smack/smack_lsm.c | 94 ++++++++++++++++++++++++++++--- security/smack/smack_netfilter.c | 96 ++++++++++++++++++++++++++++++++ 5 files changed, 196 insertions(+), 8 deletions(-) create mode 100644 security/smack/smack_netfilter.c diff --git a/security/smack/Kconfig b/security/smack/Kconfig index b065f978941855..271adae81796c2 100644 --- a/security/smack/Kconfig +++ b/security/smack/Kconfig @@ -28,3 +28,15 @@ config SECURITY_SMACK_BRINGUP access rule set once the behavior is well understood. This is a superior mechanism to the oft abused "permissive" mode of other systems. + If you are unsure how to answer this question, answer N. + +config SECURITY_SMACK_NETFILTER + bool "Packet marking using secmarks for netfilter" + depends on SECURITY_SMACK + depends on NETWORK_SECMARK + depends on NETFILTER + default n + help + This enables security marking of network packets using + Smack labels. + If you are unsure how to answer this question, answer N. diff --git a/security/smack/Makefile b/security/smack/Makefile index 67a63aaec827be..616cf93b368ed7 100644 --- a/security/smack/Makefile +++ b/security/smack/Makefile @@ -5,3 +5,4 @@ obj-$(CONFIG_SECURITY_SMACK) := smack.o smack-y := smack_lsm.o smack_access.o smackfs.o +smack-$(CONFIG_NETFILTER) += smack_netfilter.o diff --git a/security/smack/smack.h b/security/smack/smack.h index b828a379377c9c..7629eaeb1fb281 100644 --- a/security/smack/smack.h +++ b/security/smack/smack.h @@ -248,6 +248,7 @@ struct smack_known *smk_find_entry(const char *); /* * Shared data. */ +extern int smack_enabled; extern int smack_cipso_direct; extern int smack_cipso_mapped; extern struct smack_known *smack_net_ambient; diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 1fa72317bbecff..81b30e32c52674 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -52,8 +52,11 @@ #define SMK_RECEIVING 1 #define SMK_SENDING 2 +#if IS_ENABLED(CONFIG_IPV6) && !defined(CONFIG_SECURITY_SMACK_NETFILTER) LIST_HEAD(smk_ipv6_port_list); +#endif /* CONFIG_IPV6 && !CONFIG_SECURITY_SMACK_NETFILTER */ static struct kmem_cache *smack_inode_cache; +int smack_enabled; #ifdef CONFIG_SECURITY_SMACK_BRINGUP static void smk_bu_mode(int mode, char *s) @@ -2213,6 +2216,7 @@ static int smack_netlabel_send(struct sock *sk, struct sockaddr_in *sap) return smack_netlabel(sk, sk_lbl); } +#if IS_ENABLED(CONFIG_IPV6) && !defined(CONFIG_SECURITY_SMACK_NETFILTER) /** * smk_ipv6_port_label - Smack port access table management * @sock: socket @@ -2362,6 +2366,7 @@ static int smk_ipv6_port_check(struct sock *sk, struct sockaddr_in6 *address, rc = smk_bu_note("IPv6 port check", skp, object, MAY_WRITE, rc); return rc; } +#endif /* CONFIG_IPV6 && !CONFIG_SECURITY_SMACK_NETFILTER */ /** * smack_inode_setsecurity - set smack xattrs @@ -2422,8 +2427,10 @@ static int smack_inode_setsecurity(struct inode *inode, const char *name, } else return -EOPNOTSUPP; +#if IS_ENABLED(CONFIG_IPV6) && !defined(CONFIG_SECURITY_SMACK_NETFILTER) if (sock->sk->sk_family == PF_INET6) smk_ipv6_port_label(sock, NULL); +#endif /* CONFIG_IPV6 && !CONFIG_SECURITY_SMACK_NETFILTER */ return 0; } @@ -2451,6 +2458,7 @@ static int smack_socket_post_create(struct socket *sock, int family, return smack_netlabel(sock->sk, SMACK_CIPSO_SOCKET); } +#ifndef CONFIG_SECURITY_SMACK_NETFILTER /** * smack_socket_bind - record port binding information. * @sock: the socket @@ -2464,11 +2472,14 @@ static int smack_socket_post_create(struct socket *sock, int family, static int smack_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen) { +#if IS_ENABLED(CONFIG_IPV6) if (sock->sk != NULL && sock->sk->sk_family == PF_INET6) smk_ipv6_port_label(sock, address); +#endif return 0; } +#endif /* !CONFIG_SECURITY_SMACK_NETFILTER */ /** * smack_socket_connect - connect access check @@ -2497,8 +2508,10 @@ static int smack_socket_connect(struct socket *sock, struct sockaddr *sap, case PF_INET6: if (addrlen < sizeof(struct sockaddr_in6)) return -EINVAL; +#if IS_ENABLED(CONFIG_IPV6) && !defined(CONFIG_SECURITY_SMACK_NETFILTER) rc = smk_ipv6_port_check(sock->sk, (struct sockaddr_in6 *)sap, SMK_CONNECTING); +#endif /* CONFIG_IPV6 && !CONFIG_SECURITY_SMACK_NETFILTER */ break; } return rc; @@ -3381,7 +3394,9 @@ static int smack_socket_sendmsg(struct socket *sock, struct msghdr *msg, int size) { struct sockaddr_in *sip = (struct sockaddr_in *) msg->msg_name; +#if IS_ENABLED(CONFIG_IPV6) && !defined(CONFIG_SECURITY_SMACK_NETFILTER) struct sockaddr_in6 *sap = (struct sockaddr_in6 *) msg->msg_name; +#endif /* CONFIG_IPV6 && !CONFIG_SECURITY_SMACK_NETFILTER */ int rc = 0; /* @@ -3395,7 +3410,9 @@ static int smack_socket_sendmsg(struct socket *sock, struct msghdr *msg, rc = smack_netlabel_send(sock->sk, sip); break; case AF_INET6: +#if IS_ENABLED(CONFIG_IPV6) && !defined(CONFIG_SECURITY_SMACK_NETFILTER) rc = smk_ipv6_port_check(sock->sk, sap, SMK_SENDING); +#endif /* CONFIG_IPV6 && !CONFIG_SECURITY_SMACK_NETFILTER */ break; } return rc; @@ -3486,6 +3503,7 @@ static struct smack_known *smack_from_secattr(struct netlbl_lsm_secattr *sap, return smack_net_ambient; } +#if IS_ENABLED(CONFIG_IPV6) static int smk_skb_to_addr_ipv6(struct sk_buff *skb, struct sockaddr_in6 *sip) { u8 nexthdr; @@ -3532,6 +3550,7 @@ static int smk_skb_to_addr_ipv6(struct sk_buff *skb, struct sockaddr_in6 *sip) } return proto; } +#endif /* CONFIG_IPV6 */ /** * smack_socket_sock_rcv_skb - Smack packet delivery access check @@ -3544,15 +3563,30 @@ static int smack_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) { struct netlbl_lsm_secattr secattr; struct socket_smack *ssp = sk->sk_security; - struct smack_known *skp; - struct sockaddr_in6 sadd; + struct smack_known *skp = NULL; int rc = 0; struct smk_audit_info ad; #ifdef CONFIG_AUDIT struct lsm_network_audit net; #endif +#if IS_ENABLED(CONFIG_IPV6) + struct sockaddr_in6 sadd; + int proto; +#endif /* CONFIG_IPV6 */ + switch (sk->sk_family) { case PF_INET: +#ifdef CONFIG_SECURITY_SMACK_NETFILTER + /* + * If there is a secmark use it rather than the CIPSO label. + * If there is no secmark fall back to CIPSO. + * The secmark is assumed to reflect policy better. + */ + if (skb && skb->secmark != 0) { + skp = smack_from_secid(skb->secmark); + goto access_check; + } +#endif /* CONFIG_SECURITY_SMACK_NETFILTER */ /* * Translate what netlabel gave us. */ @@ -3566,6 +3600,9 @@ static int smack_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) netlbl_secattr_destroy(&secattr); +#ifdef CONFIG_SECURITY_SMACK_NETFILTER +access_check: +#endif #ifdef CONFIG_AUDIT smk_ad_init_net(&ad, __func__, LSM_AUDIT_DATA_NET, &net); ad.a.u.net->family = sk->sk_family; @@ -3584,14 +3621,32 @@ static int smack_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) if (rc != 0) netlbl_skbuff_err(skb, rc, 0); break; +#if IS_ENABLED(CONFIG_IPV6) case PF_INET6: - rc = smk_skb_to_addr_ipv6(skb, &sadd); - if (rc == IPPROTO_UDP || rc == IPPROTO_TCP) - rc = smk_ipv6_port_check(sk, &sadd, SMK_RECEIVING); + proto = smk_skb_to_addr_ipv6(skb, &sadd); + if (proto != IPPROTO_UDP && proto != IPPROTO_TCP) + break; +#ifdef CONFIG_SECURITY_SMACK_NETFILTER + if (skb && skb->secmark != 0) + skp = smack_from_secid(skb->secmark); else - rc = 0; + skp = smack_net_ambient; +#ifdef CONFIG_AUDIT + smk_ad_init_net(&ad, __func__, LSM_AUDIT_DATA_NET, &net); + ad.a.u.net->family = sk->sk_family; + ad.a.u.net->netif = skb->skb_iif; + ipv6_skb_to_auditdata(skb, &ad.a, NULL); +#endif /* CONFIG_AUDIT */ + rc = smk_access(skp, ssp->smk_in, MAY_WRITE, &ad); + rc = smk_bu_note("IPv6 delivery", skp, ssp->smk_in, + MAY_WRITE, rc); +#else /* CONFIG_SECURITY_SMACK_NETFILTER */ + rc = smk_ipv6_port_check(sk, &sadd, SMK_RECEIVING); +#endif /* CONFIG_SECURITY_SMACK_NETFILTER */ break; +#endif /* CONFIG_IPV6 */ } + return rc; } @@ -3653,16 +3708,25 @@ static int smack_socket_getpeersec_dgram(struct socket *sock, if (skb != NULL) { if (skb->protocol == htons(ETH_P_IP)) family = PF_INET; +#if IS_ENABLED(CONFIG_IPV6) else if (skb->protocol == htons(ETH_P_IPV6)) family = PF_INET6; +#endif /* CONFIG_IPV6 */ } if (family == PF_UNSPEC && sock != NULL) family = sock->sk->sk_family; - if (family == PF_UNIX) { + switch (family) { + case PF_UNIX: ssp = sock->sk->sk_security; s = ssp->smk_out->smk_secid; - } else if (family == PF_INET || family == PF_INET6) { + break; + case PF_INET: +#ifdef CONFIG_SECURITY_SMACK_NETFILTER + s = skb->secmark; + if (s != 0) + break; +#endif /* * Translate what netlabel gave us. */ @@ -3675,6 +3739,14 @@ static int smack_socket_getpeersec_dgram(struct socket *sock, s = skp->smk_secid; } netlbl_secattr_destroy(&secattr); + break; +#if IS_ENABLED(CONFIG_IPV6) + case PF_INET6: +#ifdef CONFIG_SECURITY_SMACK_NETFILTER + s = skb->secmark; +#endif /* CONFIG_SECURITY_SMACK_NETFILTER */ + break; +#endif /* CONFIG_IPV6 */ } *secid = s; if (s == 0) @@ -3730,6 +3802,7 @@ static int smack_inet_conn_request(struct sock *sk, struct sk_buff *skb, struct lsm_network_audit net; #endif +#if IS_ENABLED(CONFIG_IPV6) if (family == PF_INET6) { /* * Handle mapped IPv4 packets arriving @@ -3741,6 +3814,7 @@ static int smack_inet_conn_request(struct sock *sk, struct sk_buff *skb, else return 0; } +#endif /* CONFIG_IPV6 */ netlbl_secattr_init(&secattr); rc = netlbl_skbuff_getattr(skb, family, &secattr); @@ -4199,7 +4273,9 @@ struct security_operations smack_ops = { .unix_may_send = smack_unix_may_send, .socket_post_create = smack_socket_post_create, +#ifndef CONFIG_SECURITY_SMACK_NETFILTER .socket_bind = smack_socket_bind, +#endif /* CONFIG_SECURITY_SMACK_NETFILTER */ .socket_connect = smack_socket_connect, .socket_sendmsg = smack_socket_sendmsg, .socket_sock_rcv_skb = smack_socket_sock_rcv_skb, @@ -4280,6 +4356,8 @@ static __init int smack_init(void) if (!security_module_enable(&smack_ops)) return 0; + smack_enabled = 1; + smack_inode_cache = KMEM_CACHE(inode_smack, 0); if (!smack_inode_cache) return -ENOMEM; diff --git a/security/smack/smack_netfilter.c b/security/smack/smack_netfilter.c new file mode 100644 index 00000000000000..c952632afb0d4a --- /dev/null +++ b/security/smack/smack_netfilter.c @@ -0,0 +1,96 @@ +/* + * Simplified MAC Kernel (smack) security module + * + * This file contains the Smack netfilter implementation + * + * Author: + * Casey Schaufler + * + * Copyright (C) 2014 Casey Schaufler + * Copyright (C) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, + * as published by the Free Software Foundation. + */ + +#include +#include +#include +#include "smack.h" + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + +static unsigned int smack_ipv6_output(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct socket_smack *ssp; + struct smack_known *skp; + + if (skb && skb->sk && skb->sk->sk_security) { + ssp = skb->sk->sk_security; + skp = ssp->smk_out; + skb->secmark = skp->smk_secid; + } + + return NF_ACCEPT; +} +#endif /* IPV6 */ + +static unsigned int smack_ipv4_output(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct socket_smack *ssp; + struct smack_known *skp; + + if (skb && skb->sk && skb->sk->sk_security) { + ssp = skb->sk->sk_security; + skp = ssp->smk_out; + skb->secmark = skp->smk_secid; + } + + return NF_ACCEPT; +} + +static struct nf_hook_ops smack_nf_ops[] = { + { + .hook = smack_ipv4_output, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP_PRI_SELINUX_FIRST, + }, +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + { + .hook = smack_ipv6_output, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP6_PRI_SELINUX_FIRST, + }, +#endif /* IPV6 */ +}; + +static int __init smack_nf_ip_init(void) +{ + int err; + + if (smack_enabled == 0) + return 0; + + printk(KERN_DEBUG "Smack: Registering netfilter hooks\n"); + + err = nf_register_hooks(smack_nf_ops, ARRAY_SIZE(smack_nf_ops)); + if (err) + pr_info("Smack: nf_register_hooks: error %d\n", err); + + return 0; +} + +__initcall(smack_nf_ip_init); From 138a868f009bfca8633032cdb91e2b02e292658b Mon Sep 17 00:00:00 2001 From: Rafal Krypa Date: Thu, 8 Jan 2015 18:52:45 +0100 Subject: [PATCH 163/601] smack: Add missing logging in bidirectional UDS connect check During UDS connection check, both sides are checked for write access to the other side. But only the first check is performed with audit support. The second one didn't produce any audit logs. This simple patch fixes that. Signed-off-by: Rafal Krypa --- security/smack/smack_lsm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 81b30e32c52674..f60ded3a8da178 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -3333,7 +3333,7 @@ static int smack_unix_stream_connect(struct sock *sock, if (rc == 0) { okp = osp->smk_out; skp = ssp->smk_in; - rc = smk_access(okp, skp, MAY_WRITE, NULL); + rc = smk_access(okp, skp, MAY_WRITE, &ad); rc = smk_bu_note("UDS connect", okp, skp, MAY_WRITE, rc); } From e91e992588331a2150049df8cc172971d519e846 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 20 Jan 2015 10:24:20 +1100 Subject: [PATCH 164/601] powerpc: Enable on demand governor on ppc64_defconfig This was enabled on the pseries defconfigs recently, but missed the ppc64 one. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/configs/ppc64_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 7481e4ff87e4e0..8622dfe464211b 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -41,6 +41,7 @@ CONFIG_PPC_CELL_QPACE=y CONFIG_RTAS_FLASH=m CONFIG_IBMEBUS=y CONFIG_CPU_FREQ_PMAC64=y +CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y CONFIG_HZ_100=y CONFIG_BINFMT_MISC=m CONFIG_PPC_TRANSACTIONAL_MEM=y From afda939f0ae63723250734e1365e4ead97f8a2b1 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 20 Jan 2015 10:24:21 +1100 Subject: [PATCH 165/601] powerpc: Enable CONFIG_SATA_AHCI on pseries and ppc64 defconfigs We are starting to see ppc64 boxes with SATA AHCI adapters in it, so enable it in our defconfigs. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/configs/ppc64_defconfig | 1 + arch/powerpc/configs/pseries_defconfig | 1 + arch/powerpc/configs/pseries_le_defconfig | 1 + 3 files changed, 3 insertions(+) diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 8622dfe464211b..0fd22dfe00d130 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -116,6 +116,7 @@ CONFIG_SCSI_DH=m CONFIG_SCSI_DH_RDAC=m CONFIG_SCSI_DH_ALUA=m CONFIG_ATA=y +CONFIG_SATA_AHCI=y CONFIG_SATA_SIL24=y CONFIG_SATA_MV=y CONFIG_SATA_SVW=y diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 145e5b0f7b2453..ffd4bbd483f9d6 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -109,6 +109,7 @@ CONFIG_SCSI_DH=m CONFIG_SCSI_DH_RDAC=m CONFIG_SCSI_DH_ALUA=m CONFIG_ATA=y +CONFIG_SATA_AHCI=y # CONFIG_ATA_SFF is not set CONFIG_MD=y CONFIG_BLK_DEV_MD=y diff --git a/arch/powerpc/configs/pseries_le_defconfig b/arch/powerpc/configs/pseries_le_defconfig index 4ff694c5c855c0..66db64ad95f2a4 100644 --- a/arch/powerpc/configs/pseries_le_defconfig +++ b/arch/powerpc/configs/pseries_le_defconfig @@ -110,6 +110,7 @@ CONFIG_SCSI_DH=m CONFIG_SCSI_DH_RDAC=m CONFIG_SCSI_DH_ALUA=m CONFIG_ATA=y +CONFIG_SATA_AHCI=y # CONFIG_ATA_SFF is not set CONFIG_MD=y CONFIG_BLK_DEV_MD=y From 36c7bfc4d0f046887d6d73eba430ac6551383849 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 20 Jan 2015 10:24:22 +1100 Subject: [PATCH 166/601] powerpc: Enable KSM on pseries and ppc64 defconfigs KSM will only be used on areas marked for merging via madvise, and it is showing nice improvements on KVM workloads, so enable it by default. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/configs/ppc64_defconfig | 1 + arch/powerpc/configs/pseries_defconfig | 1 + arch/powerpc/configs/pseries_le_defconfig | 1 + 3 files changed, 3 insertions(+) diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 0fd22dfe00d130..897869ce22aa2b 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -49,6 +49,7 @@ CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y CONFIG_IRQ_ALL_CPUS=y CONFIG_MEMORY_HOTREMOVE=y +CONFIG_KSM=y CONFIG_SCHED_SMT=y CONFIG_PCCARD=y CONFIG_ELECTRA_CF=y diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index ffd4bbd483f9d6..7677850e2b0766 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -46,6 +46,7 @@ CONFIG_KEXEC=y CONFIG_IRQ_ALL_CPUS=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y +CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_PPC_64K_PAGES=y CONFIG_PPC_SUBPAGE_PROT=y diff --git a/arch/powerpc/configs/pseries_le_defconfig b/arch/powerpc/configs/pseries_le_defconfig index 66db64ad95f2a4..acb611a2683658 100644 --- a/arch/powerpc/configs/pseries_le_defconfig +++ b/arch/powerpc/configs/pseries_le_defconfig @@ -47,6 +47,7 @@ CONFIG_KEXEC=y CONFIG_IRQ_ALL_CPUS=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y +CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_PPC_64K_PAGES=y CONFIG_PPC_SUBPAGE_PROT=y From eaf31601a17ff33dc40026be3705d8fdeb6f4c46 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 20 Jan 2015 10:24:23 +1100 Subject: [PATCH 167/601] powerpc: Enable overlayfs on pseries and ppc64 defconfigs Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/configs/ppc64_defconfig | 1 + arch/powerpc/configs/pseries_defconfig | 1 + arch/powerpc/configs/pseries_le_defconfig | 1 + 3 files changed, 3 insertions(+) diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 897869ce22aa2b..3315c9f0828a62 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -290,6 +290,7 @@ CONFIG_BTRFS_FS_POSIX_ACL=y CONFIG_NILFS2_FS=m CONFIG_AUTOFS4_FS=m CONFIG_FUSE_FS=m +CONFIG_OVERLAY_FS=m CONFIG_ISO9660_FS=y CONFIG_UDF_FS=m CONFIG_MSDOS_FS=y diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 7677850e2b0766..dbcef637641df2 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -242,6 +242,7 @@ CONFIG_BTRFS_FS_POSIX_ACL=y CONFIG_NILFS2_FS=m CONFIG_AUTOFS4_FS=m CONFIG_FUSE_FS=m +CONFIG_OVERLAY_FS=m CONFIG_ISO9660_FS=y CONFIG_UDF_FS=m CONFIG_MSDOS_FS=y diff --git a/arch/powerpc/configs/pseries_le_defconfig b/arch/powerpc/configs/pseries_le_defconfig index acb611a2683658..14a7a7ab5056c8 100644 --- a/arch/powerpc/configs/pseries_le_defconfig +++ b/arch/powerpc/configs/pseries_le_defconfig @@ -243,6 +243,7 @@ CONFIG_BTRFS_FS_POSIX_ACL=y CONFIG_NILFS2_FS=m CONFIG_AUTOFS4_FS=m CONFIG_FUSE_FS=m +CONFIG_OVERLAY_FS=m CONFIG_ISO9660_FS=y CONFIG_UDF_FS=m CONFIG_MSDOS_FS=y From 7c62cf2416a81a9a06b5791f9a5c58aac158c448 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 20 Jan 2015 10:24:24 +1100 Subject: [PATCH 168/601] powerpc: Enable various container features on pseries defconfigs. Enable config options required by lxc and docker. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/configs/pseries_defconfig | 12 ++++++++++++ arch/powerpc/configs/pseries_le_defconfig | 12 ++++++++++++ 2 files changed, 24 insertions(+) diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index dbcef637641df2..21c284e32bbe72 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -20,6 +20,11 @@ CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_DEVICE=y CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y +CONFIG_MEMCG=y +CONFIG_MEMCG_SWAP=y +CONFIG_CGROUP_PERF=y +CONFIG_CGROUP_SCHED=y +CONFIG_USER_NS=y CONFIG_BLK_DEV_INITRD=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y @@ -70,6 +75,7 @@ CONFIG_INET_IPCOMP=m CONFIG_NETFILTER=y # CONFIG_NETFILTER_ADVANCED is not set CONFIG_BRIDGE=m +CONFIG_VLAN_8021Q=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y @@ -124,6 +130,7 @@ CONFIG_MD_FAULTY=m CONFIG_BLK_DEV_DM=y CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m +CONFIG_DM_THIN_PROVISIONING=m CONFIG_DM_MIRROR=m CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m @@ -132,8 +139,12 @@ CONFIG_DM_MULTIPATH_ST=m CONFIG_DM_UEVENT=y CONFIG_BONDING=m CONFIG_DUMMY=m +CONFIG_MACVLAN=m +CONFIG_MACVTAP=m +CONFIG_VXLAN=m CONFIG_NETCONSOLE=y CONFIG_TUN=m +CONFIG_VETH=m CONFIG_VIRTIO_NET=m CONFIG_VHOST_NET=m CONFIG_VORTEX=y @@ -166,6 +177,7 @@ CONFIG_INPUT_EVDEV=m CONFIG_INPUT_MISC=y CONFIG_INPUT_PCSPKR=m # CONFIG_SERIO_SERPORT is not set +CONFIG_DEVPTS_MULTIPLE_INSTANCES=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_ICOM=m diff --git a/arch/powerpc/configs/pseries_le_defconfig b/arch/powerpc/configs/pseries_le_defconfig index 14a7a7ab5056c8..3805ade3292b04 100644 --- a/arch/powerpc/configs/pseries_le_defconfig +++ b/arch/powerpc/configs/pseries_le_defconfig @@ -21,6 +21,11 @@ CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_DEVICE=y CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y +CONFIG_MEMCG=y +CONFIG_MEMCG_SWAP=y +CONFIG_CGROUP_PERF=y +CONFIG_CGROUP_SCHED=y +CONFIG_USER_NS=y CONFIG_BLK_DEV_INITRD=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y @@ -71,6 +76,7 @@ CONFIG_INET_IPCOMP=m CONFIG_NETFILTER=y # CONFIG_NETFILTER_ADVANCED is not set CONFIG_BRIDGE=m +CONFIG_VLAN_8021Q=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y @@ -125,6 +131,7 @@ CONFIG_MD_FAULTY=m CONFIG_BLK_DEV_DM=y CONFIG_DM_CRYPT=m CONFIG_DM_SNAPSHOT=m +CONFIG_DM_THIN_PROVISIONING=m CONFIG_DM_MIRROR=m CONFIG_DM_ZERO=m CONFIG_DM_MULTIPATH=m @@ -133,8 +140,12 @@ CONFIG_DM_MULTIPATH_ST=m CONFIG_DM_UEVENT=y CONFIG_BONDING=m CONFIG_DUMMY=m +CONFIG_MACVLAN=m +CONFIG_MACVTAP=m +CONFIG_VXLAN=m CONFIG_NETCONSOLE=y CONFIG_TUN=m +CONFIG_VETH=m CONFIG_VIRTIO_NET=m CONFIG_VHOST_NET=m CONFIG_VORTEX=y @@ -167,6 +178,7 @@ CONFIG_INPUT_EVDEV=m CONFIG_INPUT_MISC=y CONFIG_INPUT_PCSPKR=m # CONFIG_SERIO_SERPORT is not set +CONFIG_DEVPTS_MULTIPLE_INSTANCES=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_ICOM=m From 9654f95a081a6eb33f8a9cda496ee1a8857987f5 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Tue, 21 Oct 2014 16:32:07 +1100 Subject: [PATCH 169/601] powerpc: Enable NUMA balancing in pseries[_le]_defconfig Distros are enabling NUMA balancing (eg Ubuntu), so it would be good to get some more test coverage with it. Signed-off-by: Michael Neuling Signed-off-by: Michael Ellerman --- arch/powerpc/configs/pseries_defconfig | 2 ++ arch/powerpc/configs/pseries_le_defconfig | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig index 21c284e32bbe72..c2e39f66b182a6 100644 --- a/arch/powerpc/configs/pseries_defconfig +++ b/arch/powerpc/configs/pseries_defconfig @@ -15,6 +15,8 @@ CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y +CONFIG_NUMA_BALANCING=y +CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y CONFIG_CGROUPS=y CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_DEVICE=y diff --git a/arch/powerpc/configs/pseries_le_defconfig b/arch/powerpc/configs/pseries_le_defconfig index 3805ade3292b04..09bc96e792cdff 100644 --- a/arch/powerpc/configs/pseries_le_defconfig +++ b/arch/powerpc/configs/pseries_le_defconfig @@ -16,6 +16,8 @@ CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y +CONFIG_NUMA_BALANCING=y +CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y CONFIG_CGROUPS=y CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_DEVICE=y From ecba152356438fe28494d4d36d6d5755c32bf2ae Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Fri, 9 Jan 2015 18:33:05 +0100 Subject: [PATCH 170/601] ARM: 8267/1: cnx3xxx: Remove spurious default for DEBUG_CNS3xxx The default value for DEBUG_CNS3xxx appears twice. This patch removes the one with the wrong sort order. Signed-off-by: Daniel Thompson Signed-off-by: Russell King --- arch/arm/Kconfig.debug | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 5ddd4906f7a745..c5648cc1466dea 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -1269,7 +1269,6 @@ config DEBUG_UART_PHYS DEBUG_S3C2410_UART2) default 0x78000000 if DEBUG_CNS3XXX default 0x7c0003f8 if FOOTBRIDGE - default 0x78000000 if DEBUG_CNS3XXX default 0x80010000 if DEBUG_ASM9260_UART default 0x80070000 if DEBUG_IMX23_UART default 0x80074000 if DEBUG_IMX28_UART From 34c64a5d830ac3905065ff1cc52091af15cb2f7e Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Fri, 9 Jan 2015 18:33:16 +0100 Subject: [PATCH 171/601] ARM: 8268/1: configs: Enable DEBUG_LL_UART_8250 where needed All defconfigs touched by this patch already enable DEBUG_LL and by default DEBUG_LL_UART_NONE will be selected. This causes no issues today because due to some back compatibility magic we eventually need to remove it is not actually honoured. Nevertheless DEBUG_LL_UART_8250 is the right value for these platforms and should be set in the config files. Signed-off-by: Daniel Thompson Signed-off-by: Russell King --- arch/arm/configs/iop32x_defconfig | 1 + arch/arm/configs/iop33x_defconfig | 1 + arch/arm/configs/ixp4xx_defconfig | 1 + arch/arm/configs/lpc32xx_defconfig | 1 + arch/arm/configs/mv78xx0_defconfig | 1 + arch/arm/configs/orion5x_defconfig | 1 + arch/arm/configs/rpc_defconfig | 1 + 7 files changed, 7 insertions(+) diff --git a/arch/arm/configs/iop32x_defconfig b/arch/arm/configs/iop32x_defconfig index 4f2ec3ac138e1e..c3058da631da76 100644 --- a/arch/arm/configs/iop32x_defconfig +++ b/arch/arm/configs/iop32x_defconfig @@ -106,6 +106,7 @@ CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_USER=y CONFIG_DEBUG_LL=y +CONFIG_DEBUG_LL_UART_8250=y CONFIG_KEYS=y CONFIG_KEYS_DEBUG_PROC_KEYS=y CONFIG_CRYPTO_NULL=y diff --git a/arch/arm/configs/iop33x_defconfig b/arch/arm/configs/iop33x_defconfig index aa36128abca243..713faeee8cf4f3 100644 --- a/arch/arm/configs/iop33x_defconfig +++ b/arch/arm/configs/iop33x_defconfig @@ -87,5 +87,6 @@ CONFIG_DEBUG_KERNEL=y # CONFIG_RCU_CPU_STALL_DETECTOR is not set CONFIG_DEBUG_USER=y CONFIG_DEBUG_LL=y +CONFIG_DEBUG_LL_UART_8250=y # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRC32 is not set diff --git a/arch/arm/configs/ixp4xx_defconfig b/arch/arm/configs/ixp4xx_defconfig index 1af665e847d1cc..24636cfdf6dfbb 100644 --- a/arch/arm/configs/ixp4xx_defconfig +++ b/arch/arm/configs/ixp4xx_defconfig @@ -202,3 +202,4 @@ CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_ERRORS=y CONFIG_DEBUG_LL=y +CONFIG_DEBUG_LL_UART_8250=y diff --git a/arch/arm/configs/lpc32xx_defconfig b/arch/arm/configs/lpc32xx_defconfig index 9f56ca3985ae15..c100b7df544111 100644 --- a/arch/arm/configs/lpc32xx_defconfig +++ b/arch/arm/configs/lpc32xx_defconfig @@ -204,6 +204,7 @@ CONFIG_DEBUG_INFO=y # CONFIG_FTRACE is not set # CONFIG_ARM_UNWIND is not set CONFIG_DEBUG_LL=y +CONFIG_DEBUG_LL_UART_8250=y CONFIG_EARLY_PRINTK=y CONFIG_CRYPTO_ANSI_CPRNG=y # CONFIG_CRYPTO_HW is not set diff --git a/arch/arm/configs/mv78xx0_defconfig b/arch/arm/configs/mv78xx0_defconfig index 0dae1c1f007a5f..85d10d2e3d660f 100644 --- a/arch/arm/configs/mv78xx0_defconfig +++ b/arch/arm/configs/mv78xx0_defconfig @@ -132,6 +132,7 @@ CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_DEBUG_USER=y CONFIG_DEBUG_ERRORS=y CONFIG_DEBUG_LL=y +CONFIG_DEBUG_LL_UART_8250=y CONFIG_CRYPTO_CBC=m CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m diff --git a/arch/arm/configs/orion5x_defconfig b/arch/arm/configs/orion5x_defconfig index 952430d9e2d9cb..855143fac6bd42 100644 --- a/arch/arm/configs/orion5x_defconfig +++ b/arch/arm/configs/orion5x_defconfig @@ -156,6 +156,7 @@ CONFIG_LATENCYTOP=y # CONFIG_FTRACE is not set CONFIG_DEBUG_USER=y CONFIG_DEBUG_LL=y +CONFIG_DEBUG_LL_UART_8250=y CONFIG_CRYPTO_CBC=m CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m diff --git a/arch/arm/configs/rpc_defconfig b/arch/arm/configs/rpc_defconfig index 00515ef9782de6..89631795a91523 100644 --- a/arch/arm/configs/rpc_defconfig +++ b/arch/arm/configs/rpc_defconfig @@ -131,3 +131,4 @@ CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_USER=y CONFIG_DEBUG_ERRORS=y CONFIG_DEBUG_LL=y +CONFIG_DEBUG_LL_UART_8250=y From 6f5194553c84c3eb412c0cc59554b85be128f43d Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Fri, 9 Jan 2015 18:33:28 +0100 Subject: [PATCH 172/601] ARM: 8269/1: Remove DEBUG_LL_UART_NONE Only a very small handful of platforms support DEBUG_LL_UART_NONE but it lurks in the menus of every single platform config ready to break the build. This is an especial problem for defconfig/oldconfig since it is often selected by default. This patch solves the problem by removing this option. Any platforms still depending upon this option must be migrated. Signed-off-by: Daniel Thompson Signed-off-by: Russell King --- arch/arm/Kconfig.debug | 9 --------- 1 file changed, 9 deletions(-) diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index c5648cc1466dea..6ee9b0d361eea4 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -1031,15 +1031,6 @@ choice This option selects UART0 on VIA/Wondermedia System-on-a-chip devices, including VT8500, WM8505, WM8650 and WM8850. - config DEBUG_LL_UART_NONE - bool "No low-level debugging UART" - depends on !ARCH_MULTIPLATFORM - help - Say Y here if your platform doesn't provide a UART option - above. This relies on your platform choosing the right UART - definition internally in order for low-level debugging to - work. - config DEBUG_ICEDCC bool "Kernel low-level debugging via EmbeddedICE DCC channel" help From abbfb21efcd704e8b858e94e327bb014386ba0dd Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Fri, 9 Jan 2015 18:33:36 +0100 Subject: [PATCH 173/601] ARM: 8270/1: ks8695: Migrate debug_ll macros to shared directory As part of the migration a couple of uart definitions have been copied from of the platform specific header files. Signed-off-by: Daniel Thompson Cc: Greg Ungerer Cc: Arnd Bergmann Signed-off-by: Russell King --- arch/arm/Kconfig.debug | 8 ++++++++ .../mach/debug-macro.S => include/debug/ks8695.S} | 10 +++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) rename arch/arm/{mach-ks8695/include/mach/debug-macro.S => include/debug/ks8695.S} (80%) diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 6ee9b0d361eea4..eba36e35bad2f5 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -397,6 +397,13 @@ choice Say Y here if you want the debug print routines to direct their output to UART1 serial port on KEYSTONE2 devices. + config DEBUG_KS8695_UART + bool "KS8695 Debug UART" + depends on ARCH_KS8695 + help + Say Y here if you want kernel low-level debugging support + on KS8695. + config DEBUG_MESON_UARTAO bool "Kernel low-level debugging via Meson6 UARTAO" depends on ARCH_MESON @@ -1174,6 +1181,7 @@ config DEBUG_LL_INCLUDE DEBUG_IMX6Q_UART || \ DEBUG_IMX6SL_UART || \ DEBUG_IMX6SX_UART + default "debug/ks8695.S" if DEBUG_KS8695_UART default "debug/msm.S" if DEBUG_MSM_UART || DEBUG_QCOM_UARTDM default "debug/omap2plus.S" if DEBUG_OMAP2PLUS_UART default "debug/renesas-scif.S" if DEBUG_R7S72100_SCIF2 diff --git a/arch/arm/mach-ks8695/include/mach/debug-macro.S b/arch/arm/include/debug/ks8695.S similarity index 80% rename from arch/arm/mach-ks8695/include/mach/debug-macro.S rename to arch/arm/include/debug/ks8695.S index a79e48981202af..961da1f32ab38d 100644 --- a/arch/arm/mach-ks8695/include/mach/debug-macro.S +++ b/arch/arm/include/debug/ks8695.S @@ -1,5 +1,5 @@ /* - * arch/arm/mach-ks8695/include/mach/debug-macro.S + * arch/arm/include/debug/ks8695.S * * Copyright (C) 2006 Ben Dooks * Copyright (C) 2006 Simtec Electronics @@ -11,8 +11,12 @@ * published by the Free Software Foundation. */ -#include -#include +#define KS8695_UART_PA 0x03ffe000 +#define KS8695_UART_VA 0xf00fe000 +#define KS8695_URTH (0x04) +#define KS8695_URLS (0x14) +#define URLS_URTE (1 << 6) +#define URLS_URTHRE (1 << 5) .macro addruart, rp, rv, tmp ldr \rp, =KS8695_UART_PA @ physical base address From c26b999353339c235b9e166739926d0199eeecbf Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Fri, 9 Jan 2015 18:33:50 +0100 Subject: [PATCH 174/601] ARM: 8271/1: omap1: Migrate debug_ll macros to use 8250.S The omap1's debug-macro.S is similar to the generic 8250 code. Compared to the 8520 code the omap1 macro automatically determines what UART to use based on breadcrumbs left by the bootloader and automatically copes with the eccentric register layout on OMAP7XX. This patch drops both these features and relies instead on the generic 8250 macros: 1. Dropping support for the bootloader breadcrumbs is identical to the way the migration was handled for OMAP2 (see 808b7e07464d...). 2. Support for OMAP7XX still exists but it must be configured by hand (DEBUG_OMAP7XXUART1/2/3) rather than handled at runtime. Signed-off-by: Daniel Thompson Cc: Arnd Bergmann Cc: linux-omap@vger.kernel.org Tested-by: Aaro Koskinen Acked-by: Tony Lindgren Signed-off-by: Russell King --- arch/arm/Kconfig.debug | 57 +++++++++- .../arm/mach-omap1/include/mach/debug-macro.S | 101 ------------------ 2 files changed, 56 insertions(+), 102 deletions(-) delete mode 100644 arch/arm/mach-omap1/include/mach/debug-macro.S diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index eba36e35bad2f5..ec25d746b4dd5a 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -527,6 +527,30 @@ choice Say Y here if you want kernel low-level debugging support on TI-NSPIRE CX models. + config DEBUG_OMAP1UART1 + bool "Kernel low-level debugging via OMAP1 UART1" + depends on ARCH_OMAP1 + select DEBUG_UART_8250 + help + Say Y here if you want kernel low-level debugging support + on OMAP1 based platforms (except OMAP730) on the UART1. + + config DEBUG_OMAP1UART2 + bool "Kernel low-level debugging via OMAP1 UART2" + depends on ARCH_OMAP1 + select DEBUG_UART_8250 + help + Say Y here if you want kernel low-level debugging support + on OMAP1 based platforms (except OMAP730) on the UART2. + + config DEBUG_OMAP1UART3 + bool "Kernel low-level debugging via OMAP1 UART3" + depends on ARCH_OMAP1 + select DEBUG_UART_8250 + help + Say Y here if you want kernel low-level debugging support + on OMAP1 based platforms (except OMAP730) on the UART3. + config DEBUG_OMAP2UART1 bool "OMAP2/3/4 UART1 (omap2/3 sdp boards and some omap3 boards)" depends on ARCH_OMAP2PLUS @@ -569,6 +593,30 @@ choice depends on ARCH_OMAP2PLUS select DEBUG_OMAP2PLUS_UART + config DEBUG_OMAP7XXUART1 + bool "Kernel low-level debugging via OMAP730 UART1" + depends on ARCH_OMAP730 + select DEBUG_UART_8250 + help + Say Y here if you want kernel low-level debugging support + on OMAP730 based platforms on the UART1. + + config DEBUG_OMAP7XXUART2 + bool "Kernel low-level debugging via OMAP730 UART2" + depends on ARCH_OMAP730 + select DEBUG_UART_8250 + help + Say Y here if you want kernel low-level debugging support + on OMAP730 based platforms on the UART2. + + config DEBUG_OMAP7XXUART3 + bool "Kernel low-level debugging via OMAP730 UART3" + depends on ARCH_OMAP730 + select DEBUG_UART_8250 + help + Say Y here if you want kernel low-level debugging support + on OMAP730 based platforms on the UART3. + config DEBUG_TI81XXUART1 bool "Kernel low-level debugging messages via TI81XX UART1 (ti8148evm)" depends on ARCH_OMAP2PLUS @@ -1308,6 +1356,9 @@ config DEBUG_UART_PHYS default 0xffe40000 if DEBUG_RCAR_GEN1_SCIF0 default 0xffe42000 if DEBUG_RCAR_GEN1_SCIF2 default 0xfff36000 if DEBUG_HIGHBANK_UART + default 0xfffb0000 if DEBUG_OMAP1UART1 || DEBUG_OMAP7XXUART1 + default 0xfffb0800 if DEBUG_OMAP1UART2 || DEBUG_OMAP7XXUART2 + default 0xfffb9800 if DEBUG_OMAP1UART3 || DEBUG_OMAP7XXUART3 default 0xfffe8600 if DEBUG_UART_BCM63XX default 0xfffff700 if ARCH_IOP33X depends on DEBUG_LL_UART_8250 || DEBUG_LL_UART_PL01X || \ @@ -1390,6 +1441,9 @@ config DEBUG_UART_VIRT default 0xfef00000 if ARCH_IXP4XX && !CPU_BIG_ENDIAN default 0xfef00003 if ARCH_IXP4XX && CPU_BIG_ENDIAN default 0xfef36000 if DEBUG_HIGHBANK_UART + default 0xfefb0000 if DEBUG_OMAP1UART1 || DEBUG_OMAP7XXUART1 + default 0xfefb0800 if DEBUG_OMAP1UART2 || DEBUG_OMAP7XXUART2 + default 0xfefb9800 if DEBUG_OMAP1UART3 || DEBUG_OMAP7XXUART3 default 0xfefff700 if ARCH_IOP33X default 0xff003000 if DEBUG_U300_UART default DEBUG_UART_PHYS if !MMU @@ -1401,7 +1455,8 @@ config DEBUG_UART_VIRT config DEBUG_UART_8250_SHIFT int "Register offset shift for the 8250 debug UART" depends on DEBUG_LL_UART_8250 || DEBUG_UART_8250 - default 0 if FOOTBRIDGE || ARCH_IOP32X || DEBUG_BCM_5301X + default 0 if FOOTBRIDGE || ARCH_IOP32X || DEBUG_BCM_5301X || \ + DEBUG_OMAP7XXUART1 || DEBUG_OMAP7XXUART2 || DEBUG_OMAP7XXUART3 default 2 config DEBUG_UART_8250_WORD diff --git a/arch/arm/mach-omap1/include/mach/debug-macro.S b/arch/arm/mach-omap1/include/mach/debug-macro.S deleted file mode 100644 index 5c1a26c9f49059..00000000000000 --- a/arch/arm/mach-omap1/include/mach/debug-macro.S +++ /dev/null @@ -1,101 +0,0 @@ -/* arch/arm/mach-omap1/include/mach/debug-macro.S - * - * Debugging macro include header - * - * Copyright (C) 1994-1999 Russell King - * Moved from linux/arch/arm/kernel/debug.S by Ben Dooks - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * -*/ - -#include - -#include "serial.h" - - .pushsection .data -omap_uart_phys: .word 0x0 -omap_uart_virt: .word 0x0 - .popsection - - /* - * Note that this code won't work if the bootloader passes - * a wrong machine ID number in r1. To debug, just hardcode - * the desired UART phys and virt addresses temporarily into - * the omap_uart_phys and omap_uart_virt above. - */ - .macro addruart, rp, rv, tmp - - /* Use omap_uart_phys/virt if already configured */ -9: adr \rp, 99f @ get effective addr of 99f - ldr \rv, [\rp] @ get absolute addr of 99f - sub \rv, \rv, \rp @ offset between the two - ldr \rp, [\rp, #4] @ abs addr of omap_uart_phys - sub \tmp, \rp, \rv @ make it effective - ldr \rp, [\tmp, #0] @ omap_uart_phys - ldr \rv, [\tmp, #4] @ omap_uart_virt - cmp \rp, #0 @ is port configured? - cmpne \rv, #0 - bne 100f @ already configured - - /* Check the debug UART configuration set in uncompress.h */ - and \rp, pc, #0xff000000 - ldr \rv, =OMAP_UART_INFO_OFS - ldr \rp, [\rp, \rv] - - /* Select the UART to use based on the UART1 scratchpad value */ -10: cmp \rp, #0 @ no port configured? - beq 11f @ if none, try to use UART1 - cmp \rp, #OMAP1UART1 - beq 11f @ configure OMAP1UART1 - cmp \rp, #OMAP1UART2 - beq 12f @ configure OMAP1UART2 - cmp \rp, #OMAP1UART3 - beq 13f @ configure OMAP2UART3 - - /* Configure the UART offset from the phys/virt base */ -11: mov \rp, #0x00fb0000 @ OMAP1UART1 - b 98f -12: mov \rp, #0x00fb0000 @ OMAP1UART1 - orr \rp, \rp, #0x00000800 @ OMAP1UART2 - b 98f -13: mov \rp, #0x00fb0000 @ OMAP1UART1 - orr \rp, \rp, #0x00000800 @ OMAP1UART2 - orr \rp, \rp, #0x00009000 @ OMAP1UART3 - - /* Store both phys and virt address for the uart */ -98: add \rp, \rp, #0xff000000 @ phys base - str \rp, [\tmp, #0] @ omap_uart_phys - sub \rp, \rp, #0xff000000 @ phys base - add \rp, \rp, #0xfe000000 @ virt base - str \rp, [\tmp, #4] @ omap_uart_virt - b 9b - - .align -99: .word . - .word omap_uart_phys - .ltorg - -100: - .endm - - .macro senduart,rd,rx - strb \rd, [\rx] - .endm - - .macro busyuart,rd,rx -1001: ldrb \rd, [\rx, #(UART_LSR << OMAP_PORT_SHIFT)] - and \rd, \rd, #(UART_LSR_TEMT | UART_LSR_THRE) - teq \rd, #(UART_LSR_TEMT | UART_LSR_THRE) - beq 1002f - ldrb \rd, [\rx, #(UART_LSR << OMAP7XX_PORT_SHIFT)] - and \rd, \rd, #(UART_LSR_TEMT | UART_LSR_THRE) - teq \rd, #(UART_LSR_TEMT | UART_LSR_THRE) - bne 1001b -1002: - .endm - - .macro waituart,rd,rx - .endm From 4d31e66412955b20081d6966c271a71d4abcefd7 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Fri, 9 Jan 2015 18:34:03 +0100 Subject: [PATCH 175/601] ARM: 8272/1: netx: Migrate DEBUG_LL macros to shared directory As part of the migration we introduce DEBUG_UART_PHYS/DEBUG_UART_VIRT which default to UART1 but allow a user to configure UART2 or UART3. We also introduce symbolic names for the registers and flags. Signed-off-by: Daniel Thompson Acked-by: Arnd Bergmann Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Russell King --- arch/arm/Kconfig.debug | 16 ++++++++++++-- .../debug-macro.S => include/debug/netx.S} | 22 +++++++++---------- 2 files changed, 25 insertions(+), 13 deletions(-) rename arch/arm/{mach-netx/include/mach/debug-macro.S => include/debug/netx.S} (62%) diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index ec25d746b4dd5a..ff43ea38fe1ff2 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -503,6 +503,13 @@ choice Say Y here if you want kernel low-level debugging support on Vybrid based platforms. + config DEBUG_NETX_UART + bool "Kernel low-level debugging messages via NetX UART" + depends on ARCH_NETX + help + Say Y here if you want kernel low-level debugging support + on Hilscher NetX based platforms. + config DEBUG_NOMADIK_UART bool "Kernel low-level debugging messages via NOMADIK UART" depends on ARCH_NOMADIK @@ -1231,6 +1238,7 @@ config DEBUG_LL_INCLUDE DEBUG_IMX6SX_UART default "debug/ks8695.S" if DEBUG_KS8695_UART default "debug/msm.S" if DEBUG_MSM_UART || DEBUG_QCOM_UARTDM + default "debug/netx.S" if DEBUG_NETX_UART default "debug/omap2plus.S" if DEBUG_OMAP2PLUS_UART default "debug/renesas-scif.S" if DEBUG_R7S72100_SCIF2 default "debug/renesas-scif.S" if DEBUG_RCAR_GEN1_SCIF0 @@ -1276,6 +1284,7 @@ config DEBUG_UART_BCM63XX config DEBUG_UART_PHYS hex "Physical base address of debug UART" + default 0x00100a00 if DEBUG_NETX_UART default 0x01c20000 if DEBUG_DAVINCI_DMx_UART0 default 0x01c28000 if DEBUG_SUNXI_UART0 default 0x01c28400 if DEBUG_SUNXI_UART1 @@ -1364,7 +1373,8 @@ config DEBUG_UART_PHYS depends on DEBUG_LL_UART_8250 || DEBUG_LL_UART_PL01X || \ DEBUG_LL_UART_EFM32 || \ DEBUG_UART_8250 || DEBUG_UART_PL01X || DEBUG_MESON_UARTAO || \ - DEBUG_MSM_UART || DEBUG_QCOM_UARTDM || DEBUG_R7S72100_SCIF2 || \ + DEBUG_MSM_UART || DEBUG_NETX_UART || \ + DEBUG_QCOM_UARTDM || DEBUG_R7S72100_SCIF2 || \ DEBUG_RCAR_GEN1_SCIF0 || DEBUG_RCAR_GEN1_SCIF2 || \ DEBUG_RCAR_GEN2_SCIF0 || DEBUG_RCAR_GEN2_SCIF2 || \ DEBUG_RMOBILE_SCIFA0 || DEBUG_RMOBILE_SCIFA1 || \ @@ -1373,6 +1383,7 @@ config DEBUG_UART_PHYS config DEBUG_UART_VIRT hex "Virtual base address of debug UART" + default 0xe0000a00 if DEBUG_NETX_UART default 0xe0010fe0 if ARCH_RPC default 0xe1000000 if DEBUG_MSM_UART default 0xf0000be0 if ARCH_EBSA110 @@ -1449,7 +1460,8 @@ config DEBUG_UART_VIRT default DEBUG_UART_PHYS if !MMU depends on DEBUG_LL_UART_8250 || DEBUG_LL_UART_PL01X || \ DEBUG_UART_8250 || DEBUG_UART_PL01X || DEBUG_MESON_UARTAO || \ - DEBUG_MSM_UART || DEBUG_QCOM_UARTDM || DEBUG_S3C24XX_UART || \ + DEBUG_MSM_UART || DEBUG_NETX_UART || \ + DEBUG_QCOM_UARTDM || DEBUG_S3C24XX_UART || \ DEBUG_UART_BCM63XX || DEBUG_ASM9260_UART config DEBUG_UART_8250_SHIFT diff --git a/arch/arm/mach-netx/include/mach/debug-macro.S b/arch/arm/include/debug/netx.S similarity index 62% rename from arch/arm/mach-netx/include/mach/debug-macro.S rename to arch/arm/include/debug/netx.S index 247781e096e296..81e1b2af70f7ba 100644 --- a/arch/arm/mach-netx/include/mach/debug-macro.S +++ b/arch/arm/include/debug/netx.S @@ -1,5 +1,4 @@ -/* arch/arm/mach-netx/include/mach/debug-macro.S - * +/* * Debugging macro include header * * Copyright (C) 1994-1999 Russell King @@ -11,26 +10,27 @@ * */ -#include "hardware.h" +#define UART_DATA 0 +#define UART_FLAG 0x18 +#define UART_FLAG_BUSY (1 << 3) .macro addruart, rp, rv, tmp - mov \rp, #0x00000a00 - orr \rv, \rp, #io_p2v(0x00100000) @ virtual - orr \rp, \rp, #0x00100000 @ physical + ldr \rp, =CONFIG_DEBUG_UART_PHYS + ldr \rv, =CONFIG_DEBUG_UART_VIRT .endm .macro senduart,rd,rx - str \rd, [\rx, #0] + str \rd, [\rx, #UART_DATA] .endm .macro busyuart,rd,rx -1002: ldr \rd, [\rx, #0x18] - tst \rd, #(1 << 3) +1002: ldr \rd, [\rx, #UART_FLAG] + tst \rd, #UART_FLAG_BUSY bne 1002b .endm .macro waituart,rd,rx -1001: ldr \rd, [\rx, #0x18] - tst \rd, #(1 << 3) +1001: ldr \rd, [\rx, #UART_FLAG] + tst \rd, #UART_FLAG_BUSY bne 1001b .endm From d02fde7fc06a6ff88a9b4495712e31d54be01a4b Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Fri, 9 Jan 2015 18:34:12 +0100 Subject: [PATCH 176/601] ARM: 8273/1: Seperate DEBUG_UART_PHYS from DEBUG_LL on EP93XX On EP93XX uncompress.h uses CONFIG_DEBUG_UART_PHYS instead of a hard coded serial port. This means the build breaks when DEBUG_LL (and DEBUG_LL_UART_PL01X) is not enabled. This is fixed by adding a new dependency. Signed-off-by: Daniel Thompson Acked-by: Arnd Bergmann Signed-off-by: Russell King --- arch/arm/Kconfig.debug | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index ff43ea38fe1ff2..e250505b9ae1d2 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -1370,7 +1370,8 @@ config DEBUG_UART_PHYS default 0xfffb9800 if DEBUG_OMAP1UART3 || DEBUG_OMAP7XXUART3 default 0xfffe8600 if DEBUG_UART_BCM63XX default 0xfffff700 if ARCH_IOP33X - depends on DEBUG_LL_UART_8250 || DEBUG_LL_UART_PL01X || \ + depends on ARCH_EP93XX || \ + DEBUG_LL_UART_8250 || DEBUG_LL_UART_PL01X || \ DEBUG_LL_UART_EFM32 || \ DEBUG_UART_8250 || DEBUG_UART_PL01X || DEBUG_MESON_UARTAO || \ DEBUG_MSM_UART || DEBUG_NETX_UART || \ From a61cbf51f0a63e40ebda56f1961cf5570dc067b3 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Fri, 9 Jan 2015 18:34:22 +0100 Subject: [PATCH 177/601] ARM: 8274/1: Fix DEBUG_LL for multi-platform kernels (without PL01X) When building a multi_v7_defconfig kernel it is not possible to configure DEBUG_LL to use any serial device except a ARM Primecell PL01X, or more accurately and worse, it is possible to configure a different serial device but KConfig does not honour this request. In fact this also overrides the user selection for some of the single platform kernels, for example I don't think DEBUG_LL can be targeted at ICE or semihosted supervisor for ARCH_VERSATILE. This happens because DEBUG_UART_PL01X is automatically enabled by some architectures and this means user decisions made regarding the DEBUG_LL backend will be overridden. Problem is fixed by removing the automatic enabling of this option. Signed-off-by: Daniel Thompson Acked-by: Arnd Bergmann Signed-off-by: Russell King --- arch/arm/Kconfig.debug | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index e250505b9ae1d2..a324ecdfeb2141 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -1263,12 +1263,7 @@ config DEBUG_LL_INCLUDE # Compatibility options for PL01x config DEBUG_UART_PL01X - def_bool ARCH_EP93XX || \ - ARCH_INTEGRATOR || \ - ARCH_SPEAR3XX || \ - ARCH_SPEAR6XX || \ - ARCH_SPEAR13XX || \ - ARCH_VERSATILE + bool # Compatibility options for 8250 config DEBUG_UART_8250 From 35997a231021d603ddc73e1abfbe4f76d63f23b2 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Sat, 13 Dec 2014 04:06:22 +0100 Subject: [PATCH 178/601] ARM: 8248/1: pm: remove outdated comment As of commit abda1bd5f4e04054ce083c298fcd68a743e9df03 __cpu_suspend() takes only 2 arguments, and those arguments are passed by the platform code. This comment thus makes no sense, as cpu_suspend() is not actually hiding any arguments. Signed-off-by: Brian Norris Signed-off-by: Russell King --- arch/arm/kernel/suspend.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c index 2835d35234ca45..9a2f882a0a2d1c 100644 --- a/arch/arm/kernel/suspend.c +++ b/arch/arm/kernel/suspend.c @@ -14,10 +14,6 @@ extern int __cpu_suspend(unsigned long, int (*)(unsigned long), u32 cpuid); extern void cpu_resume_mmu(void); #ifdef CONFIG_MMU -/* - * Hide the first two arguments to __cpu_suspend - these are an implementation - * detail which platform code shouldn't have to know about. - */ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) { struct mm_struct *mm = current->active_mm; From 99a468d779f6851a31053e6a33bf91391034010b Mon Sep 17 00:00:00 2001 From: "George G. Davis" Date: Fri, 16 Jan 2015 11:21:05 +0100 Subject: [PATCH 179/601] ARM: 8286/1: mm: Fix dma_contiguous_reserve comment DMA contiguous allocations have been able to use highmem since commit "95b0e65 ARM: mm: don't limit default CMA region only to low memory" but a comment still notes the earlier "low memory" limitation. Update the comment to remove the low memory limitation and fix the s/contigouos/contiguous/ typo while we're at it. Signed-off-by: George G. Davis Acked-by: Marek Szyprowski Signed-off-by: Russell King --- arch/arm/mm/init.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 98ad9c79ea0e6a..d538dc73fcb0ec 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -319,10 +319,7 @@ void __init arm_memblock_init(const struct machine_desc *mdesc) early_init_fdt_scan_reserved_mem(); - /* - * reserve memory for DMA contigouos allocations, - * must come from DMA area inside low memory - */ + /* reserve memory for DMA contiguous allocations */ dma_contiguous_reserve(arm_dma_limit); arm_memblock_steal_permitted = false; From 7d57909bf69f213b4a9f278d78271e7c9a05f62f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 20 Jan 2015 03:44:26 +0100 Subject: [PATCH 180/601] ARM: 8290/1: decompressor: fix a wrong comment This comment does not correspond to the actual code. When zImage is loaded at a lower *OR* higher address of the destination of Image, it won't overwrite itself. Signed-off-by: Masahiro Yamada Acked-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/boot/compressed/head.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 68be9017593df1..9dff61479dff8d 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -178,7 +178,7 @@ not_angel: /* * Set up a page table only if it won't overwrite ourself. - * That means r4 < pc && r4 - 16k page directory > &_end. + * That means r4 < pc || r4 - 16k page directory > &_end. * Given that r4 > &_end is most unfrequent, we add a rough * additional 1MB of room for a possible appended DTB. */ From 7a06192834414f02465cb38f4ceb88a6f02392f6 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 20 Jan 2015 03:49:35 +0100 Subject: [PATCH 181/601] ARM: 8291/1: replace magic number with PAGE_SHIFT macro in fixup_pv code This line converts PHYS_OFFSET into PHYS_PFN_OFFSET. It is better to use PAGE_SHIFT rather than the magic number 12. Signed-off-by: Masahiro Yamada Acked-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/kernel/head.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 664eee8c4a26ce..ecb7be8b085498 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -586,7 +586,7 @@ __fixup_pv_table: add r5, r5, r3 @ adjust table end address add r6, r6, r3 @ adjust __pv_phys_pfn_offset address add r7, r7, r3 @ adjust __pv_offset address - mov r0, r8, lsr #12 @ convert to PFN + mov r0, r8, lsr #PAGE_SHIFT @ convert to PFN str r0, [r6] @ save computed PHYS_OFFSET to __pv_phys_pfn_offset strcc ip, [r7, #HIGH_OFFSET] @ save to __pv_offset high bits mov r6, r3, lsr #24 @ constant for add/sub instructions From 6d1cff2a885850b78b40c34777b46cf5da5d1050 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Tue, 13 Jan 2015 18:52:40 +0300 Subject: [PATCH 182/601] smack: fix possible use after frees in task_security() callers We hit use after free on dereferncing pointer to task_smack struct in smk_of_task() called from smack_task_to_inode(). task_security() macro uses task_cred_xxx() to get pointer to the task_smack. task_cred_xxx() could be used only for non-pointer members of task's credentials. It cannot be used for pointer members since what they point to may disapper after dropping RCU read lock. Mainly task_security() used this way: smk_of_task(task_security(p)) Intead of this introduce function smk_of_task_struct() which takes task_struct as argument and returns pointer to smk_known struct and do this under RCU read lock. Bogus task_security() macro is not used anymore, so remove it. KASan's report for this: AddressSanitizer: use after free in smack_task_to_inode+0x50/0x70 at addr c4635600 ============================================================================= BUG kmalloc-64 (Tainted: PO): kasan error ----------------------------------------------------------------------------- Disabling lock debugging due to kernel taint INFO: Allocated in new_task_smack+0x44/0xd8 age=39 cpu=0 pid=1866 kmem_cache_alloc_trace+0x88/0x1bc new_task_smack+0x44/0xd8 smack_cred_prepare+0x48/0x21c security_prepare_creds+0x44/0x4c prepare_creds+0xdc/0x110 smack_setprocattr+0x104/0x150 security_setprocattr+0x4c/0x54 proc_pid_attr_write+0x12c/0x194 vfs_write+0x1b0/0x370 SyS_write+0x5c/0x94 ret_fast_syscall+0x0/0x48 INFO: Freed in smack_cred_free+0xc4/0xd0 age=27 cpu=0 pid=1564 kfree+0x270/0x290 smack_cred_free+0xc4/0xd0 security_cred_free+0x34/0x3c put_cred_rcu+0x58/0xcc rcu_process_callbacks+0x738/0x998 __do_softirq+0x264/0x4cc do_softirq+0x94/0xf4 irq_exit+0xbc/0x120 handle_IRQ+0x104/0x134 gic_handle_irq+0x70/0xac __irq_svc+0x44/0x78 _raw_spin_unlock+0x18/0x48 sync_inodes_sb+0x17c/0x1d8 sync_filesystem+0xac/0xfc vdfs_file_fsync+0x90/0xc0 vfs_fsync_range+0x74/0x7c INFO: Slab 0xd3b23f50 objects=32 used=31 fp=0xc4635600 flags=0x4080 INFO: Object 0xc4635600 @offset=5632 fp=0x (null) Bytes b4 c46355f0: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ Object c4635600: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk Object c4635610: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk Object c4635620: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk Object c4635630: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b a5 kkkkkkkkkkkkkkk. Redzone c4635640: bb bb bb bb .... Padding c46356e8: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZZZZZZZZZ Padding c46356f8: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ CPU: 5 PID: 834 Comm: launchpad_prelo Tainted: PBO 3.10.30 #1 Backtrace: [] (dump_backtrace+0x0/0x158) from [] (show_stack+0x20/0x24) r7:c4634010 r6:d3b23f50 r5:c4635600 r4:d1002140 [] (show_stack+0x0/0x24) from [] (dump_stack+0x20/0x28) [] (dump_stack+0x0/0x28) from [] (print_trailer+0x124/0x144) [] (print_trailer+0x0/0x144) from [] (object_err+0x3c/0x44) r7:c4635600 r6:d1002140 r5:d3b23f50 r4:c4635600 [] (object_err+0x0/0x44) from [] (kasan_report_error+0x2b8/0x538) r6:d1002140 r5:d3b23f50 r4:c6429cf8 r3:c09e1aa7 [] (kasan_report_error+0x0/0x538) from [] (__asan_load4+0xd4/0xf8) [] (__asan_load4+0x0/0xf8) from [] (smack_task_to_inode+0x50/0x70) r5:c4635600 r4:ca9da000 [] (smack_task_to_inode+0x0/0x70) from [] (security_task_to_inode+0x3c/0x44) r5:cca25e80 r4:c0ba9780 [] (security_task_to_inode+0x0/0x44) from [] (pid_revalidate+0x124/0x178) r6:00000000 r5:cca25e80 r4:cbabe3c0 r3:00008124 [] (pid_revalidate+0x0/0x178) from [] (lookup_fast+0x35c/0x43y4) r9:c6429efc r8:00000101 r7:c079d940 r6:c6429e90 r5:c6429ed8 r4:c83c4148 [] (lookup_fast+0x0/0x434) from [] (do_last.isra.24+0x1c0/0x1108) [] (do_last.isra.24+0x0/0x1108) from [] (path_openat.isra.25+0xf4/0x648) [] (path_openat.isra.25+0x0/0x648) from [] (do_filp_open+0x3c/0x88) [] (do_filp_open+0x0/0x88) from [] (do_sys_open+0xf0/0x198) r7:00000001 r6:c0ea2180 r5:0000000b r4:00000000 [] (do_sys_open+0x0/0x198) from [] (SyS_open+0x30/0x34) [] (SyS_open+0x0/0x34) from [] (ret_fast_syscall+0x0/0x48) Read of size 4 by thread T834: Memory state around the buggy address: c4635380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc c4635400: 00 00 00 00 00 00 00 00 fc fc fc fc fc fc fc fc c4635480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc c4635500: 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc c4635580: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >c4635600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ c4635680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb c4635700: 00 00 00 00 04 fc fc fc fc fc fc fc fc fc fc fc c4635780: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc c4635800: 00 00 00 00 00 00 04 fc fc fc fc fc fc fc fc fc c4635880: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ================================================================== Signed-off-by: Andrey Ryabinin Cc: --- security/smack/smack.h | 10 ++++++++++ security/smack/smack_lsm.c | 24 +++++++++++++----------- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/security/smack/smack.h b/security/smack/smack.h index 7629eaeb1fb281..67ccb7b2b89bdd 100644 --- a/security/smack/smack.h +++ b/security/smack/smack.h @@ -299,6 +299,16 @@ static inline struct smack_known *smk_of_task(const struct task_smack *tsp) return tsp->smk_task; } +static inline struct smack_known *smk_of_task_struct(const struct task_struct *t) +{ + struct smack_known *skp; + + rcu_read_lock(); + skp = smk_of_task(__task_cred(t)->security); + rcu_read_unlock(); + return skp; +} + /* * Present a pointer to the forked smack label entry in an task blob. */ diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index f60ded3a8da178..a0ccce4e46f8b9 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -43,8 +43,6 @@ #include #include "smack.h" -#define task_security(task) (task_cred_xxx((task), security)) - #define TRANS_TRUE "TRUE" #define TRANS_TRUE_SIZE 4 @@ -123,7 +121,7 @@ static int smk_bu_current(char *note, struct smack_known *oskp, static int smk_bu_task(struct task_struct *otp, int mode, int rc) { struct task_smack *tsp = current_security(); - struct task_smack *otsp = task_security(otp); + struct smack_known *smk_task = smk_of_task_struct(otp); char acc[SMK_NUM_ACCESS_TYPE + 1]; if (rc <= 0) @@ -131,7 +129,7 @@ static int smk_bu_task(struct task_struct *otp, int mode, int rc) smk_bu_mode(mode, acc); pr_info("Smack Bringup: (%s %s %s) %s to %s\n", - tsp->smk_task->smk_known, otsp->smk_task->smk_known, acc, + tsp->smk_task->smk_known, smk_task->smk_known, acc, current->comm, otp->comm); return 0; } @@ -352,7 +350,8 @@ static int smk_ptrace_rule_check(struct task_struct *tracer, saip = &ad; } - tsp = task_security(tracer); + rcu_read_lock(); + tsp = __task_cred(tracer)->security; tracer_known = smk_of_task(tsp); if ((mode & PTRACE_MODE_ATTACH) && @@ -372,11 +371,14 @@ static int smk_ptrace_rule_check(struct task_struct *tracer, tracee_known->smk_known, 0, rc, saip); + rcu_read_unlock(); return rc; } /* In case of rule==SMACK_PTRACE_DEFAULT or mode==PTRACE_MODE_READ */ rc = smk_tskacc(tsp, tracee_known, smk_ptrace_mode(mode), saip); + + rcu_read_unlock(); return rc; } @@ -403,7 +405,7 @@ static int smack_ptrace_access_check(struct task_struct *ctp, unsigned int mode) if (rc != 0) return rc; - skp = smk_of_task(task_security(ctp)); + skp = smk_of_task_struct(ctp); rc = smk_ptrace_rule_check(current, skp, mode, __func__); return rc; @@ -1830,7 +1832,7 @@ static int smk_curacc_on_task(struct task_struct *p, int access, const char *caller) { struct smk_audit_info ad; - struct smack_known *skp = smk_of_task(task_security(p)); + struct smack_known *skp = smk_of_task_struct(p); int rc; smk_ad_init(&ad, caller, LSM_AUDIT_DATA_TASK); @@ -1883,7 +1885,7 @@ static int smack_task_getsid(struct task_struct *p) */ static void smack_task_getsecid(struct task_struct *p, u32 *secid) { - struct smack_known *skp = smk_of_task(task_security(p)); + struct smack_known *skp = smk_of_task_struct(p); *secid = skp->smk_secid; } @@ -1990,7 +1992,7 @@ static int smack_task_kill(struct task_struct *p, struct siginfo *info, { struct smk_audit_info ad; struct smack_known *skp; - struct smack_known *tkp = smk_of_task(task_security(p)); + struct smack_known *tkp = smk_of_task_struct(p); int rc; smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK); @@ -2044,7 +2046,7 @@ static int smack_task_wait(struct task_struct *p) static void smack_task_to_inode(struct task_struct *p, struct inode *inode) { struct inode_smack *isp = inode->i_security; - struct smack_known *skp = smk_of_task(task_security(p)); + struct smack_known *skp = smk_of_task_struct(p); isp->smk_inode = skp; } @@ -3226,7 +3228,7 @@ static void smack_d_instantiate(struct dentry *opt_dentry, struct inode *inode) */ static int smack_getprocattr(struct task_struct *p, char *name, char **value) { - struct smack_known *skp = smk_of_task(task_security(p)); + struct smack_known *skp = smk_of_task_struct(p); char *cp; int slen; From 961be7ef6963806cb978ccd6acf6bf84b0c63346 Mon Sep 17 00:00:00 2001 From: Peter Huewe Date: Thu, 22 Jan 2015 00:42:50 +0100 Subject: [PATCH 183/601] tpm/tpm_tis: Add missing ifdef CONFIG_ACPI for pnp_acpi_device This fixes a build failure if CONFIG_PNP is set but CONFIG_ACPI is not: drivers/char/tpm/tpm_tis.c: In function ?tpm_tis_pnp_init?: drivers/char/tpm/tpm_tis.c:912:45: error: invalid type argument of ?->? (have ?int?) acpi_dev_handle = pnp_acpi_device(pnp_dev)->handle; If CONFIG_PNPACPI is not set pnp_acpi_device is defined as 0 and thus accesing the handle is not possible. Fixes: 0dc553652102 ("tpm: fix raciness of PPI interface lookup") Reported-by: Jim Davis Signed-off-by: Peter Huewe Signed-off-by: James Morris --- drivers/char/tpm/tpm_tis.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index 20a61bc98db873..6725bef7cb9691 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -908,8 +908,10 @@ static int tpm_tis_pnp_init(struct pnp_dev *pnp_dev, if (is_itpm(pnp_dev)) itpm = true; +#ifdef CONFIG_ACPI if (pnp_acpi_device(pnp_dev)) acpi_dev_handle = pnp_acpi_device(pnp_dev)->handle; +#endif return tpm_tis_init(&pnp_dev->dev, acpi_dev_handle, start, len, irq); } From 601fad35d21c1fe9acb09f8bc380a805ab1763ff Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 22 Jan 2015 16:49:03 +1100 Subject: [PATCH 184/601] powerpc: Update G5 defconfig Add CGROUPS and DEVTMPFS, which allows booting with newer userspaces. Signed-off-by: Michael Ellerman --- arch/powerpc/configs/g5_defconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig index 5e0eed26137ed6..1d9ad8500909e2 100644 --- a/arch/powerpc/configs/g5_defconfig +++ b/arch/powerpc/configs/g5_defconfig @@ -9,6 +9,7 @@ CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y +CONFIG_CGROUPS=y CONFIG_BLK_DEV_INITRD=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y @@ -52,6 +53,8 @@ CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_CT_NETLINK=m CONFIG_NF_CONNTRACK_IPV4=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=y From c02d35067307dc65ebc52d1f5855102da184146b Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Tue, 13 Jan 2015 01:00:20 +0000 Subject: [PATCH 185/601] powerpc/ps3: Add empty repository highmem routines To avoid the need for preprocessor conditionals in C source files add a set of empty inline repository highmem write routines to platform.h that are used when CONFIG_PS3_REPOSITORY_WRITE is not defined. Signed-off-by: Geoff Levand Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/ps3/platform.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/powerpc/platforms/ps3/platform.h b/arch/powerpc/platforms/ps3/platform.h index d71329a8e325af..1809cfc562ee66 100644 --- a/arch/powerpc/platforms/ps3/platform.h +++ b/arch/powerpc/platforms/ps3/platform.h @@ -196,6 +196,7 @@ int ps3_repository_read_highmem_size(unsigned int region_index, int ps3_repository_read_highmem_info(unsigned int region_index, u64 *highmem_base, u64 *highmem_size); +#if defined (CONFIG_PS3_REPOSITORY_WRITE) int ps3_repository_write_highmem_region_count(unsigned int region_count); int ps3_repository_write_highmem_base(unsigned int region_index, u64 highmem_base); @@ -204,6 +205,18 @@ int ps3_repository_write_highmem_size(unsigned int region_index, int ps3_repository_write_highmem_info(unsigned int region_index, u64 highmem_base, u64 highmem_size); int ps3_repository_delete_highmem_info(unsigned int region_index); +#else +static inline int ps3_repository_write_highmem_region_count( + unsigned int region_count) {return 0;} +static inline int ps3_repository_write_highmem_base(unsigned int region_index, + u64 highmem_base) {return 0;} +static inline int ps3_repository_write_highmem_size(unsigned int region_index, + u64 highmem_size) {return 0;} +static inline int ps3_repository_write_highmem_info(unsigned int region_index, + u64 highmem_base, u64 highmem_size) {return 0;} +static inline int ps3_repository_delete_highmem_info(unsigned int region_index) + {return 0;} +#endif /* repository pme info */ From d4b18bd675b850fbf395304433952130034bdd89 Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Tue, 13 Jan 2015 01:00:20 +0000 Subject: [PATCH 186/601] powerpc/ps3: Add ps3_mm_set_repository_highmem Add the new routine ps3_mm_set_repository_highmem() that saves highmem info to the LV1 hypervisor registry so that the info will be available to second stage OS's loaded by petitboot/kexec. FreeBSD and some Linux derivatives use this feature. Also, move the existing ps3_mm_get_repository_highmem() routine up in the source file. This implementation of ps3_mm_set_repository_highmem() assumes the repository will have a single highmem region entry (at index 0). Signed-off-by: Geoff Levand Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/ps3/mm.c | 68 ++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 30 deletions(-) diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c index 0c9f643d9e2ac0..04c1b938d7624d 100644 --- a/arch/powerpc/platforms/ps3/mm.c +++ b/arch/powerpc/platforms/ps3/mm.c @@ -223,6 +223,44 @@ void ps3_mm_vas_destroy(void) } } +static int ps3_mm_get_repository_highmem(struct mem_region *r) +{ + int result; + + /* Assume a single highmem region. */ + + result = ps3_repository_read_highmem_info(0, &r->base, &r->size); + + if (result) + goto zero_region; + + if (!r->base || !r->size) { + result = -1; + goto zero_region; + } + + r->offset = r->base - map.rm.size; + + DBG("%s:%d: Found high region in repository: %llxh %llxh\n", + __func__, __LINE__, r->base, r->size); + + return 0; + +zero_region: + DBG("%s:%d: No high region in repository.\n", __func__, __LINE__); + + r->size = r->base = r->offset = 0; + return result; +} + +static int ps3_mm_set_repository_highmem(const struct mem_region *r) +{ + /* Assume a single highmem region. */ + + return r ? ps3_repository_write_highmem_info(0, r->base, r->size) : + ps3_repository_write_highmem_info(0, 0, 0); +} + /** * ps3_mm_region_create - create a memory region in the vas * @r: pointer to a struct mem_region to accept initialized values @@ -293,36 +331,6 @@ static void ps3_mm_region_destroy(struct mem_region *r) } } -static int ps3_mm_get_repository_highmem(struct mem_region *r) -{ - int result; - - /* Assume a single highmem region. */ - - result = ps3_repository_read_highmem_info(0, &r->base, &r->size); - - if (result) - goto zero_region; - - if (!r->base || !r->size) { - result = -1; - goto zero_region; - } - - r->offset = r->base - map.rm.size; - - DBG("%s:%d: Found high region in repository: %llxh %llxh\n", - __func__, __LINE__, r->base, r->size); - - return 0; - -zero_region: - DBG("%s:%d: No high region in repository.\n", __func__, __LINE__); - - r->size = r->base = r->offset = 0; - return result; -} - /*============================================================================*/ /* dma routines */ /*============================================================================*/ From 5ae74630ee4fa98bbba7a7cd8ee6919fd9f38561 Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Tue, 13 Jan 2015 01:00:20 +0000 Subject: [PATCH 187/601] powerpc/ps3: Write highmem info to repository Add calls to the ps3_mm_set_repository_highmem() routine when the ps3 r1 highmem region is either created or destroyed. Signed-off-by: Geoff Levand Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/ps3/mm.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c index 04c1b938d7624d..b0f34663b1aecf 100644 --- a/arch/powerpc/platforms/ps3/mm.c +++ b/arch/powerpc/platforms/ps3/mm.c @@ -329,6 +329,7 @@ static void ps3_mm_region_destroy(struct mem_region *r) r->size = r->base = r->offset = 0; map.total = map.rm.size; } + ps3_mm_set_repository_highmem(NULL); } /*============================================================================*/ @@ -1218,8 +1219,12 @@ void __init ps3_mm_init(void) /* Check if we got the highmem region from an earlier boot step */ - if (ps3_mm_get_repository_highmem(&map.r1)) - ps3_mm_region_create(&map.r1, map.total - map.rm.size); + if (ps3_mm_get_repository_highmem(&map.r1)) { + result = ps3_mm_region_create(&map.r1, map.total - map.rm.size); + + if (!result) + ps3_mm_set_repository_highmem(&map.r1); + } /* correct map.total for the real total amount of memory we use */ map.total = map.rm.size + map.r1.size; From 6baa5ecd6e9631a71423578a4a95dc8b179df386 Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Tue, 13 Jan 2015 01:00:20 +0000 Subject: [PATCH 188/601] powerpc/ps3: Fix vuart sparse warnings Fix sparse warnings like these: drivers/ps3/ps3-vuart.c: warning: symbol 'ps3_vuart_disable_interrupt_tx' was not declared. Should it be static? Signed-off-by: Geoff Levand Signed-off-by: Michael Ellerman --- drivers/ps3/ps3-vuart.c | 5 ----- drivers/ps3/vuart.h | 16 ++++++++++++++++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/ps3/ps3-vuart.c b/drivers/ps3/ps3-vuart.c index bc1e5139ba2957..d6db822bef8483 100644 --- a/drivers/ps3/ps3-vuart.c +++ b/drivers/ps3/ps3-vuart.c @@ -151,11 +151,6 @@ static void __maybe_unused _dump_port_params(unsigned int port_number, #endif } -struct vuart_triggers { - unsigned long rx; - unsigned long tx; -}; - int ps3_vuart_get_triggers(struct ps3_system_bus_device *dev, struct vuart_triggers *trig) { diff --git a/drivers/ps3/vuart.h b/drivers/ps3/vuart.h index eb7f6d94a8908d..23358b7193190c 100644 --- a/drivers/ps3/vuart.h +++ b/drivers/ps3/vuart.h @@ -82,4 +82,20 @@ void ps3_vuart_cancel_async(struct ps3_system_bus_device *dev); void ps3_vuart_clear_rx_bytes(struct ps3_system_bus_device *dev, unsigned int bytes); +struct vuart_triggers { + unsigned long rx; + unsigned long tx; +}; + +int ps3_vuart_get_triggers(struct ps3_system_bus_device *dev, + struct vuart_triggers *trig); +int ps3_vuart_set_triggers(struct ps3_system_bus_device *dev, unsigned int tx, + unsigned int rx); +int ps3_vuart_enable_interrupt_tx(struct ps3_system_bus_device *dev); +int ps3_vuart_disable_interrupt_tx(struct ps3_system_bus_device *dev); +int ps3_vuart_enable_interrupt_rx(struct ps3_system_bus_device *dev); +int ps3_vuart_disable_interrupt_rx(struct ps3_system_bus_device *dev); +int ps3_vuart_enable_interrupt_disconnect(struct ps3_system_bus_device *dev); +int ps3_vuart_disable_interrupt_disconnect(struct ps3_system_bus_device *dev); + #endif From fcc480759a91769e0a9a1086cb9cb4f618277d3d Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Tue, 13 Jan 2015 01:00:20 +0000 Subject: [PATCH 189/601] powerpc/ps3: Fix sys-manager-core sparse warnings Fixes warnings like these: drivers/ps3/sys-manager-core.c: error: symbol 'ps3_sys_manager_power_off' redeclared with different type Signed-off-by: Geoff Levand Signed-off-by: Michael Ellerman --- drivers/ps3/sys-manager-core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/ps3/sys-manager-core.c b/drivers/ps3/sys-manager-core.c index 0e41737ea835a6..c429ffca1ab763 100644 --- a/drivers/ps3/sys-manager-core.c +++ b/drivers/ps3/sys-manager-core.c @@ -47,7 +47,7 @@ void ps3_sys_manager_register_ops(const struct ps3_sys_manager_ops *ops) } EXPORT_SYMBOL_GPL(ps3_sys_manager_register_ops); -void ps3_sys_manager_power_off(void) +void __noreturn ps3_sys_manager_power_off(void) { if (ps3_sys_manager_ops.power_off) ps3_sys_manager_ops.power_off(ps3_sys_manager_ops.dev); @@ -55,7 +55,7 @@ void ps3_sys_manager_power_off(void) ps3_sys_manager_halt(); } -void ps3_sys_manager_restart(void) +void __noreturn ps3_sys_manager_restart(void) { if (ps3_sys_manager_ops.restart) ps3_sys_manager_ops.restart(ps3_sys_manager_ops.dev); @@ -63,7 +63,7 @@ void ps3_sys_manager_restart(void) ps3_sys_manager_halt(); } -void ps3_sys_manager_halt(void) +void __noreturn ps3_sys_manager_halt(void) { pr_emerg("System Halted, OK to turn off power\n"); local_irq_disable(); From d83fb87b2e02ca34abf281c181682cbdfae23a80 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 20 Jan 2015 18:08:36 +1100 Subject: [PATCH 190/601] powerpc/ps3: Enable CONFIG_PS3_REPOSITORY_WRITE in ps3_defconfig Signed-off-by: Michael Ellerman --- arch/powerpc/configs/ps3_defconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index 1078851aa4dc5a..adc14e813a4954 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -21,6 +21,8 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_PPC_PSERIES is not set # CONFIG_PPC_PMAC is not set CONFIG_PPC_PS3=y +CONFIG_PS3_ADVANCED=y +CONFIG_PS3_REPOSITORY_WRITE=y CONFIG_PS3_DISK=y CONFIG_PS3_ROM=y CONFIG_PS3_FLASH=y From d3383aaae9800b9e13e25b71f70dff3814d10373 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 8 Jan 2015 22:36:47 +0000 Subject: [PATCH 191/601] cxl: remove redundant increment of hwirq hwirq has not been initialized, however it is being incremented and also not being referenced in a loop. This error was detected with cppcheck: [drivers/misc/cxl/irq.c:439]: (error) Uninitialized variable: hwirq Commit 80fa93fce37d ("cxl: Name interrupts in /proc/interrupt") introduced this error. This is a simple fix that removes the redundant increment. Signed-off-by: Colin Ian King Acked-By: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c index c294925f73ee4d..68ab608ecbe731 100644 --- a/drivers/misc/cxl/irq.c +++ b/drivers/misc/cxl/irq.c @@ -436,7 +436,7 @@ int afu_register_irqs(struct cxl_context *ctx, u32 count) */ INIT_LIST_HEAD(&ctx->irq_names); for (r = 1; r < CXL_IRQ_RANGES; r++) { - for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) { + for (i = 0; i < ctx->irqs.range[r]; i++) { irq_name = kmalloc(sizeof(struct cxl_irq_name), GFP_KERNEL); if (!irq_name) From 9bcf28cdb28e6a793c4e59f0a42c66fe241993a8 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Fri, 9 Jan 2015 20:34:36 +1100 Subject: [PATCH 192/601] cxl: Add tracepoints This patch adds tracepoints throughout the cxl driver, which can provide insight into: - Context lifetimes - Commands sent to the PSL and AFU and their completion status - Segment and page table misses and their resolution - PSL and AFU interrupts - slbia calls from the powerpc copro_fault code These tracepoints are mostly intended to aid in debugging (particularly for new AFU designs), and may be useful standalone or in conjunction with hardware traces collected by the PSL (read out via the trace interface in debugfs) and AFUs. Signed-off-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/Makefile | 5 +- drivers/misc/cxl/fault.c | 5 + drivers/misc/cxl/file.c | 3 + drivers/misc/cxl/irq.c | 4 + drivers/misc/cxl/main.c | 2 + drivers/misc/cxl/native.c | 38 +++- drivers/misc/cxl/trace.c | 13 ++ drivers/misc/cxl/trace.h | 459 ++++++++++++++++++++++++++++++++++++++ 8 files changed, 520 insertions(+), 9 deletions(-) create mode 100644 drivers/misc/cxl/trace.c create mode 100644 drivers/misc/cxl/trace.h diff --git a/drivers/misc/cxl/Makefile b/drivers/misc/cxl/Makefile index 165e98fef2c2d3..edb494d3ff271a 100644 --- a/drivers/misc/cxl/Makefile +++ b/drivers/misc/cxl/Makefile @@ -1,3 +1,6 @@ -cxl-y += main.o file.o irq.o fault.o native.o context.o sysfs.o debugfs.o pci.o +cxl-y += main.o file.o irq.o fault.o native.o context.o sysfs.o debugfs.o pci.o trace.o obj-$(CONFIG_CXL) += cxl.o obj-$(CONFIG_CXL_BASE) += base.o + +# For tracepoints to include our trace.h from tracepoint infrastructure: +CFLAGS_trace.o := -I$(src) diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c index e010302a192bc3..5286b8b704f559 100644 --- a/drivers/misc/cxl/fault.c +++ b/drivers/misc/cxl/fault.c @@ -20,6 +20,7 @@ #include #include "cxl.h" +#include "trace.h" static bool sste_matches(struct cxl_sste *sste, struct copro_slb *slb) { @@ -75,6 +76,7 @@ static void cxl_load_segment(struct cxl_context *ctx, struct copro_slb *slb) pr_devel("CXL Populating SST[%li]: %#llx %#llx\n", sste - ctx->sstp, slb->vsid, slb->esid); + trace_cxl_ste_write(ctx, sste - ctx->sstp, slb->esid, slb->vsid); sste->vsid_data = cpu_to_be64(slb->vsid); sste->esid_data = cpu_to_be64(slb->esid); @@ -116,6 +118,7 @@ static int cxl_handle_segment_miss(struct cxl_context *ctx, int rc; pr_devel("CXL interrupt: Segment fault pe: %i ea: %#llx\n", ctx->pe, ea); + trace_cxl_ste_miss(ctx, ea); if ((rc = cxl_fault_segment(ctx, mm, ea))) cxl_ack_ae(ctx); @@ -135,6 +138,8 @@ static void cxl_handle_page_fault(struct cxl_context *ctx, int result; unsigned long access, flags, inv_flags = 0; + trace_cxl_pte_miss(ctx, dsisr, dar); + if ((result = copro_handle_mm_fault(mm, dar, dsisr, &flt))) { pr_devel("copro_handle_mm_fault failed: %#x\n", result); return cxl_ack_ae(ctx); diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c index b09be4462294e0..8953de6fde2d10 100644 --- a/drivers/misc/cxl/file.c +++ b/drivers/misc/cxl/file.c @@ -23,6 +23,7 @@ #include #include "cxl.h" +#include "trace.h" #define CXL_NUM_MINORS 256 /* Total to reserve */ #define CXL_DEV_MINORS 13 /* 1 control + 4 AFUs * 3 (dedicated/master/shared) */ @@ -184,6 +185,8 @@ static long afu_ioctl_start_work(struct cxl_context *ctx, */ ctx->pid = get_pid(get_task_pid(current, PIDTYPE_PID)); + trace_cxl_attach(ctx, work.work_element_descriptor, work.num_interrupts, amr); + if ((rc = cxl_attach_process(ctx, false, work.work_element_descriptor, amr))) { afu_release_irqs(ctx); diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c index 68ab608ecbe731..f0836472403b8f 100644 --- a/drivers/misc/cxl/irq.c +++ b/drivers/misc/cxl/irq.c @@ -17,6 +17,7 @@ #include #include "cxl.h" +#include "trace.h" /* XXX: This is implementation specific */ static irqreturn_t handle_psl_slice_error(struct cxl_context *ctx, u64 dsisr, u64 errstat) @@ -100,6 +101,8 @@ static irqreturn_t cxl_irq(int irq, void *data, struct cxl_irq_info *irq_info) dsisr = irq_info->dsisr; dar = irq_info->dar; + trace_cxl_psl_irq(ctx, irq, dsisr, dar); + pr_devel("CXL interrupt %i for afu pe: %i DSISR: %#llx DAR: %#llx\n", irq, ctx->pe, dsisr, dar); if (dsisr & CXL_PSL_DSISR_An_DS) { @@ -237,6 +240,7 @@ static irqreturn_t cxl_irq_afu(int irq, void *data) return IRQ_HANDLED; } + trace_cxl_afu_irq(ctx, afu_irq, irq, hwirq); pr_devel("Received AFU interrupt %i for pe: %i (virq %i hwirq %lx)\n", afu_irq, ctx->pe, irq, hwirq); diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c index 4cde9b661642a5..8ccddceead6671 100644 --- a/drivers/misc/cxl/main.c +++ b/drivers/misc/cxl/main.c @@ -23,6 +23,7 @@ #include #include "cxl.h" +#include "trace.h" static DEFINE_SPINLOCK(adapter_idr_lock); static DEFINE_IDR(cxl_adapter_idr); @@ -48,6 +49,7 @@ static inline void _cxl_slbia(struct cxl_context *ctx, struct mm_struct *mm) ctx->afu->adapter->adapter_num, ctx->afu->slice, ctx->pe); spin_lock_irqsave(&ctx->sste_lock, flags); + trace_cxl_slbia(ctx); memset(ctx->sstp, 0, ctx->sst_size); spin_unlock_irqrestore(&ctx->sste_lock, flags); mb(); diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index 0f24fa5b0de38f..29185fc6127670 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -18,24 +18,28 @@ #include #include "cxl.h" +#include "trace.h" static int afu_control(struct cxl_afu *afu, u64 command, u64 result, u64 mask, bool enabled) { u64 AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An); unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); + int rc = 0; spin_lock(&afu->afu_cntl_lock); pr_devel("AFU command starting: %llx\n", command); + trace_cxl_afu_ctrl(afu, command); + cxl_p2n_write(afu, CXL_AFU_Cntl_An, AFU_Cntl | command); AFU_Cntl = cxl_p2n_read(afu, CXL_AFU_Cntl_An); while ((AFU_Cntl & mask) != result) { if (time_after_eq(jiffies, timeout)) { dev_warn(&afu->dev, "WARNING: AFU control timed out!\n"); - spin_unlock(&afu->afu_cntl_lock); - return -EBUSY; + rc = -EBUSY; + goto out; } pr_devel_ratelimited("AFU control... (0x%.16llx)\n", AFU_Cntl | command); @@ -44,9 +48,11 @@ static int afu_control(struct cxl_afu *afu, u64 command, }; pr_devel("AFU command complete: %llx\n", command); afu->enabled = enabled; +out: + trace_cxl_afu_ctrl_done(afu, command, rc); spin_unlock(&afu->afu_cntl_lock); - return 0; + return rc; } static int afu_enable(struct cxl_afu *afu) @@ -91,6 +97,9 @@ int cxl_psl_purge(struct cxl_afu *afu) u64 dsisr, dar; u64 start, end; unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); + int rc = 0; + + trace_cxl_psl_ctrl(afu, CXL_PSL_SCNTL_An_Pc); pr_devel("PSL purge request\n"); @@ -107,7 +116,8 @@ int cxl_psl_purge(struct cxl_afu *afu) == CXL_PSL_SCNTL_An_Ps_Pending) { if (time_after_eq(jiffies, timeout)) { dev_warn(&afu->dev, "WARNING: PSL Purge timed out!\n"); - return -EBUSY; + rc = -EBUSY; + goto out; } dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); pr_devel_ratelimited("PSL purging... PSL_CNTL: 0x%.16llx PSL_DSISR: 0x%.16llx\n", PSL_CNTL, dsisr); @@ -128,7 +138,9 @@ int cxl_psl_purge(struct cxl_afu *afu) cxl_p1n_write(afu, CXL_PSL_SCNTL_An, PSL_CNTL & ~CXL_PSL_SCNTL_An_Pc); - return 0; +out: + trace_cxl_psl_ctrl_done(afu, CXL_PSL_SCNTL_An_Pc, rc); + return rc; } static int spa_max_procs(int spa_size) @@ -279,6 +291,9 @@ static int do_process_element_cmd(struct cxl_context *ctx, { u64 state; unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT); + int rc = 0; + + trace_cxl_llcmd(ctx, cmd); WARN_ON(!ctx->afu->enabled); @@ -290,12 +305,14 @@ static int do_process_element_cmd(struct cxl_context *ctx, while (1) { if (time_after_eq(jiffies, timeout)) { dev_warn(&ctx->afu->dev, "WARNING: Process Element Command timed out!\n"); - return -EBUSY; + rc = -EBUSY; + goto out; } state = be64_to_cpup(ctx->afu->sw_command_status); if (state == ~0ULL) { pr_err("cxl: Error adding process element to AFU\n"); - return -1; + rc = -1; + goto out; } if ((state & (CXL_SPA_SW_CMD_MASK | CXL_SPA_SW_STATE_MASK | CXL_SPA_SW_LINK_MASK)) == (cmd | (cmd >> 16) | ctx->pe)) @@ -310,7 +327,9 @@ static int do_process_element_cmd(struct cxl_context *ctx, schedule(); } - return 0; +out: + trace_cxl_llcmd_done(ctx, cmd, rc); + return rc; } static int add_process_element(struct cxl_context *ctx) @@ -630,6 +649,8 @@ static inline int detach_process_native_afu_directed(struct cxl_context *ctx) int cxl_detach_process(struct cxl_context *ctx) { + trace_cxl_detach(ctx); + if (ctx->afu->current_mode == CXL_MODE_DEDICATED) return detach_process_native_dedicated(ctx); @@ -668,6 +689,7 @@ static void recover_psl_err(struct cxl_afu *afu, u64 errstat) int cxl_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask) { + trace_cxl_psl_irq_ack(ctx, tfc); if (tfc) cxl_p2n_write(ctx->afu, CXL_PSL_TFC_An, tfc); if (psl_reset_mask) diff --git a/drivers/misc/cxl/trace.c b/drivers/misc/cxl/trace.c new file mode 100644 index 00000000000000..c2b06d319e6e70 --- /dev/null +++ b/drivers/misc/cxl/trace.c @@ -0,0 +1,13 @@ +/* + * Copyright 2015 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef __CHECKER__ +#define CREATE_TRACE_POINTS +#include "trace.h" +#endif diff --git a/drivers/misc/cxl/trace.h b/drivers/misc/cxl/trace.h new file mode 100644 index 00000000000000..ae434d87887e86 --- /dev/null +++ b/drivers/misc/cxl/trace.h @@ -0,0 +1,459 @@ +/* + * Copyright 2015 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM cxl + +#if !defined(_CXL_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#define _CXL_TRACE_H + +#include + +#include "cxl.h" + +#define DSISR_FLAGS \ + { CXL_PSL_DSISR_An_DS, "DS" }, \ + { CXL_PSL_DSISR_An_DM, "DM" }, \ + { CXL_PSL_DSISR_An_ST, "ST" }, \ + { CXL_PSL_DSISR_An_UR, "UR" }, \ + { CXL_PSL_DSISR_An_PE, "PE" }, \ + { CXL_PSL_DSISR_An_AE, "AE" }, \ + { CXL_PSL_DSISR_An_OC, "OC" }, \ + { CXL_PSL_DSISR_An_M, "M" }, \ + { CXL_PSL_DSISR_An_P, "P" }, \ + { CXL_PSL_DSISR_An_A, "A" }, \ + { CXL_PSL_DSISR_An_S, "S" }, \ + { CXL_PSL_DSISR_An_K, "K" } + +#define TFC_FLAGS \ + { CXL_PSL_TFC_An_A, "A" }, \ + { CXL_PSL_TFC_An_C, "C" }, \ + { CXL_PSL_TFC_An_AE, "AE" }, \ + { CXL_PSL_TFC_An_R, "R" } + +#define LLCMD_NAMES \ + { CXL_SPA_SW_CMD_TERMINATE, "TERMINATE" }, \ + { CXL_SPA_SW_CMD_REMOVE, "REMOVE" }, \ + { CXL_SPA_SW_CMD_SUSPEND, "SUSPEND" }, \ + { CXL_SPA_SW_CMD_RESUME, "RESUME" }, \ + { CXL_SPA_SW_CMD_ADD, "ADD" }, \ + { CXL_SPA_SW_CMD_UPDATE, "UPDATE" } + +#define AFU_COMMANDS \ + { 0, "DISABLE" }, \ + { CXL_AFU_Cntl_An_E, "ENABLE" }, \ + { CXL_AFU_Cntl_An_RA, "RESET" } + +#define PSL_COMMANDS \ + { CXL_PSL_SCNTL_An_Pc, "PURGE" }, \ + { CXL_PSL_SCNTL_An_Sc, "SUSPEND" } + + +DECLARE_EVENT_CLASS(cxl_pe_class, + TP_PROTO(struct cxl_context *ctx), + + TP_ARGS(ctx), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u16, pe) + ), + + TP_fast_assign( + __entry->card = ctx->afu->adapter->adapter_num; + __entry->afu = ctx->afu->slice; + __entry->pe = ctx->pe; + ), + + TP_printk("afu%i.%i pe=%i", + __entry->card, + __entry->afu, + __entry->pe + ) +); + + +TRACE_EVENT(cxl_attach, + TP_PROTO(struct cxl_context *ctx, u64 wed, s16 num_interrupts, u64 amr), + + TP_ARGS(ctx, wed, num_interrupts, amr), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u16, pe) + __field(pid_t, pid) + __field(u64, wed) + __field(u64, amr) + __field(s16, num_interrupts) + ), + + TP_fast_assign( + __entry->card = ctx->afu->adapter->adapter_num; + __entry->afu = ctx->afu->slice; + __entry->pe = ctx->pe; + __entry->pid = pid_nr(ctx->pid); + __entry->wed = wed; + __entry->amr = amr; + __entry->num_interrupts = num_interrupts; + ), + + TP_printk("afu%i.%i pid=%i pe=%i wed=0x%.16llx irqs=%i amr=0x%llx", + __entry->card, + __entry->afu, + __entry->pid, + __entry->pe, + __entry->wed, + __entry->num_interrupts, + __entry->amr + ) +); + +DEFINE_EVENT(cxl_pe_class, cxl_detach, + TP_PROTO(struct cxl_context *ctx), + TP_ARGS(ctx) +); + +TRACE_EVENT(cxl_afu_irq, + TP_PROTO(struct cxl_context *ctx, int afu_irq, int virq, irq_hw_number_t hwirq), + + TP_ARGS(ctx, afu_irq, virq, hwirq), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u16, pe) + __field(u16, afu_irq) + __field(int, virq) + __field(irq_hw_number_t, hwirq) + ), + + TP_fast_assign( + __entry->card = ctx->afu->adapter->adapter_num; + __entry->afu = ctx->afu->slice; + __entry->pe = ctx->pe; + __entry->afu_irq = afu_irq; + __entry->virq = virq; + __entry->hwirq = hwirq; + ), + + TP_printk("afu%i.%i pe=%i afu_irq=%i virq=%i hwirq=0x%lx", + __entry->card, + __entry->afu, + __entry->pe, + __entry->afu_irq, + __entry->virq, + __entry->hwirq + ) +); + +TRACE_EVENT(cxl_psl_irq, + TP_PROTO(struct cxl_context *ctx, int irq, u64 dsisr, u64 dar), + + TP_ARGS(ctx, irq, dsisr, dar), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u16, pe) + __field(int, irq) + __field(u64, dsisr) + __field(u64, dar) + ), + + TP_fast_assign( + __entry->card = ctx->afu->adapter->adapter_num; + __entry->afu = ctx->afu->slice; + __entry->pe = ctx->pe; + __entry->irq = irq; + __entry->dsisr = dsisr; + __entry->dar = dar; + ), + + TP_printk("afu%i.%i pe=%i irq=%i dsisr=%s dar=0x%.16llx", + __entry->card, + __entry->afu, + __entry->pe, + __entry->irq, + __print_flags(__entry->dsisr, "|", DSISR_FLAGS), + __entry->dar + ) +); + +TRACE_EVENT(cxl_psl_irq_ack, + TP_PROTO(struct cxl_context *ctx, u64 tfc), + + TP_ARGS(ctx, tfc), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u16, pe) + __field(u64, tfc) + ), + + TP_fast_assign( + __entry->card = ctx->afu->adapter->adapter_num; + __entry->afu = ctx->afu->slice; + __entry->pe = ctx->pe; + __entry->tfc = tfc; + ), + + TP_printk("afu%i.%i pe=%i tfc=%s", + __entry->card, + __entry->afu, + __entry->pe, + __print_flags(__entry->tfc, "|", TFC_FLAGS) + ) +); + +TRACE_EVENT(cxl_ste_miss, + TP_PROTO(struct cxl_context *ctx, u64 dar), + + TP_ARGS(ctx, dar), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u16, pe) + __field(u64, dar) + ), + + TP_fast_assign( + __entry->card = ctx->afu->adapter->adapter_num; + __entry->afu = ctx->afu->slice; + __entry->pe = ctx->pe; + __entry->dar = dar; + ), + + TP_printk("afu%i.%i pe=%i dar=0x%.16llx", + __entry->card, + __entry->afu, + __entry->pe, + __entry->dar + ) +); + +TRACE_EVENT(cxl_ste_write, + TP_PROTO(struct cxl_context *ctx, unsigned int idx, u64 e, u64 v), + + TP_ARGS(ctx, idx, e, v), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u16, pe) + __field(unsigned int, idx) + __field(u64, e) + __field(u64, v) + ), + + TP_fast_assign( + __entry->card = ctx->afu->adapter->adapter_num; + __entry->afu = ctx->afu->slice; + __entry->pe = ctx->pe; + __entry->idx = idx; + __entry->e = e; + __entry->v = v; + ), + + TP_printk("afu%i.%i pe=%i SSTE[%i] E=0x%.16llx V=0x%.16llx", + __entry->card, + __entry->afu, + __entry->pe, + __entry->idx, + __entry->e, + __entry->v + ) +); + +TRACE_EVENT(cxl_pte_miss, + TP_PROTO(struct cxl_context *ctx, u64 dsisr, u64 dar), + + TP_ARGS(ctx, dsisr, dar), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u16, pe) + __field(u64, dsisr) + __field(u64, dar) + ), + + TP_fast_assign( + __entry->card = ctx->afu->adapter->adapter_num; + __entry->afu = ctx->afu->slice; + __entry->pe = ctx->pe; + __entry->dsisr = dsisr; + __entry->dar = dar; + ), + + TP_printk("afu%i.%i pe=%i dsisr=%s dar=0x%.16llx", + __entry->card, + __entry->afu, + __entry->pe, + __print_flags(__entry->dsisr, "|", DSISR_FLAGS), + __entry->dar + ) +); + +TRACE_EVENT(cxl_llcmd, + TP_PROTO(struct cxl_context *ctx, u64 cmd), + + TP_ARGS(ctx, cmd), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u16, pe) + __field(u64, cmd) + ), + + TP_fast_assign( + __entry->card = ctx->afu->adapter->adapter_num; + __entry->afu = ctx->afu->slice; + __entry->pe = ctx->pe; + __entry->cmd = cmd; + ), + + TP_printk("afu%i.%i pe=%i cmd=%s", + __entry->card, + __entry->afu, + __entry->pe, + __print_symbolic_u64(__entry->cmd, LLCMD_NAMES) + ) +); + +TRACE_EVENT(cxl_llcmd_done, + TP_PROTO(struct cxl_context *ctx, u64 cmd, int rc), + + TP_ARGS(ctx, cmd, rc), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u16, pe) + __field(u64, cmd) + __field(int, rc) + ), + + TP_fast_assign( + __entry->card = ctx->afu->adapter->adapter_num; + __entry->afu = ctx->afu->slice; + __entry->pe = ctx->pe; + __entry->rc = rc; + __entry->cmd = cmd; + ), + + TP_printk("afu%i.%i pe=%i cmd=%s rc=%i", + __entry->card, + __entry->afu, + __entry->pe, + __print_symbolic_u64(__entry->cmd, LLCMD_NAMES), + __entry->rc + ) +); + +DECLARE_EVENT_CLASS(cxl_afu_psl_ctrl, + TP_PROTO(struct cxl_afu *afu, u64 cmd), + + TP_ARGS(afu, cmd), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u64, cmd) + ), + + TP_fast_assign( + __entry->card = afu->adapter->adapter_num; + __entry->afu = afu->slice; + __entry->cmd = cmd; + ), + + TP_printk("afu%i.%i cmd=%s", + __entry->card, + __entry->afu, + __print_symbolic_u64(__entry->cmd, AFU_COMMANDS) + ) +); + +DECLARE_EVENT_CLASS(cxl_afu_psl_ctrl_done, + TP_PROTO(struct cxl_afu *afu, u64 cmd, int rc), + + TP_ARGS(afu, cmd, rc), + + TP_STRUCT__entry( + __field(u8, card) + __field(u8, afu) + __field(u64, cmd) + __field(int, rc) + ), + + TP_fast_assign( + __entry->card = afu->adapter->adapter_num; + __entry->afu = afu->slice; + __entry->rc = rc; + __entry->cmd = cmd; + ), + + TP_printk("afu%i.%i cmd=%s rc=%i", + __entry->card, + __entry->afu, + __print_symbolic_u64(__entry->cmd, AFU_COMMANDS), + __entry->rc + ) +); + +DEFINE_EVENT(cxl_afu_psl_ctrl, cxl_afu_ctrl, + TP_PROTO(struct cxl_afu *afu, u64 cmd), + TP_ARGS(afu, cmd) +); + +DEFINE_EVENT(cxl_afu_psl_ctrl_done, cxl_afu_ctrl_done, + TP_PROTO(struct cxl_afu *afu, u64 cmd, int rc), + TP_ARGS(afu, cmd, rc) +); + +DEFINE_EVENT_PRINT(cxl_afu_psl_ctrl, cxl_psl_ctrl, + TP_PROTO(struct cxl_afu *afu, u64 cmd), + TP_ARGS(afu, cmd), + + TP_printk("psl%i.%i cmd=%s", + __entry->card, + __entry->afu, + __print_symbolic_u64(__entry->cmd, PSL_COMMANDS) + ) +); + +DEFINE_EVENT_PRINT(cxl_afu_psl_ctrl_done, cxl_psl_ctrl_done, + TP_PROTO(struct cxl_afu *afu, u64 cmd, int rc), + TP_ARGS(afu, cmd, rc), + + TP_printk("psl%i.%i cmd=%s rc=%i", + __entry->card, + __entry->afu, + __print_symbolic_u64(__entry->cmd, PSL_COMMANDS), + __entry->rc + ) +); + +DEFINE_EVENT(cxl_pe_class, cxl_slbia, + TP_PROTO(struct cxl_context *ctx), + TP_ARGS(ctx) +); + +#endif /* _CXL_TRACE_H */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace +#include From 49fd644c3b0014fb80c2b1db7d8c86a75c4c6b1f Mon Sep 17 00:00:00 2001 From: Philippe Bergheaud Date: Fri, 12 Dec 2014 11:28:53 +0100 Subject: [PATCH 193/601] cxl: Update CXL ABI documentation This fixes two typos and explains where shared attributes are stored. Signed-off-by: Philippe Bergheaud Acked-by: Michael Neuling Signed-off-by: Michael Ellerman --- Documentation/ABI/testing/sysfs-class-cxl | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-class-cxl b/Documentation/ABI/testing/sysfs-class-cxl index 554405ec1955e9..2ab97527e18696 100644 --- a/Documentation/ABI/testing/sysfs-class-cxl +++ b/Documentation/ABI/testing/sysfs-class-cxl @@ -1,3 +1,9 @@ +Note: Attributes that are shared between devices are stored in the directory +pointed to by the symlink device/. +Example: The real path of the attribute /sys/class/cxl/afu0.0s/irqs_max is +/sys/class/cxl/afu0.0s/device/irqs_max, i.e. /sys/class/cxl/afu0.0/irqs_max. + + Slave contexts (eg. /sys/class/cxl/afu0.0s): What: /sys/class/cxl//irqs_max @@ -67,7 +73,7 @@ Contact: linuxppc-dev@lists.ozlabs.org Description: read only Decimal value of the current version of the kernel/user API. -What: /sys/class/cxl//api_version_com +What: /sys/class/cxl//api_version_compatible Date: September 2014 Contact: linuxppc-dev@lists.ozlabs.org Description: read only @@ -75,7 +81,6 @@ Description: read only this this kernel supports. - Master contexts (eg. /sys/class/cxl/afu0.0m) What: /sys/class/cxl/m/mmio_size @@ -106,7 +111,7 @@ Contact: linuxppc-dev@lists.ozlabs.org Description: read only Identifies the CAIA Version the card implements. -What: /sys/class/cxl//psl_version +What: /sys/class/cxl//psl_revision Date: September 2014 Contact: linuxppc-dev@lists.ozlabs.org Description: read only From 4beb5421babee1204757b877622830c6aa31be6d Mon Sep 17 00:00:00 2001 From: Ryan Grimm Date: Mon, 19 Jan 2015 11:52:48 -0600 Subject: [PATCH 194/601] cxl: Use image state defaults for reloading FPGA Select defaults such that a PERST causes flash image reload. Select which image based on what the card is set up to load. CXL_VSEC_PERST_LOADS_IMAGE selects whether PERST assertion causes flash image load. CXL_VSEC_PERST_SELECT_USER selects which image is loaded on the next PERST. cxl_update_image_control writes these bits into the VSEC. Signed-off-by: Ryan Grimm Acked-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/cxl.h | 1 + drivers/misc/cxl/pci.c | 42 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 0df04380bfd63b..518c4c6e6151e3 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -488,6 +488,7 @@ void cxl_release_one_irq(struct cxl *adapter, int hwirq); int cxl_alloc_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter, unsigned int num); void cxl_release_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter); int cxl_setup_irq(struct cxl *adapter, unsigned int hwirq, unsigned int virq); +int cxl_update_image_control(struct cxl *adapter); /* common == phyp + powernv */ struct cxl_process_element_common { diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 2ccd0a91d486e1..014f4c928e4c5c 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -361,6 +361,41 @@ int cxl_setup_irq(struct cxl *adapter, unsigned int hwirq, return pnv_cxl_ioda_msi_setup(dev, hwirq, virq); } +int cxl_update_image_control(struct cxl *adapter) +{ + struct pci_dev *dev = to_pci_dev(adapter->dev.parent); + int rc; + int vsec; + u8 image_state; + + if (!(vsec = find_cxl_vsec(dev))) { + dev_err(&dev->dev, "ABORTING: CXL VSEC not found!\n"); + return -ENODEV; + } + + if ((rc = CXL_READ_VSEC_IMAGE_STATE(dev, vsec, &image_state))) { + dev_err(&dev->dev, "failed to read image state: %i\n", rc); + return rc; + } + + if (adapter->perst_loads_image) + image_state |= CXL_VSEC_PERST_LOADS_IMAGE; + else + image_state &= ~CXL_VSEC_PERST_LOADS_IMAGE; + + if (adapter->perst_select_user) + image_state |= CXL_VSEC_PERST_SELECT_USER; + else + image_state &= ~CXL_VSEC_PERST_SELECT_USER; + + if ((rc = CXL_WRITE_VSEC_IMAGE_STATE(dev, vsec, image_state))) { + dev_err(&dev->dev, "failed to update image control: %i\n", rc); + return rc; + } + + return 0; +} + int cxl_alloc_one_irq(struct cxl *adapter) { struct pci_dev *dev = to_pci_dev(adapter->dev.parent); @@ -770,8 +805,8 @@ static int cxl_read_vsec(struct cxl *adapter, struct pci_dev *dev) CXL_READ_VSEC_BASE_IMAGE(dev, vsec, &adapter->base_image); CXL_READ_VSEC_IMAGE_STATE(dev, vsec, &image_state); adapter->user_image_loaded = !!(image_state & CXL_VSEC_USER_IMAGE_LOADED); - adapter->perst_loads_image = !!(image_state & CXL_VSEC_PERST_LOADS_IMAGE); - adapter->perst_select_user = !!(image_state & CXL_VSEC_PERST_SELECT_USER); + adapter->perst_loads_image = true; + adapter->perst_select_user = !!(image_state & CXL_VSEC_USER_IMAGE_LOADED); CXL_READ_VSEC_NAFUS(dev, vsec, &adapter->slices); CXL_READ_VSEC_AFU_DESC_OFF(dev, vsec, &afu_desc_off); @@ -879,6 +914,9 @@ static struct cxl *cxl_init_adapter(struct pci_dev *dev) if ((rc = cxl_vsec_looks_ok(adapter, dev))) goto err2; + if ((rc = cxl_update_image_control(adapter))) + goto err2; + if ((rc = cxl_map_adapter_regs(adapter, dev))) goto err2; From 95bc11bcd1428afdb48400ec84dc6d5a83926138 Mon Sep 17 00:00:00 2001 From: Ryan Grimm Date: Mon, 19 Jan 2015 11:52:49 -0600 Subject: [PATCH 195/601] cxl: Add image control to sysfs load_image_on_perst identifies whether a PERST will cause the image to be flashed to the card. And if so, which image. Valid entries are: "none", "user" and "factory". A value of "none" means PERST will not cause the image to be flashed. A power cycle to the pcie slot is required to load the image. "user" loads the user provided image and "factory" loads the factory image upon PERST. sysfs updates the cxl struct in the driver then calls cxl_update_image_control to write the vals in the VSEC. Signed-off-by: Ryan Grimm Acked-by: Ian Munsie Signed-off-by: Michael Ellerman --- Documentation/ABI/testing/sysfs-class-cxl | 14 ++++++++ drivers/misc/cxl/sysfs.c | 39 +++++++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-class-cxl b/Documentation/ABI/testing/sysfs-class-cxl index 2ab97527e18696..5941ff38d4a316 100644 --- a/Documentation/ABI/testing/sysfs-class-cxl +++ b/Documentation/ABI/testing/sysfs-class-cxl @@ -132,3 +132,17 @@ Contact: linuxppc-dev@lists.ozlabs.org Description: read only Will return "user" or "factory" depending on the image loaded onto the card. + +What: /sys/class/cxl//load_image_on_perst +Date: December 2014 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read/write + Valid entries are "none", "user", and "factory". + "none" means PERST will not cause image to be loaded to the + card. A power cycle is required to load the image. + "none" could be useful for debugging because the trace arrays + are preserved. + "user" and "factory" means PERST will cause either the user or + user or factory image to be loaded. + Default is to reload on PERST whichever image the card has + loaded. diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c index 461bdbd5d48317..ed4ad461143c1a 100644 --- a/drivers/misc/cxl/sysfs.c +++ b/drivers/misc/cxl/sysfs.c @@ -56,11 +56,50 @@ static ssize_t image_loaded_show(struct device *device, return scnprintf(buf, PAGE_SIZE, "factory\n"); } +static ssize_t load_image_on_perst_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl *adapter = to_cxl_adapter(device); + + if (!adapter->perst_loads_image) + return scnprintf(buf, PAGE_SIZE, "none\n"); + + if (adapter->perst_select_user) + return scnprintf(buf, PAGE_SIZE, "user\n"); + return scnprintf(buf, PAGE_SIZE, "factory\n"); +} + +static ssize_t load_image_on_perst_store(struct device *device, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cxl *adapter = to_cxl_adapter(device); + int rc; + + if (!strncmp(buf, "none", 4)) + adapter->perst_loads_image = false; + else if (!strncmp(buf, "user", 4)) { + adapter->perst_select_user = true; + adapter->perst_loads_image = true; + } else if (!strncmp(buf, "factory", 7)) { + adapter->perst_select_user = false; + adapter->perst_loads_image = true; + } else + return -EINVAL; + + if ((rc = cxl_update_image_control(adapter))) + return rc; + + return count; +} + static struct device_attribute adapter_attrs[] = { __ATTR_RO(caia_version), __ATTR_RO(psl_revision), __ATTR_RO(base_image), __ATTR_RO(image_loaded), + __ATTR_RW(load_image_on_perst), }; From 1212aa1c8c9ca34642f7737e1edaa96c9ce3d7dd Mon Sep 17 00:00:00 2001 From: Ryan Grimm Date: Mon, 19 Jan 2015 11:52:50 -0600 Subject: [PATCH 196/601] cxl: Enable CAPP recovery Turning snoops on is the last step in CAPP recovery. Sapphire is expected to have reinitialized the PHB and done the previous recovery steps. Add mode argument to opal call to do this. Driver can turn snoops off although it does not currently. Signed-off-by: Ryan Grimm Acked-by: Ian Munsie Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal.h | 8 ++++++++ arch/powerpc/include/asm/pnv-pci.h | 2 +- arch/powerpc/platforms/powernv/pci-ioda.c | 6 +++--- drivers/misc/cxl/pci.c | 8 +++++++- 4 files changed, 19 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index eb95b675109b64..2baf8a5925ca96 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -595,6 +595,14 @@ enum { OPAL_PHB3_NUM_PEST_REGS = 256 }; +/* CAPI modes for PHB */ +enum { + OPAL_PHB_CAPI_MODE_PCIE = 0, + OPAL_PHB_CAPI_MODE_CAPI = 1, + OPAL_PHB_CAPI_MODE_SNOOP_OFF = 2, + OPAL_PHB_CAPI_MODE_SNOOP_ON = 3, +}; + struct OpalIoPhbErrorCommon { __be32 version; __be32 ioType; diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h index f09a22fa1bd794..3c00d648336d78 100644 --- a/arch/powerpc/include/asm/pnv-pci.h +++ b/arch/powerpc/include/asm/pnv-pci.h @@ -13,7 +13,7 @@ #include #include -int pnv_phb_to_cxl(struct pci_dev *dev); +int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode); int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, unsigned int virq); int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num); diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index fac88ed8a915eb..5d52d6f274f81d 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1468,7 +1468,7 @@ struct device_node *pnv_pci_to_phb_node(struct pci_dev *dev) } EXPORT_SYMBOL(pnv_pci_to_phb_node); -int pnv_phb_to_cxl(struct pci_dev *dev) +int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode) { struct pci_controller *hose = pci_bus_to_host(dev->bus); struct pnv_phb *phb = hose->private_data; @@ -1481,13 +1481,13 @@ int pnv_phb_to_cxl(struct pci_dev *dev) pe_info(pe, "Switching PHB to CXL\n"); - rc = opal_pci_set_phb_cxl_mode(phb->opal_id, 1, pe->pe_number); + rc = opal_pci_set_phb_cxl_mode(phb->opal_id, mode, pe->pe_number); if (rc) dev_err(&dev->dev, "opal_pci_set_phb_cxl_mode failed: %i\n", rc); return rc; } -EXPORT_SYMBOL(pnv_phb_to_cxl); +EXPORT_SYMBOL(pnv_phb_to_cxl_mode); /* Find PHB for cxl dev and allocate MSI hwirqs? * Returns the absolute hardware IRQ number diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 014f4c928e4c5c..a4a4e0217eed20 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -926,9 +926,15 @@ static struct cxl *cxl_init_adapter(struct pci_dev *dev) if ((rc = init_implementation_adapter_regs(adapter, dev))) goto err3; - if ((rc = pnv_phb_to_cxl(dev))) + if ((rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_CAPI))) goto err3; + /* If recovery happened, the last step is to turn on snooping. + * In the non-recovery case this has no effect */ + if ((rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON))) { + goto err3; + } + if ((rc = cxl_register_psl_err_irq(adapter))) goto err3; From 62fa19d4b4fd781ad37c9155c6332f28a9e97a2c Mon Sep 17 00:00:00 2001 From: Ryan Grimm Date: Mon, 19 Jan 2015 11:52:51 -0600 Subject: [PATCH 197/601] cxl: Add ability to reset the card Adds reset to sysfs which will PERST the card. If load_image_on_perst is set to "user" or "factory", the PERST will cause that image to be loaded. load_image_on_perst is set to "user" for production. "none" could be used for debugging. The PSL trace arrays are preserved which then can be read through debugfs. PERST also triggers CAPP recovery. An HMI comes in, which is handled by EEH. EEH unbinds the driver, calls into Sapphire to reinitialize the PHB, then rebinds the driver. Signed-off-by: Ryan Grimm Acked-by: Ian Munsie Signed-off-by: Michael Ellerman --- Documentation/ABI/testing/sysfs-class-cxl | 7 +++++ drivers/misc/cxl/cxl.h | 1 + drivers/misc/cxl/pci.c | 37 +++++++++++++++++++++++ drivers/misc/cxl/sysfs.c | 18 +++++++++++ 4 files changed, 63 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-class-cxl b/Documentation/ABI/testing/sysfs-class-cxl index 5941ff38d4a316..9ea01068a16c15 100644 --- a/Documentation/ABI/testing/sysfs-class-cxl +++ b/Documentation/ABI/testing/sysfs-class-cxl @@ -146,3 +146,10 @@ Description: read/write user or factory image to be loaded. Default is to reload on PERST whichever image the card has loaded. + +What: /sys/class/cxl//reset +Date: October 2014 +Contact: linuxppc-dev@lists.ozlabs.org +Description: write only + Writing 1 will issue a PERST to card which may cause the card + to reload the FPGA depending on load_image_on_perst. diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 518c4c6e6151e3..6a6a487464c58e 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -489,6 +489,7 @@ int cxl_alloc_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter, unsig void cxl_release_irq_ranges(struct cxl_irq_ranges *irqs, struct cxl *adapter); int cxl_setup_irq(struct cxl *adapter, unsigned int hwirq, unsigned int virq); int cxl_update_image_control(struct cxl *adapter); +int cxl_reset(struct cxl *adapter); /* common == phyp + powernv */ struct cxl_process_element_common { diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index a4a4e0217eed20..428ea8ba25fcea 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -21,6 +21,7 @@ #include #include /* for struct pci_controller */ #include +#include #include "cxl.h" @@ -741,6 +742,42 @@ static void cxl_remove_afu(struct cxl_afu *afu) device_unregister(&afu->dev); } +int cxl_reset(struct cxl *adapter) +{ + struct pci_dev *dev = to_pci_dev(adapter->dev.parent); + int rc; + int i; + u32 val; + + dev_info(&dev->dev, "CXL reset\n"); + + for (i = 0; i < adapter->slices; i++) + cxl_remove_afu(adapter->afu[i]); + + /* pcie_warm_reset requests a fundamental pci reset which includes a + * PERST assert/deassert. PERST triggers a loading of the image + * if "user" or "factory" is selected in sysfs */ + if ((rc = pci_set_pcie_reset_state(dev, pcie_warm_reset))) { + dev_err(&dev->dev, "cxl: pcie_warm_reset failed\n"); + return rc; + } + + /* the PERST done above fences the PHB. So, reset depends on EEH + * to unbind the driver, tell Sapphire to reinit the PHB, and rebind + * the driver. Do an mmio read explictly to ensure EEH notices the + * fenced PHB. Retry for a few seconds before giving up. */ + i = 0; + while (((val = mmio_read32be(adapter->p1_mmio)) != 0xffffffff) && + (i < 5)) { + msleep(500); + i++; + } + + if (val != 0xffffffff) + dev_err(&dev->dev, "cxl: PERST failed to trigger EEH\n"); + + return rc; +} static int cxl_map_adapter_regs(struct cxl *adapter, struct pci_dev *dev) { diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c index ed4ad461143c1a..adf1f6d849131e 100644 --- a/drivers/misc/cxl/sysfs.c +++ b/drivers/misc/cxl/sysfs.c @@ -56,6 +56,23 @@ static ssize_t image_loaded_show(struct device *device, return scnprintf(buf, PAGE_SIZE, "factory\n"); } +static ssize_t reset_adapter_store(struct device *device, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cxl *adapter = to_cxl_adapter(device); + int rc; + int val; + + rc = sscanf(buf, "%i", &val); + if ((rc != 1) || (val != 1)) + return -EINVAL; + + if ((rc = cxl_reset(adapter))) + return rc; + return count; +} + static ssize_t load_image_on_perst_show(struct device *device, struct device_attribute *attr, char *buf) @@ -100,6 +117,7 @@ static struct device_attribute adapter_attrs[] = { __ATTR_RO(base_image), __ATTR_RO(image_loaded), __ATTR_RW(load_image_on_perst), + __ATTR(reset, S_IWUSR, NULL, reset_adapter_store), }; From f8b2dcbd9e6d1479b9b5a9e9e78bbaf783bde819 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 14 Jan 2015 17:50:19 +0100 Subject: [PATCH 198/601] s390: add z13 code generation support Allow to generate code that only runs on z13 machines. Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 18 ++++++++++++++++++ arch/s390/Makefile | 2 ++ arch/s390/kernel/head.S | 4 +++- arch/s390/kernel/setup.c | 3 +++ 4 files changed, 26 insertions(+), 1 deletion(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index e79c3eab40b9e8..06c6d9ab5a8da7 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -185,6 +185,10 @@ config HAVE_MARCH_ZEC12_FEATURES def_bool n select HAVE_MARCH_Z196_FEATURES +config HAVE_MARCH_Z13_FEATURES + def_bool n + select HAVE_MARCH_ZEC12_FEATURES + choice prompt "Processor type" default MARCH_G5 @@ -244,6 +248,14 @@ config MARCH_ZEC12 2827 series). The kernel will be slightly faster but will not work on older machines. +config MARCH_Z13 + bool "IBM z13" + select HAVE_MARCH_Z13_FEATURES if 64BIT + help + Select this to enable optimizations for IBM z13 (2964 series). + The kernel will be slightly faster but will not work on older + machines. + endchoice config MARCH_G5_TUNE @@ -267,6 +279,9 @@ config MARCH_Z196_TUNE config MARCH_ZEC12_TUNE def_bool TUNE_ZEC12 || MARCH_ZEC12 && TUNE_DEFAULT +config MARCH_Z13_TUNE + def_bool TUNE_Z13 || MARCH_Z13 && TUNE_DEFAULT + choice prompt "Tune code generation" default TUNE_DEFAULT @@ -305,6 +320,9 @@ config TUNE_Z196 config TUNE_ZEC12 bool "IBM zBC12 and zEC12" +config TUNE_Z13 + bool "IBM z13" + endchoice config 64BIT diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 878e67973151b5..e742ec5de9a7e5 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -42,6 +42,7 @@ mflags-$(CONFIG_MARCH_Z9_109) := -march=z9-109 mflags-$(CONFIG_MARCH_Z10) := -march=z10 mflags-$(CONFIG_MARCH_Z196) := -march=z196 mflags-$(CONFIG_MARCH_ZEC12) := -march=zEC12 +mflags-$(CONFIG_MARCH_Z13) := -march=z13 aflags-y += $(mflags-y) cflags-y += $(mflags-y) @@ -53,6 +54,7 @@ cflags-$(CONFIG_MARCH_Z9_109_TUNE) += -mtune=z9-109 cflags-$(CONFIG_MARCH_Z10_TUNE) += -mtune=z10 cflags-$(CONFIG_MARCH_Z196_TUNE) += -mtune=z196 cflags-$(CONFIG_MARCH_ZEC12_TUNE) += -mtune=zEC12 +cflags-$(CONFIG_MARCH_Z13_TUNE) += -mtune=z13 #KBUILD_IMAGE is necessary for make rpm KBUILD_IMAGE :=arch/s390/boot/image diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index d62eee11f0b542..132f4c9ade6030 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -436,7 +436,9 @@ ENTRY(startup_kdump) # followed by the facility words. #if defined(CONFIG_64BIT) -#if defined(CONFIG_MARCH_ZEC12) +#if defined(CONFIG_MARCH_Z13) + .long 3, 0xc100eff2, 0xf46ce800, 0x00400000 +#elif defined(CONFIG_MARCH_ZEC12) .long 3, 0xc100eff2, 0xf46ce800, 0x00400000 #elif defined(CONFIG_MARCH_Z196) .long 2, 0xc100eff2, 0xf46c0000 diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 4e532c67832fdd..bfac77ada4f281 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -810,6 +810,9 @@ static void __init setup_hwcaps(void) case 0x2828: strcpy(elf_platform, "zEC12"); break; + case 0x2964: + strcpy(elf_platform, "z13"); + break; } } From 1f6b83e5e4d3aed46eac1d219322fba9c7341cd8 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 14 Jan 2015 17:51:17 +0100 Subject: [PATCH 199/601] s390: avoid z13 cache aliasing Avoid cache aliasing on z13 by aligning shared objects to multiples of 512K. The virtual addresses of a page from a shared file needs to have identical bits in the range 2^12 to 2^18. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/elf.h | 8 +- arch/s390/include/asm/pgtable.h | 4 + arch/s390/kernel/process.c | 10 --- arch/s390/mm/init.c | 9 +- arch/s390/mm/mmap.c | 142 +++++++++++++++++++++++++++++++- 5 files changed, 155 insertions(+), 18 deletions(-) diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index f6e43d39e3d82e..c9df40b5c0ac19 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -163,8 +163,8 @@ extern unsigned int vdso_enabled; the loader. We need to make sure that it is out of the way of the program that it will "exec", and that there is sufficient room for the brk. */ -extern unsigned long randomize_et_dyn(unsigned long base); -#define ELF_ET_DYN_BASE (randomize_et_dyn(STACK_TOP / 3 * 2)) +extern unsigned long randomize_et_dyn(void); +#define ELF_ET_DYN_BASE randomize_et_dyn() /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. */ @@ -209,7 +209,9 @@ do { \ } while (0) #endif /* CONFIG_COMPAT */ -#define STACK_RND_MASK 0x7ffUL +extern unsigned long mmap_rnd_mask; + +#define STACK_RND_MASK (mmap_rnd_mask) #define ARCH_DLINFO \ do { \ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 5e102422c9abd4..b8641b41e19c37 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1779,6 +1779,10 @@ extern int s390_enable_sie(void); extern int s390_enable_skey(void); extern void s390_reset_cmma(struct mm_struct *mm); +/* s390 has a private copy of get unmapped area to deal with cache synonyms */ +#define HAVE_ARCH_UNMAPPED_AREA +#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN + /* * No page table caches to initialise */ diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index aa7a83948c7b13..2c1eb4f3aaf5ec 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -243,13 +243,3 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) ret = PAGE_ALIGN(mm->brk + brk_rnd()); return (ret > mm->brk) ? ret : mm->brk; } - -unsigned long randomize_et_dyn(unsigned long base) -{ - unsigned long ret; - - if (!(current->flags & PF_RANDOMIZE)) - return base; - ret = PAGE_ALIGN(base + brk_rnd()); - return (ret > base) ? ret : base; -} diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index c7235e01fd6749..d35b15113b17fd 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -71,13 +71,16 @@ static void __init setup_zero_pages(void) break; case 0x2827: /* zEC12 */ case 0x2828: /* zEC12 */ - default: order = 5; break; + case 0x2964: /* z13 */ + default: + order = 7; + break; } /* Limit number of empty zero pages for small memory sizes */ - if (order > 2 && totalram_pages <= 16384) - order = 2; + while (order > 2 && (totalram_pages >> 10) < (1UL << order)) + order--; empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order); if (!empty_zero_page) diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 9b436c21195ec6..d008f638b2cd27 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -28,8 +28,12 @@ #include #include #include +#include #include +unsigned long mmap_rnd_mask; +unsigned long mmap_align_mask; + static unsigned long stack_maxrandom_size(void) { if (!(current->flags & PF_RANDOMIZE)) @@ -60,8 +64,10 @@ static unsigned long mmap_rnd(void) { if (!(current->flags & PF_RANDOMIZE)) return 0; - /* 8MB randomization for mmap_base */ - return (get_random_int() & 0x7ffUL) << PAGE_SHIFT; + if (is_32bit_task()) + return (get_random_int() & 0x7ff) << PAGE_SHIFT; + else + return (get_random_int() & mmap_rnd_mask) << PAGE_SHIFT; } static unsigned long mmap_base_legacy(void) @@ -81,6 +87,106 @@ static inline unsigned long mmap_base(void) return STACK_TOP - stack_maxrandom_size() - mmap_rnd() - gap; } +unsigned long +arch_get_unmapped_area(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + struct vm_unmapped_area_info info; + int do_color_align; + + if (len > TASK_SIZE - mmap_min_addr) + return -ENOMEM; + + if (flags & MAP_FIXED) + return addr; + + if (addr) { + addr = PAGE_ALIGN(addr); + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } + + do_color_align = 0; + if (filp || (flags & MAP_SHARED)) + do_color_align = !is_32bit_task(); + + info.flags = 0; + info.length = len; + info.low_limit = mm->mmap_base; + info.high_limit = TASK_SIZE; + info.align_mask = do_color_align ? (mmap_align_mask << PAGE_SHIFT) : 0; + info.align_offset = pgoff << PAGE_SHIFT; + return vm_unmapped_area(&info); +} + +unsigned long +arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, + const unsigned long len, const unsigned long pgoff, + const unsigned long flags) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + unsigned long addr = addr0; + struct vm_unmapped_area_info info; + int do_color_align; + + /* requested length too big for entire address space */ + if (len > TASK_SIZE - mmap_min_addr) + return -ENOMEM; + + if (flags & MAP_FIXED) + return addr; + + /* requesting a specific address */ + if (addr) { + addr = PAGE_ALIGN(addr); + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } + + do_color_align = 0; + if (filp || (flags & MAP_SHARED)) + do_color_align = !is_32bit_task(); + + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.length = len; + info.low_limit = max(PAGE_SIZE, mmap_min_addr); + info.high_limit = mm->mmap_base; + info.align_mask = do_color_align ? (mmap_align_mask << PAGE_SHIFT) : 0; + info.align_offset = pgoff << PAGE_SHIFT; + addr = vm_unmapped_area(&info); + + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + if (addr & ~PAGE_MASK) { + VM_BUG_ON(addr != -ENOMEM); + info.flags = 0; + info.low_limit = TASK_UNMAPPED_BASE; + info.high_limit = TASK_SIZE; + addr = vm_unmapped_area(&info); + } + + return addr; +} + +unsigned long randomize_et_dyn(void) +{ + unsigned long base; + + base = (STACK_TOP / 3 * 2) & (~mmap_align_mask << PAGE_SHIFT); + return base + mmap_rnd(); +} + #ifndef CONFIG_64BIT /* @@ -177,4 +283,36 @@ void arch_pick_mmap_layout(struct mm_struct *mm) } } +static int __init setup_mmap_rnd(void) +{ + struct cpuid cpu_id; + + get_cpu_id(&cpu_id); + switch (cpu_id.machine) { + case 0x9672: + case 0x2064: + case 0x2066: + case 0x2084: + case 0x2086: + case 0x2094: + case 0x2096: + case 0x2097: + case 0x2098: + case 0x2817: + case 0x2818: + case 0x2827: + case 0x2828: + mmap_rnd_mask = 0x7ffUL; + mmap_align_mask = 0UL; + break; + case 0x2964: /* z13 */ + default: + mmap_rnd_mask = 0x3ff80UL; + mmap_align_mask = 0x7fUL; + break; + } + return 0; +} +early_initcall(setup_mmap_rnd); + #endif From 10ad34bc76dfbc49bda327a13012f6754c0c72e0 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 14 Jan 2015 17:52:10 +0100 Subject: [PATCH 200/601] s390: add SMT support The multi-threading facility is introduced with the z13 processor family. This patch adds code to detect the multi-threading facility. With the facility enabled each core will surface multiple hardware threads to the system. Each hardware threads looks like a normal CPU to the operating system with all its registers and properties. The SCLP interface reports the SMT topology indirectly via the maximum thread id. Each reported CPU in the result of a read-scp-information is a core representing a number of hardware threads. To reflect the reduced CPU capacity if two hardware threads run on a single core the MT utilization counter set is used to normalize the raw cputime obtained by the CPU timer deltas. This scaled cputime is reported via the taskstats interface. The normal /proc/stat numbers are based on the raw cputime and are not affected by the normalization. Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 15 +- arch/s390/include/asm/cpu_mf.h | 14 ++ arch/s390/include/asm/reset.h | 3 +- arch/s390/include/asm/sclp.h | 5 +- arch/s390/include/asm/sigp.h | 1 + arch/s390/include/asm/smp.h | 4 + arch/s390/include/asm/sysinfo.h | 20 ++- arch/s390/include/asm/topology.h | 4 + arch/s390/kernel/base.S | 3 +- arch/s390/kernel/dis.c | 8 +- arch/s390/kernel/ipl.c | 11 +- arch/s390/kernel/machine_kexec.c | 19 +-- arch/s390/kernel/smp.c | 261 ++++++++++++++++++++++++------- arch/s390/kernel/sysinfo.c | 8 + arch/s390/kernel/topology.c | 63 +++++--- arch/s390/kernel/vtime.c | 58 ++++++- drivers/s390/char/sclp_early.c | 49 ++++-- drivers/s390/cio/cio.c | 2 +- 18 files changed, 430 insertions(+), 118 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 06c6d9ab5a8da7..7eba5b5723e986 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -396,17 +396,26 @@ config HOTPLUG_CPU can be controlled through /sys/devices/system/cpu/cpu#. Say N if you want to disable CPU hotplug. +config SCHED_SMT + def_bool n + config SCHED_MC def_bool n config SCHED_BOOK + def_bool n + +config SCHED_TOPOLOGY def_bool y - prompt "Book scheduler support" + prompt "Topology scheduler support" depends on SMP + select SCHED_SMT select SCHED_MC + select SCHED_BOOK help - Book scheduler support improves the CPU scheduler's decision making - when dealing with machines that have several books. + Topology scheduler support improves the CPU scheduler's decision + making when dealing with machines that have multi-threading, + multiple cores or multiple books. source kernel/Kconfig.preempt diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index cb700d54bd832a..5243a8679a1dcb 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -189,6 +189,20 @@ static inline int ecctr(u64 ctr, u64 *val) return cc; } +/* Store CPU counter multiple for the MT utilization counter set */ +static inline int stcctm5(u64 num, u64 *val) +{ + typedef struct { u64 _[num]; } addrtype; + int cc; + + asm volatile ( + " .insn rsy,0xeb0000000017,%2,5,%1\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc), "=Q" (*(addrtype *) val) : "d" (num) : "cc"); + return cc; +} + /* Query sampling information */ static inline int qsi(struct hws_qsi_info_block *info) { diff --git a/arch/s390/include/asm/reset.h b/arch/s390/include/asm/reset.h index 804578587a7a87..72786067b30052 100644 --- a/arch/s390/include/asm/reset.h +++ b/arch/s390/include/asm/reset.h @@ -15,5 +15,6 @@ struct reset_call { extern void register_reset_call(struct reset_call *reset); extern void unregister_reset_call(struct reset_call *reset); -extern void s390_reset_system(void (*func)(void *), void *data); +extern void s390_reset_system(void (*fn_pre)(void), + void (*fn_post)(void *), void *data); #endif /* _ASM_S390_RESET_H */ diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index b6f8066789c1cc..edb453cfc2c635 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -27,7 +27,7 @@ struct sclp_ipl_info { }; struct sclp_cpu_entry { - u8 address; + u8 core_id; u8 reserved0[2]; u8 : 3; u8 siif : 1; @@ -51,6 +51,9 @@ int sclp_cpu_deconfigure(u8 cpu); unsigned long long sclp_get_rnmax(void); unsigned long long sclp_get_rzm(void); unsigned int sclp_get_max_cpu(void); +unsigned int sclp_get_mtid(u8 cpu_type); +unsigned int sclp_get_mtid_max(void); +unsigned int sclp_get_mtid_prev(void); int sclp_sdias_blk_count(void); int sclp_sdias_copy(void *dest, int blk_num, int nr_blks); int sclp_chp_configure(struct chp_id chpid); diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h index 49576115dbb76d..c49d9c0483a83f 100644 --- a/arch/s390/include/asm/sigp.h +++ b/arch/s390/include/asm/sigp.h @@ -15,6 +15,7 @@ #define SIGP_SET_ARCHITECTURE 18 #define SIGP_COND_EMERGENCY_SIGNAL 19 #define SIGP_SENSE_RUNNING 21 +#define SIGP_SET_MULTI_THREADING 22 #define SIGP_STORE_ADDITIONAL_STATUS 23 /* SIGP condition codes */ diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 762d4f88af5ae3..b3bd0282dd9870 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -16,6 +16,8 @@ #define raw_smp_processor_id() (S390_lowcore.cpu_nr) extern struct mutex smp_cpu_state_mutex; +extern unsigned int smp_cpu_mt_shift; +extern unsigned int smp_cpu_mtid; extern int __cpu_up(unsigned int cpu, struct task_struct *tidle); @@ -35,6 +37,8 @@ extern void smp_fill_possible_mask(void); #else /* CONFIG_SMP */ +#define smp_cpu_mtid 0 + static inline void smp_call_ipl_cpu(void (*func)(void *), void *data) { func(data); diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h index f92428e459f8b8..73f12d21af4dc4 100644 --- a/arch/s390/include/asm/sysinfo.h +++ b/arch/s390/include/asm/sysinfo.h @@ -90,7 +90,11 @@ struct sysinfo_2_2_2 { unsigned short cpus_reserved; char name[8]; unsigned int caf; - char reserved_2[16]; + char reserved_2[8]; + unsigned char mt_installed; + unsigned char mt_general; + unsigned char mt_psmtid; + char reserved_3[5]; unsigned short cpus_dedicated; unsigned short cpus_shared; }; @@ -120,26 +124,28 @@ struct sysinfo_3_2_2 { extern int topology_max_mnest; -#define TOPOLOGY_CPU_BITS 64 +#define TOPOLOGY_CORE_BITS 64 #define TOPOLOGY_NR_MAG 6 -struct topology_cpu { - unsigned char reserved0[4]; +struct topology_core { + unsigned char nl; + unsigned char reserved0[3]; unsigned char :6; unsigned char pp:2; unsigned char reserved1; unsigned short origin; - unsigned long mask[TOPOLOGY_CPU_BITS / BITS_PER_LONG]; + unsigned long mask[TOPOLOGY_CORE_BITS / BITS_PER_LONG]; }; struct topology_container { - unsigned char reserved[7]; + unsigned char nl; + unsigned char reserved[6]; unsigned char id; }; union topology_entry { unsigned char nl; - struct topology_cpu cpu; + struct topology_core cpu; struct topology_container container; }; diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index 56af53093d24d3..c4fbb9527c5ca2 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -9,9 +9,11 @@ struct cpu; #ifdef CONFIG_SCHED_BOOK struct cpu_topology_s390 { + unsigned short thread_id; unsigned short core_id; unsigned short socket_id; unsigned short book_id; + cpumask_t thread_mask; cpumask_t core_mask; cpumask_t book_mask; }; @@ -19,6 +21,8 @@ struct cpu_topology_s390 { extern struct cpu_topology_s390 cpu_topology[NR_CPUS]; #define topology_physical_package_id(cpu) (cpu_topology[cpu].socket_id) +#define topology_thread_id(cpu) (cpu_topology[cpu].thread_id) +#define topology_thread_cpumask(cpu) (&cpu_topology[cpu].thread_mask) #define topology_core_id(cpu) (cpu_topology[cpu].core_id) #define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_mask) #define topology_book_id(cpu) (cpu_topology[cpu].book_id) diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S index 797a823a22755d..f74a53d339b0b7 100644 --- a/arch/s390/kernel/base.S +++ b/arch/s390/kernel/base.S @@ -97,7 +97,8 @@ ENTRY(diag308_reset) lg %r4,0(%r4) # Save PSW sturg %r4,%r3 # Use sturg, because of large pages lghi %r1,1 - diag %r1,%r1,0x308 + lghi %r0,0 + diag %r0,%r1,0x308 .Lrestart_part2: lhi %r0,0 # Load r0 with zero lhi %r1,2 # Use mode 2 = ESAME (dump) diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index d46d0b0b2cdab0..533430307da8f5 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -137,7 +137,7 @@ enum { INSTR_RSI_RRP, INSTR_RSL_LRDFU, INSTR_RSL_R0RD, INSTR_RSY_AARD, INSTR_RSY_CCRD, INSTR_RSY_RRRD, INSTR_RSY_RURD, - INSTR_RSY_RDRM, + INSTR_RSY_RDRM, INSTR_RSY_RMRD, INSTR_RS_AARD, INSTR_RS_CCRD, INSTR_RS_R0RD, INSTR_RS_RRRD, INSTR_RS_RURD, INSTR_RXE_FRRD, INSTR_RXE_RRRD, INSTR_RXE_RRRDM, @@ -307,6 +307,7 @@ static const unsigned char formats[][7] = { [INSTR_RSY_AARD] = { 0xff, A_8,A_12,D20_20,B_16,0,0 }, [INSTR_RSY_CCRD] = { 0xff, C_8,C_12,D20_20,B_16,0,0 }, [INSTR_RSY_RDRM] = { 0xff, R_8,D20_20,B_16,U4_12,0,0 }, + [INSTR_RSY_RMRD] = { 0xff, R_8,U4_12,D20_20,B_16,0,0 }, [INSTR_RSY_RRRD] = { 0xff, R_8,R_12,D20_20,B_16,0,0 }, [INSTR_RSY_RURD] = { 0xff, R_8,U4_12,D20_20,B_16,0,0 }, [INSTR_RS_AARD] = { 0xff, A_8,A_12,D_20,B_16,0,0 }, @@ -450,7 +451,8 @@ enum { LONG_INSN_VERLLV, LONG_INSN_VESRAV, LONG_INSN_VESRLV, - LONG_INSN_VSBCBI + LONG_INSN_VSBCBI, + LONG_INSN_STCCTM }; static char *long_insn_name[] = { @@ -530,6 +532,7 @@ static char *long_insn_name[] = { [LONG_INSN_VESRAV] = "vesrav", [LONG_INSN_VESRLV] = "vesrlv", [LONG_INSN_VSBCBI] = "vsbcbi", + [LONG_INSN_STCCTM] = "stcctm", }; static struct s390_insn opcode[] = { @@ -1655,6 +1658,7 @@ static struct s390_insn opcode_eb[] = { { "lric", 0x60, INSTR_RSY_RDRM }, { "stric", 0x61, INSTR_RSY_RDRM }, { "mric", 0x62, INSTR_RSY_RDRM }, + { { 0, LONG_INSN_STCCTM }, 0x17, INSTR_RSY_RMRD }, #endif { "rll", 0x1d, INSTR_RSY_RRRD }, { "mvclu", 0x8e, INSTR_RSY_RRRD }, diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 39badb9ca0b30c..5c8651f3650937 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -2074,7 +2074,8 @@ static void do_reset_calls(void) u32 dump_prefix_page; -void s390_reset_system(void (*func)(void *), void *data) +void s390_reset_system(void (*fn_pre)(void), + void (*fn_post)(void *), void *data) { struct _lowcore *lc; @@ -2112,7 +2113,11 @@ void s390_reset_system(void (*func)(void *), void *data) /* Store status at absolute zero */ store_status(); + /* Call function before reset */ + if (fn_pre) + fn_pre(); do_reset_calls(); - if (func) - func(data); + /* Call function after reset */ + if (fn_post) + fn_post(data); } diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 4685337fa7c6bf..fb0901ec4306b8 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -103,21 +103,18 @@ static int __init machine_kdump_pm_init(void) return 0; } arch_initcall(machine_kdump_pm_init); -#endif /* * Start kdump: We expect here that a store status has been done on our CPU */ static void __do_machine_kdump(void *image) { -#ifdef CONFIG_CRASH_DUMP int (*start_kdump)(int) = (void *)((struct kimage *) image)->start; - setup_regs(); __load_psw_mask(PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA); start_kdump(1); -#endif } +#endif /* * Check if kdump checksums are valid: We call purgatory with parameter "0" @@ -249,18 +246,18 @@ static void __do_machine_kexec(void *data) */ static void __machine_kexec(void *data) { - struct kimage *image = data; - __arch_local_irq_stosm(0x04); /* enable DAT */ pfault_fini(); tracing_off(); debug_locks_off(); - if (image->type == KEXEC_TYPE_CRASH) { +#ifdef CONFIG_CRASH_DUMP + if (((struct kimage *) data)->type == KEXEC_TYPE_CRASH) { + lgr_info_log(); - s390_reset_system(__do_machine_kdump, data); - } else { - s390_reset_system(__do_machine_kexec, data); - } + s390_reset_system(setup_regs, __do_machine_kdump, data); + } else +#endif + s390_reset_system(NULL, __do_machine_kexec, data); disabled_wait((unsigned long) __builtin_return_address(0)); } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 0b499f5cbe19c1..370ff3a092a395 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -71,9 +71,30 @@ struct pcpu { }; static u8 boot_cpu_type; -static u16 boot_cpu_address; static struct pcpu pcpu_devices[NR_CPUS]; +unsigned int smp_cpu_mt_shift; +EXPORT_SYMBOL(smp_cpu_mt_shift); + +unsigned int smp_cpu_mtid; +EXPORT_SYMBOL(smp_cpu_mtid); + +static unsigned int smp_max_threads __initdata = -1U; + +static int __init early_nosmt(char *s) +{ + smp_max_threads = 1; + return 0; +} +early_param("nosmt", early_nosmt); + +static int __init early_smt(char *s) +{ + get_option(&s, &smp_max_threads); + return 0; +} +early_param("smt", early_smt); + /* * The smp_cpu_state_mutex must be held when changing the state or polarization * member of a pcpu data structure within the pcpu_devices arreay. @@ -132,7 +153,7 @@ static inline int pcpu_running(struct pcpu *pcpu) /* * Find struct pcpu by cpu address. */ -static struct pcpu *pcpu_find_address(const struct cpumask *mask, int address) +static struct pcpu *pcpu_find_address(const struct cpumask *mask, u16 address) { int cpu; @@ -298,6 +319,32 @@ static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *), for (;;) ; } +/* + * Enable additional logical cpus for multi-threading. + */ +static int pcpu_set_smt(unsigned int mtid) +{ + register unsigned long reg1 asm ("1") = (unsigned long) mtid; + int cc; + + if (smp_cpu_mtid == mtid) + return 0; + asm volatile( + " sigp %1,0,%2 # sigp set multi-threading\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc) : "d" (reg1), "K" (SIGP_SET_MULTI_THREADING) + : "cc"); + if (cc == 0) { + smp_cpu_mtid = mtid; + smp_cpu_mt_shift = 0; + while (smp_cpu_mtid >= (1U << smp_cpu_mt_shift)) + smp_cpu_mt_shift++; + pcpu_devices[0].address = stap(); + } + return cc; +} + /* * Call function on an online CPU. */ @@ -512,22 +559,17 @@ EXPORT_SYMBOL(smp_ctl_clear_bit); #ifdef CONFIG_CRASH_DUMP -static void __init smp_get_save_area(int cpu, u16 address) +static inline void __smp_store_cpu_state(int cpu, u16 address, int is_boot_cpu) { void *lc = pcpu_devices[0].lowcore; struct save_area_ext *sa_ext; unsigned long vx_sa; - if (is_kdump_kernel()) - return; - if (!OLDMEM_BASE && (address == boot_cpu_address || - ipl_info.type != IPL_TYPE_FCP_DUMP)) - return; sa_ext = dump_save_area_create(cpu); if (!sa_ext) panic("could not allocate memory for save area\n"); - if (address == boot_cpu_address) { - /* Copy the registers of the boot cpu. */ + if (is_boot_cpu) { + /* Copy the registers of the boot CPU. */ copy_oldmem_page(1, (void *) &sa_ext->sa, sizeof(sa_ext->sa), SAVE_AREA_BASE - PAGE_SIZE, 0); if (MACHINE_HAS_VX) @@ -548,6 +590,64 @@ static void __init smp_get_save_area(int cpu, u16 address) free_page(vx_sa); } +/* + * Collect CPU state of the previous, crashed system. + * There are four cases: + * 1) standard zfcp dump + * condition: OLDMEM_BASE == NULL && ipl_info.type == IPL_TYPE_FCP_DUMP + * The state for all CPUs except the boot CPU needs to be collected + * with sigp stop-and-store-status. The boot CPU state is located in + * the absolute lowcore of the memory stored in the HSA. The zcore code + * will allocate the save area and copy the boot CPU state from the HSA. + * 2) stand-alone kdump for SCSI (zfcp dump with swapped memory) + * condition: OLDMEM_BASE != NULL && ipl_info.type == IPL_TYPE_FCP_DUMP + * The state for all CPUs except the boot CPU needs to be collected + * with sigp stop-and-store-status. The firmware or the boot-loader + * stored the registers of the boot CPU in the absolute lowcore in the + * memory of the old system. + * 3) kdump and the old kernel did not store the CPU state, + * or stand-alone kdump for DASD + * condition: OLDMEM_BASE != NULL && !is_kdump_kernel() + * The state for all CPUs except the boot CPU needs to be collected + * with sigp stop-and-store-status. The kexec code or the boot-loader + * stored the registers of the boot CPU in the memory of the old system. + * 4) kdump and the old kernel stored the CPU state + * condition: OLDMEM_BASE != NULL && is_kdump_kernel() + * The state of all CPUs is stored in ELF sections in the memory of the + * old system. The ELF sections are picked up by the crash_dump code + * via elfcorehdr_addr. + */ +static void __init smp_store_cpu_states(struct sclp_cpu_info *info) +{ + unsigned int cpu, address, i, j; + int is_boot_cpu; + + if (is_kdump_kernel()) + /* Previous system stored the CPU states. Nothing to do. */ + return; + if (!(OLDMEM_BASE || ipl_info.type == IPL_TYPE_FCP_DUMP)) + /* No previous system present, normal boot. */ + return; + /* Set multi-threading state to the previous system. */ + pcpu_set_smt(sclp_get_mtid_prev()); + /* Collect CPU states. */ + cpu = 0; + for (i = 0; i < info->configured; i++) { + /* Skip CPUs with different CPU type. */ + if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type) + continue; + for (j = 0; j <= smp_cpu_mtid; j++, cpu++) { + address = (info->cpu[i].core_id << smp_cpu_mt_shift) + j; + is_boot_cpu = (address == pcpu_devices[0].address); + if (is_boot_cpu && !OLDMEM_BASE) + /* Skip boot CPU for standard zfcp dump. */ + continue; + /* Get state for this CPu. */ + __smp_store_cpu_state(cpu, address, is_boot_cpu); + } + } +} + int smp_store_status(int cpu) { unsigned long vx_sa; @@ -565,10 +665,6 @@ int smp_store_status(int cpu) return 0; } -#else /* CONFIG_CRASH_DUMP */ - -static inline void smp_get_save_area(int cpu, u16 address) { } - #endif /* CONFIG_CRASH_DUMP */ void smp_cpu_set_polarization(int cpu, int val) @@ -590,11 +686,13 @@ static struct sclp_cpu_info *smp_get_cpu_info(void) info = kzalloc(sizeof(*info), GFP_KERNEL); if (info && (use_sigp_detection || sclp_get_cpu_info(info))) { use_sigp_detection = 1; - for (address = 0; address <= MAX_CPU_ADDRESS; address++) { + for (address = 0; address <= MAX_CPU_ADDRESS; + address += (1U << smp_cpu_mt_shift)) { if (__pcpu_sigp_relax(address, SIGP_SENSE, 0, NULL) == SIGP_CC_NOT_OPERATIONAL) continue; - info->cpu[info->configured].address = address; + info->cpu[info->configured].core_id = + address >> smp_cpu_mt_shift; info->configured++; } info->combined = info->configured; @@ -608,7 +706,8 @@ static int __smp_rescan_cpus(struct sclp_cpu_info *info, int sysfs_add) { struct pcpu *pcpu; cpumask_t avail; - int cpu, nr, i; + int cpu, nr, i, j; + u16 address; nr = 0; cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); @@ -616,51 +715,76 @@ static int __smp_rescan_cpus(struct sclp_cpu_info *info, int sysfs_add) for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) { if (info->has_cpu_type && info->cpu[i].type != boot_cpu_type) continue; - if (pcpu_find_address(cpu_present_mask, info->cpu[i].address)) - continue; - pcpu = pcpu_devices + cpu; - pcpu->address = info->cpu[i].address; - pcpu->state = (i >= info->configured) ? - CPU_STATE_STANDBY : CPU_STATE_CONFIGURED; - smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); - set_cpu_present(cpu, true); - if (sysfs_add && smp_add_present_cpu(cpu) != 0) - set_cpu_present(cpu, false); - else - nr++; - cpu = cpumask_next(cpu, &avail); + address = info->cpu[i].core_id << smp_cpu_mt_shift; + for (j = 0; j <= smp_cpu_mtid; j++) { + if (pcpu_find_address(cpu_present_mask, address + j)) + continue; + pcpu = pcpu_devices + cpu; + pcpu->address = address + j; + pcpu->state = + (cpu >= info->configured*(smp_cpu_mtid + 1)) ? + CPU_STATE_STANDBY : CPU_STATE_CONFIGURED; + smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + set_cpu_present(cpu, true); + if (sysfs_add && smp_add_present_cpu(cpu) != 0) + set_cpu_present(cpu, false); + else + nr++; + cpu = cpumask_next(cpu, &avail); + if (cpu >= nr_cpu_ids) + break; + } } return nr; } static void __init smp_detect_cpus(void) { - unsigned int cpu, c_cpus, s_cpus; + unsigned int cpu, mtid, c_cpus, s_cpus; struct sclp_cpu_info *info; + u16 address; + /* Get CPU information */ info = smp_get_cpu_info(); if (!info) panic("smp_detect_cpus failed to allocate memory\n"); + + /* Find boot CPU type */ if (info->has_cpu_type) { - for (cpu = 0; cpu < info->combined; cpu++) { - if (info->cpu[cpu].address != boot_cpu_address) - continue; - /* The boot cpu dictates the cpu type. */ - boot_cpu_type = info->cpu[cpu].type; - break; - } + address = stap(); + for (cpu = 0; cpu < info->combined; cpu++) + if (info->cpu[cpu].core_id == address) { + /* The boot cpu dictates the cpu type. */ + boot_cpu_type = info->cpu[cpu].type; + break; + } + if (cpu >= info->combined) + panic("Could not find boot CPU type"); } + +#if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_CRASH_DUMP) + /* Collect CPU state of previous system */ + smp_store_cpu_states(info); +#endif + + /* Set multi-threading state for the current system */ + mtid = sclp_get_mtid(boot_cpu_type); + mtid = (mtid < smp_max_threads) ? mtid : smp_max_threads - 1; + pcpu_set_smt(mtid); + + /* Print number of CPUs */ c_cpus = s_cpus = 0; for (cpu = 0; cpu < info->combined; cpu++) { if (info->has_cpu_type && info->cpu[cpu].type != boot_cpu_type) continue; - if (cpu < info->configured) { - smp_get_save_area(c_cpus, info->cpu[cpu].address); - c_cpus++; - } else - s_cpus++; + if (cpu < info->configured) + c_cpus += smp_cpu_mtid + 1; + else + s_cpus += smp_cpu_mtid + 1; } pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus); + + /* Add CPUs present at boot */ get_online_cpus(); __smp_rescan_cpus(info, 0); put_online_cpus(); @@ -696,12 +820,23 @@ static void smp_start_secondary(void *cpuvoid) int __cpu_up(unsigned int cpu, struct task_struct *tidle) { struct pcpu *pcpu; - int rc; + int base, i, rc; pcpu = pcpu_devices + cpu; if (pcpu->state != CPU_STATE_CONFIGURED) return -EIO; - if (pcpu_sigp_retry(pcpu, SIGP_INITIAL_CPU_RESET, 0) != + base = cpu - (cpu % (smp_cpu_mtid + 1)); + for (i = 0; i <= smp_cpu_mtid; i++) { + if (base + i < nr_cpu_ids) + if (cpu_online(base + i)) + break; + } + /* + * If this is the first CPU of the core to get online + * do an initial CPU reset. + */ + if (i > smp_cpu_mtid && + pcpu_sigp_retry(pcpu_devices + base, SIGP_INITIAL_CPU_RESET, 0) != SIGP_CC_ORDER_CODE_ACCEPTED) return -EIO; @@ -774,7 +909,8 @@ void __init smp_fill_possible_mask(void) { unsigned int possible, sclp, cpu; - sclp = sclp_get_max_cpu() ?: nr_cpu_ids; + sclp = min(smp_max_threads, sclp_get_mtid_max() + 1); + sclp = sclp_get_max_cpu()*sclp ?: nr_cpu_ids; possible = setup_possible_cpus ?: nr_cpu_ids; possible = min(possible, sclp); for (cpu = 0; cpu < possible && cpu < nr_cpu_ids; cpu++) @@ -796,9 +932,8 @@ void __init smp_prepare_boot_cpu(void) { struct pcpu *pcpu = pcpu_devices; - boot_cpu_address = stap(); pcpu->state = CPU_STATE_CONFIGURED; - pcpu->address = boot_cpu_address; + pcpu->address = stap(); pcpu->lowcore = (struct _lowcore *)(unsigned long) store_prefix(); pcpu->async_stack = S390_lowcore.async_stack - ASYNC_SIZE + STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); @@ -848,7 +983,7 @@ static ssize_t cpu_configure_store(struct device *dev, const char *buf, size_t count) { struct pcpu *pcpu; - int cpu, val, rc; + int cpu, val, rc, i; char delim; if (sscanf(buf, "%d %c", &val, &delim) != 1) @@ -860,29 +995,43 @@ static ssize_t cpu_configure_store(struct device *dev, rc = -EBUSY; /* disallow configuration changes of online cpus and cpu 0 */ cpu = dev->id; - if (cpu_online(cpu) || cpu == 0) + cpu -= cpu % (smp_cpu_mtid + 1); + if (cpu == 0) goto out; + for (i = 0; i <= smp_cpu_mtid; i++) + if (cpu_online(cpu + i)) + goto out; pcpu = pcpu_devices + cpu; rc = 0; switch (val) { case 0: if (pcpu->state != CPU_STATE_CONFIGURED) break; - rc = sclp_cpu_deconfigure(pcpu->address); + rc = sclp_cpu_deconfigure(pcpu->address >> smp_cpu_mt_shift); if (rc) break; - pcpu->state = CPU_STATE_STANDBY; - smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + for (i = 0; i <= smp_cpu_mtid; i++) { + if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i)) + continue; + pcpu[i].state = CPU_STATE_STANDBY; + smp_cpu_set_polarization(cpu + i, + POLARIZATION_UNKNOWN); + } topology_expect_change(); break; case 1: if (pcpu->state != CPU_STATE_STANDBY) break; - rc = sclp_cpu_configure(pcpu->address); + rc = sclp_cpu_configure(pcpu->address >> smp_cpu_mt_shift); if (rc) break; - pcpu->state = CPU_STATE_CONFIGURED; - smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + for (i = 0; i <= smp_cpu_mtid; i++) { + if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i)) + continue; + pcpu[i].state = CPU_STATE_CONFIGURED; + smp_cpu_set_polarization(cpu + i, + POLARIZATION_UNKNOWN); + } topology_expect_change(); break; default: diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index 811f542b8ed4ad..85565f1ff4743a 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -194,6 +194,14 @@ static void stsi_2_2_2(struct seq_file *m, struct sysinfo_2_2_2 *info) seq_printf(m, "LPAR CPUs Reserved: %d\n", info->cpus_reserved); seq_printf(m, "LPAR CPUs Dedicated: %d\n", info->cpus_dedicated); seq_printf(m, "LPAR CPUs Shared: %d\n", info->cpus_shared); + if (info->mt_installed & 0x80) { + seq_printf(m, "LPAR CPUs G-MTID: %d\n", + info->mt_general & 0x1f); + seq_printf(m, "LPAR CPUs S-MTID: %d\n", + info->mt_installed & 0x1f); + seq_printf(m, "LPAR CPUs PS-MTID: %d\n", + info->mt_psmtid & 0x1f); + } } static void stsi_3_2_2(struct seq_file *m, struct sysinfo_3_2_2 *info) diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index b93bed76ea9402..24ee33f1af2422 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -59,32 +59,50 @@ static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu) return mask; } -static struct mask_info *add_cpus_to_mask(struct topology_cpu *tl_cpu, +static cpumask_t cpu_thread_map(unsigned int cpu) +{ + cpumask_t mask; + int i; + + cpumask_copy(&mask, cpumask_of(cpu)); + if (!topology_enabled || !MACHINE_HAS_TOPOLOGY) + return mask; + cpu -= cpu % (smp_cpu_mtid + 1); + for (i = 0; i <= smp_cpu_mtid; i++) + if (cpu_present(cpu + i)) + cpumask_set_cpu(cpu + i, &mask); + return mask; +} + +static struct mask_info *add_cpus_to_mask(struct topology_core *tl_core, struct mask_info *book, struct mask_info *socket, int one_socket_per_cpu) { - unsigned int cpu; + unsigned int core; - for_each_set_bit(cpu, &tl_cpu->mask[0], TOPOLOGY_CPU_BITS) { - unsigned int rcpu; - int lcpu; + for_each_set_bit(core, &tl_core->mask[0], TOPOLOGY_CORE_BITS) { + unsigned int rcore; + int lcpu, i; - rcpu = TOPOLOGY_CPU_BITS - 1 - cpu + tl_cpu->origin; - lcpu = smp_find_processor_id(rcpu); + rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin; + lcpu = smp_find_processor_id(rcore << smp_cpu_mt_shift); if (lcpu < 0) continue; - cpumask_set_cpu(lcpu, &book->mask); - cpu_topology[lcpu].book_id = book->id; - cpumask_set_cpu(lcpu, &socket->mask); - cpu_topology[lcpu].core_id = rcpu; - if (one_socket_per_cpu) { - cpu_topology[lcpu].socket_id = rcpu; - socket = socket->next; - } else { - cpu_topology[lcpu].socket_id = socket->id; + for (i = 0; i <= smp_cpu_mtid; i++) { + cpu_topology[lcpu + i].book_id = book->id; + cpu_topology[lcpu + i].core_id = rcore; + cpu_topology[lcpu + i].thread_id = lcpu + i; + cpumask_set_cpu(lcpu + i, &book->mask); + cpumask_set_cpu(lcpu + i, &socket->mask); + if (one_socket_per_cpu) + cpu_topology[lcpu + i].socket_id = rcore; + else + cpu_topology[lcpu + i].socket_id = socket->id; + smp_cpu_set_polarization(lcpu + i, tl_core->pp); } - smp_cpu_set_polarization(lcpu, tl_cpu->pp); + if (one_socket_per_cpu) + socket = socket->next; } return socket; } @@ -108,7 +126,7 @@ static void clear_masks(void) static union topology_entry *next_tle(union topology_entry *tle) { if (!tle->nl) - return (union topology_entry *)((struct topology_cpu *)tle + 1); + return (union topology_entry *)((struct topology_core *)tle + 1); return (union topology_entry *)((struct topology_container *)tle + 1); } @@ -231,9 +249,11 @@ static void update_cpu_masks(void) spin_lock_irqsave(&topology_lock, flags); for_each_possible_cpu(cpu) { + cpu_topology[cpu].thread_mask = cpu_thread_map(cpu); cpu_topology[cpu].core_mask = cpu_group_map(&socket_info, cpu); cpu_topology[cpu].book_mask = cpu_group_map(&book_info, cpu); if (!MACHINE_HAS_TOPOLOGY) { + cpu_topology[cpu].thread_id = cpu; cpu_topology[cpu].core_id = cpu; cpu_topology[cpu].socket_id = cpu; cpu_topology[cpu].book_id = cpu; @@ -445,6 +465,12 @@ int topology_cpu_init(struct cpu *cpu) return sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group); } +const struct cpumask *cpu_thread_mask(int cpu) +{ + return &cpu_topology[cpu].thread_mask; +} + + const struct cpumask *cpu_coregroup_mask(int cpu) { return &cpu_topology[cpu].core_mask; @@ -456,6 +482,7 @@ static const struct cpumask *cpu_book_mask(int cpu) } static struct sched_domain_topology_level s390_topology[] = { + { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) }, { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, { cpu_book_mask, SD_INIT_NAME(BOOK) }, { cpu_cpu_mask, SD_INIT_NAME(DIE) }, diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index e34122e539a16b..e53d3595a7c8c1 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -15,6 +15,8 @@ #include #include #include +#include +#include static void virt_timer_expire(void); @@ -23,6 +25,10 @@ static DEFINE_SPINLOCK(virt_timer_lock); static atomic64_t virt_timer_current; static atomic64_t virt_timer_elapsed; +static DEFINE_PER_CPU(u64, mt_cycles[32]); +static DEFINE_PER_CPU(u64, mt_scaling_mult) = { 1 }; +static DEFINE_PER_CPU(u64, mt_scaling_div) = { 1 }; + static inline u64 get_vtimer(void) { u64 timer; @@ -61,6 +67,8 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset) { struct thread_info *ti = task_thread_info(tsk); u64 timer, clock, user, system, steal; + u64 user_scaled, system_scaled; + int i; timer = S390_lowcore.last_update_timer; clock = S390_lowcore.last_update_clock; @@ -76,15 +84,49 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset) S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock; + /* Do MT utilization calculation */ + if (smp_cpu_mtid) { + u64 cycles_new[32], *cycles_old; + u64 delta, mult, div; + + cycles_old = this_cpu_ptr(mt_cycles); + if (stcctm5(smp_cpu_mtid + 1, cycles_new) < 2) { + mult = div = 0; + for (i = 0; i <= smp_cpu_mtid; i++) { + delta = cycles_new[i] - cycles_old[i]; + mult += delta; + div += (i + 1) * delta; + } + if (mult > 0) { + /* Update scaling factor */ + __this_cpu_write(mt_scaling_mult, mult); + __this_cpu_write(mt_scaling_div, div); + memcpy(cycles_old, cycles_new, + sizeof(u64) * (smp_cpu_mtid + 1)); + } + } + } + user = S390_lowcore.user_timer - ti->user_timer; S390_lowcore.steal_timer -= user; ti->user_timer = S390_lowcore.user_timer; - account_user_time(tsk, user, user); system = S390_lowcore.system_timer - ti->system_timer; S390_lowcore.steal_timer -= system; ti->system_timer = S390_lowcore.system_timer; - account_system_time(tsk, hardirq_offset, system, system); + + user_scaled = user; + system_scaled = system; + /* Do MT utilization scaling */ + if (smp_cpu_mtid) { + u64 mult = __this_cpu_read(mt_scaling_mult); + u64 div = __this_cpu_read(mt_scaling_div); + + user_scaled = (user_scaled * mult) / div; + system_scaled = (system_scaled * mult) / div; + } + account_user_time(tsk, user, user_scaled); + account_system_time(tsk, hardirq_offset, system, system_scaled); steal = S390_lowcore.steal_timer; if ((s64) steal > 0) { @@ -126,7 +168,7 @@ void vtime_account_user(struct task_struct *tsk) void vtime_account_irq_enter(struct task_struct *tsk) { struct thread_info *ti = task_thread_info(tsk); - u64 timer, system; + u64 timer, system, system_scaled; timer = S390_lowcore.last_update_timer; S390_lowcore.last_update_timer = get_vtimer(); @@ -135,7 +177,15 @@ void vtime_account_irq_enter(struct task_struct *tsk) system = S390_lowcore.system_timer - ti->system_timer; S390_lowcore.steal_timer -= system; ti->system_timer = S390_lowcore.system_timer; - account_system_time(tsk, 0, system, system); + system_scaled = system; + /* Do MT utilization scaling */ + if (smp_cpu_mtid) { + u64 mult = __this_cpu_read(mt_scaling_mult); + u64 div = __this_cpu_read(mt_scaling_div); + + system_scaled = (system_scaled * mult) / div; + } + account_system_time(tsk, 0, system, system_scaled); virt_timer_forward(system); } diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index 5bd6cb145a87d4..daf6cd5079ec3e 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -20,26 +20,31 @@ struct read_info_sccb { struct sccb_header header; /* 0-7 */ u16 rnmax; /* 8-9 */ u8 rnsize; /* 10 */ - u8 _reserved0[16 - 11]; /* 11-15 */ + u8 _pad_11[16 - 11]; /* 11-15 */ u16 ncpurl; /* 16-17 */ u16 cpuoff; /* 18-19 */ - u8 _reserved7[24 - 20]; /* 20-23 */ + u8 _pad_20[24 - 20]; /* 20-23 */ u8 loadparm[8]; /* 24-31 */ - u8 _reserved1[48 - 32]; /* 32-47 */ + u8 _pad_32[42 - 32]; /* 32-41 */ + u8 fac42; /* 42 */ + u8 fac43; /* 43 */ + u8 _pad_44[48 - 44]; /* 44-47 */ u64 facilities; /* 48-55 */ - u8 _reserved2a[76 - 56]; /* 56-75 */ + u8 _pad_56[66 - 56]; /* 56-65 */ + u8 fac66; /* 66 */ + u8 _pad_67[76 - 67]; /* 67-83 */ u32 ibc; /* 76-79 */ - u8 _reserved2b[84 - 80]; /* 80-83 */ + u8 _pad80[84 - 80]; /* 80-83 */ u8 fac84; /* 84 */ u8 fac85; /* 85 */ - u8 _reserved3[91 - 86]; /* 86-90 */ + u8 _pad_86[91 - 86]; /* 86-90 */ u8 flags; /* 91 */ - u8 _reserved4[100 - 92]; /* 92-99 */ + u8 _pad_92[100 - 92]; /* 92-99 */ u32 rnsize2; /* 100-103 */ u64 rnmax2; /* 104-111 */ - u8 _reserved5[120 - 112]; /* 112-119 */ + u8 _pad_112[120 - 112]; /* 112-119 */ u16 hcpua; /* 120-121 */ - u8 _reserved6[4096 - 122]; /* 122-4095 */ + u8 _pad_122[4096 - 122]; /* 122-4095 */ } __packed __aligned(PAGE_SIZE); static char sccb_early[PAGE_SIZE] __aligned(PAGE_SIZE) __initdata; @@ -50,6 +55,10 @@ static unsigned int sclp_max_cpu; static struct sclp_ipl_info sclp_ipl_info; static unsigned char sclp_siif; static u32 sclp_ibc; +static unsigned int sclp_mtid; +static unsigned int sclp_mtid_cp; +static unsigned int sclp_mtid_max; +static unsigned int sclp_mtid_prev; u64 sclp_facilities; u8 sclp_fac84; @@ -128,7 +137,7 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb) boot_cpu_address = stap(); cpue = (void *)sccb + sccb->cpuoff; for (cpu = 0; cpu < sccb->ncpurl; cpue++, cpu++) { - if (boot_cpu_address != cpue->address) + if (boot_cpu_address != cpue->core_id) continue; sclp_siif = cpue->siif; break; @@ -139,6 +148,11 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb) if (sccb->flags & 0x2) sclp_ipl_info.has_dump = 1; memcpy(&sclp_ipl_info.loadparm, &sccb->loadparm, LOADPARM_LEN); + + sclp_mtid = (sccb->fac42 & 0x80) ? (sccb->fac42 & 31) : 0; + sclp_mtid_cp = (sccb->fac42 & 0x80) ? (sccb->fac43 & 31) : 0; + sclp_mtid_max = max(sclp_mtid, sclp_mtid_cp); + sclp_mtid_prev = (sccb->fac42 & 0x80) ? (sccb->fac66 & 31) : 0; } bool __init sclp_has_linemode(void) @@ -178,6 +192,21 @@ unsigned int sclp_get_ibc(void) } EXPORT_SYMBOL(sclp_get_ibc); +unsigned int sclp_get_mtid(u8 cpu_type) +{ + return cpu_type ? sclp_mtid : sclp_mtid_cp; +} + +unsigned int sclp_get_mtid_max(void) +{ + return sclp_mtid_max; +} + +unsigned int sclp_get_mtid_prev(void) +{ + return sclp_mtid_prev; +} + /* * This function will be called after sclp_facilities_detect(), which gets * called from early.c code. The sclp_facilities_detect() function retrieves diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 10eb738fc81a88..3578105989a0d7 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -938,7 +938,7 @@ void reipl_ccw_dev(struct ccw_dev_id *devid) { struct subchannel_id uninitialized_var(schid); - s390_reset_system(NULL, NULL); + s390_reset_system(NULL, NULL, NULL); if (reipl_find_schid(devid, &schid) != 0) panic("IPL Device not found\n"); do_reipl_asm(*((__u32*)&schid)); From 5be6fdc090e156bc67b63310b65f4f54523e7b6c Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Fri, 16 Jan 2015 18:29:04 +0100 Subject: [PATCH 201/601] dcssblk: issue warning when trying to save SN/EN type DCSS The content of a DCSS of type SN or EN cannot be saved. Issue a warning when trying to save such a DCSS. Depending on the setup, this may be a user error or intended behaviour e.g. with a multi-DCSS device. Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dcssblk.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index b550c8c8d0103e..7f900229404dfd 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -438,7 +438,13 @@ dcssblk_save_store(struct device *dev, struct device_attribute *attr, const char pr_info("All DCSSs that map to device %s are " "saved\n", dev_info->segment_name); list_for_each_entry(entry, &dev_info->seg_list, lh) { - segment_save(entry->segment_name); + if (entry->segment_type == SEG_TYPE_EN || + entry->segment_type == SEG_TYPE_SN) + pr_warn("DCSS %s is of type SN or EN" + " and cannot be saved\n", + entry->segment_name); + else + segment_save(entry->segment_name); } } else { // device is busy => we save it when it becomes @@ -797,7 +803,12 @@ dcssblk_release(struct gendisk *disk, fmode_t mode) pr_info("Device %s has become idle and is being saved " "now\n", dev_info->segment_name); list_for_each_entry(entry, &dev_info->seg_list, lh) { - segment_save(entry->segment_name); + if (entry->segment_type == SEG_TYPE_EN || + entry->segment_type == SEG_TYPE_SN) + pr_warn("DCSS %s is of type SN or EN and cannot" + " be saved\n", entry->segment_name); + else + segment_save(entry->segment_name); } dev_info->save_pending = 0; } From 032f1be056a2e84f8817b56fe6d76e8e83285578 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Fri, 16 Jan 2015 10:35:05 +0100 Subject: [PATCH 202/601] s390/smp: remove check for CONFIG_ZFCPDUMP Commit 725908110a1f ("s390: add SMT support") added a check for CONFIG_ZFCPDUMP. But the Kconfig symbol ZFCPDUMP was removed in v3.16 through commit bf28a5970de3 ("s390/dump: Remove CONFIG_ZFCPDUMP"). So this check will always evaluate to false. No one noticed probably because the code also checks for CONFIG_CRASH_DUMP which "also enables s390 zfcpdump". Dump the unneeded check. Signed-off-by: Paul Bolle Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 370ff3a092a395..a668993ff577f9 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -762,7 +762,7 @@ static void __init smp_detect_cpus(void) panic("Could not find boot CPU type"); } -#if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_CRASH_DUMP) +#ifdef CONFIG_CRASH_DUMP /* Collect CPU state of previous system */ smp_store_cpu_states(info); #endif From 93568d6fc509923db204320062b1883bc9962b0d Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 16 Jun 2014 10:21:31 -0700 Subject: [PATCH 203/601] s390: update default configuration Signed-off-by: Martin Schwidefsky --- arch/s390/configs/default_defconfig | 2 +- arch/s390/configs/gcov_defconfig | 1 + arch/s390/configs/performance_defconfig | 1 + arch/s390/configs/zfcpdump_defconfig | 1 + arch/s390/defconfig | 7 +------ 5 files changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig index 9432d0f202ef20..64707750c78071 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/default_defconfig @@ -555,7 +555,6 @@ CONFIG_DEBUG_OBJECTS_RCU_HEAD=y CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y CONFIG_SLUB_DEBUG_ON=y CONFIG_SLUB_STATS=y -CONFIG_DEBUG_KMEMLEAK=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_VM=y CONFIG_DEBUG_VM_RB=y @@ -563,6 +562,7 @@ CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m CONFIG_DEBUG_PER_CPU_MAPS=y CONFIG_DEBUG_SHIRQ=y CONFIG_DETECT_HUNG_TASK=y +CONFIG_PANIC_ON_OOPS=y CONFIG_TIMER_STATS=y CONFIG_DEBUG_RT_MUTEXES=y CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig index 219dca6ea92684..5c3097272cd8c8 100644 --- a/arch/s390/configs/gcov_defconfig +++ b/arch/s390/configs/gcov_defconfig @@ -540,6 +540,7 @@ CONFIG_UNUSED_SYMBOLS=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m +CONFIG_PANIC_ON_OOPS=y CONFIG_TIMER_STATS=y CONFIG_RCU_TORTURE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=60 diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index 822c2f2e0c25d4..bda70f1ffd2c59 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -537,6 +537,7 @@ CONFIG_FRAME_WARN=1024 CONFIG_UNUSED_SYMBOLS=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y +CONFIG_PANIC_ON_OOPS=y CONFIG_TIMER_STATS=y CONFIG_RCU_TORTURE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=60 diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 9d63051ebec425..1b0184a0f7f213 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -71,6 +71,7 @@ CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_FS=y CONFIG_DEBUG_KERNEL=y +CONFIG_PANIC_ON_OOPS=y # CONFIG_SCHED_DEBUG is not set CONFIG_RCU_CPU_STALL_TIMEOUT=60 # CONFIG_FTRACE is not set diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 785c5f24d6f9e5..83ef702d240368 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -14,7 +14,6 @@ CONFIG_IKCONFIG_PROC=y CONFIG_CGROUPS=y CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y -CONFIG_RESOURCE_COUNTERS=y CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y CONFIG_CGROUP_SCHED=y @@ -22,12 +21,8 @@ CONFIG_RT_GROUP_SCHED=y CONFIG_BLK_CGROUP=y CONFIG_NAMESPACES=y CONFIG_BLK_DEV_INITRD=y -CONFIG_RD_BZIP2=y -CONFIG_RD_LZMA=y -CONFIG_RD_XZ=y -CONFIG_RD_LZO=y -CONFIG_RD_LZ4=y CONFIG_EXPERT=y +CONFIG_BPF_SYSCALL=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_OPROFILE=y From 2ec504934e43e6c47dfdd4436ca58b36f1ab3218 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Mon, 19 Jan 2015 23:02:00 +0100 Subject: [PATCH 204/601] s390/hmcdrv: free memory on error path Free allocated page in case of error returned by hmcdrv_ftp_startup. [heiko.carstens@de.ibm.com]: slightly changed Christophe's patch Signed-off-by: Christophe Jaillet Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/char/hmcdrv_ftp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/s390/char/hmcdrv_ftp.c b/drivers/s390/char/hmcdrv_ftp.c index 4bd63322fc29e6..d4b61d9088fb73 100644 --- a/drivers/s390/char/hmcdrv_ftp.c +++ b/drivers/s390/char/hmcdrv_ftp.c @@ -200,10 +200,9 @@ int hmcdrv_ftp_probe(void) rc = hmcdrv_ftp_startup(); if (rc) - return rc; + goto out; rc = hmcdrv_ftp_do(&ftp); - free_page((unsigned long) ftp.buf); hmcdrv_ftp_shutdown(); switch (rc) { @@ -216,7 +215,8 @@ int hmcdrv_ftp_probe(void) rc = 0; /* clear length (success) */ break; } /* switch */ - +out: + free_page((unsigned long) ftp.buf); return rc; } EXPORT_SYMBOL(hmcdrv_ftp_probe); From 926a7336a776263b5fdfa7b77fec704be1cae33f Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Mon, 16 Dec 2013 10:48:00 +0100 Subject: [PATCH 205/601] s390/hvc_iucv: add simple wildcard matches to the iucv allow filter Introduce a wildcard character to filter a range of z/VM user IDs with a single filter entry. Only the leading portion up to the wildcard of an filter entry contributes to the match. This reduces the filter size and avoids configuration updates when deploying new terminal server instances. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- drivers/tty/hvc/hvc_iucv.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/drivers/tty/hvc/hvc_iucv.c b/drivers/tty/hvc/hvc_iucv.c index ea74460f363862..f78a87b07872ff 100644 --- a/drivers/tty/hvc/hvc_iucv.c +++ b/drivers/tty/hvc/hvc_iucv.c @@ -1,10 +1,10 @@ /* - * hvc_iucv.c - z/VM IUCV hypervisor console (HVC) device driver + * z/VM IUCV hypervisor console (HVC) device driver * * This HVC device driver provides terminal access using * z/VM IUCV communication paths. * - * Copyright IBM Corp. 2008, 2009 + * Copyright IBM Corp. 2008, 2013 * * Author(s): Hendrik Brueckner */ @@ -102,6 +102,7 @@ static struct hvc_iucv_private *hvc_iucv_table[MAX_HVC_IUCV_LINES]; #define IUCV_HVC_CON_IDX (0) /* List of z/VM user ID filter entries (struct iucv_vmid_filter) */ #define MAX_VMID_FILTER (500) +#define FILTER_WILDCARD_CHAR '*' static size_t hvc_iucv_filter_size; static void *hvc_iucv_filter; static const char *hvc_iucv_filter_string; @@ -734,20 +735,31 @@ static void hvc_iucv_notifier_del(struct hvc_struct *hp, int id) * hvc_iucv_filter_connreq() - Filter connection request based on z/VM user ID * @ipvmid: Originating z/VM user ID (right padded with blanks) * - * Returns 0 if the z/VM user ID @ipvmid is allowed to connection, otherwise - * non-zero. + * Returns 0 if the z/VM user ID that is specified with @ipvmid is permitted to + * connect, otherwise non-zero. */ static int hvc_iucv_filter_connreq(u8 ipvmid[8]) { - size_t i; + const char *wildcard, *filter_entry; + size_t i, len; /* Note: default policy is ACCEPT if no filter is set */ if (!hvc_iucv_filter_size) return 0; - for (i = 0; i < hvc_iucv_filter_size; i++) - if (0 == memcmp(ipvmid, hvc_iucv_filter + (8 * i), 8)) + for (i = 0; i < hvc_iucv_filter_size; i++) { + filter_entry = hvc_iucv_filter + (8 * i); + + /* If a filter entry contains the filter wildcard character, + * reduce the length to match the leading portion of the user + * ID only (wildcard match). Characters following the wildcard + * are ignored. + */ + wildcard = strnchr(filter_entry, 8, FILTER_WILDCARD_CHAR); + len = (wildcard) ? wildcard - filter_entry : 8; + if (0 == memcmp(ipvmid, filter_entry, len)) return 0; + } return 1; } @@ -1166,6 +1178,7 @@ static void __init hvc_iucv_destroy(struct hvc_iucv_private *priv) /** * hvc_iucv_parse_filter() - Parse filter for a single z/VM user ID * @filter: String containing a comma-separated list of z/VM user IDs + * @dest: Location where to store the parsed z/VM user ID */ static const char *hvc_iucv_parse_filter(const char *filter, char *dest) { @@ -1188,6 +1201,10 @@ static const char *hvc_iucv_parse_filter(const char *filter, char *dest) if (filter[len - 1] == '\n') len--; + /* prohibit filter entries containing the wildcard character only */ + if (len == 1 && *filter == FILTER_WILDCARD_CHAR) + return ERR_PTR(-EINVAL); + if (len > 8) return ERR_PTR(-EINVAL); From 2f896d5866107e2926dcdec34a7d40bc56dd2951 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Thu, 22 Jan 2015 01:36:05 +0000 Subject: [PATCH 206/601] arm64: use fixmap for text patching When kernel text is marked as read only, it cannot be modified directly. Use a fixmap to modify the text instead in a similar manner to x86 and arm. Reviewed-by: Kees Cook Reviewed-by: Mark Rutland Tested-by: Kees Cook Tested-by: Mark Rutland Signed-off-by: Laura Abbott Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fixmap.h | 1 + arch/arm64/kernel/insn.c | 47 ++++++++++++++++++++++++++++++++- 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 9ef6eca905cae9..defa0ff982509c 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -49,6 +49,7 @@ enum fixed_addresses { FIX_BTMAP_END = __end_of_permanent_fixed_addresses, FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1, + FIX_TEXT_POKE0, __end_of_fixed_addresses }; diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 7e9327a0986dc5..27d4864577e5d4 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -17,14 +17,19 @@ * along with this program. If not, see . */ #include +#include #include #include +#include #include +#include #include +#include #include #include #include +#include #include #define AARCH64_INSN_SF_BIT BIT(31) @@ -72,6 +77,29 @@ bool __kprobes aarch64_insn_is_nop(u32 insn) } } +static DEFINE_SPINLOCK(patch_lock); + +static void __kprobes *patch_map(void *addr, int fixmap) +{ + unsigned long uintaddr = (uintptr_t) addr; + bool module = !core_kernel_text(uintaddr); + struct page *page; + + if (module && IS_ENABLED(CONFIG_DEBUG_SET_MODULE_RONX)) + page = vmalloc_to_page(addr); + else + page = virt_to_page(addr); + + BUG_ON(!page); + set_fixmap(fixmap, page_to_phys(page)); + + return (void *) (__fix_to_virt(fixmap) + (uintaddr & ~PAGE_MASK)); +} + +static void __kprobes patch_unmap(int fixmap) +{ + clear_fixmap(fixmap); +} /* * In ARMv8-A, A64 instructions have a fixed length of 32 bits and are always * little-endian. @@ -88,10 +116,27 @@ int __kprobes aarch64_insn_read(void *addr, u32 *insnp) return ret; } +static int __kprobes __aarch64_insn_write(void *addr, u32 insn) +{ + void *waddr = addr; + unsigned long flags = 0; + int ret; + + spin_lock_irqsave(&patch_lock, flags); + waddr = patch_map(addr, FIX_TEXT_POKE0); + + ret = probe_kernel_write(waddr, &insn, AARCH64_INSN_SIZE); + + patch_unmap(FIX_TEXT_POKE0); + spin_unlock_irqrestore(&patch_lock, flags); + + return ret; +} + int __kprobes aarch64_insn_write(void *addr, u32 insn) { insn = cpu_to_le32(insn); - return probe_kernel_write(addr, &insn, AARCH64_INSN_SIZE); + return __aarch64_insn_write(addr, insn); } static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn) From 55b5eb75e7ccdfe94b6ea1be6bba0c21149abecf Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 22 Jan 2015 11:10:39 +0100 Subject: [PATCH 207/601] s390/tape: remove redundant if statement The unit check handler for 3480 / 3490 tapes used to print a different warning message for erpa code 0x57 dependent on the device type. The warning messages have been remove in a cleanup, the if statement is a left over. Remove it. Reported-by: Fraser Brown Signed-off-by: Martin Schwidefsky --- drivers/s390/char/tape_34xx.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/s390/char/tape_34xx.c b/drivers/s390/char/tape_34xx.c index 9aa79702b3707a..de69f0ddc321de 100644 --- a/drivers/s390/char/tape_34xx.c +++ b/drivers/s390/char/tape_34xx.c @@ -773,13 +773,11 @@ tape_34xx_unit_check(struct tape_device *device, struct tape_request *request, "occurred\n"); return tape_34xx_erp_failed(request, -EIO); case 0x57: - if (device->cdev->id.driver_info == tape_3480) { - /* Attention intercept. */ - return tape_34xx_erp_retry(request); - } else { - /* Global status intercept. */ - return tape_34xx_erp_retry(request); - } + /* + * 3480: Attention intercept. + * 3490: Global status intercept. + */ + return tape_34xx_erp_retry(request); case 0x5a: /* * Tape length incompatible. The tape inserted is too long, From da141706aea52c1a9fbd28cb8d289b78819f5436 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Wed, 21 Jan 2015 17:36:06 -0800 Subject: [PATCH 208/601] arm64: add better page protections to arm64 Add page protections for arm64 similar to those in arm. This is for security reasons to prevent certain classes of exploits. The current method: - Map all memory as either RWX or RW. We round to the nearest section to avoid creating page tables before everything is mapped - Once everything is mapped, if either end of the RWX section should not be X, we split the PMD and remap as necessary - When initmem is to be freed, we change the permissions back to RW (using stop machine if necessary to flush the TLB) - If CONFIG_DEBUG_RODATA is set, the read only sections are set read only. Acked-by: Ard Biesheuvel Tested-by: Kees Cook Tested-by: Ard Biesheuvel Signed-off-by: Laura Abbott Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig.debug | 23 +++ arch/arm64/include/asm/cacheflush.h | 5 + arch/arm64/kernel/vmlinux.lds.S | 17 ++- arch/arm64/mm/init.c | 1 + arch/arm64/mm/mm.h | 2 + arch/arm64/mm/mmu.c | 211 ++++++++++++++++++++++++---- 6 files changed, 233 insertions(+), 26 deletions(-) diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index 5fdd6dce806125..4a8741073c90c0 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -66,4 +66,27 @@ config DEBUG_SET_MODULE_RONX against certain classes of kernel exploits. If in doubt, say "N". +config DEBUG_RODATA + bool "Make kernel text and rodata read-only" + help + If this is set, kernel text and rodata will be made read-only. This + is to help catch accidental or malicious attempts to change the + kernel's executable code. Additionally splits rodata from kernel + text so it can be made explicitly non-executable. + + If in doubt, say Y + +config DEBUG_ALIGN_RODATA + depends on DEBUG_RODATA && !ARM64_64K_PAGES + bool "Align linker sections up to SECTION_SIZE" + help + If this option is enabled, sections that may potentially be marked as + read only or non-executable will be aligned up to the section size of + the kernel. This prevents sections from being split into pages and + avoids a potential TLB penalty. The downside is an increase in + alignment and potentially wasted space. Turn on this option if + performance is more important than memory pressure. + + If in doubt, say N + endmenu diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 7ae31a2cc6c0ba..67d309cc3b6b80 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -152,4 +152,9 @@ int set_memory_ro(unsigned long addr, int numpages); int set_memory_rw(unsigned long addr, int numpages); int set_memory_x(unsigned long addr, int numpages); int set_memory_nx(unsigned long addr, int numpages); + +#ifdef CONFIG_DEBUG_RODATA +void mark_rodata_ro(void); +#endif + #endif diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 9965ec87cbecfc..5d9d2dca530d97 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -8,6 +8,7 @@ #include #include #include +#include #include "image.h" @@ -49,6 +50,14 @@ PECOFF_FILE_ALIGNMENT = 0x200; #define PECOFF_EDATA_PADDING #endif +#ifdef CONFIG_DEBUG_ALIGN_RODATA +#define ALIGN_DEBUG_RO . = ALIGN(1< #include #include +#include #include #include @@ -59,21 +60,43 @@ EXPORT_SYMBOL(phys_mem_access_prot); static void __init *early_alloc(unsigned long sz) { void *ptr = __va(memblock_alloc(sz, sz)); + BUG_ON(!ptr); memset(ptr, 0, sz); return ptr; } -static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, +/* + * remap a PMD into pages + */ +static void split_pmd(pmd_t *pmd, pte_t *pte) +{ + unsigned long pfn = pmd_pfn(*pmd); + int i = 0; + + do { + /* + * Need to have the least restrictive permissions available + * permissions will be fixed up later + */ + set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); + pfn++; + } while (pte++, i++, i < PTRS_PER_PTE); +} + +static void alloc_init_pte(pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long pfn, - pgprot_t prot) + pgprot_t prot, + void *(*alloc)(unsigned long size)) { pte_t *pte; - if (pmd_none(*pmd)) { - pte = early_alloc(PTRS_PER_PTE * sizeof(pte_t)); + if (pmd_none(*pmd) || pmd_bad(*pmd)) { + pte = alloc(PTRS_PER_PTE * sizeof(pte_t)); + if (pmd_sect(*pmd)) + split_pmd(pmd, pte); __pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE); + flush_tlb_all(); } - BUG_ON(pmd_bad(*pmd)); pte = pte_offset_kernel(pmd, addr); do { @@ -82,9 +105,22 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, } while (pte++, addr += PAGE_SIZE, addr != end); } -static void __init alloc_init_pmd(struct mm_struct *mm, pud_t *pud, +void split_pud(pud_t *old_pud, pmd_t *pmd) +{ + unsigned long addr = pud_pfn(*old_pud) << PAGE_SHIFT; + pgprot_t prot = __pgprot(pud_val(*old_pud) ^ addr); + int i = 0; + + do { + set_pmd(pmd, __pmd(addr | prot)); + addr += PMD_SIZE; + } while (pmd++, i++, i < PTRS_PER_PMD); +} + +static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, unsigned long addr, unsigned long end, - phys_addr_t phys, pgprot_t prot) + phys_addr_t phys, pgprot_t prot, + void *(*alloc)(unsigned long size)) { pmd_t *pmd; unsigned long next; @@ -93,8 +129,16 @@ static void __init alloc_init_pmd(struct mm_struct *mm, pud_t *pud, * Check for initial section mappings in the pgd/pud and remove them. */ if (pud_none(*pud) || pud_bad(*pud)) { - pmd = early_alloc(PTRS_PER_PMD * sizeof(pmd_t)); + pmd = alloc(PTRS_PER_PMD * sizeof(pmd_t)); + if (pud_sect(*pud)) { + /* + * need to have the 1G of mappings continue to be + * present + */ + split_pud(pud, pmd); + } pud_populate(mm, pud, pmd); + flush_tlb_all(); } pmd = pmd_offset(pud, addr); @@ -113,21 +157,34 @@ static void __init alloc_init_pmd(struct mm_struct *mm, pud_t *pud, flush_tlb_all(); } else { alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys), - prot); + prot, alloc); } phys += next - addr; } while (pmd++, addr = next, addr != end); } -static void __init alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, +static inline bool use_1G_block(unsigned long addr, unsigned long next, + unsigned long phys) +{ + if (PAGE_SHIFT != 12) + return false; + + if (((addr | next | phys) & ~PUD_MASK) != 0) + return false; + + return true; +} + +static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, unsigned long addr, unsigned long end, - phys_addr_t phys, pgprot_t prot) + phys_addr_t phys, pgprot_t prot, + void *(*alloc)(unsigned long size)) { pud_t *pud; unsigned long next; if (pgd_none(*pgd)) { - pud = early_alloc(PTRS_PER_PUD * sizeof(pud_t)); + pud = alloc(PTRS_PER_PUD * sizeof(pud_t)); pgd_populate(mm, pgd, pud); } BUG_ON(pgd_bad(*pgd)); @@ -139,8 +196,7 @@ static void __init alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, /* * For 4K granule only, attempt to put down a 1GB block */ - if ((PAGE_SHIFT == 12) && - ((addr | next | phys) & ~PUD_MASK) == 0) { + if (use_1G_block(addr, next, phys)) { pud_t old_pud = *pud; set_pud(pud, __pud(phys | pgprot_val(mk_sect_prot(prot)))); @@ -158,7 +214,7 @@ static void __init alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, flush_tlb_all(); } } else { - alloc_init_pmd(mm, pud, addr, next, phys, prot); + alloc_init_pmd(mm, pud, addr, next, phys, prot, alloc); } phys += next - addr; } while (pud++, addr = next, addr != end); @@ -168,9 +224,10 @@ static void __init alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, * Create the page directory entries and any necessary page tables for the * mapping specified by 'md'. */ -static void __init __create_mapping(struct mm_struct *mm, pgd_t *pgd, +static void __create_mapping(struct mm_struct *mm, pgd_t *pgd, phys_addr_t phys, unsigned long virt, - phys_addr_t size, pgprot_t prot) + phys_addr_t size, pgprot_t prot, + void *(*alloc)(unsigned long size)) { unsigned long addr, length, end, next; @@ -180,13 +237,23 @@ static void __init __create_mapping(struct mm_struct *mm, pgd_t *pgd, end = addr + length; do { next = pgd_addr_end(addr, end); - alloc_init_pud(mm, pgd, addr, next, phys, prot); + alloc_init_pud(mm, pgd, addr, next, phys, prot, alloc); phys += next - addr; } while (pgd++, addr = next, addr != end); } -static void __init create_mapping(phys_addr_t phys, unsigned long virt, - phys_addr_t size) +static void *late_alloc(unsigned long size) +{ + void *ptr; + + BUG_ON(size > PAGE_SIZE); + ptr = (void *)__get_free_page(PGALLOC_GFP); + BUG_ON(!ptr); + return ptr; +} + +static void __ref create_mapping(phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot) { if (virt < VMALLOC_START) { pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", @@ -194,15 +261,71 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt, return; } __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), phys, virt, - size, PAGE_KERNEL_EXEC); + size, prot, early_alloc); } void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) { - __create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot); + __create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot, + early_alloc); +} + +static void create_mapping_late(phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot) +{ + if (virt < VMALLOC_START) { + pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", + &phys, virt); + return; + } + + return __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), + phys, virt, size, prot, late_alloc); +} + +#ifdef CONFIG_DEBUG_RODATA +static void __init __map_memblock(phys_addr_t start, phys_addr_t end) +{ + /* + * Set up the executable regions using the existing section mappings + * for now. This will get more fine grained later once all memory + * is mapped + */ + unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE); + unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE); + + if (end < kernel_x_start) { + create_mapping(start, __phys_to_virt(start), + end - start, PAGE_KERNEL); + } else if (start >= kernel_x_end) { + create_mapping(start, __phys_to_virt(start), + end - start, PAGE_KERNEL); + } else { + if (start < kernel_x_start) + create_mapping(start, __phys_to_virt(start), + kernel_x_start - start, + PAGE_KERNEL); + create_mapping(kernel_x_start, + __phys_to_virt(kernel_x_start), + kernel_x_end - kernel_x_start, + PAGE_KERNEL_EXEC); + if (kernel_x_end < end) + create_mapping(kernel_x_end, + __phys_to_virt(kernel_x_end), + end - kernel_x_end, + PAGE_KERNEL); + } + +} +#else +static void __init __map_memblock(phys_addr_t start, phys_addr_t end) +{ + create_mapping(start, __phys_to_virt(start), end - start, + PAGE_KERNEL_EXEC); } +#endif static void __init map_mem(void) { @@ -248,14 +371,53 @@ static void __init map_mem(void) memblock_set_current_limit(limit); } #endif - - create_mapping(start, __phys_to_virt(start), end - start); + __map_memblock(start, end); } /* Limit no longer required. */ memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); } +void __init fixup_executable(void) +{ +#ifdef CONFIG_DEBUG_RODATA + /* now that we are actually fully mapped, make the start/end more fine grained */ + if (!IS_ALIGNED((unsigned long)_stext, SECTION_SIZE)) { + unsigned long aligned_start = round_down(__pa(_stext), + SECTION_SIZE); + + create_mapping(aligned_start, __phys_to_virt(aligned_start), + __pa(_stext) - aligned_start, + PAGE_KERNEL); + } + + if (!IS_ALIGNED((unsigned long)__init_end, SECTION_SIZE)) { + unsigned long aligned_end = round_up(__pa(__init_end), + SECTION_SIZE); + create_mapping(__pa(__init_end), (unsigned long)__init_end, + aligned_end - __pa(__init_end), + PAGE_KERNEL); + } +#endif +} + +#ifdef CONFIG_DEBUG_RODATA +void mark_rodata_ro(void) +{ + create_mapping_late(__pa(_stext), (unsigned long)_stext, + (unsigned long)_etext - (unsigned long)_stext, + PAGE_KERNEL_EXEC | PTE_RDONLY); + +} +#endif + +void fixup_init(void) +{ + create_mapping_late(__pa(__init_begin), (unsigned long)__init_begin, + (unsigned long)__init_end - (unsigned long)__init_begin, + PAGE_KERNEL); +} + /* * paging_init() sets up the page tables, initialises the zone memory * maps and sets up the zero page. @@ -265,6 +427,7 @@ void __init paging_init(void) void *zero_page; map_mem(); + fixup_executable(); /* * Finally flush the caches and tlb to ensure that we're in a From 60305db9884515ca063474e262b454f6da04e4e2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 22 Jan 2015 10:01:40 +0000 Subject: [PATCH 209/601] arm64/efi: move virtmap init to early initcall Now that the create_mapping() code in mm/mmu.c is able to support setting up kernel page tables at initcall time, we can move the whole virtmap creation to arm64_enable_runtime_services() instead of having a distinct stage during early boot. This also allows us to drop the arm64-specific EFI_VIRTMAP flag. Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/efi.h | 18 ++---- arch/arm64/kernel/efi.c | 111 +++++++++++++++++------------------ arch/arm64/kernel/setup.c | 1 - arch/arm64/mm/mmu.c | 2 +- 4 files changed, 59 insertions(+), 73 deletions(-) diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 7baf2cc04e1e0c..ef572206f1c3eb 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -6,10 +6,8 @@ #ifdef CONFIG_EFI extern void efi_init(void); -extern void efi_virtmap_init(void); #else #define efi_init() -#define efi_virtmap_init() #endif #define efi_call_virt(f, ...) \ @@ -53,23 +51,17 @@ extern void efi_virtmap_init(void); #define EFI_ALLOC_ALIGN SZ_64K /* - * On ARM systems, virtually remapped UEFI runtime services are set up in three + * On ARM systems, virtually remapped UEFI runtime services are set up in two * distinct stages: * - The stub retrieves the final version of the memory map from UEFI, populates * the virt_addr fields and calls the SetVirtualAddressMap() [SVAM] runtime * service to communicate the new mapping to the firmware (Note that the new * mapping is not live at this time) - * - During early boot, the page tables are allocated and populated based on the - * virt_addr fields in the memory map, but only if all descriptors with the - * EFI_MEMORY_RUNTIME attribute have a non-zero value for virt_addr. If this - * succeeds, the EFI_VIRTMAP flag is set to indicate that the virtual mappings - * have been installed successfully. - * - During an early initcall(), the UEFI Runtime Services are enabled and the - * EFI_RUNTIME_SERVICES bit set if some conditions are met, i.e., we need a - * non-early mapping of the UEFI system table, and we need to have the virtmap - * installed. + * - During an early initcall(), the EFI system table is permanently remapped + * and the virtual remapping of the UEFI Runtime Services regions is loaded + * into a private set of page tables. If this all succeeds, the Runtime + * Services are enabled and the EFI_RUNTIME_SERVICES bit set. */ -#define EFI_VIRTMAP EFI_ARCH_1 void efi_virtmap_load(void); void efi_virtmap_unload(void); diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index c9cb0fbe7aa45a..b42c7b480e1ee3 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -38,6 +38,19 @@ struct efi_memory_map memmap; static u64 efi_system_table; +static pgd_t efi_pgd[PTRS_PER_PGD] __page_aligned_bss; + +static struct mm_struct efi_mm = { + .mm_rb = RB_ROOT, + .pgd = efi_pgd, + .mm_users = ATOMIC_INIT(2), + .mm_count = ATOMIC_INIT(1), + .mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem), + .page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock), + .mmlist = LIST_HEAD_INIT(efi_mm.mmlist), + INIT_MM_CONTEXT(efi_mm) +}; + static int uefi_debug __initdata; static int __init uefi_debug_setup(char *str) { @@ -213,6 +226,45 @@ void __init efi_init(void) return; reserve_regions(); + early_memunmap(memmap.map, params.mmap_size); +} + +static bool __init efi_virtmap_init(void) +{ + efi_memory_desc_t *md; + + for_each_efi_memory_desc(&memmap, md) { + u64 paddr, npages, size; + pgprot_t prot; + + if (!(md->attribute & EFI_MEMORY_RUNTIME)) + continue; + if (md->virt_addr == 0) + return false; + + paddr = md->phys_addr; + npages = md->num_pages; + memrange_efi_to_native(&paddr, &npages); + size = npages << PAGE_SHIFT; + + pr_info(" EFI remap 0x%016llx => %p\n", + md->phys_addr, (void *)md->virt_addr); + + /* + * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be + * executable, everything else can be mapped with the XN bits + * set. + */ + if (!is_normal_ram(md)) + prot = __pgprot(PROT_DEVICE_nGnRE); + else if (md->type == EFI_RUNTIME_SERVICES_CODE) + prot = PAGE_KERNEL_EXEC; + else + prot = PAGE_KERNEL; + + create_pgd_mapping(&efi_mm, paddr, md->virt_addr, size, prot); + } + return true; } /* @@ -254,7 +306,7 @@ static int __init arm64_enable_runtime_services(void) } set_bit(EFI_SYSTEM_TABLES, &efi.flags); - if (!efi_enabled(EFI_VIRTMAP)) { + if (!efi_virtmap_init()) { pr_err("No UEFI virtual mapping was installed -- runtime services will not be available\n"); return -1; } @@ -283,19 +335,6 @@ static int __init arm64_dmi_init(void) } core_initcall(arm64_dmi_init); -static pgd_t efi_pgd[PTRS_PER_PGD] __page_aligned_bss; - -static struct mm_struct efi_mm = { - .mm_rb = RB_ROOT, - .pgd = efi_pgd, - .mm_users = ATOMIC_INIT(2), - .mm_count = ATOMIC_INIT(1), - .mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem), - .page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock), - .mmlist = LIST_HEAD_INIT(efi_mm.mmlist), - INIT_MM_CONTEXT(efi_mm) -}; - static void efi_set_pgd(struct mm_struct *mm) { cpu_switch_mm(mm->pgd, mm); @@ -315,47 +354,3 @@ void efi_virtmap_unload(void) efi_set_pgd(current->active_mm); preempt_enable(); } - -void __init efi_virtmap_init(void) -{ - efi_memory_desc_t *md; - - if (!efi_enabled(EFI_BOOT)) - return; - - for_each_efi_memory_desc(&memmap, md) { - u64 paddr, npages, size; - pgprot_t prot; - - if (!(md->attribute & EFI_MEMORY_RUNTIME)) - continue; - if (WARN(md->virt_addr == 0, - "UEFI virtual mapping incomplete or missing -- no entry found for 0x%llx\n", - md->phys_addr)) - return; - - paddr = md->phys_addr; - npages = md->num_pages; - memrange_efi_to_native(&paddr, &npages); - size = npages << PAGE_SHIFT; - - pr_info(" EFI remap 0x%016llx => %p\n", - md->phys_addr, (void *)md->virt_addr); - - /* - * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be - * executable, everything else can be mapped with the XN bits - * set. - */ - if (!is_normal_ram(md)) - prot = __pgprot(PROT_DEVICE_nGnRE); - else if (md->type == EFI_RUNTIME_SERVICES_CODE) - prot = PAGE_KERNEL_EXEC; - else - prot = PAGE_KERNEL; - - create_pgd_mapping(&efi_mm, paddr, md->virt_addr, size, prot); - } - set_bit(EFI_VIRTMAP, &efi.flags); - early_memunmap(memmap.map, memmap.map_end - memmap.map); -} diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 207413fe08a02c..bb10903887d439 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -382,7 +382,6 @@ void __init setup_arch(char **cmdline_p) paging_init(); request_standard_resources(); - efi_virtmap_init(); early_ioremap_reset(); unflatten_device_tree(); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 91d55b6efd8af6..155cbb0a74b601 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -269,7 +269,7 @@ void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, pgprot_t prot) { __create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot, - early_alloc); + late_alloc); } static void create_mapping_late(phys_addr_t phys, unsigned long virt, From bfd33c4b4b1ac718d481efee10f3a16d88757577 Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Thu, 15 Jan 2015 14:18:18 +0200 Subject: [PATCH 210/601] MAINTAINERS: email update Changed to my private email address as I left Samsung. Signed-off-by: Dmitry Kasatkin --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index ddf762511cb3ca..e9098d034b9d60 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4888,7 +4888,7 @@ F: drivers/ipack/ INTEGRITY MEASUREMENT ARCHITECTURE (IMA) M: Mimi Zohar -M: Dmitry Kasatkin +M: Dmitry Kasatkin L: linux-ima-devel@lists.sourceforge.net L: linux-ima-user@lists.sourceforge.net L: linux-security-module@vger.kernel.org From 3efb5f21a36fbddd524cffe36426a84622ce580e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 20 Jan 2015 11:28:28 -0500 Subject: [PATCH 211/601] tracing: Remove unneeded includes of debugfs.h and fs.h The creation of tracing files and directories is for the most part encapsulated in helper functions in trace.c. Other files do not need to include debugfs.h or fs.h, as they may have needed to in the past. Remove them from the files that do not need them. Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 2 -- kernel/trace/trace_branch.c | 1 - kernel/trace/trace_export.c | 2 -- kernel/trace/trace_irqsoff.c | 2 -- kernel/trace/trace_nop.c | 2 -- kernel/trace/trace_printk.c | 2 -- kernel/trace/trace_sched_switch.c | 2 -- kernel/trace/trace_sched_wakeup.c | 2 -- kernel/trace/trace_stack.c | 2 -- 9 files changed, 17 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 7a4104cb95cb28..96079180de3d66 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include /* for self test */ @@ -23,7 +22,6 @@ #include #include #include -#include #include diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 7d6e2afde66909..57cbf1efdd4405 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index d4ddde28a81ad0..12e2b99be862f9 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -6,12 +6,10 @@ #include #include #include -#include #include #include #include #include -#include #include "trace_output.h" diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 9bb104f748d0c0..8523ea345f2b1a 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -10,11 +10,9 @@ * Copyright (C) 2004 Nadia Yvette Chambers */ #include -#include #include #include #include -#include #include "trace.h" diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c index fcf0a9e4891620..8bb2071474dd01 100644 --- a/kernel/trace/trace_nop.c +++ b/kernel/trace/trace_nop.c @@ -6,8 +6,6 @@ */ #include -#include -#include #include #include "trace.h" diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index c4e70b6bd7faa2..7ee4b5cc1ce501 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c @@ -5,7 +5,6 @@ * */ #include -#include #include #include #include @@ -15,7 +14,6 @@ #include #include #include -#include #include "trace.h" diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 2e293beb186e94..419ca37e72c954 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -5,8 +5,6 @@ * */ #include -#include -#include #include #include #include diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 8fb84b362816ec..d6e1003724e960 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -10,8 +10,6 @@ * Copyright (C) 2004 Nadia Yvette Chambers */ #include -#include -#include #include #include #include diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 16eddb308c336a..e80927b88eb046 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -7,12 +7,10 @@ #include #include #include -#include #include #include #include #include -#include #include From 14a5ae40f0def33a422a45b2ed09198adb7bf11c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 20 Jan 2015 11:14:16 -0500 Subject: [PATCH 212/601] tracing: Use IS_ERR() check for return value of tracing_init_dentry() tracing_init_dentry() will soon return NULL as a valid pointer for the top level tracing directroy. NULL can not be used as an error value. Instead, switch to ERR_PTR() and check the return status with IS_ERR(). Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 2 +- kernel/trace/trace.c | 8 ++++---- kernel/trace/trace_events.c | 2 +- kernel/trace/trace_functions_graph.c | 2 +- kernel/trace/trace_kprobe.c | 2 +- kernel/trace/trace_printk.c | 2 +- kernel/trace/trace_stack.c | 2 +- kernel/trace/trace_stat.c | 2 +- kernel/trace/trace_uprobe.c | 2 +- 9 files changed, 12 insertions(+), 12 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 929a733d302e0d..80c9d34540ddbc 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5419,7 +5419,7 @@ static __init int ftrace_init_debugfs(void) struct dentry *d_tracer; d_tracer = tracing_init_dentry(); - if (!d_tracer) + if (IS_ERR(d_tracer)) return 0; ftrace_init_dyn_debugfs(d_tracer); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 7669b1f3234e0d..acd27555dc5be8 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5820,7 +5820,7 @@ struct dentry *tracing_init_dentry_tr(struct trace_array *tr) return tr->dir; if (!debugfs_initialized()) - return NULL; + return ERR_PTR(-ENODEV); if (tr->flags & TRACE_ARRAY_FL_GLOBAL) tr->dir = debugfs_create_dir("tracing", NULL); @@ -5844,7 +5844,7 @@ static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu) return tr->percpu_dir; d_tracer = tracing_init_dentry_tr(tr); - if (!d_tracer) + if (IS_ERR(d_tracer)) return NULL; tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer); @@ -6047,7 +6047,7 @@ static struct dentry *trace_options_init_dentry(struct trace_array *tr) return tr->options; d_tracer = tracing_init_dentry_tr(tr); - if (!d_tracer) + if (IS_ERR(d_tracer)) return NULL; tr->options = debugfs_create_dir("options", d_tracer); @@ -6538,7 +6538,7 @@ static __init int tracer_init_debugfs(void) trace_access_lock_init(); d_tracer = tracing_init_dentry(); - if (!d_tracer) + if (IS_ERR(d_tracer)) return 0; init_tracer_debugfs(&global_trace, d_tracer); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 366a78a3e61e21..4ff8c1394017a2 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2490,7 +2490,7 @@ static __init int event_trace_init(void) return -ENODEV; d_tracer = tracing_init_dentry(); - if (!d_tracer) + if (IS_ERR(d_tracer)) return 0; entry = debugfs_create_file("available_events", 0444, d_tracer, diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index ba476009e5de49..2d25ad1526bb16 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -1437,7 +1437,7 @@ static __init int init_graph_debugfs(void) struct dentry *d_tracer; d_tracer = tracing_init_dentry(); - if (!d_tracer) + if (IS_ERR(d_tracer)) return 0; trace_create_file("max_graph_depth", 0644, d_tracer, diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 5edb518be3458a..b4a00def88f5c5 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1320,7 +1320,7 @@ static __init int init_kprobe_trace(void) return -EINVAL; d_tracer = tracing_init_dentry(); - if (!d_tracer) + if (IS_ERR(d_tracer)) return 0; entry = debugfs_create_file("kprobe_events", 0644, d_tracer, diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index 7ee4b5cc1ce501..36c1455b7567ee 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c @@ -347,7 +347,7 @@ static __init int init_trace_printk_function_export(void) struct dentry *d_tracer; d_tracer = tracing_init_dentry(); - if (!d_tracer) + if (IS_ERR(d_tracer)) return 0; trace_create_file("printk_formats", 0444, d_tracer, diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index e80927b88eb046..c3e4fcfddd4580 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -460,7 +460,7 @@ static __init int stack_trace_init(void) struct dentry *d_tracer; d_tracer = tracing_init_dentry(); - if (!d_tracer) + if (IS_ERR(d_tracer)) return 0; trace_create_file("stack_max_size", 0644, d_tracer, diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index 7af67360b33074..75e19e86c95480 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -276,7 +276,7 @@ static int tracing_stat_init(void) struct dentry *d_tracing; d_tracing = tracing_init_dentry(); - if (!d_tracing) + if (IS_ERR(d_tracing)) return 0; stat_dir = debugfs_create_dir("trace_stat", d_tracing); diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 8520acc34b185f..5f0eba9e5e6bcb 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1321,7 +1321,7 @@ static __init int init_uprobe_trace(void) struct dentry *d_tracer; d_tracer = tracing_init_dentry(); - if (!d_tracer) + if (IS_ERR(d_tracer)) return 0; trace_create_file("uprobe_events", 0644, d_tracer, From fca8ba4ee24d17f8845ed1c8edcc3fd81c4650c2 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Thu, 22 Jan 2015 17:07:36 +0000 Subject: [PATCH 213/601] of/unittest: Remove obsolete code Commit 3ce04b4a9, "Removes OF_UNITTEST dependency on OF_DYNAMIC config symbol" removes a bunch of code, but missed a few minor bits. Clean it up by removing the node removal cache and flag. Signed-off-by: Grant Likely Cc: Gaurav Minocha Cc: Rob Herring --- drivers/of/unittest.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 139363af5c887f..7aa1d6dae5ba72 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -27,11 +27,6 @@ static struct selftest_results { int failed; } selftest_results; -#define NO_OF_NODES 3 -static struct device_node *nodes[NO_OF_NODES]; -static int last_node_index; -static bool selftest_live_tree; - #define selftest(result, fmt, ...) ({ \ bool failed = !(result); \ if (failed) { \ @@ -830,13 +825,6 @@ static int attach_node_and_children(struct device_node *np) return 0; } - /* Children of the root need to be remembered for removal */ - if (np->parent == of_root) { - if (WARN_ON(last_node_index >= NO_OF_NODES)) - return -EINVAL; - nodes[last_node_index++] = np; - } - child = np->child; np->child = NULL; @@ -899,10 +887,7 @@ static int __init selftest_data_add(void) } if (!of_root) { - /* enabling flag for removing nodes */ - selftest_live_tree = true; of_root = selftest_data_node; - for_each_of_allnodes(np) __of_attach_node_sysfs(np); of_aliases = of_find_node_by_path("/aliases"); From dabd39cc2fb1b0e97313ebbe7309ea8e05b7cfb5 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Jan 2015 22:34:32 +0000 Subject: [PATCH 214/601] KEYS: Make /proc/keys unconditional if CONFIG_KEYS=y Now that /proc/keys is used by libkeyutils to look up a key by type and description, we should make it unconditional and remove CONFIG_DEBUG_PROC_KEYS. Reported-by: Jiri Kosina Signed-off-by: David Howells Tested-by: Jiri Kosina --- Documentation/security/keys.txt | 2 -- security/keys/Kconfig | 18 ------------------ security/keys/proc.c | 8 -------- 3 files changed, 28 deletions(-) diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt index 821c936e1a634f..c9e7f4f223a55b 100644 --- a/Documentation/security/keys.txt +++ b/Documentation/security/keys.txt @@ -323,8 +323,6 @@ about the status of the key service: U Under construction by callback to userspace N Negative key - This file must be enabled at kernel configuration time as it allows anyone - to list the keys database. (*) /proc/key-users diff --git a/security/keys/Kconfig b/security/keys/Kconfig index a4f3f8c48d6e3f..72483b8f1be5b0 100644 --- a/security/keys/Kconfig +++ b/security/keys/Kconfig @@ -80,21 +80,3 @@ config ENCRYPTED_KEYS Userspace only ever sees/stores encrypted blobs. If you are unsure as to whether this is required, answer N. - -config KEYS_DEBUG_PROC_KEYS - bool "Enable the /proc/keys file by which keys may be viewed" - depends on KEYS - help - This option turns on support for the /proc/keys file - through which - can be listed all the keys on the system that are viewable by the - reading process. - - The only keys included in the list are those that grant View - permission to the reading process whether or not it possesses them. - Note that LSM security checks are still performed, and may further - filter out keys that the current process is not authorised to view. - - Only key attributes are listed here; key payloads are not included in - the resulting table. - - If you are unsure as to whether this is required, answer N. diff --git a/security/keys/proc.c b/security/keys/proc.c index 972eeb336b8145..f0611a6368cd25 100644 --- a/security/keys/proc.c +++ b/security/keys/proc.c @@ -18,7 +18,6 @@ #include #include "internal.h" -#ifdef CONFIG_KEYS_DEBUG_PROC_KEYS static int proc_keys_open(struct inode *inode, struct file *file); static void *proc_keys_start(struct seq_file *p, loff_t *_pos); static void *proc_keys_next(struct seq_file *p, void *v, loff_t *_pos); @@ -38,7 +37,6 @@ static const struct file_operations proc_keys_fops = { .llseek = seq_lseek, .release = seq_release, }; -#endif static int proc_key_users_open(struct inode *inode, struct file *file); static void *proc_key_users_start(struct seq_file *p, loff_t *_pos); @@ -67,11 +65,9 @@ static int __init key_proc_init(void) { struct proc_dir_entry *p; -#ifdef CONFIG_KEYS_DEBUG_PROC_KEYS p = proc_create("keys", 0, NULL, &proc_keys_fops); if (!p) panic("Cannot create /proc/keys\n"); -#endif p = proc_create("key-users", 0, NULL, &proc_key_users_fops); if (!p) @@ -86,8 +82,6 @@ __initcall(key_proc_init); * Implement "/proc/keys" to provide a list of the keys on the system that * grant View permission to the caller. */ -#ifdef CONFIG_KEYS_DEBUG_PROC_KEYS - static struct rb_node *key_serial_next(struct seq_file *p, struct rb_node *n) { struct user_namespace *user_ns = seq_user_ns(p); @@ -275,8 +269,6 @@ static int proc_keys_show(struct seq_file *m, void *v) return 0; } -#endif /* CONFIG_KEYS_DEBUG_PROC_KEYS */ - static struct rb_node *__key_user_next(struct user_namespace *user_ns, struct rb_node *n) { while (n) { From e9863e687d87855900bd7cd96b51c018172467e1 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Fri, 12 Dec 2014 12:39:37 +0800 Subject: [PATCH 215/601] powerpc/powernv: Print the M64 range information in bootup log The M64 range information is missed in dmesg, which would be helpful in debug. This patch prints the M64 range information in the same format as M32. Signed-off-by: Wei Yang Reviewed-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci-ioda.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 5d52d6f274f81d..4f1e43c05e84ab 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -357,6 +357,9 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb) phb->ioda.m64_segsize = phb->ioda.m64_size / phb->ioda.total_pe; phb->ioda.m64_base = pci_addr; + pr_info(" MEM64 0x%016llx..0x%016llx -> 0x%016llx\n", + res->start, res->end, pci_addr); + /* Use last M64 BAR to cover M64 window */ phb->ioda.m64_bar_idx = 15; phb->init_m64 = pnv_ioda2_init_m64; From 79872e35469b6db1c5243abded7b24367bd0a353 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Tue, 21 Oct 2014 13:41:29 +0530 Subject: [PATCH 216/601] powerpc/pseries: All events of EPOW_SYSTEM_SHUTDOWN must initiate shutdown The current handling of EPOW_SHUTDOWN_ON_UPS event does not shutdown the system after logging the message. All the events of EPOW_SYSTEM_SHUTDOWN action code (EPOW_SHUTDOWN_ON_UPS is a part of it) must initiate system shutdown as per the SPAPR spec. If the LPAR does not shutdown after receiving this rtas based event, it will expose itself to a forced abrupt shutdown initiated by the platform firmware. This patch fixes the situation. Signed-off-by: Anshuman Khandual Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/ras.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index c3b2a7e81ddb29..02e4a17455164d 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -89,6 +89,8 @@ static void handle_system_shutdown(char event_modifier) case EPOW_SHUTDOWN_ON_UPS: pr_emerg("Loss of power reported by firmware, system is " "running on UPS/battery"); + pr_emerg("Check RTAS error log for details"); + orderly_poweroff(true); break; case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS: From 4e28784024a0d87f6f04250e46e8c9ac4f30e361 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Thu, 23 Oct 2014 19:19:35 -0200 Subject: [PATCH 217/601] powernv/iommu: disable IOMMU bypass with param iommu=nobypass When IOMMU bypass is enabled, a PCI device can read and write memory that was not mapped by the driver without causing an EEH. That might cause memory corruption, for example. When we disable bypass, DMA reads and writes to addresses not mapped by the IOMMU will cause an EEH, allowing us to debug such issues. Signed-off-by: Thadeu Lima de Souza Cascardo Reviewed-by: Gavin Shan Signed-off-by: Michael Ellerman --- Documentation/kernel-parameters.txt | 2 ++ arch/powerpc/platforms/powernv/pci-ioda.c | 26 ++++++++++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4df73da11adc58..7dedfe56c3f30a 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1493,6 +1493,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. forcesac soft pt [x86, IA-64] + nobypass [PPC/POWERNV] + Disable IOMMU bypass, using IOMMU for PCI devices. io7= [HW] IO7 for Marvel based alpha systems diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 4f1e43c05e84ab..85b473823fdad0 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -75,6 +75,28 @@ static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, #define pe_info(pe, fmt, ...) \ pe_level_printk(pe, KERN_INFO, fmt, ##__VA_ARGS__) +static bool pnv_iommu_bypass_disabled __read_mostly; + +static int __init iommu_setup(char *str) +{ + if (!str) + return -EINVAL; + + while (*str) { + if (!strncmp(str, "nobypass", 8)) { + pnv_iommu_bypass_disabled = true; + pr_info("PowerNV: IOMMU bypass window disabled.\n"); + break; + } + str += strcspn(str, ","); + if (*str == ',') + str++; + } + + return 0; +} +early_param("iommu", iommu_setup); + /* * stdcix is only supposed to be used in hypervisor real mode as per * the architecture spec @@ -1351,7 +1373,9 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, pnv_ioda_setup_bus_dma(pe, pe->pbus, true); /* Also create a bypass window */ - pnv_pci_ioda2_setup_bypass_pe(phb, pe); + if (!pnv_iommu_bypass_disabled) + pnv_pci_ioda2_setup_bypass_pe(phb, pe); + return; fail: if (pe->tce32_seg >= 0) From 145a2d0427a88e874f85e82e6fc42933a36f5a2b Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Mon, 15 Dec 2014 09:45:00 +0800 Subject: [PATCH 218/601] powerpc/pci: remove the multi-init for pci_dn->phb pci_dn->phb is set to phb in update_dn_pci_info(), if succeed. This patch removes the duplication of pci_dn->phb initialization. Signed-off-by: Wei Yang Reviewed-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/pci_dn.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index 1f61fab59d9b28..83df3075d3df50 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -147,10 +147,8 @@ void pci_devs_phb_init_dynamic(struct pci_controller *phb) /* PHB nodes themselves must not match */ update_dn_pci_info(dn, phb); pdn = dn->data; - if (pdn) { + if (pdn) pdn->devfn = pdn->busno = -1; - pdn->phb = phb; - } /* Update dn->phb ptrs for new phb and children devices */ traverse_pci_devices(dn, update_dn_pci_info, phb); From b64eedb88b43da9e1ecfe4c197892e6562f4e1df Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Wed, 14 Jan 2015 12:38:50 +0530 Subject: [PATCH 219/601] selftests/powerpc: Make git ignore all binaries in powerpc test suite This patch includes all of the powerpc test binaries into the .gitignore file listing in their respective directories. This will make sure that git ignores all of these test binaries when displaying status. Signed-off-by: Anshuman Khandual Signed-off-by: Michael Ellerman --- .../selftests/powerpc/copyloops/.gitignore | 4 ++++ tools/testing/selftests/powerpc/mm/.gitignore | 1 + .../testing/selftests/powerpc/pmu/.gitignore | 3 +++ .../selftests/powerpc/pmu/ebb/.gitignore | 22 +++++++++++++++++++ .../selftests/powerpc/primitives/.gitignore | 1 + tools/testing/selftests/powerpc/tm/.gitignore | 1 + 6 files changed, 32 insertions(+) create mode 100644 tools/testing/selftests/powerpc/copyloops/.gitignore create mode 100644 tools/testing/selftests/powerpc/mm/.gitignore create mode 100644 tools/testing/selftests/powerpc/pmu/.gitignore create mode 100644 tools/testing/selftests/powerpc/pmu/ebb/.gitignore create mode 100644 tools/testing/selftests/powerpc/primitives/.gitignore create mode 100644 tools/testing/selftests/powerpc/tm/.gitignore diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore new file mode 100644 index 00000000000000..25a192f62c4de4 --- /dev/null +++ b/tools/testing/selftests/powerpc/copyloops/.gitignore @@ -0,0 +1,4 @@ +copyuser_64 +copyuser_power7 +memcpy_64 +memcpy_power7 diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore new file mode 100644 index 00000000000000..156f4e89c2f1c7 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/.gitignore @@ -0,0 +1 @@ +hugetlb_vs_thp_test diff --git a/tools/testing/selftests/powerpc/pmu/.gitignore b/tools/testing/selftests/powerpc/pmu/.gitignore new file mode 100644 index 00000000000000..e748f336eed355 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/.gitignore @@ -0,0 +1,3 @@ +count_instructions +l3_bank_test +per_event_excludes diff --git a/tools/testing/selftests/powerpc/pmu/ebb/.gitignore b/tools/testing/selftests/powerpc/pmu/ebb/.gitignore new file mode 100644 index 00000000000000..42bddbed8b6464 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/.gitignore @@ -0,0 +1,22 @@ +reg_access_test +event_attributes_test +cycles_test +cycles_with_freeze_test +pmc56_overflow_test +ebb_vs_cpu_event_test +cpu_event_vs_ebb_test +cpu_event_pinned_vs_ebb_test +task_event_vs_ebb_test +task_event_pinned_vs_ebb_test +multi_ebb_procs_test +multi_counter_test +pmae_handling_test +close_clears_pmcc_test +instruction_count_test +fork_cleanup_test +ebb_on_child_test +ebb_on_willing_child_test +back_to_back_ebbs_test +lost_exception_test +no_handler_test +cycles_with_mmcr2_test diff --git a/tools/testing/selftests/powerpc/primitives/.gitignore b/tools/testing/selftests/powerpc/primitives/.gitignore new file mode 100644 index 00000000000000..4cc4e31bed1d50 --- /dev/null +++ b/tools/testing/selftests/powerpc/primitives/.gitignore @@ -0,0 +1 @@ +load_unaligned_zeropad diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore new file mode 100644 index 00000000000000..33d02cc54a3ed1 --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/.gitignore @@ -0,0 +1 @@ +tm-resched-dscr From 3776c2096790233b06c8b4e82046daa77c63fced Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 9 Dec 2014 11:44:07 +0530 Subject: [PATCH 220/601] selftests/powerpc: Add subpage protection self test. Signed-off-by: Paul Mackerras Signed-off-by: Aneesh Kumar K.V mpe: Fix compile errors and formatting. Add tempfile logic to Makefile. Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/mm/.gitignore | 2 + tools/testing/selftests/powerpc/mm/Makefile | 9 +- .../selftests/powerpc/mm/subpage_prot.c | 220 ++++++++++++++++++ 3 files changed, 228 insertions(+), 3 deletions(-) create mode 100644 tools/testing/selftests/powerpc/mm/subpage_prot.c diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore index 156f4e89c2f1c7..b43ade0ec861b8 100644 --- a/tools/testing/selftests/powerpc/mm/.gitignore +++ b/tools/testing/selftests/powerpc/mm/.gitignore @@ -1 +1,3 @@ hugetlb_vs_thp_test +subpage_prot +tempfile diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index 357ccbd6bad9f0..a14c538dd7f8d6 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -1,9 +1,9 @@ noarg: $(MAKE) -C ../ -PROGS := hugetlb_vs_thp_test +PROGS := hugetlb_vs_thp_test subpage_prot -all: $(PROGS) +all: $(PROGS) tempfile $(PROGS): ../harness.c @@ -12,7 +12,10 @@ run_tests: all ./$$PROG; \ done; +tempfile: + dd if=/dev/zero of=tempfile bs=64k count=1 + clean: - rm -f $(PROGS) + rm -f $(PROGS) tempfile .PHONY: all run_tests clean diff --git a/tools/testing/selftests/powerpc/mm/subpage_prot.c b/tools/testing/selftests/powerpc/mm/subpage_prot.c new file mode 100644 index 00000000000000..440180ff8089d5 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/subpage_prot.c @@ -0,0 +1,220 @@ +/* + * Copyright IBM Corp. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" + +char *file_name; + +int in_test; +volatile int faulted; +volatile void *dar; +int errors; + +static void segv(int signum, siginfo_t *info, void *ctxt_v) +{ + ucontext_t *ctxt = (ucontext_t *)ctxt_v; + struct pt_regs *regs = ctxt->uc_mcontext.regs; + + if (!in_test) { + fprintf(stderr, "Segfault outside of test !\n"); + exit(1); + } + + faulted = 1; + dar = (void *)regs->dar; + regs->nip += 4; +} + +static inline void do_read(const volatile void *addr) +{ + int ret; + + asm volatile("lwz %0,0(%1); twi 0,%0,0; isync;\n" + : "=r" (ret) : "r" (addr) : "memory"); +} + +static inline void do_write(const volatile void *addr) +{ + int val = 0x1234567; + + asm volatile("stw %0,0(%1); sync; \n" + : : "r" (val), "r" (addr) : "memory"); +} + +static inline void check_faulted(void *addr, long page, long subpage, int write) +{ + int want_fault = (subpage == ((page + 3) % 16)); + + if (write) + want_fault |= (subpage == ((page + 1) % 16)); + + if (faulted != want_fault) { + printf("Failed at 0x%p (p=%ld,sp=%ld,w=%d), want=%s, got=%s !\n", + addr, page, subpage, write, + want_fault ? "fault" : "pass", + faulted ? "fault" : "pass"); + ++errors; + } + + if (faulted) { + if (dar != addr) { + printf("Fault expected at 0x%p and happened at 0x%p !\n", + addr, dar); + } + faulted = 0; + asm volatile("sync" : : : "memory"); + } +} + +static int run_test(void *addr, unsigned long size) +{ + unsigned int *map; + long i, j, pages, err; + + pages = size / 0x10000; + map = malloc(pages * 4); + assert(map); + + /* + * for each page, mark subpage i % 16 read only and subpage + * (i + 3) % 16 inaccessible + */ + for (i = 0; i < pages; i++) { + map[i] = (0x40000000 >> (((i + 1) * 2) % 32)) | + (0xc0000000 >> (((i + 3) * 2) % 32)); + } + + err = syscall(__NR_subpage_prot, addr, size, map); + if (err) { + perror("subpage_perm"); + return 1; + } + free(map); + + in_test = 1; + errors = 0; + for (i = 0; i < pages; i++) { + for (j = 0; j < 16; j++, addr += 0x1000) { + do_read(addr); + check_faulted(addr, i, j, 0); + do_write(addr); + check_faulted(addr, i, j, 1); + } + } + + in_test = 0; + if (errors) { + printf("%d errors detected\n", errors); + return 1; + } + + return 0; +} + +int test_anon(void) +{ + unsigned long align; + struct sigaction act = { + .sa_sigaction = segv, + .sa_flags = SA_SIGINFO + }; + void *mallocblock; + unsigned long mallocsize; + + if (getpagesize() != 0x10000) { + fprintf(stderr, "Kernel page size must be 64K!\n"); + return 1; + } + + sigaction(SIGSEGV, &act, NULL); + + mallocsize = 4 * 16 * 1024 * 1024; + + FAIL_IF(posix_memalign(&mallocblock, 64 * 1024, mallocsize)); + + align = (unsigned long)mallocblock; + if (align & 0xffff) + align = (align | 0xffff) + 1; + + mallocblock = (void *)align; + + printf("allocated malloc block of 0x%lx bytes at 0x%p\n", + mallocsize, mallocblock); + + printf("testing malloc block...\n"); + + return run_test(mallocblock, mallocsize); +} + +int test_file(void) +{ + struct sigaction act = { + .sa_sigaction = segv, + .sa_flags = SA_SIGINFO + }; + void *fileblock; + off_t filesize; + int fd; + + fd = open(file_name, O_RDWR); + if (fd == -1) { + perror("failed to open file"); + return 1; + } + sigaction(SIGSEGV, &act, NULL); + + filesize = lseek(fd, 0, SEEK_END); + if (filesize & 0xffff) + filesize &= ~0xfffful; + + fileblock = mmap(NULL, filesize, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + if (fileblock == MAP_FAILED) { + perror("failed to map file"); + return 1; + } + printf("allocated %s for 0x%lx bytes at 0x%p\n", + file_name, filesize, fileblock); + + printf("testing file map...\n"); + + return run_test(fileblock, filesize); +} + +int main(int argc, char *argv[]) +{ + test_harness(test_anon, "subpage_prot_anon"); + + if (argc > 1) + file_name = argv[1]; + else + file_name = "tempfile"; + + test_harness(test_file, "subpage_prot_file"); + + return 0; +} From ed77d4182ba9c5c5ab1b28728ae1ce750d575dfa Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 15 Jan 2015 12:24:00 +1100 Subject: [PATCH 221/601] powerpc: Remove unused CPU_FTR_IABR We removed the last usage of CPU_FTR_IABR in commit 1ad7d70562ee "powerpc/xmon: Enable HW instruction breakpoint on POWER8". Mark it as free. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cputable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 22d5a7da9e6816..5cf5a6d1068599 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -165,7 +165,7 @@ extern const char *powerpc_base_platform; #define CPU_FTR_ARCH_201 LONG_ASM_CONST(0x0000000200000000) #define CPU_FTR_ARCH_206 LONG_ASM_CONST(0x0000000400000000) #define CPU_FTR_ARCH_207S LONG_ASM_CONST(0x0000000800000000) -#define CPU_FTR_IABR LONG_ASM_CONST(0x0000001000000000) +/* Free LONG_ASM_CONST(0x0000001000000000) */ #define CPU_FTR_MMCRA LONG_ASM_CONST(0x0000002000000000) #define CPU_FTR_CTRL LONG_ASM_CONST(0x0000004000000000) #define CPU_FTR_SMT LONG_ASM_CONST(0x0000008000000000) From 10ea834364c8670b3bf9bbbf6b9d27b4d2ebc9de Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 15 Jan 2015 12:01:42 +1100 Subject: [PATCH 222/601] powerpc: Rename _TIF_SYSCALL_T_OR_A to _TIF_SYSCALL_DOTRACE Once upon a time, at least 9 years ago (< 2.6.12), _TIF_SYSCALL_T_OR_A meant "TRACE or AUDIT". But these days it means TRACE or AUDIT or SECCOMP or TRACEPOINT or NOHZ. All of those are implemented via syscall_dotrace() so rename the flag to that to try and clarify things. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/thread_info.h | 2 +- arch/powerpc/kernel/entry_32.S | 6 +++--- arch/powerpc/kernel/entry_64.S | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index ebc4f165690a9f..c1efa05613f0d5 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -124,7 +124,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_SYSCALL_TRACEPOINT (1< Date: Mon, 15 Dec 2014 20:20:31 +0530 Subject: [PATCH 223/601] powerpc/kprobes: Fix kallsyms lookup across powerpc ABIv1 and ABIv2 Currently, all non-dot symbols are being treated as function descriptors in ABIv1. This is incorrect and is resulting in perf probe not working: # perf probe do_fork Added new event: Failed to write event: Invalid argument Error: Failed to add events. # dmesg | tail -1 [192268.073063] Could not insert probe at _text+768432: -22 perf probe bases all kernel probes on _text and writes, for example, "p:probe/do_fork _text+768432" to /sys/kernel/debug/tracing/kprobe_events. In-kernel, _text is being considered to be a function descriptor and is resulting in the above error. Fix this by changing how we lookup symbol addresses on ppc64. We first check for the dot variant of a symbol and look at the non-dot variant only if that fails. In this manner, we avoid having to look at the function descriptor. While at it, also separate out how this works on ABIv2 where we don't have dot symbols, but need to use the local entry point. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/kprobes.h | 63 +++++++++++++++++++++--------- 1 file changed, 44 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h index af15d4d8d604c7..039b583db02923 100644 --- a/arch/powerpc/include/asm/kprobes.h +++ b/arch/powerpc/include/asm/kprobes.h @@ -41,34 +41,59 @@ typedef ppc_opcode_t kprobe_opcode_t; #define MAX_INSN_SIZE 1 #ifdef CONFIG_PPC64 +#if defined(_CALL_ELF) && _CALL_ELF == 2 +/* PPC64 ABIv2 needs local entry point */ +#define kprobe_lookup_name(name, addr) \ +{ \ + addr = (kprobe_opcode_t *)kallsyms_lookup_name(name); \ + if (addr) \ + addr = (kprobe_opcode_t *)ppc_function_entry(addr); \ +} +#else /* - * 64bit powerpc uses function descriptors. - * Handle cases where: - * - User passes a <.symbol> or - * - User passes a or - * - User passes a non-existent symbol, kallsyms_lookup_name - * returns 0. Don't deref the NULL pointer in that case + * 64bit powerpc ABIv1 uses function descriptors: + * - Check for the dot variant of the symbol first. + * - If that fails, try looking up the symbol provided. + * + * This ensures we always get to the actual symbol and not the descriptor. + * Also handle format. */ #define kprobe_lookup_name(name, addr) \ { \ - addr = (kprobe_opcode_t *)kallsyms_lookup_name(name); \ - if (addr) { \ - char *colon; \ - if ((colon = strchr(name, ':')) != NULL) { \ - colon++; \ - if (*colon != '\0' && *colon != '.') \ - addr = (kprobe_opcode_t *)ppc_function_entry(addr); \ - } else if (name[0] != '.') \ - addr = (kprobe_opcode_t *)ppc_function_entry(addr); \ - } else { \ - char dot_name[KSYM_NAME_LEN]; \ + char dot_name[MODULE_NAME_LEN + 1 + KSYM_NAME_LEN]; \ + char *modsym; \ + bool dot_appended = false; \ + if ((modsym = strchr(name, ':')) != NULL) { \ + modsym++; \ + if (*modsym != '\0' && *modsym != '.') { \ + /* Convert to */ \ + strncpy(dot_name, name, modsym - name); \ + dot_name[modsym - name] = '.'; \ + dot_name[modsym - name + 1] = '\0'; \ + strncat(dot_name, modsym, \ + sizeof(dot_name) - (modsym - name) - 2);\ + dot_appended = true; \ + } else { \ + dot_name[0] = '\0'; \ + strncat(dot_name, name, sizeof(dot_name) - 1); \ + } \ + } else if (name[0] != '.') { \ dot_name[0] = '.'; \ dot_name[1] = '\0'; \ strncat(dot_name, name, KSYM_NAME_LEN - 2); \ - addr = (kprobe_opcode_t *)kallsyms_lookup_name(dot_name); \ + dot_appended = true; \ + } else { \ + dot_name[0] = '\0'; \ + strncat(dot_name, name, KSYM_NAME_LEN - 1); \ + } \ + addr = (kprobe_opcode_t *)kallsyms_lookup_name(dot_name); \ + if (!addr && dot_appended) { \ + /* Let's try the original non-dot symbol lookup */ \ + addr = (kprobe_opcode_t *)kallsyms_lookup_name(name); \ } \ } -#endif +#endif /* defined(_CALL_ELF) && _CALL_ELF == 2 */ +#endif /* CONFIG_PPC64 */ #define flush_insn_slot(p) do { } while (0) #define kretprobe_blacklist_size 0 From 53a448c3d5d721cd85e3ad3e38c222a4dbb17f13 Mon Sep 17 00:00:00 2001 From: Emil Medve Date: Wed, 21 Jan 2015 16:21:14 -0600 Subject: [PATCH 224/601] powerpc: Replace cpumask_weight(cpu_possible_mask) with num_possible_cpus() num_possible_cpus() is just a shorthand for it. Signed-off-by: Emil Medve Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/cell/smp.c | 2 +- arch/powerpc/platforms/pseries/hotplug-cpu.c | 2 +- arch/powerpc/sysdev/mpic.c | 2 +- arch/powerpc/sysdev/xics/xics-common.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c index c8017a7bcabd2b..b64e7ead752f20 100644 --- a/arch/powerpc/platforms/cell/smp.c +++ b/arch/powerpc/platforms/cell/smp.c @@ -106,7 +106,7 @@ static int __init smp_iic_probe(void) { iic_request_IPIs(); - return cpumask_weight(cpu_possible_mask); + return num_possible_cpus(); } static void smp_cell_setup_cpu(int cpu) diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index f30cf4d136a475..62475440fd4531 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -272,7 +272,7 @@ static int pseries_add_processor(struct device_node *np) */ printk(KERN_ERR "Cannot add cpu %s; this system configuration" " supports %d logical cpus.\n", np->full_name, - cpumask_weight(cpu_possible_mask)); + num_possible_cpus()); goto out_unlock; } diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index c4648ad5c1f36b..bbfbbf2025fde5 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -1929,7 +1929,7 @@ int __init smp_mpic_probe(void) DBG("smp_mpic_probe()...\n"); - nr_cpus = cpumask_weight(cpu_possible_mask); + nr_cpus = num_possible_cpus(); DBG("nr_cpus: %d\n", nr_cpus); diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index 365249cd346bb3..125743b58c7097 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -148,7 +148,7 @@ int __init xics_smp_probe(void) /* Register all the IPIs */ xics_request_ipi(); - return cpumask_weight(cpu_possible_mask); + return num_possible_cpus(); } #endif /* CONFIG_SMP */ From 6f20e7f2e930211613a66d0603fa4abaaf3ce662 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 8 Jan 2015 16:40:51 +1100 Subject: [PATCH 225/601] powerpc/kernel: Avoid memory corruption at early stage When calling to early_setup(), we pick "boot_paca" up for the master CPU and initialize that with initialise_paca(). At that point, the SLB shadow buffer isn't populated yet. Updating the SLB shadow buffer should corrupt what we had in physical address 0 where the trap instruction is usually stored. This hasn't been observed to cause any trouble in practice, but is obviously fishy. Fixes: 6f4441ef7009 ("powerpc: Dynamically allocate slb_shadow from memblock") Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/paca.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index d6e195e8cd4c4e..5a23b69f812972 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -115,6 +115,14 @@ static struct slb_shadow * __init init_slb_shadow(int cpu) { struct slb_shadow *s = &slb_shadow[cpu]; + /* + * When we come through here to initialise boot_paca, the slb_shadow + * buffers are not allocated yet. That's OK, we'll get one later in + * boot, but make sure we don't corrupt memory at 0. + */ + if (!slb_shadow) + return NULL; + s->persistent = cpu_to_be32(SLB_NUM_BOLTED); s->buffer_length = cpu_to_be32(sizeof(*s)); From 2aa5cf9e48f2f39cc255f8e29964df3ff9ca017b Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Tue, 25 Nov 2014 09:27:00 +1100 Subject: [PATCH 226/601] powerpc/eeh: Fix missed PE#0 on P7IOC PE#0 should be regarded as valid for P7IOC, while it's invalid for PHB3. The patch adds flag EEH_VALID_PE_ZERO to differentiate those two cases. Without the patch, we possibly see frozen PE#0 state is cleared without EEH recovery taken on P7IOC as following kernel logs indicate: [root@ltcfbl8eb ~]# dmesg : pci 0000:00 : [PE# 000] Secondary bus 0 associated with PE#0 pci 0000:01 : [PE# 001] Secondary bus 1 associated with PE#1 pci 0001:00 : [PE# 000] Secondary bus 0 associated with PE#0 pci 0001:01 : [PE# 001] Secondary bus 1 associated with PE#1 pci 0002:00 : [PE# 000] Secondary bus 0 associated with PE#0 pci 0002:01 : [PE# 001] Secondary bus 1 associated with PE#1 pci 0003:00 : [PE# 000] Secondary bus 0 associated with PE#0 pci 0003:01 : [PE# 001] Secondary bus 1 associated with PE#1 pci 0003:20 : [PE# 002] Secondary bus 32..63 associated with PE#2 : EEH: Clear non-existing PHB#3-PE#0 EEH: PHB location: U78AE.001.WZS00M9-P1-002 Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/eeh.h | 5 +++-- arch/powerpc/kernel/eeh_pe.c | 14 +++++++++++--- arch/powerpc/platforms/powernv/eeh-powernv.c | 11 +++++++++++ 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 0652ebe117af66..9c11d1ed6a368f 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -38,8 +38,9 @@ struct device_node; #define EEH_FORCE_DISABLED 0x02 /* EEH disabled */ #define EEH_PROBE_MODE_DEV 0x04 /* From PCI device */ #define EEH_PROBE_MODE_DEVTREE 0x08 /* From device tree */ -#define EEH_ENABLE_IO_FOR_LOG 0x10 /* Enable IO for log */ -#define EEH_EARLY_DUMP_LOG 0x20 /* Dump log immediately */ +#define EEH_VALID_PE_ZERO 0x10 /* PE#0 is valid */ +#define EEH_ENABLE_IO_FOR_LOG 0x20 /* Enable IO for log */ +#define EEH_EARLY_DUMP_LOG 0x40 /* Dump log immediately */ /* * Delay for PE reset, all in ms diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 5a63e2b0f65b61..fa950fbc2d970b 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -239,10 +239,18 @@ static void *__eeh_pe_get(void *data, void *flag) if (pe->type & EEH_PE_PHB) return NULL; - /* We prefer PE address */ - if (edev->pe_config_addr && - (edev->pe_config_addr == pe->addr)) + /* + * We prefer PE address. For most cases, we should + * have non-zero PE address + */ + if (eeh_has_flag(EEH_VALID_PE_ZERO)) { + if (edev->pe_config_addr == pe->addr) + return pe; + } else { + if (edev->pe_config_addr && + (edev->pe_config_addr == pe->addr)) return pe; + } /* Try BDF address */ if (edev->config_addr && diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 1d19e7917d7fc5..e261869adc86d5 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -68,6 +68,17 @@ static int powernv_eeh_init(void) if (phb->model == PNV_PHB_MODEL_P7IOC) eeh_add_flag(EEH_ENABLE_IO_FOR_LOG); + + /* + * PE#0 should be regarded as valid by EEH core + * if it's not the reserved one. Currently, we + * have the reserved PE#0 and PE#127 for PHB3 + * and P7IOC separately. So we should regard + * PE#0 as valid for P7IOC. + */ + if (phb->ioda.reserved_pe != 0) + eeh_add_flag(EEH_VALID_PE_ZERO); + break; } From 432227e9077eec13b3caf3aec6087f94a2f4327f Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 11 Dec 2014 14:28:55 +1100 Subject: [PATCH 227/601] powerpc/eeh: Introduce flag EEH_PE_REMOVED The conditions that one specific PE's frozen count exceeds the maximal allowed times (EEH_MAX_ALLOWED_FREEZES) and it's in isolated or recovery state indicate the PE was removed permanently implicitly. The patch introduces flag EEH_PE_REMOVED to indicate that explicitly so that we don't depend on the fixed maximal allowed times, which can be varied as we do in subsequent patch. Flag EEH_PE_REMOVED is expected to be marked for the PE whose frozen count exceeds the maximal allowed times, or just failed from recovery. Requested-by: Ryan Grimm Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/eeh.h | 1 + arch/powerpc/kernel/eeh_driver.c | 2 +- arch/powerpc/kernel/eeh_pe.c | 6 ++---- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 9c11d1ed6a368f..3e4dd34f2bd2fc 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -78,6 +78,7 @@ struct device_node; #define EEH_PE_KEEP (1 << 8) /* Keep PE on hotplug */ #define EEH_PE_CFG_RESTRICTED (1 << 9) /* Block config on error */ +#define EEH_PE_REMOVED (1 << 10) /* Removed permanently */ struct eeh_pe { int type; /* PE type: PHB/Bus/Device */ diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index b17e793ba67ed6..ac00672af7d199 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -806,7 +806,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); /* Mark the PE to be removed permanently */ - pe->freeze_count = EEH_MAX_ALLOWED_FREEZES + 1; + eeh_pe_state_mark(pe, EEH_PE_REMOVED); /* * Shut down the device drivers for good. We mark diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index fa950fbc2d970b..1e4946c36f9ecf 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -526,8 +526,7 @@ static void *__eeh_pe_state_mark(void *data, void *flag) struct pci_dev *pdev; /* Keep the state of permanently removed PE intact */ - if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) && - (state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING))) + if (pe->state & EEH_PE_REMOVED) return NULL; pe->state |= state; @@ -600,8 +599,7 @@ static void *__eeh_pe_state_clear(void *data, void *flag) struct pci_dev *pdev; /* Keep the state of permanently removed PE intact */ - if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) && - (state & EEH_PE_ISOLATED)) + if (pe->state & EEH_PE_REMOVED) return NULL; pe->state &= ~state; From 1b28f170d99170a1fdd22818a9610a73196b391d Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 11 Dec 2014 14:28:56 +1100 Subject: [PATCH 228/601] powerpc/eeh: Allow to set maximal frozen times When PE's frozen count hits maximal allowed frozen times, which is 5 currently, it will be forced to be offline permanently. Once the PE is removed permanently, rebooting machine is required to bring the PE back. It's not convienent when testing EEH functionality. The patch exports the maximal allowed frozen times through debugfs entry (/sys/kernel/debug/powerpc/eeh_max_freezes). Requested-by: Ryan Grimm Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/eeh.h | 7 +------ arch/powerpc/kernel/eeh.c | 24 ++++++++++++++++++++++++ arch/powerpc/kernel/eeh_driver.c | 2 +- 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 3e4dd34f2bd2fc..55abfd09e47fc7 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h @@ -218,6 +218,7 @@ struct eeh_ops { }; extern int eeh_subsystem_flags; +extern int eeh_max_freezes; extern struct eeh_ops *eeh_ops; extern raw_spinlock_t confirm_error_lock; @@ -255,12 +256,6 @@ static inline void eeh_serialize_unlock(unsigned long flags) raw_spin_unlock_irqrestore(&confirm_error_lock, flags); } -/* - * Max number of EEH freezes allowed before we consider the device - * to be permanently disabled. - */ -#define EEH_MAX_ALLOWED_FREEZES 5 - typedef void *(*eeh_traverse_func)(void *data, void *flag); void eeh_set_pe_aux_size(int size); int eeh_phb_pe_create(struct pci_controller *phb); diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index e1b6d8e172893c..3b2252e7731b61 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -104,6 +104,13 @@ int eeh_subsystem_flags; EXPORT_SYMBOL(eeh_subsystem_flags); +/* + * EEH allowed maximal frozen times. If one particular PE's + * frozen count in last hour exceeds this limit, the PE will + * be forced to be offline permanently. + */ +int eeh_max_freezes = 5; + /* Platform dependent EEH operations */ struct eeh_ops *eeh_ops = NULL; @@ -1652,8 +1659,22 @@ static int eeh_enable_dbgfs_get(void *data, u64 *val) return 0; } +static int eeh_freeze_dbgfs_set(void *data, u64 val) +{ + eeh_max_freezes = val; + return 0; +} + +static int eeh_freeze_dbgfs_get(void *data, u64 *val) +{ + *val = eeh_max_freezes; + return 0; +} + DEFINE_SIMPLE_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get, eeh_enable_dbgfs_set, "0x%llx\n"); +DEFINE_SIMPLE_ATTRIBUTE(eeh_freeze_dbgfs_ops, eeh_freeze_dbgfs_get, + eeh_freeze_dbgfs_set, "0x%llx\n"); #endif static int __init eeh_init_proc(void) @@ -1664,6 +1685,9 @@ static int __init eeh_init_proc(void) debugfs_create_file("eeh_enable", 0600, powerpc_debugfs_root, NULL, &eeh_enable_dbgfs_ops); + debugfs_create_file("eeh_max_freezes", 0600, + powerpc_debugfs_root, NULL, + &eeh_freeze_dbgfs_ops); #endif } diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index ac00672af7d199..d099540c0f560d 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -667,7 +667,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) eeh_pe_update_time_stamp(pe); pe->freeze_count++; - if (pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) + if (pe->freeze_count > eeh_max_freezes) goto excess_failures; pr_warn("EEH: This PCI device has failed %d times in the last hour\n", pe->freeze_count); From a113de373bcb7651196e29a49483c8e24e1e6aa9 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 11 Dec 2014 17:00:58 +1100 Subject: [PATCH 229/601] powerpc/powernv: Remove pnv_pci_probe_mode() The callback (ppc_md.pci_probe_mode()) is used to determine if the child PCI devices of the indicated PCI bus should be probed from device-tree or hardware. On PowerNV platform, we always expect probing PCI devices from hardware, which is PowerPC PCI core's default behaviour. Also, the callback had some delay implemented based on PHB's device node property "reset-clear-timestamp", which wasn't exported from skiboot. So we don't need this function and it's safe to remove it. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/pci.c | 30 ---------------------------- 1 file changed, 30 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 4945e87f12dca5..e69142f4af089c 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -781,35 +781,6 @@ static void pnv_p7ioc_rc_quirk(struct pci_dev *dev) } DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_IBM, 0x3b9, pnv_p7ioc_rc_quirk); -static int pnv_pci_probe_mode(struct pci_bus *bus) -{ - struct pci_controller *hose = pci_bus_to_host(bus); - const __be64 *tstamp; - u64 now, target; - - - /* We hijack this as a way to ensure we have waited long - * enough since the reset was lifted on the PCI bus - */ - if (bus != hose->bus) - return PCI_PROBE_NORMAL; - tstamp = of_get_property(hose->dn, "reset-clear-timestamp", NULL); - if (!tstamp || !*tstamp) - return PCI_PROBE_NORMAL; - - now = mftb() / tb_ticks_per_usec; - target = (be64_to_cpup(tstamp) / tb_ticks_per_usec) - + PCI_RESET_DELAY_US; - - pr_devel("pci %04d: Reset target: 0x%llx now: 0x%llx\n", - hose->global_number, target, now); - - if (now < target) - msleep((target - now + 999) / 1000); - - return PCI_PROBE_NORMAL; -} - void __init pnv_pci_init(void) { struct device_node *np; @@ -856,7 +827,6 @@ void __init pnv_pci_init(void) ppc_md.tce_build_rm = pnv_tce_build_rm; ppc_md.tce_free_rm = pnv_tce_free_rm; ppc_md.tce_get = pnv_tce_get; - ppc_md.pci_probe_mode = pnv_pci_probe_mode; set_pci_dma_ops(&dma_iommu_ops); /* Configure MSIs */ From 15c2d45d17418cc4a712608c78ff3b5f0583d83b Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 21 Jan 2015 12:27:38 +1100 Subject: [PATCH 230/601] powerpc: Add 64bit optimised memcmp I noticed ksm spending quite a lot of time in memcmp on a large KVM box. The current memcmp loop is very unoptimised - byte at a time compares with no loop unrolling. We can do much much better. Optimise the loop in a few ways: - Unroll the byte at a time loop - For large (at least 32 byte) comparisons that are also 8 byte aligned, use an unrolled modulo scheduled loop using 8 byte loads. This is similar to our glibc memcmp. A simple microbenchmark testing 10000000 iterations of an 8192 byte memcmp was used to measure the performance: baseline: 29.93 s modified: 1.70 s Just over 17x faster. v2: Incorporated some suggestions from Segher: - Use andi. instead of rdlicl. - Convert bdnzt eq, to bdnz. It's just duplicating the earlier compare and was a relic from a previous version. - Don't use cr5, we have plans to use that CR field for fast local atomics. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/lib/Makefile | 3 +- arch/powerpc/lib/memcmp_64.S | 233 +++++++++++++++++++++++++++++++++++ arch/powerpc/lib/string.S | 2 + 3 files changed, 237 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/lib/memcmp_64.S diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 1b01159b81f3ea..5526156aae5f91 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -15,7 +15,8 @@ obj-$(CONFIG_PPC32) += div64.o copy_32.o obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \ usercopy_64.o mem_64.o hweight_64.o \ - copyuser_power7.o string_64.o copypage_power7.o + copyuser_power7.o string_64.o copypage_power7.o \ + memcmp_64.o ifeq ($(CONFIG_GENERIC_CSUM),) obj-y += checksum_$(CONFIG_WORD_SIZE).o obj-$(CONFIG_PPC64) += checksum_wrappers_64.o diff --git a/arch/powerpc/lib/memcmp_64.S b/arch/powerpc/lib/memcmp_64.S new file mode 100644 index 00000000000000..8953d2382a653a --- /dev/null +++ b/arch/powerpc/lib/memcmp_64.S @@ -0,0 +1,233 @@ +/* + * Author: Anton Blanchard + * Copyright 2015 IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include + +#define off8 r6 +#define off16 r7 +#define off24 r8 + +#define rA r9 +#define rB r10 +#define rC r11 +#define rD r27 +#define rE r28 +#define rF r29 +#define rG r30 +#define rH r31 + +#ifdef __LITTLE_ENDIAN__ +#define LD ldbrx +#else +#define LD ldx +#endif + +_GLOBAL(memcmp) + cmpdi cr1,r5,0 + + /* Use the short loop if both strings are not 8B aligned */ + or r6,r3,r4 + andi. r6,r6,7 + + /* Use the short loop if length is less than 32B */ + cmpdi cr6,r5,31 + + beq cr1,.Lzero + bne .Lshort + bgt cr6,.Llong + +.Lshort: + mtctr r5 + +1: lbz rA,0(r3) + lbz rB,0(r4) + subf. rC,rB,rA + bne .Lnon_zero + bdz .Lzero + + lbz rA,1(r3) + lbz rB,1(r4) + subf. rC,rB,rA + bne .Lnon_zero + bdz .Lzero + + lbz rA,2(r3) + lbz rB,2(r4) + subf. rC,rB,rA + bne .Lnon_zero + bdz .Lzero + + lbz rA,3(r3) + lbz rB,3(r4) + subf. rC,rB,rA + bne .Lnon_zero + + addi r3,r3,4 + addi r4,r4,4 + + bdnz 1b + +.Lzero: + li r3,0 + blr + +.Lnon_zero: + mr r3,rC + blr + +.Llong: + li off8,8 + li off16,16 + li off24,24 + + std r31,-8(r1) + std r30,-16(r1) + std r29,-24(r1) + std r28,-32(r1) + std r27,-40(r1) + + srdi r0,r5,5 + mtctr r0 + andi. r5,r5,31 + + LD rA,0,r3 + LD rB,0,r4 + + LD rC,off8,r3 + LD rD,off8,r4 + + LD rE,off16,r3 + LD rF,off16,r4 + + LD rG,off24,r3 + LD rH,off24,r4 + cmpld cr0,rA,rB + + addi r3,r3,32 + addi r4,r4,32 + + bdz .Lfirst32 + + LD rA,0,r3 + LD rB,0,r4 + cmpld cr1,rC,rD + + LD rC,off8,r3 + LD rD,off8,r4 + cmpld cr6,rE,rF + + LD rE,off16,r3 + LD rF,off16,r4 + cmpld cr7,rG,rH + bne cr0,.LcmpAB + + LD rG,off24,r3 + LD rH,off24,r4 + cmpld cr0,rA,rB + bne cr1,.LcmpCD + + addi r3,r3,32 + addi r4,r4,32 + + bdz .Lsecond32 + + .balign 16 + +1: LD rA,0,r3 + LD rB,0,r4 + cmpld cr1,rC,rD + bne cr6,.LcmpEF + + LD rC,off8,r3 + LD rD,off8,r4 + cmpld cr6,rE,rF + bne cr7,.LcmpGH + + LD rE,off16,r3 + LD rF,off16,r4 + cmpld cr7,rG,rH + bne cr0,.LcmpAB + + LD rG,off24,r3 + LD rH,off24,r4 + cmpld cr0,rA,rB + bne cr1,.LcmpCD + + addi r3,r3,32 + addi r4,r4,32 + + bdnz 1b + +.Lsecond32: + cmpld cr1,rC,rD + bne cr6,.LcmpEF + + cmpld cr6,rE,rF + bne cr7,.LcmpGH + + cmpld cr7,rG,rH + bne cr0,.LcmpAB + + bne cr1,.LcmpCD + bne cr6,.LcmpEF + bne cr7,.LcmpGH + +.Ltail: + ld r31,-8(r1) + ld r30,-16(r1) + ld r29,-24(r1) + ld r28,-32(r1) + ld r27,-40(r1) + + cmpdi r5,0 + beq .Lzero + b .Lshort + +.Lfirst32: + cmpld cr1,rC,rD + cmpld cr6,rE,rF + cmpld cr7,rG,rH + + bne cr0,.LcmpAB + bne cr1,.LcmpCD + bne cr6,.LcmpEF + bne cr7,.LcmpGH + + b .Ltail + +.LcmpAB: + li r3,1 + bgt cr0,.Lout + li r3,-1 + b .Lout + +.LcmpCD: + li r3,1 + bgt cr1,.Lout + li r3,-1 + b .Lout + +.LcmpEF: + li r3,1 + bgt cr6,.Lout + li r3,-1 + b .Lout + +.LcmpGH: + li r3,1 + bgt cr7,.Lout + li r3,-1 + +.Lout: + ld r31,-8(r1) + ld r30,-16(r1) + ld r29,-24(r1) + ld r28,-32(r1) + ld r27,-40(r1) + blr diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S index 1b5a0a09d60966..c80fb49ce607f6 100644 --- a/arch/powerpc/lib/string.S +++ b/arch/powerpc/lib/string.S @@ -93,6 +93,7 @@ _GLOBAL(strlen) subf r3,r3,r4 blr +#ifdef CONFIG_PPC32 _GLOBAL(memcmp) PPC_LCMPI 0,r5,0 beq- 2f @@ -106,6 +107,7 @@ _GLOBAL(memcmp) blr 2: li r3,0 blr +#endif _GLOBAL(memchr) PPC_LCMPI 0,r5,0 From 521adf5357105f6f750fbe7bca958fab3b19df2e Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 21 Jan 2015 12:27:39 +1100 Subject: [PATCH 231/601] selftests/powerpc: Add memcmp testcase Add a testcase for the new ppc64 memcmp. Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/Makefile | 2 +- .../selftests/powerpc/stringloops/.gitignore | 1 + .../selftests/powerpc/stringloops/Makefile | 20 ++++ .../powerpc/stringloops/asm/ppc_asm.h | 7 ++ .../selftests/powerpc/stringloops/memcmp.c | 103 ++++++++++++++++++ .../selftests/powerpc/stringloops/memcmp_64.S | 1 + 6 files changed, 133 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/powerpc/stringloops/.gitignore create mode 100644 tools/testing/selftests/powerpc/stringloops/Makefile create mode 100644 tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h create mode 100644 tools/testing/selftests/powerpc/stringloops/memcmp.c create mode 120000 tools/testing/selftests/powerpc/stringloops/memcmp_64.S diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index f6ff90a76bd76c..1d5e7ad2c46008 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -13,7 +13,7 @@ CFLAGS := -Wall -O2 -flto -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CUR export CC CFLAGS -TARGETS = pmu copyloops mm tm primitives +TARGETS = pmu copyloops mm tm primitives stringloops endif diff --git a/tools/testing/selftests/powerpc/stringloops/.gitignore b/tools/testing/selftests/powerpc/stringloops/.gitignore new file mode 100644 index 00000000000000..0b43da74ee46ac --- /dev/null +++ b/tools/testing/selftests/powerpc/stringloops/.gitignore @@ -0,0 +1 @@ +memcmp diff --git a/tools/testing/selftests/powerpc/stringloops/Makefile b/tools/testing/selftests/powerpc/stringloops/Makefile new file mode 100644 index 00000000000000..506d7734647764 --- /dev/null +++ b/tools/testing/selftests/powerpc/stringloops/Makefile @@ -0,0 +1,20 @@ +# The loops are all 64-bit code +CFLAGS += -m64 +CFLAGS += -I$(CURDIR) + +PROGS := memcmp +EXTRA_SOURCES := memcmp_64.S ../harness.c + +all: $(PROGS) + +$(PROGS): $(EXTRA_SOURCES) + +run_tests: all + @-for PROG in $(PROGS); do \ + ./$$PROG; \ + done; + +clean: + rm -f $(PROGS) *.o + +.PHONY: all run_tests clean diff --git a/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h b/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h new file mode 100644 index 00000000000000..11bece87e88083 --- /dev/null +++ b/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h @@ -0,0 +1,7 @@ +#include + +#ifndef r1 +#define r1 sp +#endif + +#define _GLOBAL(A) FUNC_START(test_ ## A) diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp.c b/tools/testing/selftests/powerpc/stringloops/memcmp.c new file mode 100644 index 00000000000000..17417dd7070847 --- /dev/null +++ b/tools/testing/selftests/powerpc/stringloops/memcmp.c @@ -0,0 +1,103 @@ +#include +#include +#include +#include "../utils.h" + +#define SIZE 256 +#define ITERATIONS 10000 + +int test_memcmp(const void *s1, const void *s2, size_t n); + +/* test all offsets and lengths */ +static void test_one(char *s1, char *s2) +{ + unsigned long offset, size; + + for (offset = 0; offset < SIZE; offset++) { + for (size = 0; size < (SIZE-offset); size++) { + int x, y; + unsigned long i; + + y = memcmp(s1+offset, s2+offset, size); + x = test_memcmp(s1+offset, s2+offset, size); + + if (((x ^ y) < 0) && /* Trick to compare sign */ + ((x | y) != 0)) { /* check for zero */ + printf("memcmp returned %d, should have returned %d (offset %ld size %ld)\n", x, y, offset, size); + + for (i = offset; i < offset+size; i++) + printf("%02x ", s1[i]); + printf("\n"); + + for (i = offset; i < offset+size; i++) + printf("%02x ", s2[i]); + printf("\n"); + abort(); + } + } + } +} + +static int testcase(void) +{ + char *s1; + char *s2; + unsigned long i; + + s1 = memalign(128, SIZE); + if (!s1) { + perror("memalign"); + exit(1); + } + + s2 = memalign(128, SIZE); + if (!s2) { + perror("memalign"); + exit(1); + } + + srandom(1); + + for (i = 0; i < ITERATIONS; i++) { + unsigned long j; + unsigned long change; + + for (j = 0; j < SIZE; j++) + s1[j] = random(); + + memcpy(s2, s1, SIZE); + + /* change one byte */ + change = random() % SIZE; + s2[change] = random() & 0xff; + + test_one(s1, s2); + } + + srandom(1); + + for (i = 0; i < ITERATIONS; i++) { + unsigned long j; + unsigned long change; + + for (j = 0; j < SIZE; j++) + s1[j] = random(); + + memcpy(s2, s1, SIZE); + + /* change multiple bytes, 1/8 of total */ + for (j = 0; j < SIZE / 8; j++) { + change = random() % SIZE; + s2[change] = random() & 0xff; + } + + test_one(s1, s2); + } + + return 0; +} + +int main(void) +{ + return test_harness(testcase, "memcmp"); +} diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp_64.S b/tools/testing/selftests/powerpc/stringloops/memcmp_64.S new file mode 120000 index 00000000000000..9bc87e438ae982 --- /dev/null +++ b/tools/testing/selftests/powerpc/stringloops/memcmp_64.S @@ -0,0 +1 @@ +../../../../../arch/powerpc/lib/memcmp_64.S \ No newline at end of file From 89f703f0932341b316b2312581dacddba14b3876 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Jan 2015 22:24:00 +0100 Subject: [PATCH 232/601] X.509: shut up about included cert for silent build Every kernel build that includes X.509 support prints out a message like - Including cert signing_key.x509 This may be useful for some cases, but when doing automated build tests, it just means noise. To hide the message, this uses '$(kecho)' for printing the message, which means we still see it when building with V=1, but not at the normal level or when building with 'make -s'. Signed-off-by: Arnd Bergmann Signed-off-by: David Howells --- kernel/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/Makefile b/kernel/Makefile index a59481a3fa6cff..23e17a7e7a638f 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -142,7 +142,7 @@ endif kernel/system_certificates.o: $(obj)/x509_certificate_list quiet_cmd_x509certs = CERTS $@ - cmd_x509certs = cat $(X509_CERTIFICATES) /dev/null >$@ $(foreach X509,$(X509_CERTIFICATES),; echo " - Including cert $(X509)") + cmd_x509certs = cat $(X509_CERTIFICATES) /dev/null >$@ $(foreach X509,$(X509_CERTIFICATES),; $(kecho) " - Including cert $(X509)") targets += $(obj)/x509_certificate_list $(obj)/x509_certificate_list: $(X509_CERTIFICATES) $(obj)/.x509.list From e994393acd65e729a574aaca466eab22b5b39cc6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 13 Jan 2015 22:24:31 +0100 Subject: [PATCH 233/601] X.509: silence asn1 compiler debug output The asn1_compiler process is particularly chatty and produces about the only stdout output for an allmodconfig kernel. In order to follow the general concept of 'no news is good news' for building kernels, this hides all the existing output unless the KBUILD_VERBOSE environment variable is set. Signed-off-by: Arnd Bergmann Signed-off-by: David Howells --- scripts/asn1_compiler.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/scripts/asn1_compiler.c b/scripts/asn1_compiler.c index 91c4117637ae1f..7750e9c31483b9 100644 --- a/scripts/asn1_compiler.c +++ b/scripts/asn1_compiler.c @@ -311,6 +311,9 @@ struct token { static struct token *token_list; static unsigned nr_tokens; +static _Bool verbose; + +#define debug(fmt, ...) do { if (verbose) printf(fmt, ## __VA_ARGS__); } while (0) static int directive_compare(const void *_key, const void *_pdir) { @@ -322,21 +325,21 @@ static int directive_compare(const void *_key, const void *_pdir) dlen = strlen(dir); clen = (dlen < token->size) ? dlen : token->size; - //printf("cmp(%*.*s,%s) = ", + //debug("cmp(%*.*s,%s) = ", // (int)token->size, (int)token->size, token->value, // dir); val = memcmp(token->value, dir, clen); if (val != 0) { - //printf("%d [cmp]\n", val); + //debug("%d [cmp]\n", val); return val; } if (dlen == token->size) { - //printf("0\n"); + //debug("0\n"); return 0; } - //printf("%d\n", (int)dlen - (int)token->size); + //debug("%d\n", (int)dlen - (int)token->size); return dlen - token->size; /* shorter -> negative */ } @@ -515,13 +518,13 @@ static void tokenise(char *buffer, char *end) } nr_tokens = tix; - printf("Extracted %u tokens\n", nr_tokens); + debug("Extracted %u tokens\n", nr_tokens); #if 0 { int n; for (n = 0; n < nr_tokens; n++) - printf("Token %3u: '%*.*s'\n", + debug("Token %3u: '%*.*s'\n", n, (int)token_list[n].size, (int)token_list[n].size, token_list[n].value); @@ -542,6 +545,7 @@ int main(int argc, char **argv) ssize_t readlen; FILE *out, *hdr; char *buffer, *p; + char *kbuild_verbose; int fd; if (argc != 4) { @@ -550,6 +554,10 @@ int main(int argc, char **argv) exit(2); } + kbuild_verbose = getenv("KBUILD_VERBOSE"); + if (kbuild_verbose) + verbose = atoi(kbuild_verbose); + filename = argv[1]; outputname = argv[2]; headername = argv[3]; @@ -748,11 +756,11 @@ static void build_type_list(void) qsort(type_index, nr, sizeof(type_index[0]), type_index_compare); - printf("Extracted %u types\n", nr_types); + debug("Extracted %u types\n", nr_types); #if 0 for (n = 0; n < nr_types; n++) { struct type *type = type_index[n]; - printf("- %*.*s\n", + debug("- %*.*s\n", (int)type->name->size, (int)type->name->size, type->name->value); @@ -793,7 +801,7 @@ static void parse(void) } while (type++, !(type->flags & TYPE_STOP_MARKER)); - printf("Extracted %u actions\n", nr_actions); + debug("Extracted %u actions\n", nr_actions); } static struct element *element_list; @@ -1284,7 +1292,7 @@ static void render(FILE *out, FILE *hdr) } /* We do two passes - the first one calculates all the offsets */ - printf("Pass 1\n"); + debug("Pass 1\n"); nr_entries = 0; root = &type_list[0]; render_element(NULL, root->element, NULL); @@ -1295,7 +1303,7 @@ static void render(FILE *out, FILE *hdr) e->flags &= ~ELEMENT_RENDERED; /* And then we actually render */ - printf("Pass 2\n"); + debug("Pass 2\n"); fprintf(out, "\n"); fprintf(out, "static const unsigned char %s_machine[] = {\n", grammar_name); From aa03c428e67881795f4190f9ffdb62fc14978608 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 22 Jan 2015 18:20:35 +0000 Subject: [PATCH 234/601] arm64: Fix overlapping VA allocations PCI IO space was intended to be 16MiB, at 32MiB below MODULES_VADDR, but commit d1e6dc91b532d3d3 ("arm64: Add architectural support for PCI") extended this to cover the full 32MiB. The final 8KiB of this 32MiB is also allocated for the fixmap, allowing for potential clashes between the two. This change was masked by assumptions in mem_init and the page table dumping code, which assumed the I/O space to be 16MiB long through seaparte hard-coded definitions. This patch changes the definition of the PCI I/O space allocation to live in asm/memory.h, along with the other VA space allocations. As the fixmap allocation depends on the number of fixmap entries, this is moved below the PCI I/O space allocation. Both the fixmap and PCI I/O space are guarded with 2MB of padding. Sites assuming the I/O space was 16MiB are moved over use new PCI_IO_{START,END} definitions, which will keep in sync with the size of the IO space (now restored to 16MiB). As a useful side effect, the use of the new PCI_IO_{START,END} definitions prevents a build issue in the dumping code due to a (now redundant) missing include of io.h for PCI_IOBASE. Signed-off-by: Mark Rutland Cc: Kees Cook Cc: Laura Abbott Cc: Liviu Dudau Cc: Steve Capper Cc: Will Deacon [catalin.marinas@arm.com: reorder FIXADDR and PCI_IO address_markers_idx enum] Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/io.h | 5 +++-- arch/arm64/include/asm/memory.h | 10 +++++++++- arch/arm64/mm/dump.c | 8 ++++---- arch/arm64/mm/init.c | 5 +++-- 4 files changed, 19 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 949c406d4df431..540f7c0aea8250 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -145,8 +146,8 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) * I/O port access primitives. */ #define arch_has_dev_port() (1) -#define IO_SPACE_LIMIT (SZ_32M - 1) -#define PCI_IOBASE ((void __iomem *)(MODULES_VADDR - SZ_32M)) +#define IO_SPACE_LIMIT (PCI_IO_SIZE - 1) +#define PCI_IOBASE ((void __iomem *)PCI_IO_START) /* * String version of I/O memory access operations. diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 6486b2bfd56263..f800d45ea2265b 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -32,6 +32,12 @@ */ #define UL(x) _AC(x, UL) +/* + * Size of the PCI I/O space. This must remain a power of two so that + * IO_SPACE_LIMIT acts as a mask for the low bits of I/O addresses. + */ +#define PCI_IO_SIZE SZ_16M + /* * PAGE_OFFSET - the virtual address of the start of the kernel image (top * (VA_BITS - 1)) @@ -45,7 +51,9 @@ #define PAGE_OFFSET (UL(0xffffffffffffffff) << (VA_BITS - 1)) #define MODULES_END (PAGE_OFFSET) #define MODULES_VADDR (MODULES_END - SZ_64M) -#define FIXADDR_TOP (MODULES_VADDR - SZ_2M - PAGE_SIZE) +#define PCI_IO_END (MODULES_VADDR - SZ_2M) +#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) +#define FIXADDR_TOP (PCI_IO_START - SZ_2M) #define TASK_SIZE_64 (UL(1) << VA_BITS) #ifdef CONFIG_COMPAT diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index cf33f33333ccd2..77d39a1b6f8d4d 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -36,10 +36,10 @@ enum address_markers_idx { VMEMMAP_START_NR, VMEMMAP_END_NR, #endif - PCI_START_NR, - PCI_END_NR, FIXADDR_START_NR, FIXADDR_END_NR, + PCI_START_NR, + PCI_END_NR, MODULES_START_NR, MODUELS_END_NR, KERNEL_SPACE_NR, @@ -52,10 +52,10 @@ static struct addr_marker address_markers[] = { { 0, "vmemmap start" }, { 0, "vmemmap end" }, #endif - { (unsigned long) PCI_IOBASE, "PCI I/O start" }, - { (unsigned long) PCI_IOBASE + SZ_16M, "PCI I/O end" }, { FIXADDR_START, "Fixmap start" }, { FIXADDR_TOP, "Fixmap end" }, + { PCI_IO_START, "PCI I/O start" }, + { PCI_IO_END, "PCI I/O end" }, { MODULES_VADDR, "Modules start" }, { MODULES_END, "Modules end" }, { PAGE_OFFSET, "Kernel Mapping" }, diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 43cccb5101c02c..a8dfb40eefa82a 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -35,6 +35,7 @@ #include #include +#include #include #include #include @@ -296,8 +297,8 @@ void __init mem_init(void) " vmemmap : 0x%16lx - 0x%16lx (%6ld GB maximum)\n" " 0x%16lx - 0x%16lx (%6ld MB actual)\n" #endif - " PCI I/O : 0x%16lx - 0x%16lx (%6ld MB)\n" " fixed : 0x%16lx - 0x%16lx (%6ld KB)\n" + " PCI I/O : 0x%16lx - 0x%16lx (%6ld MB)\n" " modules : 0x%16lx - 0x%16lx (%6ld MB)\n" " memory : 0x%16lx - 0x%16lx (%6ld MB)\n" " .init : 0x%p" " - 0x%p" " (%6ld KB)\n" @@ -310,8 +311,8 @@ void __init mem_init(void) MLM((unsigned long)virt_to_page(PAGE_OFFSET), (unsigned long)virt_to_page(high_memory)), #endif - MLM((unsigned long)PCI_IOBASE, (unsigned long)PCI_IOBASE + SZ_16M), MLK(FIXADDR_START, FIXADDR_TOP), + MLM(PCI_IO_START, PCI_IO_END), MLM(MODULES_VADDR, MODULES_END), MLM(PAGE_OFFSET, (unsigned long)high_memory), MLK_ROUNDUP(__init_begin, __init_end), From 764011ca8247808e066a182bcfe42a2b14c99a9a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 22 Jan 2015 18:20:36 +0000 Subject: [PATCH 235/601] arm64: mm: dump: add missing includes The arm64 dump code is currently relying on some definitions which are pulled in via transitive dependencies. It seems we have implicit dependencies on the following definitions: * MODULES_VADDR (asm/memory.h) * MODULES_END (asm/memory.h) * PAGE_OFFSET (asm/memory.h) * PTE_* (asm/pgtable-hwdef.h) * ENOMEM (linux/errno.h) * device_initcall (linux/init.h) This patch ensures we explicitly include the relevant headers for the above items, fixing the observed build issue and hopefully preventing future issues as headers are refactored. Signed-off-by: Mark Rutland Reported-by: Mark Brown Acked-by: Steve Capper Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/mm/dump.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index 77d39a1b6f8d4d..bbc5a29ecaaf79 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -14,13 +14,17 @@ * of the License. */ #include +#include #include +#include #include #include #include #include +#include #include +#include #define LOWEST_ADDR (UL(0xffffffffffffffff) << VA_BITS) From 2c72a44ecdf2a7ceac73844226e97ed2d5dd1e82 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 14 Jan 2015 17:52:33 +0100 Subject: [PATCH 236/601] s390/spinlock: add compare-and-delay to lock wait loops Add the compare-and-delay instruction to the spin-lock and rw-lock retry loops. A CPU executing the compare-and-delay instruction stops until the lock value has changed. This is done to make the locking code for contended locks to behave better in regard to the multi- hreading facility. A thread of a core executing a compare-and-delay will allow the other threads of a core to get a larger share of the core resources. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/setup.h | 3 ++ arch/s390/kernel/early.c | 18 ++++++++++++ arch/s390/lib/spinlock.c | 52 ++++++++++++++++++++++++++++++----- 3 files changed, 66 insertions(+), 7 deletions(-) diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 7736fdd725953c..b8d1e54b473308 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -57,6 +57,7 @@ extern void detect_memory_memblock(void); #define MACHINE_FLAG_TE (1UL << 15) #define MACHINE_FLAG_TLB_LC (1UL << 17) #define MACHINE_FLAG_VX (1UL << 18) +#define MACHINE_FLAG_CAD (1UL << 19) #define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM) #define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM) @@ -80,6 +81,7 @@ extern void detect_memory_memblock(void); #define MACHINE_HAS_TE (0) #define MACHINE_HAS_TLB_LC (0) #define MACHINE_HAS_VX (0) +#define MACHINE_HAS_CAD (0) #else /* CONFIG_64BIT */ #define MACHINE_HAS_IEEE (1) #define MACHINE_HAS_CSP (1) @@ -93,6 +95,7 @@ extern void detect_memory_memblock(void); #define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE) #define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC) #define MACHINE_HAS_VX (S390_lowcore.machine_flags & MACHINE_FLAG_VX) +#define MACHINE_HAS_CAD (S390_lowcore.machine_flags & MACHINE_FLAG_CAD) #endif /* CONFIG_64BIT */ /* diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 302ac1f7f8e769..70a3294509018e 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -393,9 +393,27 @@ static __init void detect_machine_facilities(void) S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC; if (test_facility(129)) S390_lowcore.machine_flags |= MACHINE_FLAG_VX; + if (test_facility(128)) + S390_lowcore.machine_flags |= MACHINE_FLAG_CAD; #endif } +static int __init nocad_setup(char *str) +{ + S390_lowcore.machine_flags &= ~MACHINE_FLAG_CAD; + return 0; +} +early_param("nocad", nocad_setup); + +static int __init cad_init(void) +{ + if (MACHINE_HAS_CAD) + /* Enable problem state CAD. */ + __ctl_set_bit(2, 3); + return 0; +} +early_initcall(cad_init); + static __init void rescue_initrd(void) { #ifdef CONFIG_BLK_DEV_INITRD diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index 034a35a3e9c11a..d6c9991f77975e 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -12,7 +12,15 @@ #include #include -int spin_retry = 1000; +int spin_retry = -1; + +static int __init spin_retry_init(void) +{ + if (spin_retry < 0) + spin_retry = MACHINE_HAS_CAD ? 10 : 1000; + return 0; +} +early_initcall(spin_retry_init); /** * spin_retry= parameter @@ -24,6 +32,11 @@ static int __init spin_retry_setup(char *str) } __setup("spin_retry=", spin_retry_setup); +static inline void _raw_compare_and_delay(unsigned int *lock, unsigned int old) +{ + asm(".insn rsy,0xeb0000000022,%0,0,%1" : : "d" (old), "Q" (*lock)); +} + void arch_spin_lock_wait(arch_spinlock_t *lp) { unsigned int cpu = SPINLOCK_LOCKVAL; @@ -46,6 +59,8 @@ void arch_spin_lock_wait(arch_spinlock_t *lp) /* Loop for a while on the lock value. */ count = spin_retry; do { + if (MACHINE_HAS_CAD) + _raw_compare_and_delay(&lp->lock, owner); owner = ACCESS_ONCE(lp->lock); } while (owner && count-- > 0); if (!owner) @@ -84,6 +99,8 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags) /* Loop for a while on the lock value. */ count = spin_retry; do { + if (MACHINE_HAS_CAD) + _raw_compare_and_delay(&lp->lock, owner); owner = ACCESS_ONCE(lp->lock); } while (owner && count-- > 0); if (!owner) @@ -100,11 +117,19 @@ EXPORT_SYMBOL(arch_spin_lock_wait_flags); int arch_spin_trylock_retry(arch_spinlock_t *lp) { + unsigned int cpu = SPINLOCK_LOCKVAL; + unsigned int owner; int count; - for (count = spin_retry; count > 0; count--) - if (arch_spin_trylock_once(lp)) - return 1; + for (count = spin_retry; count > 0; count--) { + owner = ACCESS_ONCE(lp->lock); + /* Try to get the lock if it is free. */ + if (!owner) { + if (_raw_compare_and_swap(&lp->lock, 0, cpu)) + return 1; + } else if (MACHINE_HAS_CAD) + _raw_compare_and_delay(&lp->lock, owner); + } return 0; } EXPORT_SYMBOL(arch_spin_trylock_retry); @@ -126,8 +151,11 @@ void _raw_read_lock_wait(arch_rwlock_t *rw) } old = ACCESS_ONCE(rw->lock); owner = ACCESS_ONCE(rw->owner); - if ((int) old < 0) + if ((int) old < 0) { + if (MACHINE_HAS_CAD) + _raw_compare_and_delay(&rw->lock, old); continue; + } if (_raw_compare_and_swap(&rw->lock, old, old + 1)) return; } @@ -141,8 +169,11 @@ int _raw_read_trylock_retry(arch_rwlock_t *rw) while (count-- > 0) { old = ACCESS_ONCE(rw->lock); - if ((int) old < 0) + if ((int) old < 0) { + if (MACHINE_HAS_CAD) + _raw_compare_and_delay(&rw->lock, old); continue; + } if (_raw_compare_and_swap(&rw->lock, old, old + 1)) return 1; } @@ -173,6 +204,8 @@ void _raw_write_lock_wait(arch_rwlock_t *rw, unsigned int prev) } if ((old & 0x7fffffff) == 0 && (int) prev >= 0) break; + if (MACHINE_HAS_CAD) + _raw_compare_and_delay(&rw->lock, old); } } EXPORT_SYMBOL(_raw_write_lock_wait); @@ -201,6 +234,8 @@ void _raw_write_lock_wait(arch_rwlock_t *rw) smp_rmb(); if ((old & 0x7fffffff) == 0 && (int) prev >= 0) break; + if (MACHINE_HAS_CAD) + _raw_compare_and_delay(&rw->lock, old); } } EXPORT_SYMBOL(_raw_write_lock_wait); @@ -214,8 +249,11 @@ int _raw_write_trylock_retry(arch_rwlock_t *rw) while (count-- > 0) { old = ACCESS_ONCE(rw->lock); - if (old) + if (old) { + if (MACHINE_HAS_CAD) + _raw_compare_and_delay(&rw->lock, old); continue; + } if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000)) return 1; } From 5bc334bff9a6e189113140ed6dce0ce61d768943 Mon Sep 17 00:00:00 2001 From: Ingo Tuchscherer Date: Fri, 23 Jan 2015 13:27:04 +0100 Subject: [PATCH 237/601] s390/zcrypt: Number of supported ap domains is not retrievable. Upcoming versions of secure key management facilities (CCA and EP11) require information about the maximum number of supported ap domains in order to service TKE requests properly. With IBM z13 the number of available domains (so far 16) has increased up to 85. This number varies depending on machine types and models. Therefore the new sysfs attribute 'ap_max_domain_id' provides this limit of supported ap domains. Upcoming releases for CCA and EP11 will use this new information. Without this problem fix it is not possible to retrieve reliable information about the maximum number of supported ap domains. Thus, customers are not able to perform key management for CCA and EP11 coprocessor adapters. Signed-off-by: Ingo Tuchscherer Signed-off-by: Harald Freudenberger Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/ap_bus.c | 137 ++++++++++++++++++++++++----------- 1 file changed, 96 insertions(+), 41 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 4d41bf75c23318..faa058016b5ca7 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -203,6 +203,24 @@ ap_test_queue(ap_qid_t qid, int *queue_depth, int *device_type) return reg1; } +/** + * ap_query_facilities(): PQAP(TAPQ) query facilities. + * @qid: The AP queue number + * + * Returns content of general register 2 after the PQAP(TAPQ) + * instruction was called. + */ +static inline unsigned long ap_query_facilities(ap_qid_t qid) +{ + register unsigned long reg0 asm ("0") = qid | 0x00800000UL; + register unsigned long reg1 asm ("1"); + register unsigned long reg2 asm ("2") = 0UL; + + asm volatile(".long 0xb2af0000" /* PQAP(TAPQ) */ + : "+d" (reg0), "=d" (reg1), "+d" (reg2) : : "cc"); + return reg2; +} + /** * ap_reset_queue(): Reset adjunct processor queue. * @qid: The AP queue number @@ -1006,6 +1024,47 @@ void ap_bus_force_rescan(void) } EXPORT_SYMBOL(ap_bus_force_rescan); +/* + * ap_test_config(): helper function to extract the nrth bit + * within the unsigned int array field. + */ +static inline int ap_test_config(unsigned int *field, unsigned int nr) +{ + if (nr > 0xFFu) + return 0; + return ap_test_bit((field + (nr >> 5)), (nr & 0x1f)); +} + +/* + * ap_test_config_card_id(): Test, whether an AP card ID is configured. + * @id AP card ID + * + * Returns 0 if the card is not configured + * 1 if the card is configured or + * if the configuration information is not available + */ +static inline int ap_test_config_card_id(unsigned int id) +{ + if (!ap_configuration) + return 1; + return ap_test_config(ap_configuration->apm, id); +} + +/* + * ap_test_config_domain(): Test, whether an AP usage domain is configured. + * @domain AP usage domain ID + * + * Returns 0 if the usage domain is not configured + * 1 if the usage domain is configured or + * if the configuration information is not available + */ +static inline int ap_test_config_domain(unsigned int domain) +{ + if (!ap_configuration) + return 1; + return ap_test_config(ap_configuration->aqm, domain); +} + /* * AP bus attributes. */ @@ -1121,6 +1180,42 @@ static ssize_t poll_timeout_store(struct bus_type *bus, const char *buf, static BUS_ATTR(poll_timeout, 0644, poll_timeout_show, poll_timeout_store); +static ssize_t ap_max_domain_id_show(struct bus_type *bus, char *buf) +{ + ap_qid_t qid; + int i, nd, max_domain_id = -1; + unsigned long fbits; + + if (ap_configuration) { + if (ap_domain_index >= 0 && ap_domain_index < AP_DOMAINS) { + for (i = 0; i < AP_DEVICES; i++) { + if (!ap_test_config_card_id(i)) + continue; + qid = AP_MKQID(i, ap_domain_index); + fbits = ap_query_facilities(qid); + if (fbits & (1UL << 57)) { + /* the N bit is 0, Nd field is filled */ + nd = (int)((fbits & 0x00FF0000UL)>>16); + if (nd > 0) + max_domain_id = nd; + else + max_domain_id = 15; + } else { + /* N bit is 1, max 16 domains */ + max_domain_id = 15; + } + break; + } + } + } else { + /* no APXA support, older machines with max 16 domains */ + max_domain_id = 15; + } + return snprintf(buf, PAGE_SIZE, "%d\n", max_domain_id); +} + +static BUS_ATTR(ap_max_domain_id, 0444, ap_max_domain_id_show, NULL); + static struct bus_attribute *const ap_bus_attrs[] = { &bus_attr_ap_domain, &bus_attr_ap_control_domain_mask, @@ -1128,50 +1223,10 @@ static struct bus_attribute *const ap_bus_attrs[] = { &bus_attr_poll_thread, &bus_attr_ap_interrupts, &bus_attr_poll_timeout, + &bus_attr_ap_max_domain_id, NULL, }; -static inline int ap_test_config(unsigned int *field, unsigned int nr) -{ - if (nr > 0xFFu) - return 0; - return ap_test_bit((field + (nr >> 5)), (nr & 0x1f)); -} - -/* - * ap_test_config_card_id(): Test, whether an AP card ID is configured. - * @id AP card ID - * - * Returns 0 if the card is not configured - * 1 if the card is configured or - * if the configuration information is not available - */ -static inline int ap_test_config_card_id(unsigned int id) -{ - if (!ap_configuration) - return 1; - return ap_test_config(ap_configuration->apm, id); -} - -/* - * ap_test_config_domain(): Test, whether an AP usage domain is configured. - * @domain AP usage domain ID - * - * Returns 0 if the usage domain is not configured - * 1 if the usage domain is configured or - * if the configuration information is not available - */ -static inline int ap_test_config_domain(unsigned int domain) -{ - if (!ap_configuration) /* QCI not supported */ - if (domain < 16) - return 1; /* then domains 0...15 are configured */ - else - return 0; - else - return ap_test_config(ap_configuration->aqm, domain); -} - /** * ap_query_configuration(): Query AP configuration information. * From bdea1f1bb273383312f0eca56241794b06ed4205 Mon Sep 17 00:00:00 2001 From: Ingo Tuchscherer Date: Fri, 23 Jan 2015 14:56:25 +0100 Subject: [PATCH 238/601] s390/zcrypt: Add support for new crypto express (CEX5S) adapter. Extends the generic cryptographic device driver (zcrypt) to support the Crypto Express 5S adapter. Signed-off-by: Ingo Tuchscherer Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/ap_bus.c | 3 --- drivers/s390/crypto/ap_bus.h | 1 + drivers/s390/crypto/zcrypt_api.h | 1 + drivers/s390/crypto/zcrypt_cex4.c | 33 +++++++++++++++++++++++++------ 4 files changed, 29 insertions(+), 9 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index faa058016b5ca7..a60fc2f9f4b25a 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1489,9 +1489,6 @@ static void ap_scan_bus(struct work_struct *unused) continue; } break; - case 11: - ap_dev->device_type = 10; - break; default: ap_dev->device_type = device_type; } diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index 055a0f956d1718..2737d261a32400 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -117,6 +117,7 @@ static inline int ap_test_bit(unsigned int *ptr, unsigned int nr) #define AP_DEVICE_TYPE_CEX3A 8 #define AP_DEVICE_TYPE_CEX3C 9 #define AP_DEVICE_TYPE_CEX4 10 +#define AP_DEVICE_TYPE_CEX5 11 /* * Known function facilities diff --git a/drivers/s390/crypto/zcrypt_api.h b/drivers/s390/crypto/zcrypt_api.h index b3d496bfaa7ead..75087689193162 100644 --- a/drivers/s390/crypto/zcrypt_api.h +++ b/drivers/s390/crypto/zcrypt_api.h @@ -75,6 +75,7 @@ struct ica_z90_status { #define ZCRYPT_CEX3C 7 #define ZCRYPT_CEX3A 8 #define ZCRYPT_CEX4 10 +#define ZCRYPT_CEX5 11 /** * Large random numbers are pulled in 4096 byte chunks from the crypto cards diff --git a/drivers/s390/crypto/zcrypt_cex4.c b/drivers/s390/crypto/zcrypt_cex4.c index 569f8b1d86c053..71e698b8577286 100644 --- a/drivers/s390/crypto/zcrypt_cex4.c +++ b/drivers/s390/crypto/zcrypt_cex4.c @@ -26,6 +26,10 @@ #define CEX4A_SPEED_RATING 900 /* TODO new card, new speed rating */ #define CEX4C_SPEED_RATING 6500 /* TODO new card, new speed rating */ +#define CEX4P_SPEED_RATING 7000 /* TODO new card, new speed rating */ +#define CEX5A_SPEED_RATING 450 /* TODO new card, new speed rating */ +#define CEX5C_SPEED_RATING 3250 /* TODO new card, new speed rating */ +#define CEX5P_SPEED_RATING 3500 /* TODO new card, new speed rating */ #define CEX4A_MAX_MESSAGE_SIZE MSGTYPE50_CRB3_MAX_MSG_SIZE #define CEX4C_MAX_MESSAGE_SIZE MSGTYPE06_MAX_MSG_SIZE @@ -39,6 +43,7 @@ static struct ap_device_id zcrypt_cex4_ids[] = { { AP_DEVICE(AP_DEVICE_TYPE_CEX4) }, + { AP_DEVICE(AP_DEVICE_TYPE_CEX5) }, { /* end of list */ }, }; @@ -70,11 +75,18 @@ static int zcrypt_cex4_probe(struct ap_device *ap_dev) switch (ap_dev->device_type) { case AP_DEVICE_TYPE_CEX4: + case AP_DEVICE_TYPE_CEX5: if (ap_test_bit(&ap_dev->functions, AP_FUNC_ACCEL)) { zdev = zcrypt_device_alloc(CEX4A_MAX_MESSAGE_SIZE); if (!zdev) return -ENOMEM; - zdev->type_string = "CEX4A"; + if (ap_dev->device_type == AP_DEVICE_TYPE_CEX4) { + zdev->type_string = "CEX4A"; + zdev->speed_rating = CEX4A_SPEED_RATING; + } else { + zdev->type_string = "CEX5A"; + zdev->speed_rating = CEX5A_SPEED_RATING; + } zdev->user_space_type = ZCRYPT_CEX3A; zdev->min_mod_size = CEX4A_MIN_MOD_SIZE; if (ap_test_bit(&ap_dev->functions, AP_FUNC_MEX4K) && @@ -90,33 +102,42 @@ static int zcrypt_cex4_probe(struct ap_device *ap_dev) CEX4A_MAX_MOD_SIZE_2K; } zdev->short_crt = 1; - zdev->speed_rating = CEX4A_SPEED_RATING; zdev->ops = zcrypt_msgtype_request(MSGTYPE50_NAME, MSGTYPE50_VARIANT_DEFAULT); } else if (ap_test_bit(&ap_dev->functions, AP_FUNC_COPRO)) { zdev = zcrypt_device_alloc(CEX4C_MAX_MESSAGE_SIZE); if (!zdev) return -ENOMEM; - zdev->type_string = "CEX4C"; + if (ap_dev->device_type == AP_DEVICE_TYPE_CEX4) { + zdev->type_string = "CEX4C"; + zdev->speed_rating = CEX4C_SPEED_RATING; + } else { + zdev->type_string = "CEX5C"; + zdev->speed_rating = CEX5C_SPEED_RATING; + } zdev->user_space_type = ZCRYPT_CEX3C; zdev->min_mod_size = CEX4C_MIN_MOD_SIZE; zdev->max_mod_size = CEX4C_MAX_MOD_SIZE; zdev->max_exp_bit_length = CEX4C_MAX_MOD_SIZE; zdev->short_crt = 0; - zdev->speed_rating = CEX4C_SPEED_RATING; zdev->ops = zcrypt_msgtype_request(MSGTYPE06_NAME, MSGTYPE06_VARIANT_DEFAULT); } else if (ap_test_bit(&ap_dev->functions, AP_FUNC_EP11)) { zdev = zcrypt_device_alloc(CEX4C_MAX_MESSAGE_SIZE); if (!zdev) return -ENOMEM; - zdev->type_string = "CEX4P"; + if (ap_dev->device_type == AP_DEVICE_TYPE_CEX4) { + zdev->type_string = "CEX4P"; + zdev->speed_rating = CEX4P_SPEED_RATING; + } else { + zdev->type_string = "CEX5P"; + zdev->speed_rating = CEX5P_SPEED_RATING; + } zdev->user_space_type = ZCRYPT_CEX4; zdev->min_mod_size = CEX4C_MIN_MOD_SIZE; zdev->max_mod_size = CEX4C_MAX_MOD_SIZE; zdev->max_exp_bit_length = CEX4C_MAX_MOD_SIZE; zdev->short_crt = 0; - zdev->speed_rating = CEX4C_SPEED_RATING; zdev->ops = zcrypt_msgtype_request(MSGTYPE06_NAME, MSGTYPE06_VARIANT_EP11); } From da1f2b82054c171166ca0069cd60aa9300127cf1 Mon Sep 17 00:00:00 2001 From: Min-Hua Chen Date: Fri, 26 Dec 2014 16:52:10 +0000 Subject: [PATCH 239/601] arm64: add ioremap physical address information In /proc/vmallocinfo, it's good to show the physical address of each ioremap in vmallocinfo. Add physical address information in arm64 ioremap. 0xffffc900047f2000-0xffffc900047f4000 8192 _nv013519rm+0x57/0xa0 [nvidia] phys=f8100000 ioremap 0xffffc900047f4000-0xffffc900047f6000 8192 _nv013519rm+0x57/0xa0 [nvidia] phys=f8008000 ioremap 0xffffc90004800000-0xffffc90004821000 135168 e1000_probe+0x22c/0xb95 [e1000e] phys=f4300000 ioremap 0xffffc900049c0000-0xffffc900049e1000 135168 _nv013521rm+0x4d/0xd0 [nvidia] phys=e0140000 ioremap Signed-off-by: Min-Hua Chen Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/mm/ioremap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index cbb99c8f1e0436..01e88c8bcab0fd 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -62,6 +62,7 @@ static void __iomem *__ioremap_caller(phys_addr_t phys_addr, size_t size, if (!area) return NULL; addr = (unsigned long)area->addr; + area->phys_addr = phys_addr; err = ioremap_page_range(addr, addr + size, phys_addr, prot); if (err) { From 9f71ac961be8d44ad9d7742a5d1129577514660d Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Wed, 17 Dec 2014 15:50:21 +0000 Subject: [PATCH 240/601] arm64: Fix SCTLR_EL1 initialisation We initialise the SCTLR_EL1 value by read-modify-writeback of the desired bits, leaving the other bits (including reserved bits(RESx)) untouched. However, sometimes the boot monitor could leave garbage values in the RESx bits which could have different implications. This patch makes sure that all the bits, including the RESx bits, are set to the proper state, except for the 'endianness' control bits, EE(25) & E0E(24)- which are set early in the el2_setup. Updated the state of the Bit[6] in the comment to RES0 in the comment. Signed-off-by: Suzuki K. Poulose Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/mm/proc.S | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 4e778b13291b74..b98fc8ce6a161b 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -244,14 +244,18 @@ ENTRY(__cpu_setup) ENDPROC(__cpu_setup) /* + * We set the desired value explicitly, including those of the + * reserved bits. The values of bits EE & E0E were set early in + * el2_setup, which are left untouched below. + * * n n T * U E WT T UD US IHBS * CE0 XWHW CZ ME TEEA S * .... .IEE .... NEAI TE.I ..AD DEN0 ACAM - * 0011 0... 1101 ..0. ..0. 10.. .... .... < hardware reserved - * .... .1.. .... 01.1 11.1 ..01 0001 1101 < software settings + * 0011 0... 1101 ..0. ..0. 10.. .0.. .... < hardware reserved + * .... .1.. .... 01.1 11.1 ..01 0.01 1101 < software settings */ .type crval, #object crval: - .word 0x000802e2 // clear - .word 0x0405d11d // set + .word 0xfcffffff // clear + .word 0x34d5d91d // set From 9d3bfbb4df58a8025b46b2676da2cf450385b754 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 12 Jan 2015 20:48:53 +0000 Subject: [PATCH 241/601] arm64: Combine coherent and non-coherent swiotlb dma_ops Since dev_archdata now has a dma_coherent state, combine the two coherent and non-coherent operations and remove their declaration, together with set_dma_ops, from the arch dma-mapping.h file. Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/dma-mapping.h | 11 +-- arch/arm64/mm/dma-mapping.c | 116 ++++++++++++--------------- 2 files changed, 54 insertions(+), 73 deletions(-) diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index 9ce3e680ae1c6f..6932bb57dba021 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -28,8 +28,6 @@ #define DMA_ERROR_CODE (~(dma_addr_t)0) extern struct dma_map_ops *dma_ops; -extern struct dma_map_ops coherent_swiotlb_dma_ops; -extern struct dma_map_ops noncoherent_swiotlb_dma_ops; static inline struct dma_map_ops *__generic_dma_ops(struct device *dev) { @@ -47,23 +45,18 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) return __generic_dma_ops(dev); } -static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops) -{ - dev->archdata.dma_ops = ops; -} - static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, struct iommu_ops *iommu, bool coherent) { dev->archdata.dma_coherent = coherent; - if (coherent) - set_dma_ops(dev, &coherent_swiotlb_dma_ops); } #define arch_setup_dma_ops arch_setup_dma_ops /* do not use this function in a driver */ static inline bool is_device_dma_coherent(struct device *dev) { + if (!dev) + return false; return dev->archdata.dma_coherent; } diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index d9209420391381..0a24b9b8c6982d 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -134,16 +134,17 @@ static void __dma_free_coherent(struct device *dev, size_t size, swiotlb_free_coherent(dev, size, vaddr, dma_handle); } -static void *__dma_alloc_noncoherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flags, - struct dma_attrs *attrs) +static void *__dma_alloc(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flags, + struct dma_attrs *attrs) { struct page *page; void *ptr, *coherent_ptr; + bool coherent = is_device_dma_coherent(dev); size = PAGE_ALIGN(size); - if (!(flags & __GFP_WAIT)) { + if (!coherent && !(flags & __GFP_WAIT)) { struct page *page = NULL; void *addr = __alloc_from_pool(size, &page); @@ -151,13 +152,16 @@ static void *__dma_alloc_noncoherent(struct device *dev, size_t size, *dma_handle = phys_to_dma(dev, page_to_phys(page)); return addr; - } ptr = __dma_alloc_coherent(dev, size, dma_handle, flags, attrs); if (!ptr) goto no_mem; + /* no need for non-cacheable mapping if coherent */ + if (coherent) + return ptr; + /* remove any dirty cache lines on the kernel alias */ __dma_flush_range(ptr, ptr + size); @@ -179,15 +183,17 @@ static void *__dma_alloc_noncoherent(struct device *dev, size_t size, return NULL; } -static void __dma_free_noncoherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle, - struct dma_attrs *attrs) +static void __dma_free(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle, + struct dma_attrs *attrs) { void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle)); - if (__free_from_pool(vaddr, size)) - return; - vunmap(vaddr); + if (!is_device_dma_coherent(dev)) { + if (__free_from_pool(vaddr, size)) + return; + vunmap(vaddr); + } __dma_free_coherent(dev, size, swiotlb_addr, dma_handle, attrs); } @@ -199,7 +205,8 @@ static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page, dma_addr_t dev_addr; dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs); - __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); + if (!is_device_dma_coherent(dev)) + __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); return dev_addr; } @@ -209,7 +216,8 @@ static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { - __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); + if (!is_device_dma_coherent(dev)) + __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); swiotlb_unmap_page(dev, dev_addr, size, dir, attrs); } @@ -221,9 +229,10 @@ static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl, int i, ret; ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs); - for_each_sg(sgl, sg, ret, i) - __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), - sg->length, dir); + if (!is_device_dma_coherent(dev)) + for_each_sg(sgl, sg, ret, i) + __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), + sg->length, dir); return ret; } @@ -236,9 +245,10 @@ static void __swiotlb_unmap_sg_attrs(struct device *dev, struct scatterlist *sg; int i; - for_each_sg(sgl, sg, nelems, i) - __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), - sg->length, dir); + if (!is_device_dma_coherent(dev)) + for_each_sg(sgl, sg, nelems, i) + __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), + sg->length, dir); swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs); } @@ -246,7 +256,8 @@ static void __swiotlb_sync_single_for_cpu(struct device *dev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir) { - __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); + if (!is_device_dma_coherent(dev)) + __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); swiotlb_sync_single_for_cpu(dev, dev_addr, size, dir); } @@ -255,7 +266,8 @@ static void __swiotlb_sync_single_for_device(struct device *dev, enum dma_data_direction dir) { swiotlb_sync_single_for_device(dev, dev_addr, size, dir); - __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); + if (!is_device_dma_coherent(dev)) + __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); } static void __swiotlb_sync_sg_for_cpu(struct device *dev, @@ -265,9 +277,10 @@ static void __swiotlb_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg; int i; - for_each_sg(sgl, sg, nelems, i) - __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), - sg->length, dir); + if (!is_device_dma_coherent(dev)) + for_each_sg(sgl, sg, nelems, i) + __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), + sg->length, dir); swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir); } @@ -279,9 +292,10 @@ static void __swiotlb_sync_sg_for_device(struct device *dev, int i; swiotlb_sync_sg_for_device(dev, sgl, nelems, dir); - for_each_sg(sgl, sg, nelems, i) - __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), - sg->length, dir); + if (!is_device_dma_coherent(dev)) + for_each_sg(sgl, sg, nelems, i) + __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), + sg->length, dir); } /* vma->vm_page_prot must be set appropriately before calling this function */ @@ -308,28 +322,20 @@ static int __dma_common_mmap(struct device *dev, struct vm_area_struct *vma, return ret; } -static int __swiotlb_mmap_noncoherent(struct device *dev, - struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - struct dma_attrs *attrs) -{ - vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, false); - return __dma_common_mmap(dev, vma, cpu_addr, dma_addr, size); -} - -static int __swiotlb_mmap_coherent(struct device *dev, - struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - struct dma_attrs *attrs) +static int __swiotlb_mmap(struct device *dev, + struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + struct dma_attrs *attrs) { - /* Just use whatever page_prot attributes were specified */ + vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, + is_device_dma_coherent(dev)); return __dma_common_mmap(dev, vma, cpu_addr, dma_addr, size); } -struct dma_map_ops noncoherent_swiotlb_dma_ops = { - .alloc = __dma_alloc_noncoherent, - .free = __dma_free_noncoherent, - .mmap = __swiotlb_mmap_noncoherent, +static struct dma_map_ops swiotlb_dma_ops = { + .alloc = __dma_alloc, + .free = __dma_free, + .mmap = __swiotlb_mmap, .map_page = __swiotlb_map_page, .unmap_page = __swiotlb_unmap_page, .map_sg = __swiotlb_map_sg_attrs, @@ -341,24 +347,6 @@ struct dma_map_ops noncoherent_swiotlb_dma_ops = { .dma_supported = swiotlb_dma_supported, .mapping_error = swiotlb_dma_mapping_error, }; -EXPORT_SYMBOL(noncoherent_swiotlb_dma_ops); - -struct dma_map_ops coherent_swiotlb_dma_ops = { - .alloc = __dma_alloc_coherent, - .free = __dma_free_coherent, - .mmap = __swiotlb_mmap_coherent, - .map_page = swiotlb_map_page, - .unmap_page = swiotlb_unmap_page, - .map_sg = swiotlb_map_sg_attrs, - .unmap_sg = swiotlb_unmap_sg_attrs, - .sync_single_for_cpu = swiotlb_sync_single_for_cpu, - .sync_single_for_device = swiotlb_sync_single_for_device, - .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, - .sync_sg_for_device = swiotlb_sync_sg_for_device, - .dma_supported = swiotlb_dma_supported, - .mapping_error = swiotlb_dma_mapping_error, -}; -EXPORT_SYMBOL(coherent_swiotlb_dma_ops); extern int swiotlb_late_init_with_default_size(size_t default_size); @@ -427,7 +415,7 @@ static int __init swiotlb_late_init(void) { size_t swiotlb_size = min(SZ_64M, MAX_ORDER_NR_PAGES << PAGE_SHIFT); - dma_ops = &noncoherent_swiotlb_dma_ops; + dma_ops = &swiotlb_dma_ops; return swiotlb_late_init_with_default_size(swiotlb_size); } From 78d51e0b8b57728099a3da74f4a10b6f8c71b764 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 12 Jan 2015 20:48:54 +0000 Subject: [PATCH 242/601] arm64: implement generic IOMMU configuration Add the necessary call to of_iommu_init. Acked-by: Will Deacon Signed-off-by: Robin Murphy Signed-off-by: Catalin Marinas --- arch/arm64/kernel/setup.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index bb10903887d439..e8420f635bd447 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -405,6 +406,7 @@ void __init setup_arch(char **cmdline_p) static int __init arm64_device_init(void) { + of_iommu_init(); of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); return 0; } From 04597a65c5efc207257a736d339c6f2f5b00250f Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Wed, 21 Jan 2015 12:43:09 +0000 Subject: [PATCH 243/601] arm64: Track system support for mixed endian EL0 This patch keeps track of the mixed endian EL0 support across the system and provides helper functions to export it. The status is a boolean indicating whether all the CPUs on the system supports mixed endian at EL0. Signed-off-by: Suzuki K. Poulose Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpufeature.h | 2 ++ arch/arm64/include/asm/cputype.h | 14 ++++++++++++++ arch/arm64/kernel/cpuinfo.c | 22 ++++++++++++++++++++++ 3 files changed, 38 insertions(+) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 07547ccc1f2bd2..b6c16d5f622fd0 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -52,6 +52,8 @@ static inline void cpus_set_cap(unsigned int num) } void check_local_cpu_errata(void); +bool cpu_supports_mixed_endian_el0(void); +bool system_supports_mixed_endian_el0(void); #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 8adb986a3086a3..ee6403df9fe4c1 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -72,6 +72,15 @@ #define APM_CPU_PART_POTENZA 0x000 +#define ID_AA64MMFR0_BIGENDEL0_SHIFT 16 +#define ID_AA64MMFR0_BIGENDEL0_MASK (0xf << ID_AA64MMFR0_BIGENDEL0_SHIFT) +#define ID_AA64MMFR0_BIGENDEL0(mmfr0) \ + (((mmfr0) & ID_AA64MMFR0_BIGENDEL0_MASK) >> ID_AA64MMFR0_BIGENDEL0_SHIFT) +#define ID_AA64MMFR0_BIGEND_SHIFT 8 +#define ID_AA64MMFR0_BIGEND_MASK (0xf << ID_AA64MMFR0_BIGEND_SHIFT) +#define ID_AA64MMFR0_BIGEND(mmfr0) \ + (((mmfr0) & ID_AA64MMFR0_BIGEND_MASK) >> ID_AA64MMFR0_BIGEND_SHIFT) + #ifndef __ASSEMBLY__ /* @@ -104,6 +113,11 @@ static inline u32 __attribute_const__ read_cpuid_cachetype(void) return read_cpuid(CTR_EL0); } +static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0) +{ + return (ID_AA64MMFR0_BIGEND(mmfr0) == 0x1) || + (ID_AA64MMFR0_BIGENDEL0(mmfr0) == 0x1); +} #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 49782282a0271a..929855691dae62 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -35,6 +35,7 @@ */ DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data); static struct cpuinfo_arm64 boot_cpu_data; +static bool mixed_endian_el0 = true; static char *icache_policy_str[] = { [ICACHE_POLICY_RESERVED] = "RESERVED/UNKNOWN", @@ -68,6 +69,26 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu); } +bool cpu_supports_mixed_endian_el0(void) +{ + return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1)); +} + +bool system_supports_mixed_endian_el0(void) +{ + return mixed_endian_el0; +} + +static void update_mixed_endian_el0_support(struct cpuinfo_arm64 *info) +{ + mixed_endian_el0 &= id_aa64mmfr0_mixed_endian_el0(info->reg_id_aa64mmfr0); +} + +static void update_cpu_features(struct cpuinfo_arm64 *info) +{ + update_mixed_endian_el0_support(info); +} + static int check_reg_mask(char *name, u64 mask, u64 boot, u64 cur, int cpu) { if ((boot & mask) == (cur & mask)) @@ -215,6 +236,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) cpuinfo_detect_icache_policy(info); check_local_cpu_errata(); + update_cpu_features(info); } void cpuinfo_store_cpu(void) From 736d474f0fafd1486f178570bc47660ee9dfdef8 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Wed, 21 Jan 2015 12:43:10 +0000 Subject: [PATCH 244/601] arm64: Consolidate hotplug notifier for instruction emulation As of now each insn_emulation has a cpu hotplug notifier that enables/disables the CPU feature bit for the functionality. This patch re-arranges the code, such that there is only one notifier that runs through the list of registered emulation hooks and runs their corresponding set_hw_mode. We do nothing when a CPU is dying as we will set the appropriate bits as it comes back online based on the state of the hooks. Signed-off-by: Mark Rutland Signed-off-by: Suzuki K. Poulose Cc: Will Deacon Cc: Punit Agrawal [catalin.marinas@arm.com: fix pr_warn compilation error] [catalin.marinas@arm.com: remove unnecessary "insn" check] Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cputype.h | 2 + arch/arm64/kernel/armv8_deprecated.c | 125 +++++++++++++++++---------- 2 files changed, 79 insertions(+), 48 deletions(-) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index ee6403df9fe4c1..68732e9a02fb6b 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -81,6 +81,8 @@ #define ID_AA64MMFR0_BIGEND(mmfr0) \ (((mmfr0) & ID_AA64MMFR0_BIGEND_MASK) >> ID_AA64MMFR0_BIGEND_SHIFT) +#define SCTLR_EL1_CP15BEN (0x1 << 5) + #ifndef __ASSEMBLY__ /* diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index c363671d750932..68b955e1fd99a0 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -19,6 +19,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include "trace-events-emulation.h" @@ -85,6 +86,57 @@ static void remove_emulation_hooks(struct insn_emulation_ops *ops) pr_notice("Removed %s emulation handler\n", ops->name); } +static void enable_insn_hw_mode(void *data) +{ + struct insn_emulation *insn = (struct insn_emulation *)data; + if (insn->ops->set_hw_mode) + insn->ops->set_hw_mode(true); +} + +static void disable_insn_hw_mode(void *data) +{ + struct insn_emulation *insn = (struct insn_emulation *)data; + if (insn->ops->set_hw_mode) + insn->ops->set_hw_mode(false); +} + +/* Run set_hw_mode(mode) on all active CPUs */ +static int run_all_cpu_set_hw_mode(struct insn_emulation *insn, bool enable) +{ + if (!insn->ops->set_hw_mode) + return -EINVAL; + if (enable) + on_each_cpu(enable_insn_hw_mode, (void *)insn, true); + else + on_each_cpu(disable_insn_hw_mode, (void *)insn, true); + return 0; +} + +/* + * Run set_hw_mode for all insns on a starting CPU. + * Returns: + * 0 - If all the hooks ran successfully. + * -EINVAL - At least one hook is not supported by the CPU. + */ +static int run_all_insn_set_hw_mode(unsigned long cpu) +{ + int rc = 0; + unsigned long flags; + struct insn_emulation *insn; + + raw_spin_lock_irqsave(&insn_emulation_lock, flags); + list_for_each_entry(insn, &insn_emulation, node) { + bool enable = (insn->current_mode == INSN_HW); + if (insn->ops->set_hw_mode && insn->ops->set_hw_mode(enable)) { + pr_warn("CPU[%ld] cannot support the emulation of %s", + cpu, insn->ops->name); + rc = -EINVAL; + } + } + raw_spin_unlock_irqrestore(&insn_emulation_lock, flags); + return rc; +} + static int update_insn_emulation_mode(struct insn_emulation *insn, enum insn_emulation_mode prev) { @@ -97,10 +149,8 @@ static int update_insn_emulation_mode(struct insn_emulation *insn, remove_emulation_hooks(insn->ops); break; case INSN_HW: - if (insn->ops->set_hw_mode) { - insn->ops->set_hw_mode(false); + if (!run_all_cpu_set_hw_mode(insn, false)) pr_notice("Disabled %s support\n", insn->ops->name); - } break; } @@ -111,10 +161,9 @@ static int update_insn_emulation_mode(struct insn_emulation *insn, register_emulation_hooks(insn->ops); break; case INSN_HW: - if (insn->ops->set_hw_mode && insn->ops->set_hw_mode(true)) + ret = run_all_cpu_set_hw_mode(insn, true); + if (!ret) pr_notice("Enabled %s support\n", insn->ops->name); - else - ret = -EINVAL; break; } @@ -133,6 +182,8 @@ static void register_insn_emulation(struct insn_emulation_ops *ops) switch (ops->status) { case INSN_DEPRECATED: insn->current_mode = INSN_EMULATE; + /* Disable the HW mode if it was turned on at early boot time */ + run_all_cpu_set_hw_mode(insn, false); insn->max = INSN_HW; break; case INSN_OBSOLETE: @@ -453,8 +504,6 @@ static int cp15barrier_handler(struct pt_regs *regs, u32 instr) return 0; } -#define SCTLR_EL1_CP15BEN (1 << 5) - static inline void config_sctlr_el1(u32 clear, u32 set) { u32 val; @@ -465,48 +514,13 @@ static inline void config_sctlr_el1(u32 clear, u32 set) asm volatile("msr sctlr_el1, %0" : : "r" (val)); } -static void enable_cp15_ben(void *info) -{ - config_sctlr_el1(0, SCTLR_EL1_CP15BEN); -} - -static void disable_cp15_ben(void *info) -{ - config_sctlr_el1(SCTLR_EL1_CP15BEN, 0); -} - -static int cpu_hotplug_notify(struct notifier_block *b, - unsigned long action, void *hcpu) -{ - switch (action) { - case CPU_STARTING: - case CPU_STARTING_FROZEN: - enable_cp15_ben(NULL); - return NOTIFY_DONE; - case CPU_DYING: - case CPU_DYING_FROZEN: - disable_cp15_ben(NULL); - return NOTIFY_DONE; - } - - return NOTIFY_OK; -} - -static struct notifier_block cpu_hotplug_notifier = { - .notifier_call = cpu_hotplug_notify, -}; - static int cp15_barrier_set_hw_mode(bool enable) { - if (enable) { - register_cpu_notifier(&cpu_hotplug_notifier); - on_each_cpu(enable_cp15_ben, NULL, true); - } else { - unregister_cpu_notifier(&cpu_hotplug_notifier); - on_each_cpu(disable_cp15_ben, NULL, true); - } - - return true; + if (enable) + config_sctlr_el1(0, SCTLR_EL1_CP15BEN); + else + config_sctlr_el1(SCTLR_EL1_CP15BEN, 0); + return 0; } static struct undef_hook cp15_barrier_hooks[] = { @@ -534,6 +548,20 @@ static struct insn_emulation_ops cp15_barrier_ops = { .set_hw_mode = cp15_barrier_set_hw_mode, }; +static int insn_cpu_hotplug_notify(struct notifier_block *b, + unsigned long action, void *hcpu) +{ + int rc = 0; + if ((action & ~CPU_TASKS_FROZEN) == CPU_STARTING) + rc = run_all_insn_set_hw_mode((unsigned long)hcpu); + + return notifier_from_errno(rc); +} + +static struct notifier_block insn_cpu_hotplug_notifier = { + .notifier_call = insn_cpu_hotplug_notify, +}; + /* * Invoked as late_initcall, since not needed before init spawned. */ @@ -545,6 +573,7 @@ static int __init armv8_deprecated_init(void) if (IS_ENABLED(CONFIG_CP15_BARRIER_EMULATION)) register_insn_emulation(&cp15_barrier_ops); + register_cpu_notifier(&insn_cpu_hotplug_notifier); register_insn_emulation_sysctl(ctl_abi); return 0; From 2d888f48e056119495847a269a435d5c3d9df349 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Wed, 21 Jan 2015 12:43:11 +0000 Subject: [PATCH 245/601] arm64: Emulate SETEND for AArch32 tasks Emulate deprecated 'setend' instruction for AArch32 bit tasks. setend [le/be] - Sets the endianness of EL0 On systems with CPUs which support mixed endian at EL0, the hardware support for the instruction can be enabled by setting the SCTLR_EL1.SED bit. Like the other emulated instructions it is controlled by an entry in /proc/sys/abi/. For more information see : Documentation/arm64/legacy_instructions.txt The instruction is emulated by setting/clearing the SPSR_EL1.E bit, which will be reflected in the PSTATE.E in AArch32 context. This patch also restores the native endianness for the execution of signal handlers, since the process could have changed the endianness. Note: All CPUs on the system must have mixed endian support at EL0. Once the handler is registered, hotplugging a CPU which doesn't support mixed endian, could lead to unexpected results/behavior in applications. Signed-off-by: Suzuki K. Poulose Cc: Will Deacon Cc: Punit Agrawal Signed-off-by: Catalin Marinas --- Documentation/arm64/legacy_instructions.txt | 12 ++++ arch/arm64/Kconfig | 15 ++++ arch/arm64/include/asm/cputype.h | 1 + arch/arm64/include/asm/ptrace.h | 7 ++ arch/arm64/kernel/armv8_deprecated.c | 80 +++++++++++++++++++++ arch/arm64/kernel/signal32.c | 5 +- 6 files changed, 119 insertions(+), 1 deletion(-) diff --git a/Documentation/arm64/legacy_instructions.txt b/Documentation/arm64/legacy_instructions.txt index a3b3da2ec6edb4..01bf3d9fac858b 100644 --- a/Documentation/arm64/legacy_instructions.txt +++ b/Documentation/arm64/legacy_instructions.txt @@ -32,6 +32,9 @@ The default mode depends on the status of the instruction in the architecture. Deprecated instructions should default to emulation while obsolete instructions must be undefined by default. +Note: Instruction emulation may not be possible in all cases. See +individual instruction notes for further information. + Supported legacy instructions ----------------------------- * SWP{B} @@ -43,3 +46,12 @@ Default: Undef (0) Node: /proc/sys/abi/cp15_barrier Status: Deprecated Default: Emulate (1) + +* SETEND +Node: /proc/sys/abi/setend +Status: Deprecated +Default: Emulate (1)* +Note: All the cpus on the system must have mixed endian support at EL0 +for this feature to be enabled. If a new CPU - which doesn't support mixed +endian - is hotplugged in after this feature has been enabled, there could +be unexpected results in the application. diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b1f9a20a367746..21a59bf37145e7 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -540,6 +540,21 @@ config CP15_BARRIER_EMULATION If unsure, say Y +config SETEND_EMULATION + bool "Emulate SETEND instruction" + help + The SETEND instruction alters the data-endianness of the + AArch32 EL0, and is deprecated in ARMv8. + + Say Y here to enable software emulation of the instruction + for AArch32 userspace code. + + Note: All the cpus on the system must have mixed endian support at EL0 + for this feature to be enabled. If a new CPU - which doesn't support mixed + endian - is hotplugged in after this feature has been enabled, there could + be unexpected results in the applications. + + If unsure, say Y endif endmenu diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 68732e9a02fb6b..a84ec605bed819 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -82,6 +82,7 @@ (((mmfr0) & ID_AA64MMFR0_BIGEND_MASK) >> ID_AA64MMFR0_BIGEND_SHIFT) #define SCTLR_EL1_CP15BEN (0x1 << 5) +#define SCTLR_EL1_SED (0x1 << 8) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 41ed9e13795e59..d6dd9fdbc3bee6 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -58,6 +58,13 @@ #define COMPAT_PSR_Z_BIT 0x40000000 #define COMPAT_PSR_N_BIT 0x80000000 #define COMPAT_PSR_IT_MASK 0x0600fc00 /* If-Then execution state mask */ + +#ifdef CONFIG_CPU_BIG_ENDIAN +#define COMPAT_PSR_ENDSTATE COMPAT_PSR_E_BIT +#else +#define COMPAT_PSR_ENDSTATE 0 +#endif + /* * These are 'magic' values for PTRACE_PEEKUSR that return info about where a * process is located in memory. diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 68b955e1fd99a0..7922c2e710cadf 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -548,6 +548,79 @@ static struct insn_emulation_ops cp15_barrier_ops = { .set_hw_mode = cp15_barrier_set_hw_mode, }; +static int setend_set_hw_mode(bool enable) +{ + if (!cpu_supports_mixed_endian_el0()) + return -EINVAL; + + if (enable) + config_sctlr_el1(SCTLR_EL1_SED, 0); + else + config_sctlr_el1(0, SCTLR_EL1_SED); + return 0; +} + +static int compat_setend_handler(struct pt_regs *regs, u32 big_endian) +{ + char *insn; + + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->pc); + + if (big_endian) { + insn = "setend be"; + regs->pstate |= COMPAT_PSR_E_BIT; + } else { + insn = "setend le"; + regs->pstate &= ~COMPAT_PSR_E_BIT; + } + + trace_instruction_emulation(insn, regs->pc); + pr_warn_ratelimited("\"%s\" (%ld) uses deprecated setend instruction at 0x%llx\n", + current->comm, (unsigned long)current->pid, regs->pc); + + return 0; +} + +static int a32_setend_handler(struct pt_regs *regs, u32 instr) +{ + int rc = compat_setend_handler(regs, (instr >> 9) & 1); + regs->pc += 4; + return rc; +} + +static int t16_setend_handler(struct pt_regs *regs, u32 instr) +{ + int rc = compat_setend_handler(regs, (instr >> 3) & 1); + regs->pc += 2; + return rc; +} + +static struct undef_hook setend_hooks[] = { + { + .instr_mask = 0xfffffdff, + .instr_val = 0xf1010000, + .pstate_mask = COMPAT_PSR_MODE_MASK, + .pstate_val = COMPAT_PSR_MODE_USR, + .fn = a32_setend_handler, + }, + { + /* Thumb mode */ + .instr_mask = 0x0000fff7, + .instr_val = 0x0000b650, + .pstate_mask = (COMPAT_PSR_T_BIT | COMPAT_PSR_MODE_MASK), + .pstate_val = (COMPAT_PSR_T_BIT | COMPAT_PSR_MODE_USR), + .fn = t16_setend_handler, + }, + {} +}; + +static struct insn_emulation_ops setend_ops = { + .name = "setend", + .status = INSN_DEPRECATED, + .hooks = setend_hooks, + .set_hw_mode = setend_set_hw_mode, +}; + static int insn_cpu_hotplug_notify(struct notifier_block *b, unsigned long action, void *hcpu) { @@ -573,6 +646,13 @@ static int __init armv8_deprecated_init(void) if (IS_ENABLED(CONFIG_CP15_BARRIER_EMULATION)) register_insn_emulation(&cp15_barrier_ops); + if (IS_ENABLED(CONFIG_SETEND_EMULATION)) { + if(system_supports_mixed_endian_el0()) + register_insn_emulation(&setend_ops); + else + pr_info("setend instruction emulation is not supported on the system"); + } + register_cpu_notifier(&insn_cpu_hotplug_notifier); register_insn_emulation_sysctl(ctl_abi); diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 192d900c058ff5..e299de396e9b33 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -440,7 +440,7 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka, { compat_ulong_t handler = ptr_to_compat(ka->sa.sa_handler); compat_ulong_t retcode; - compat_ulong_t spsr = regs->pstate & ~PSR_f; + compat_ulong_t spsr = regs->pstate & ~(PSR_f | COMPAT_PSR_E_BIT); int thumb; /* Check if the handler is written for ARM or Thumb */ @@ -454,6 +454,9 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka, /* The IT state must be cleared for both ARM and Thumb-2 */ spsr &= ~COMPAT_PSR_IT_MASK; + /* Restore the original endianness */ + spsr |= COMPAT_PSR_ENDSTATE; + if (ka->sa.sa_flags & SA_RESTORER) { retcode = ptr_to_compat(ka->sa.sa_restorer); } else { From 0aaf0dae81b586134faeb52e28b7ad567629dd68 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Fri, 23 Jan 2015 05:36:42 +0000 Subject: [PATCH 246/601] smp, ARM64: Kill SMP single function call interrupt Commit 9a46ad6d6df3b54 "smp: make smp_call_function_many() use logic similar to smp_call_function_single()" has unified the way to handle single and multiple cross-CPU function calls. Now only one interrupt is needed for architecture specific code to support generic SMP function call interfaces, so kill the redundant single function call interrupt. Signed-off-by: Jiang Liu Acked-by: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: Arnd Bergmann Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/hardirq.h | 2 +- arch/arm64/kernel/smp.c | 10 +--------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h index e8a3268a891c7c..6aae421f4d733d 100644 --- a/arch/arm64/include/asm/hardirq.h +++ b/arch/arm64/include/asm/hardirq.h @@ -20,7 +20,7 @@ #include #include -#define NR_IPI 6 +#define NR_IPI 5 typedef struct { unsigned int __softirq_pending; diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 7ae6ee0852618e..328b8ce4b00703 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -65,7 +65,6 @@ struct secondary_data secondary_data; enum ipi_msg_type { IPI_RESCHEDULE, IPI_CALL_FUNC, - IPI_CALL_FUNC_SINGLE, IPI_CPU_STOP, IPI_TIMER, IPI_IRQ_WORK, @@ -483,7 +482,6 @@ static const char *ipi_types[NR_IPI] __tracepoint_string = { #define S(x,s) [x] = s S(IPI_RESCHEDULE, "Rescheduling interrupts"), S(IPI_CALL_FUNC, "Function call interrupts"), - S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"), S(IPI_CPU_STOP, "CPU stop interrupts"), S(IPI_TIMER, "Timer broadcast interrupts"), S(IPI_IRQ_WORK, "IRQ work interrupts"), @@ -527,7 +525,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask) void arch_send_call_function_single_ipi(int cpu) { - smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); + smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC); } #ifdef CONFIG_IRQ_WORK @@ -585,12 +583,6 @@ void handle_IPI(int ipinr, struct pt_regs *regs) irq_exit(); break; - case IPI_CALL_FUNC_SINGLE: - irq_enter(); - generic_smp_call_function_single_interrupt(); - irq_exit(); - break; - case IPI_CPU_STOP: irq_enter(); ipi_cpu_stop(cpu); From 33b36543df336d9158e1a763fe97251885f52c5c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 16 Jan 2015 13:52:14 +0000 Subject: [PATCH 247/601] arm64: uapi: expose our struct ucontext to the uapi headers arm64 defines its own ucontext structure which is incompatible with the struct defined (and exposed to userspace by) the asm-generic headers. glibc carries its own struct definition that is compatible with the arm64 definition, but we should expose our format in the uapi headers in case other libraries want to make use of the ucontext pushed as part of an arm64 sigframe. This patch moves the arm64 asm/ucontext.h to the uapi headers, along with the necessary #include of linux/types.h. Cc: Arnd Bergmann Cc: Marcus Shawcroft Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/uapi/asm/Kbuild | 1 + arch/arm64/include/{ => uapi}/asm/ucontext.h | 8 +++++--- 2 files changed, 6 insertions(+), 3 deletions(-) rename arch/arm64/include/{ => uapi}/asm/ucontext.h (88%) diff --git a/arch/arm64/include/uapi/asm/Kbuild b/arch/arm64/include/uapi/asm/Kbuild index 942376d37d220f..825b0fe51c2b41 100644 --- a/arch/arm64/include/uapi/asm/Kbuild +++ b/arch/arm64/include/uapi/asm/Kbuild @@ -18,4 +18,5 @@ header-y += siginfo.h header-y += signal.h header-y += stat.h header-y += statfs.h +header-y += ucontext.h header-y += unistd.h diff --git a/arch/arm64/include/asm/ucontext.h b/arch/arm64/include/uapi/asm/ucontext.h similarity index 88% rename from arch/arm64/include/asm/ucontext.h rename to arch/arm64/include/uapi/asm/ucontext.h index 42e04c87742837..791de8e89e359d 100644 --- a/arch/arm64/include/asm/ucontext.h +++ b/arch/arm64/include/uapi/asm/ucontext.h @@ -13,8 +13,10 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -#ifndef __ASM_UCONTEXT_H -#define __ASM_UCONTEXT_H +#ifndef _UAPI__ASM_UCONTEXT_H +#define _UAPI__ASM_UCONTEXT_H + +#include struct ucontext { unsigned long uc_flags; @@ -27,4 +29,4 @@ struct ucontext { struct sigcontext uc_mcontext; }; -#endif /* __ASM_UCONTEXT_H */ +#endif /* _UAPI__ASM_UCONTEXT_H */ From 82b0b2c2b1e64ad6c5309a9eeba806af9812666b Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Fri, 23 Jan 2015 09:31:01 -0800 Subject: [PATCH 248/601] Smack: Repair netfilter dependency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On 1/23/2015 8:20 AM, Jim Davis wrote: > Building with the attached random configuration file, > > security/smack/smack_netfilter.c: In function ‘smack_ipv4_output’: > security/smack/smack_netfilter.c:55:6: error: ‘struct sk_buff’ has no > member named ‘secmark’ > skb->secmark = skp->smk_secid; > ^ > make[2]: *** [security/smack/smack_netfilter.o] Error 1 The existing Makefile used the wrong configuration option to determine if smack_netfilter should be built. This sets it right. Signed-off-by: Casey Schaufler --- security/smack/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/smack/Makefile b/security/smack/Makefile index 616cf93b368ed7..ee2ebd50454126 100644 --- a/security/smack/Makefile +++ b/security/smack/Makefile @@ -5,4 +5,4 @@ obj-$(CONFIG_SECURITY_SMACK) := smack.o smack-y := smack_lsm.o smack_access.o smackfs.o -smack-$(CONFIG_NETFILTER) += smack_netfilter.o +smack-$(CONFIG_SECURITY_SMACK_NETFILTER) += smack_netfilter.o From 566fcec60b7458784d4ed9bca974c5a56dacf214 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 23 Jan 2015 15:32:46 -0500 Subject: [PATCH 249/601] NFSv4: Fix an atomicity problem in CLOSE If we are to remove the serialisation of OPEN/CLOSE, then we need to ensure that the stateid sent as part of a CLOSE operation does not change after we test the state in nfs4_close_prepare. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 7 ++++++- fs/nfs/nfs4xdr.c | 4 ++-- include/linux/nfs_xdr.h | 2 +- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c347705b016104..4863dec108652d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2587,6 +2587,11 @@ static void nfs4_close_done(struct rpc_task *task, void *data) case -NFS4ERR_OLD_STATEID: case -NFS4ERR_BAD_STATEID: case -NFS4ERR_EXPIRED: + if (!nfs4_stateid_match(&calldata->arg.stateid, + &state->stateid)) { + rpc_restart_call_prepare(task); + goto out_release; + } if (calldata->arg.fmode == 0) break; default: @@ -2619,6 +2624,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) is_rdwr = test_bit(NFS_O_RDWR_STATE, &state->flags); is_rdonly = test_bit(NFS_O_RDONLY_STATE, &state->flags); is_wronly = test_bit(NFS_O_WRONLY_STATE, &state->flags); + nfs4_stateid_copy(&calldata->arg.stateid, &state->stateid); /* Calculate the change in open mode */ calldata->arg.fmode = 0; if (state->n_rdwr == 0) { @@ -2757,7 +2763,6 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) calldata->inode = state->inode; calldata->state = state; calldata->arg.fh = NFS_FH(state->inode); - calldata->arg.stateid = &state->open_stateid; /* Serialization for the sequence id */ calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid, gfp_mask); if (calldata->arg.seqid == NULL) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index cb4376b78ed917..7e7be5ab70bb68 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1125,7 +1125,7 @@ static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg { encode_op_hdr(xdr, OP_CLOSE, decode_close_maxsz, hdr); encode_nfs4_seqid(xdr, arg->seqid); - encode_nfs4_stateid(xdr, arg->stateid); + encode_nfs4_stateid(xdr, &arg->stateid); } static void encode_commit(struct xdr_stream *xdr, const struct nfs_commitargs *args, struct compound_hdr *hdr) @@ -1530,7 +1530,7 @@ static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_co static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_closeargs *arg, struct compound_hdr *hdr) { encode_op_hdr(xdr, OP_OPEN_DOWNGRADE, decode_open_downgrade_maxsz, hdr); - encode_nfs4_stateid(xdr, arg->stateid); + encode_nfs4_stateid(xdr, &arg->stateid); encode_nfs4_seqid(xdr, arg->seqid); encode_share_access(xdr, arg->fmode); } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 467c84efb5960f..7e38d641236e5c 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -389,7 +389,7 @@ struct nfs_open_confirmres { struct nfs_closeargs { struct nfs4_sequence_args seq_args; struct nfs_fh * fh; - nfs4_stateid * stateid; + nfs4_stateid stateid; struct nfs_seqid * seqid; fmode_t fmode; const u32 * bitmask; From f95549cf24660255c880b3ea7ee2d7d08de1f5c5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 23 Jan 2015 18:06:09 -0500 Subject: [PATCH 250/601] NFSv4: More CLOSE/OPEN races If an OPEN RPC call races with a CLOSE or OPEN_DOWNGRADE so that it updates the nfs_state structure before the CLOSE/OPEN_DOWNGRADE has a chance to do so, then we know that the state->flags need to be recalculated from scratch. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4863dec108652d..a6c04a5812d0c7 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1167,6 +1167,16 @@ static bool nfs_need_update_open_stateid(struct nfs4_state *state, return false; } +static void nfs_resync_open_stateid_locked(struct nfs4_state *state) +{ + if (state->n_wronly) + set_bit(NFS_O_WRONLY_STATE, &state->flags); + if (state->n_rdonly) + set_bit(NFS_O_RDONLY_STATE, &state->flags); + if (state->n_rdwr) + set_bit(NFS_O_RDWR_STATE, &state->flags); +} + static void nfs_clear_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode) { @@ -1185,8 +1195,12 @@ static void nfs_clear_open_stateid_locked(struct nfs4_state *state, } if (stateid == NULL) return; - if (!nfs_need_update_open_stateid(state, stateid)) + /* Handle races with OPEN */ + if (!nfs4_stateid_match_other(stateid, &state->open_stateid) || + !nfs4_stateid_is_newer(stateid, &state->open_stateid)) { + nfs_resync_open_stateid_locked(state); return; + } if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) nfs4_stateid_copy(&state->stateid, stateid); nfs4_stateid_copy(&state->open_stateid, stateid); From badc76dd0dc6d55a86c79e952f19d3af24708058 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 23 Jan 2015 18:48:00 -0500 Subject: [PATCH 251/601] NFSv4: Convert nfs_alloc_seqid() to return an ERR_PTR() if allocation fails When we relax the sequencing on the NFSv4.1 OPEN/CLOSE code, we will want to use the value NULL to indicate that no sequencing is needed. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 10 +++++----- fs/nfs/nfs4state.c | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a6c04a5812d0c7..c9c38077075c44 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -988,7 +988,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, goto err_free_p; p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid, gfp_mask); - if (p->o_arg.seqid == NULL) + if (IS_ERR(p->o_arg.seqid)) goto err_free_label; nfs_sb_active(dentry->d_sb); p->dentry = dget(dentry); @@ -2779,7 +2779,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) calldata->arg.fh = NFS_FH(state->inode); /* Serialization for the sequence id */ calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid, gfp_mask); - if (calldata->arg.seqid == NULL) + if (IS_ERR(calldata->arg.seqid)) goto out_free_calldata; calldata->arg.fmode = 0; calldata->arg.bitmask = server->cache_consistency_bitmask; @@ -5517,7 +5517,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * goto out; seqid = nfs_alloc_seqid(&lsp->ls_seqid, GFP_KERNEL); status = -ENOMEM; - if (seqid == NULL) + if (IS_ERR(seqid)) goto out; task = nfs4_do_unlck(request, nfs_file_open_context(request->fl_file), lsp, seqid); status = PTR_ERR(task); @@ -5558,10 +5558,10 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, p->arg.fh = NFS_FH(inode); p->arg.fl = &p->fl; p->arg.open_seqid = nfs_alloc_seqid(&lsp->ls_state->owner->so_seqid, gfp_mask); - if (p->arg.open_seqid == NULL) + if (IS_ERR(p->arg.open_seqid)) goto out_free; p->arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid, gfp_mask); - if (p->arg.lock_seqid == NULL) + if (IS_ERR(p->arg.lock_seqid)) goto out_free_seqid; p->arg.lock_stateid = &lsp->ls_stateid; p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 5194933ed419c1..b922e43d69b88b 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1003,11 +1003,11 @@ struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_m struct nfs_seqid *new; new = kmalloc(sizeof(*new), gfp_mask); - if (new != NULL) { - new->sequence = counter; - INIT_LIST_HEAD(&new->list); - new->task = NULL; - } + if (new == NULL) + return ERR_PTR(-ENOMEM); + new->sequence = counter; + INIT_LIST_HEAD(&new->list); + new->task = NULL; return new; } From a67964197c946adbb14d91c3d878af22de47091c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 23 Jan 2015 19:04:44 -0500 Subject: [PATCH 252/601] NFSv4: Check for NULL argument in nfs_*_seqid() functions Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 21 ++++++++++++++------- fs/nfs/nfs4xdr.c | 5 ++++- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index b922e43d69b88b..590f096fd01179 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1015,7 +1015,7 @@ void nfs_release_seqid(struct nfs_seqid *seqid) { struct nfs_seqid_counter *sequence; - if (list_empty(&seqid->list)) + if (seqid == NULL || list_empty(&seqid->list)) return; sequence = seqid->sequence; spin_lock(&sequence->lock); @@ -1071,13 +1071,15 @@ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid) void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid) { - struct nfs4_state_owner *sp = container_of(seqid->sequence, - struct nfs4_state_owner, so_seqid); - struct nfs_server *server = sp->so_server; + struct nfs4_state_owner *sp; + + if (seqid == NULL) + return; + sp = container_of(seqid->sequence, struct nfs4_state_owner, so_seqid); if (status == -NFS4ERR_BAD_SEQID) nfs4_drop_state_owner(sp); - if (!nfs4_has_session(server->nfs_client)) + if (!nfs4_has_session(sp->so_server->nfs_client)) nfs_increment_seqid(status, seqid); } @@ -1088,14 +1090,18 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid) */ void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid) { - nfs_increment_seqid(status, seqid); + if (seqid != NULL) + nfs_increment_seqid(status, seqid); } int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task) { - struct nfs_seqid_counter *sequence = seqid->sequence; + struct nfs_seqid_counter *sequence; int status = 0; + if (seqid == NULL) + goto out; + sequence = seqid->sequence; spin_lock(&sequence->lock); seqid->task = task; if (list_empty(&seqid->list)) @@ -1106,6 +1112,7 @@ int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task) status = -EAGAIN; unlock: spin_unlock(&sequence->lock); +out: return status; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 7e7be5ab70bb68..d05fada4929ced 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -946,7 +946,10 @@ static void encode_uint64(struct xdr_stream *xdr, u64 n) static void encode_nfs4_seqid(struct xdr_stream *xdr, const struct nfs_seqid *seqid) { - encode_uint32(xdr, seqid->sequence->counter); + if (seqid != NULL) + encode_uint32(xdr, seqid->sequence->counter); + else + encode_uint32(xdr, 0); } static void encode_compound_hdr(struct xdr_stream *xdr, From 63f5f796af613898669b23ccfc091ec77de7591c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 23 Jan 2015 19:19:25 -0500 Subject: [PATCH 253/601] NFSv4.1: Allow parallel OPEN/OPEN_DOWNGRADE/CLOSE Remove the serialisation of OPEN/OPEN_DOWNGRADE and CLOSE calls for the case of NFSv4.1 and newer. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 3 +++ fs/nfs/nfs4proc.c | 17 +++++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index a08178764cf96e..e57290a66b761e 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -44,6 +44,7 @@ enum nfs4_client_state { #define NFS4_RENEW_TIMEOUT 0x01 #define NFS4_RENEW_DELEGATION_CB 0x02 +struct nfs_seqid_counter; struct nfs4_minor_version_ops { u32 minor_version; unsigned init_caps; @@ -56,6 +57,8 @@ struct nfs4_minor_version_ops { struct nfs_fsinfo *); void (*free_lock_state)(struct nfs_server *, struct nfs4_lock_state *); + struct nfs_seqid * + (*alloc_seqid)(struct nfs_seqid_counter *, gfp_t); const struct rpc_call_ops *call_sync_ops; const struct nfs4_state_recovery_ops *reboot_recovery_ops; const struct nfs4_state_recovery_ops *nograce_recovery_ops; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c9c38077075c44..0a279ad5421fff 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -977,6 +977,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, struct dentry *parent = dget_parent(dentry); struct inode *dir = parent->d_inode; struct nfs_server *server = NFS_SERVER(dir); + struct nfs_seqid *(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t); struct nfs4_opendata *p; p = kzalloc(sizeof(*p), gfp_mask); @@ -987,7 +988,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, if (IS_ERR(p->f_label)) goto err_free_p; - p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid, gfp_mask); + alloc_seqid = server->nfs_client->cl_mvops->alloc_seqid; + p->o_arg.seqid = alloc_seqid(&sp->so_seqid, gfp_mask); if (IS_ERR(p->o_arg.seqid)) goto err_free_label; nfs_sb_active(dentry->d_sb); @@ -2751,6 +2753,7 @@ static bool nfs4_roc(struct inode *inode) int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) { struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs_seqid *(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t); struct nfs4_closedata *calldata; struct nfs4_state_owner *sp = state->owner; struct rpc_task *task; @@ -2778,7 +2781,8 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) calldata->state = state; calldata->arg.fh = NFS_FH(state->inode); /* Serialization for the sequence id */ - calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid, gfp_mask); + alloc_seqid = server->nfs_client->cl_mvops->alloc_seqid; + calldata->arg.seqid = alloc_seqid(&state->owner->so_seqid, gfp_mask); if (IS_ERR(calldata->arg.seqid)) goto out_free_calldata; calldata->arg.fmode = 0; @@ -8414,6 +8418,7 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { .match_stateid = nfs4_match_stateid, .find_root_sec = nfs4_find_root_sec, .free_lock_state = nfs4_release_lockowner, + .alloc_seqid = nfs_alloc_seqid, .call_sync_ops = &nfs40_call_sync_ops, .reboot_recovery_ops = &nfs40_reboot_recovery_ops, .nograce_recovery_ops = &nfs40_nograce_recovery_ops, @@ -8422,6 +8427,12 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { }; #if defined(CONFIG_NFS_V4_1) +static struct nfs_seqid * +nfs_alloc_no_seqid(struct nfs_seqid_counter *arg1, gfp_t arg2) +{ + return NULL; +} + static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { .minor_version = 1, .init_caps = NFS_CAP_READDIRPLUS @@ -8435,6 +8446,7 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { .match_stateid = nfs41_match_stateid, .find_root_sec = nfs41_find_root_sec, .free_lock_state = nfs41_free_lock_state, + .alloc_seqid = nfs_alloc_no_seqid, .call_sync_ops = &nfs41_call_sync_ops, .reboot_recovery_ops = &nfs41_reboot_recovery_ops, .nograce_recovery_ops = &nfs41_nograce_recovery_ops, @@ -8461,6 +8473,7 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { .find_root_sec = nfs41_find_root_sec, .free_lock_state = nfs41_free_lock_state, .call_sync_ops = &nfs41_call_sync_ops, + .alloc_seqid = nfs_alloc_no_seqid, .reboot_recovery_ops = &nfs41_reboot_recovery_ops, .nograce_recovery_ops = &nfs41_nograce_recovery_ops, .state_renewal_ops = &nfs41_state_renewal_ops, From 39071e6fff7d7e11a5993afd67240ef04a4d05a0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 24 Jan 2015 15:07:56 -0500 Subject: [PATCH 254/601] NFSv4: Fix atomicity problems with lock stateid updates When we update the lock stateid, we really do need to ensure that this is done under the state->state_lock, and that we are indeed only updating confirmed locks with a newer version of the same stateid. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 42 +++++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0a279ad5421fff..db9d98eda07be8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1297,6 +1297,23 @@ static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stat return ret; } +static bool nfs4_update_lock_stateid(struct nfs4_lock_state *lsp, + const nfs4_stateid *stateid) +{ + struct nfs4_state *state = lsp->ls_state; + bool ret = false; + + spin_lock(&state->state_lock); + if (!nfs4_stateid_match_other(stateid, &lsp->ls_stateid)) + goto out_noupdate; + if (!nfs4_stateid_is_newer(stateid, &lsp->ls_stateid)) + goto out_noupdate; + nfs4_stateid_copy(&lsp->ls_stateid, stateid); + ret = true; +out_noupdate: + spin_unlock(&state->state_lock); + return ret; +} static void nfs4_return_incompatible_delegation(struct inode *inode, fmode_t fmode) { @@ -5403,9 +5420,9 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) return; switch (task->tk_status) { case 0: - nfs4_stateid_copy(&calldata->lsp->ls_stateid, - &calldata->res.stateid); renew_lease(calldata->server, calldata->timestamp); + nfs4_update_lock_stateid(calldata->lsp, + &calldata->res.stateid); break; case -NFS4ERR_BAD_STATEID: case -NFS4ERR_OLD_STATEID: @@ -5626,6 +5643,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) static void nfs4_lock_done(struct rpc_task *task, void *calldata) { struct nfs4_lockdata *data = calldata; + struct nfs4_lock_state *lsp = data->lsp; dprintk("%s: begin!\n", __func__); @@ -5633,18 +5651,16 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) return; data->rpc_status = task->tk_status; - if (data->arg.new_lock_owner != 0) { - if (data->rpc_status == 0) - nfs_confirm_seqid(&data->lsp->ls_seqid, 0); - else - goto out; - } - if (data->rpc_status == 0) { - nfs4_stateid_copy(&data->lsp->ls_stateid, &data->res.stateid); - set_bit(NFS_LOCK_INITIALIZED, &data->lsp->ls_flags); - renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp); + if (task->tk_status == 0) { + renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), + data->timestamp); + if (data->arg.new_lock_owner != 0) { + nfs_confirm_seqid(&lsp->ls_seqid, 0); + nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid); + set_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags); + } else if (!nfs4_update_lock_stateid(lsp, &data->res.stateid)) + rpc_restart_call_prepare(task); } -out: dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status); } From 6b447539aa9aaac0a0215f3e28a0839553210e7e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 24 Jan 2015 18:38:15 -0500 Subject: [PATCH 255/601] NFSv4: Always do open_to_lock_owner if the lock stateid is uninitialised The original text in RFC3530 was terribly confusing since it conflated lockowners and lock stateids. RFC3530bis clarifies that you must use open_to_lock_owner when there is no lock state for that file+lockowner combination. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index db9d98eda07be8..f12ded041a426c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5611,7 +5611,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0) goto out_wait; /* Do we need to do an open_to_lock_owner? */ - if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) { + if (!test_bit(NFS_LOCK_INITIALIZED, &data->lsp->ls_flags)) { if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) { goto out_release_lock_seqid; } From 425c1d4e5b6d4bd700eb94ad8318bdb05431fdc7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 24 Jan 2015 14:57:53 -0500 Subject: [PATCH 256/601] NFSv4: Fix lock on-wire reordering issues This patch ensures that the server cannot reorder our LOCK/LOCKU requests if they are sent in parallel on the wire. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 29 ++++++++++++++++++++++++----- fs/nfs/nfs4xdr.c | 6 +++--- include/linux/nfs_xdr.h | 6 +++--- 3 files changed, 30 insertions(+), 11 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f12ded041a426c..41e7c2fc046e69 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5393,7 +5393,6 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, p->arg.fl = &p->fl; p->arg.seqid = seqid; p->res.seqid = seqid; - p->arg.stateid = &lsp->ls_stateid; p->lsp = lsp; atomic_inc(&lsp->ls_count); /* Ensure we don't close file until we're done freeing locks! */ @@ -5428,6 +5427,9 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) case -NFS4ERR_OLD_STATEID: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: + if (!nfs4_stateid_match(&calldata->arg.stateid, + &calldata->lsp->ls_stateid)) + rpc_restart_call_prepare(task); break; default: if (nfs4_async_handle_error(task, calldata->server, @@ -5443,6 +5445,7 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data) if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) goto out_wait; + nfs4_stateid_copy(&calldata->arg.stateid, &calldata->lsp->ls_stateid); if (test_bit(NFS_LOCK_INITIALIZED, &calldata->lsp->ls_flags) == 0) { /* Note: exit _without_ running nfs4_locku_done */ goto out_no_action; @@ -5584,7 +5587,6 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, p->arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid, gfp_mask); if (IS_ERR(p->arg.lock_seqid)) goto out_free_seqid; - p->arg.lock_stateid = &lsp->ls_stateid; p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; p->arg.lock_owner.id = lsp->ls_seqid.owner_id; p->arg.lock_owner.s_dev = server->s_dev; @@ -5615,11 +5617,15 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata) if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) { goto out_release_lock_seqid; } - data->arg.open_stateid = &state->open_stateid; + nfs4_stateid_copy(&data->arg.open_stateid, + &state->open_stateid); data->arg.new_lock_owner = 1; data->res.open_seqid = data->arg.open_seqid; - } else + } else { data->arg.new_lock_owner = 0; + nfs4_stateid_copy(&data->arg.lock_stateid, + &data->lsp->ls_stateid); + } if (!nfs4_valid_open_stateid(state)) { data->rpc_status = -EBADF; task->tk_action = NULL; @@ -5651,7 +5657,8 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) return; data->rpc_status = task->tk_status; - if (task->tk_status == 0) { + switch (task->tk_status) { + case 0: renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp); if (data->arg.new_lock_owner != 0) { @@ -5660,6 +5667,18 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) set_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags); } else if (!nfs4_update_lock_stateid(lsp, &data->res.stateid)) rpc_restart_call_prepare(task); + break; + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_OLD_STATEID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + if (data->arg.new_lock_owner != 0) { + if (!nfs4_stateid_match(&data->arg.open_stateid, + &lsp->ls_state->open_stateid)) + rpc_restart_call_prepare(task); + } else if (!nfs4_stateid_match(&data->arg.lock_stateid, + &lsp->ls_stateid)) + rpc_restart_call_prepare(task); } dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status); } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index d05fada4929ced..e3018e7a316c82 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1304,12 +1304,12 @@ static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args *p = cpu_to_be32(args->new_lock_owner); if (args->new_lock_owner){ encode_nfs4_seqid(xdr, args->open_seqid); - encode_nfs4_stateid(xdr, args->open_stateid); + encode_nfs4_stateid(xdr, &args->open_stateid); encode_nfs4_seqid(xdr, args->lock_seqid); encode_lockowner(xdr, &args->lock_owner); } else { - encode_nfs4_stateid(xdr, args->lock_stateid); + encode_nfs4_stateid(xdr, &args->lock_stateid); encode_nfs4_seqid(xdr, args->lock_seqid); } } @@ -1333,7 +1333,7 @@ static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *ar encode_op_hdr(xdr, OP_LOCKU, decode_locku_maxsz, hdr); encode_uint32(xdr, nfs4_lock_type(args->fl, 0)); encode_nfs4_seqid(xdr, args->seqid); - encode_nfs4_stateid(xdr, args->stateid); + encode_nfs4_stateid(xdr, &args->stateid); p = reserve_space(xdr, 16); p = xdr_encode_hyper(p, args->fl->fl_start); xdr_encode_hyper(p, nfs4_lock_length(args->fl)); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 7e38d641236e5c..b6a6953c0f0958 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -416,9 +416,9 @@ struct nfs_lock_args { struct nfs_fh * fh; struct file_lock * fl; struct nfs_seqid * lock_seqid; - nfs4_stateid * lock_stateid; + nfs4_stateid lock_stateid; struct nfs_seqid * open_seqid; - nfs4_stateid * open_stateid; + nfs4_stateid open_stateid; struct nfs_lowner lock_owner; unsigned char block : 1; unsigned char reclaim : 1; @@ -437,7 +437,7 @@ struct nfs_locku_args { struct nfs_fh * fh; struct file_lock * fl; struct nfs_seqid * seqid; - nfs4_stateid * stateid; + nfs4_stateid stateid; }; struct nfs_locku_res { From c69899a17ca4836230720e65493942d9582a0424 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 24 Jan 2015 16:03:52 -0500 Subject: [PATCH 257/601] NFSv4: Update of VFS byte range lock must be atomic with the stateid update Ensure that we test the lock stateid remained unchanged while we were updating the VFS tracking of the byte range lock. Have the process replay the lock to the server if we detect that was not the case. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 37 +++++++++++++++---------------------- include/linux/nfs_xdr.h | 1 + 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 41e7c2fc046e69..9f6baf98942c23 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5420,9 +5420,10 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) switch (task->tk_status) { case 0: renew_lease(calldata->server, calldata->timestamp); - nfs4_update_lock_stateid(calldata->lsp, - &calldata->res.stateid); - break; + do_vfs_lock(calldata->fl.fl_file, &calldata->fl); + if (nfs4_update_lock_stateid(calldata->lsp, + &calldata->res.stateid)) + break; case -NFS4ERR_BAD_STATEID: case -NFS4ERR_OLD_STATEID: case -NFS4ERR_STALE_STATEID: @@ -5661,6 +5662,13 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) case 0: renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp); + if (data->arg.new_lock) { + data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS); + if (do_vfs_lock(data->fl.fl_file, &data->fl) < 0) { + rpc_restart_call_prepare(task); + break; + } + } if (data->arg.new_lock_owner != 0) { nfs_confirm_seqid(&lsp->ls_seqid, 0); nfs4_stateid_copy(&lsp->ls_stateid, &data->res.stateid); @@ -5760,7 +5768,8 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f if (recovery_type == NFS_LOCK_RECLAIM) data->arg.reclaim = NFS_LOCK_RECLAIM; nfs4_set_sequence_privileged(&data->arg.seq_args); - } + } else + data->arg.new_lock = 1; task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); @@ -5884,10 +5893,8 @@ static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *reques static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) { - struct nfs4_state_owner *sp = state->owner; struct nfs_inode *nfsi = NFS_I(state->inode); unsigned char fl_flags = request->fl_flags; - unsigned int seq; int status = -ENOLCK; if ((fl_flags & FL_POSIX) && @@ -5907,25 +5914,11 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock /* ...but avoid races with delegation recall... */ request->fl_flags = fl_flags & ~FL_SLEEP; status = do_vfs_lock(request->fl_file, request); - goto out_unlock; - } - seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); - up_read(&nfsi->rwsem); - status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW); - if (status != 0) + up_read(&nfsi->rwsem); goto out; - down_read(&nfsi->rwsem); - if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) { - status = -NFS4ERR_DELAY; - goto out_unlock; } - /* Note: we always want to sleep here! */ - request->fl_flags = fl_flags | FL_SLEEP; - if (do_vfs_lock(request->fl_file, request) < 0) - printk(KERN_WARNING "NFS: %s: VFS is out of sync with lock " - "manager!\n", __func__); -out_unlock: up_read(&nfsi->rwsem); + status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW); out: request->fl_flags = fl_flags; return status; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index b6a6953c0f0958..e5c3b620a6099a 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -422,6 +422,7 @@ struct nfs_lock_args { struct nfs_lowner lock_owner; unsigned char block : 1; unsigned char reclaim : 1; + unsigned char new_lock : 1; unsigned char new_lock_owner : 1; }; From b4019c0e219bb1301865f8b2efedb4773526ed91 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 24 Jan 2015 14:19:19 -0500 Subject: [PATCH 258/601] NFSv4.1: Allow parallel LOCK/LOCKU calls Note, however, that we still serialise on the open stateid if the lock stateid is unconfirmed. Hopefully that will not prove too much of a burden for first time locks; it should leave the ability to parallelise OPENs unchanged, since they no longer call the serialisation primitives. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9f6baf98942c23..66befb0dd241c5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5517,6 +5517,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * struct nfs_seqid *seqid; struct nfs4_lock_state *lsp; struct rpc_task *task; + struct nfs_seqid *(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t); int status = 0; unsigned char fl_flags = request->fl_flags; @@ -5540,7 +5541,8 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * lsp = request->fl_u.nfs4_fl.owner; if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) == 0) goto out; - seqid = nfs_alloc_seqid(&lsp->ls_seqid, GFP_KERNEL); + alloc_seqid = NFS_SERVER(inode)->nfs_client->cl_mvops->alloc_seqid; + seqid = alloc_seqid(&lsp->ls_seqid, GFP_KERNEL); status = -ENOMEM; if (IS_ERR(seqid)) goto out; @@ -5575,6 +5577,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, struct nfs4_lockdata *p; struct inode *inode = lsp->ls_state->inode; struct nfs_server *server = NFS_SERVER(inode); + struct nfs_seqid *(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t); p = kzalloc(sizeof(*p), gfp_mask); if (p == NULL) @@ -5585,7 +5588,8 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, p->arg.open_seqid = nfs_alloc_seqid(&lsp->ls_state->owner->so_seqid, gfp_mask); if (IS_ERR(p->arg.open_seqid)) goto out_free; - p->arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid, gfp_mask); + alloc_seqid = server->nfs_client->cl_mvops->alloc_seqid; + p->arg.lock_seqid = alloc_seqid(&lsp->ls_seqid, gfp_mask); if (IS_ERR(p->arg.lock_seqid)) goto out_free_seqid; p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; From 40dd4b7aee1a8c3b8dac7b67ba710692d7691b77 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 24 Jan 2015 13:54:37 -0500 Subject: [PATCH 259/601] NFSv4.1: Optimise layout return-on-close Optimise the layout return on close code by ensuring that 1) Add a check for whether we hold a layout before taking any spinlocks 2) Only take the spin lock once 3) Use nfs_state->state to speed up open file checks Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 37 +------------------------------------ fs/nfs/pnfs.c | 24 ++++++++++++++++++++---- fs/nfs/pnfs.h | 10 ++++++++++ 3 files changed, 31 insertions(+), 40 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 66befb0dd241c5..0f75b92767262a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2714,45 +2714,10 @@ static const struct rpc_call_ops nfs4_close_ops = { .rpc_release = nfs4_free_closedata, }; -static bool nfs4_state_has_opener(struct nfs4_state *state) -{ - /* first check existing openers */ - if (test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0 && - state->n_rdonly != 0) - return true; - - if (test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0 && - state->n_wronly != 0) - return true; - - if (test_bit(NFS_O_RDWR_STATE, &state->flags) != 0 && - state->n_rdwr != 0) - return true; - - return false; -} - static bool nfs4_roc(struct inode *inode) { - struct nfs_inode *nfsi = NFS_I(inode); - struct nfs_open_context *ctx; - struct nfs4_state *state; - - spin_lock(&inode->i_lock); - list_for_each_entry(ctx, &nfsi->open_files, list) { - state = ctx->state; - if (state == NULL) - continue; - if (nfs4_state_has_opener(state)) { - spin_unlock(&inode->i_lock); - return false; - } - } - spin_unlock(&inode->i_lock); - - if (nfs4_check_delegation(inode, FMODE_READ)) + if (!nfs_have_layout(inode)) return false; - return pnfs_roc(inode); } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 0a5dda4d85c27b..4d69076a60282f 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -34,6 +34,7 @@ #include "pnfs.h" #include "iostat.h" #include "nfs4trace.h" +#include "delegation.h" #define NFSDBG_FACILITY NFSDBG_PNFS #define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ) @@ -954,30 +955,45 @@ pnfs_commit_and_return_layout(struct inode *inode) bool pnfs_roc(struct inode *ino) { + struct nfs_inode *nfsi = NFS_I(ino); + struct nfs_open_context *ctx; + struct nfs4_state *state; struct pnfs_layout_hdr *lo; struct pnfs_layout_segment *lseg, *tmp; LIST_HEAD(tmp_list); bool found = false; spin_lock(&ino->i_lock); - lo = NFS_I(ino)->layout; + lo = nfsi->layout; if (!lo || !test_and_clear_bit(NFS_LAYOUT_ROC, &lo->plh_flags) || test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) - goto out_nolayout; + goto out_noroc; + + /* Don't return layout if we hold a delegation */ + if (nfs4_check_delegation(ino, FMODE_READ)) + goto out_noroc; + + list_for_each_entry(ctx, &nfsi->open_files, list) { + state = ctx->state; + /* Don't return layout if there is open file state */ + if (state != NULL && state->state != 0) + goto out_noroc; + } + list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { mark_lseg_invalid(lseg, &tmp_list); found = true; } if (!found) - goto out_nolayout; + goto out_noroc; lo->plh_block_lgets++; pnfs_get_layout_hdr(lo); /* matched in pnfs_roc_release */ spin_unlock(&ino->i_lock); pnfs_free_lseg_list(&tmp_list); return true; -out_nolayout: +out_noroc: spin_unlock(&ino->i_lock); return false; } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 9ae5b765b07351..a98d8fd9637f56 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -275,6 +275,11 @@ void nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node); bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node); void nfs4_deviceid_purge_client(const struct nfs_client *); +static inline bool nfs_have_layout(struct inode *inode) +{ + return NFS_I(inode)->layout != NULL; +} + static inline struct nfs4_deviceid_node * nfs4_get_deviceid(struct nfs4_deviceid_node *d) { @@ -427,6 +432,11 @@ static inline void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id) #endif /* NFS_DEBUG */ #else /* CONFIG_NFS_V4_1 */ +static inline bool nfs_have_layout(struct inode *inode) +{ + return false; +} + static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) { } From 127b21b89f9d8ba0dc23e47b8c35d8a0bac9d6fc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 23 Jan 2015 13:19:17 -0500 Subject: [PATCH 260/601] SUNRPC: Adjust rpciod workqueue parameters Increase the concurrency level for rpciod threads to allow for allocations etc that happen in the RPCSEC_GSS layer. Also note that the NFSv4 byte range locks may now need to allocate memory from inside rpciod. Add the WQ_HIGHPRI flag to improve latency guarantees while we're at it. Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index d20f2329eea3f4..4f65ec28d2b49c 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -1069,7 +1069,8 @@ static int rpciod_start(void) * Create the rpciod thread and wait for it to start. */ dprintk("RPC: creating workqueue rpciod\n"); - wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM, 1); + /* Note: highpri because network receive is latency sensitive */ + wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); rpciod_workqueue = wq; return rpciod_workqueue != NULL; } From c4a7ca774949960064dac11b326908f28407e8c3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 23 Jan 2015 14:50:56 -0500 Subject: [PATCH 261/601] SUNRPC: Allow waiting on memory allocation We should be safe now, as long as we don't do GFP_IO or higher allocations Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 4f65ec28d2b49c..b91fd9c597b451 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -844,10 +844,10 @@ static void rpc_async_schedule(struct work_struct *work) void *rpc_malloc(struct rpc_task *task, size_t size) { struct rpc_buffer *buf; - gfp_t gfp = GFP_NOWAIT | __GFP_NOWARN; + gfp_t gfp = GFP_NOIO | __GFP_NOWARN; if (RPC_IS_SWAPPER(task)) - gfp |= __GFP_MEMALLOC; + gfp = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; size += sizeof(struct rpc_buffer); if (size <= RPC_BUFFER_MAXSIZE) From 89f0ff386cb1ebca0da7940d05bf609bc86f3972 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 3 Jan 2015 14:47:43 -0500 Subject: [PATCH 262/601] NFSv4.1: Replace usage of nfs_client->cl_addr in encode_create_session Replace the current code with something that is a little closer to what net/sunrpc/auth_unix.c uses. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index e3018e7a316c82..41253393171f0b 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1804,9 +1804,8 @@ static void encode_create_session(struct xdr_stream *xdr, struct compound_hdr *hdr) { __be32 *p; - char machine_name[NFS4_MAX_MACHINE_NAME_LEN]; - uint32_t len; struct nfs_client *clp = args->client; + struct rpc_clnt *clnt = clp->cl_rpcclient; struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); u32 max_resp_sz_cached; @@ -1817,11 +1816,8 @@ static void encode_create_session(struct xdr_stream *xdr, max_resp_sz_cached = (NFS4_dec_open_sz + RPC_REPHDRSIZE + RPC_MAX_AUTH_SIZE + 2) * XDR_UNIT; - len = scnprintf(machine_name, sizeof(machine_name), "%s", - clp->cl_ipaddr); - encode_op_hdr(xdr, OP_CREATE_SESSION, decode_create_session_maxsz, hdr); - p = reserve_space(xdr, 16 + 2*28 + 20 + len + 12); + p = reserve_space(xdr, 16 + 2*28 + 20 + clnt->cl_nodelen + 12); p = xdr_encode_hyper(p, clp->cl_clientid); *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */ *p++ = cpu_to_be32(args->flags); /*flags */ @@ -1850,7 +1846,7 @@ static void encode_create_session(struct xdr_stream *xdr, /* authsys_parms rfc1831 */ *p++ = cpu_to_be32(nn->boot_time.tv_nsec); /* stamp */ - p = xdr_encode_opaque(p, machine_name, len); + p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen); *p++ = cpu_to_be32(0); /* UID */ *p++ = cpu_to_be32(0); /* GID */ *p = cpu_to_be32(0); /* No more gids */ From cf6726e2ee387b0eff303628eaa0beaf36a1aeb4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Dec 2014 11:22:28 -0500 Subject: [PATCH 263/601] NFSv4: Deal with atomic upgrades of an existing delegation Ensure that we deal correctly with the case where the server sends us a newer instance of the same delegation. If the stateids match, but the sequence numbers differ, then treat the new delegation as if it were an atomic upgrade. Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 7f3f6064134443..16b754ee0d0909 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -301,6 +301,17 @@ nfs_inode_detach_delegation(struct inode *inode) return nfs_detach_delegation(nfsi, delegation, server); } +static void +nfs_update_inplace_delegation(struct nfs_delegation *delegation, + const struct nfs_delegation *update) +{ + if (nfs4_stateid_is_newer(&update->stateid, &delegation->stateid)) { + delegation->stateid.seqid = update->stateid.seqid; + smp_wmb(); + delegation->type = update->type; + } +} + /** * nfs_inode_set_delegation - set up a delegation on an inode * @inode: inode to which delegation applies @@ -334,9 +345,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct old_delegation = rcu_dereference_protected(nfsi->delegation, lockdep_is_held(&clp->cl_lock)); if (old_delegation != NULL) { - if (nfs4_stateid_match(&delegation->stateid, - &old_delegation->stateid) && - delegation->type == old_delegation->type) { + /* Is this an update of the existing delegation? */ + if (nfs4_stateid_match_other(&old_delegation->stateid, + &delegation->stateid)) { + nfs_update_inplace_delegation(old_delegation, + delegation); + nfsi->delegation_state = old_delegation->type; goto out; } /* From 860cd64d102d9b6c97830e09c447a9a850ea7641 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Thu, 15 Jan 2015 19:29:43 -0700 Subject: [PATCH 264/601] iommu: Fix trace_map() to report original iova and original size iommu_map() calls trace_map() with iova and size. trace_map() should report original iova and original size as opposed to iova and size after they get changed during mapping. size is always zero at the end of mapping which is useless to report and iova as it gets incremented, it is not as useful as the original iova. Change iommu_map() to call trace_map() to report original iova and original size. Signed-off-by: Shuah Khan Reported-by: Alex Williamson Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 3a4fb6274c9939..9e0dcdbf4110e8 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1084,7 +1084,7 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova, if (ret) iommu_unmap(domain, orig_iova, orig_size - size); else - trace_map(iova, paddr, size); + trace_map(orig_iova, paddr, orig_size); return ret; } From f20ed39f53145e45edd27ff9fba5a60429c40bac Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Tue, 20 Jan 2015 18:30:04 +0200 Subject: [PATCH 265/601] iommu/ipmmu-vmsa: Use the ARM LPAE page table allocator Replace the custom page table allocation implementation with the standard allocator. The driver loses the ability to map 64kB chunkgs using the PTE contiguous hint, hence the removal of the SZ_64K page size from the IOMMU page sizes bitmap. Signed-off-by: Laurent Pinchart Signed-off-by: Joerg Roedel --- drivers/iommu/Kconfig | 1 + drivers/iommu/ipmmu-vmsa.c | 554 ++++--------------------------------- 2 files changed, 61 insertions(+), 494 deletions(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 87060ad6829d78..b7c656c84f51ff 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -313,6 +313,7 @@ config IPMMU_VMSA depends on ARM_LPAE depends on ARCH_SHMOBILE || COMPILE_TEST select IOMMU_API + select IOMMU_IO_PGTABLE_LPAE select ARM_DMA_USE_IOMMU help Support for the Renesas VMSA-compatible IPMMU Renesas found in the diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 791c3daec7c0c8..3d7e7325a1e5f0 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -24,6 +24,8 @@ #include #include +#include "io-pgtable.h" + struct ipmmu_vmsa_device { struct device *dev; void __iomem *base; @@ -38,9 +40,11 @@ struct ipmmu_vmsa_domain { struct ipmmu_vmsa_device *mmu; struct iommu_domain *io_domain; + struct io_pgtable_cfg cfg; + struct io_pgtable_ops *iop; + unsigned int context_id; spinlock_t lock; /* Protects mappings */ - pgd_t *pgd; }; struct ipmmu_vmsa_archdata { @@ -172,52 +176,6 @@ static LIST_HEAD(ipmmu_devices); #define IMUASID_ASID0_MASK (0xff << 0) #define IMUASID_ASID0_SHIFT 0 -/* ----------------------------------------------------------------------------- - * Page Table Bits - */ - -/* - * VMSA states in section B3.6.3 "Control of Secure or Non-secure memory access, - * Long-descriptor format" that the NStable bit being set in a table descriptor - * will result in the NStable and NS bits of all child entries being ignored and - * considered as being set. The IPMMU seems not to comply with this, as it - * generates a secure access page fault if any of the NStable and NS bits isn't - * set when running in non-secure mode. - */ -#ifndef PMD_NSTABLE -#define PMD_NSTABLE (_AT(pmdval_t, 1) << 63) -#endif - -#define ARM_VMSA_PTE_XN (((pteval_t)3) << 53) -#define ARM_VMSA_PTE_CONT (((pteval_t)1) << 52) -#define ARM_VMSA_PTE_AF (((pteval_t)1) << 10) -#define ARM_VMSA_PTE_SH_NS (((pteval_t)0) << 8) -#define ARM_VMSA_PTE_SH_OS (((pteval_t)2) << 8) -#define ARM_VMSA_PTE_SH_IS (((pteval_t)3) << 8) -#define ARM_VMSA_PTE_SH_MASK (((pteval_t)3) << 8) -#define ARM_VMSA_PTE_NS (((pteval_t)1) << 5) -#define ARM_VMSA_PTE_PAGE (((pteval_t)3) << 0) - -/* Stage-1 PTE */ -#define ARM_VMSA_PTE_nG (((pteval_t)1) << 11) -#define ARM_VMSA_PTE_AP_UNPRIV (((pteval_t)1) << 6) -#define ARM_VMSA_PTE_AP_RDONLY (((pteval_t)2) << 6) -#define ARM_VMSA_PTE_AP_MASK (((pteval_t)3) << 6) -#define ARM_VMSA_PTE_ATTRINDX_MASK (((pteval_t)3) << 2) -#define ARM_VMSA_PTE_ATTRINDX_SHIFT 2 - -#define ARM_VMSA_PTE_ATTRS_MASK \ - (ARM_VMSA_PTE_XN | ARM_VMSA_PTE_CONT | ARM_VMSA_PTE_nG | \ - ARM_VMSA_PTE_AF | ARM_VMSA_PTE_SH_MASK | ARM_VMSA_PTE_AP_MASK | \ - ARM_VMSA_PTE_NS | ARM_VMSA_PTE_ATTRINDX_MASK) - -#define ARM_VMSA_PTE_CONT_ENTRIES 16 -#define ARM_VMSA_PTE_CONT_SIZE (PAGE_SIZE * ARM_VMSA_PTE_CONT_ENTRIES) - -#define IPMMU_PTRS_PER_PTE 512 -#define IPMMU_PTRS_PER_PMD 512 -#define IPMMU_PTRS_PER_PGD 4 - /* ----------------------------------------------------------------------------- * Read/Write Access */ @@ -307,18 +265,39 @@ static void ipmmu_utlb_disable(struct ipmmu_vmsa_domain *domain, ipmmu_write(mmu, IMUCTR(utlb), 0); } -static void ipmmu_flush_pgtable(struct ipmmu_vmsa_device *mmu, void *addr, - size_t size) +static void ipmmu_tlb_flush_all(void *cookie) { - unsigned long offset = (unsigned long)addr & ~PAGE_MASK; + struct ipmmu_vmsa_domain *domain = cookie; + + ipmmu_tlb_invalidate(domain); +} + +static void ipmmu_tlb_add_flush(unsigned long iova, size_t size, bool leaf, + void *cookie) +{ + /* The hardware doesn't support selective TLB flush. */ +} + +static void ipmmu_flush_pgtable(void *ptr, size_t size, void *cookie) +{ + unsigned long offset = (unsigned long)ptr & ~PAGE_MASK; + struct ipmmu_vmsa_domain *domain = cookie; /* * TODO: Add support for coherent walk through CCI with DVM and remove * cache handling. */ - dma_map_page(mmu->dev, virt_to_page(addr), offset, size, DMA_TO_DEVICE); + dma_map_page(domain->mmu->dev, virt_to_page(ptr), offset, size, + DMA_TO_DEVICE); } +static struct iommu_gather_ops ipmmu_gather_ops = { + .tlb_flush_all = ipmmu_tlb_flush_all, + .tlb_add_flush = ipmmu_tlb_add_flush, + .tlb_sync = ipmmu_tlb_flush_all, + .flush_pgtable = ipmmu_flush_pgtable, +}; + /* ----------------------------------------------------------------------------- * Domain/Context Management */ @@ -326,7 +305,28 @@ static void ipmmu_flush_pgtable(struct ipmmu_vmsa_device *mmu, void *addr, static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) { phys_addr_t ttbr; - u32 reg; + + /* + * Allocate the page table operations. + * + * VMSA states in section B3.6.3 "Control of Secure or Non-secure memory + * access, Long-descriptor format" that the NStable bit being set in a + * table descriptor will result in the NStable and NS bits of all child + * entries being ignored and considered as being set. The IPMMU seems + * not to comply with this, as it generates a secure access page fault + * if any of the NStable and NS bits isn't set when running in + * non-secure mode. + */ + domain->cfg.quirks = IO_PGTABLE_QUIRK_ARM_NS; + domain->cfg.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K, + domain->cfg.ias = 32; + domain->cfg.oas = 40; + domain->cfg.tlb = &ipmmu_gather_ops; + + domain->iop = alloc_io_pgtable_ops(ARM_32_LPAE_S1, &domain->cfg, + domain); + if (!domain->iop) + return -EINVAL; /* * TODO: When adding support for multiple contexts, find an unused @@ -335,9 +335,7 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) domain->context_id = 0; /* TTBR0 */ - ipmmu_flush_pgtable(domain->mmu, domain->pgd, - IPMMU_PTRS_PER_PGD * sizeof(*domain->pgd)); - ttbr = __pa(domain->pgd); + ttbr = domain->cfg.arm_lpae_s1_cfg.ttbr[0]; ipmmu_ctx_write(domain, IMTTLBR0, ttbr); ipmmu_ctx_write(domain, IMTTUBR0, ttbr >> 32); @@ -350,15 +348,8 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) IMTTBCR_SH0_INNER_SHAREABLE | IMTTBCR_ORGN0_WB_WA | IMTTBCR_IRGN0_WB_WA | IMTTBCR_SL0_LVL_1); - /* - * MAIR0 - * We need three attributes only, non-cacheable, write-back read/write - * allocate and device memory. - */ - reg = (IMMAIR_ATTR_NC << IMMAIR_ATTR_SHIFT(IMMAIR_ATTR_IDX_NC)) - | (IMMAIR_ATTR_WBRWA << IMMAIR_ATTR_SHIFT(IMMAIR_ATTR_IDX_WBRWA)) - | (IMMAIR_ATTR_DEVICE << IMMAIR_ATTR_SHIFT(IMMAIR_ATTR_IDX_DEV)); - ipmmu_ctx_write(domain, IMMAIR0, reg); + /* MAIR0 */ + ipmmu_ctx_write(domain, IMMAIR0, domain->cfg.arm_lpae_s1_cfg.mair[0]); /* IMBUSCR */ ipmmu_ctx_write(domain, IMBUSCR, @@ -462,397 +453,6 @@ static irqreturn_t ipmmu_irq(int irq, void *dev) return ipmmu_domain_irq(domain); } -/* ----------------------------------------------------------------------------- - * Page Table Management - */ - -#define pud_pgtable(pud) pfn_to_page(__phys_to_pfn(pud_val(pud) & PHYS_MASK)) - -static void ipmmu_free_ptes(pmd_t *pmd) -{ - pgtable_t table = pmd_pgtable(*pmd); - __free_page(table); -} - -static void ipmmu_free_pmds(pud_t *pud) -{ - pmd_t *pmd = pmd_offset(pud, 0); - pgtable_t table; - unsigned int i; - - for (i = 0; i < IPMMU_PTRS_PER_PMD; ++i) { - if (!pmd_table(*pmd)) - continue; - - ipmmu_free_ptes(pmd); - pmd++; - } - - table = pud_pgtable(*pud); - __free_page(table); -} - -static void ipmmu_free_pgtables(struct ipmmu_vmsa_domain *domain) -{ - pgd_t *pgd, *pgd_base = domain->pgd; - unsigned int i; - - /* - * Recursively free the page tables for this domain. We don't care about - * speculative TLB filling, because the TLB will be nuked next time this - * context bank is re-allocated and no devices currently map to these - * tables. - */ - pgd = pgd_base; - for (i = 0; i < IPMMU_PTRS_PER_PGD; ++i) { - if (pgd_none(*pgd)) - continue; - ipmmu_free_pmds((pud_t *)pgd); - pgd++; - } - - kfree(pgd_base); -} - -/* - * We can't use the (pgd|pud|pmd|pte)_populate or the set_(pgd|pud|pmd|pte) - * functions as they would flush the CPU TLB. - */ - -static pte_t *ipmmu_alloc_pte(struct ipmmu_vmsa_device *mmu, pmd_t *pmd, - unsigned long iova) -{ - pte_t *pte; - - if (!pmd_none(*pmd)) - return pte_offset_kernel(pmd, iova); - - pte = (pte_t *)get_zeroed_page(GFP_ATOMIC); - if (!pte) - return NULL; - - ipmmu_flush_pgtable(mmu, pte, PAGE_SIZE); - *pmd = __pmd(__pa(pte) | PMD_NSTABLE | PMD_TYPE_TABLE); - ipmmu_flush_pgtable(mmu, pmd, sizeof(*pmd)); - - return pte + pte_index(iova); -} - -static pmd_t *ipmmu_alloc_pmd(struct ipmmu_vmsa_device *mmu, pgd_t *pgd, - unsigned long iova) -{ - pud_t *pud = (pud_t *)pgd; - pmd_t *pmd; - - if (!pud_none(*pud)) - return pmd_offset(pud, iova); - - pmd = (pmd_t *)get_zeroed_page(GFP_ATOMIC); - if (!pmd) - return NULL; - - ipmmu_flush_pgtable(mmu, pmd, PAGE_SIZE); - *pud = __pud(__pa(pmd) | PMD_NSTABLE | PMD_TYPE_TABLE); - ipmmu_flush_pgtable(mmu, pud, sizeof(*pud)); - - return pmd + pmd_index(iova); -} - -static u64 ipmmu_page_prot(unsigned int prot, u64 type) -{ - u64 pgprot = ARM_VMSA_PTE_nG | ARM_VMSA_PTE_AF - | ARM_VMSA_PTE_SH_IS | ARM_VMSA_PTE_AP_UNPRIV - | ARM_VMSA_PTE_NS | type; - - if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ)) - pgprot |= ARM_VMSA_PTE_AP_RDONLY; - - if (prot & IOMMU_CACHE) - pgprot |= IMMAIR_ATTR_IDX_WBRWA << ARM_VMSA_PTE_ATTRINDX_SHIFT; - - if (prot & IOMMU_NOEXEC) - pgprot |= ARM_VMSA_PTE_XN; - else if (!(prot & (IOMMU_READ | IOMMU_WRITE))) - /* If no access create a faulting entry to avoid TLB fills. */ - pgprot &= ~ARM_VMSA_PTE_PAGE; - - return pgprot; -} - -static int ipmmu_alloc_init_pte(struct ipmmu_vmsa_device *mmu, pmd_t *pmd, - unsigned long iova, unsigned long pfn, - size_t size, int prot) -{ - pteval_t pteval = ipmmu_page_prot(prot, ARM_VMSA_PTE_PAGE); - unsigned int num_ptes = 1; - pte_t *pte, *start; - unsigned int i; - - pte = ipmmu_alloc_pte(mmu, pmd, iova); - if (!pte) - return -ENOMEM; - - start = pte; - - /* - * Install the page table entries. We can be called both for a single - * page or for a block of 16 physically contiguous pages. In the latter - * case set the PTE contiguous hint. - */ - if (size == SZ_64K) { - pteval |= ARM_VMSA_PTE_CONT; - num_ptes = ARM_VMSA_PTE_CONT_ENTRIES; - } - - for (i = num_ptes; i; --i) - *pte++ = pfn_pte(pfn++, __pgprot(pteval)); - - ipmmu_flush_pgtable(mmu, start, sizeof(*pte) * num_ptes); - - return 0; -} - -static int ipmmu_alloc_init_pmd(struct ipmmu_vmsa_device *mmu, pmd_t *pmd, - unsigned long iova, unsigned long pfn, - int prot) -{ - pmdval_t pmdval = ipmmu_page_prot(prot, PMD_TYPE_SECT); - - *pmd = pfn_pmd(pfn, __pgprot(pmdval)); - ipmmu_flush_pgtable(mmu, pmd, sizeof(*pmd)); - - return 0; -} - -static int ipmmu_create_mapping(struct ipmmu_vmsa_domain *domain, - unsigned long iova, phys_addr_t paddr, - size_t size, int prot) -{ - struct ipmmu_vmsa_device *mmu = domain->mmu; - pgd_t *pgd = domain->pgd; - unsigned long flags; - unsigned long pfn; - pmd_t *pmd; - int ret; - - if (!pgd) - return -EINVAL; - - if (size & ~PAGE_MASK) - return -EINVAL; - - if (paddr & ~((1ULL << 40) - 1)) - return -ERANGE; - - pfn = __phys_to_pfn(paddr); - pgd += pgd_index(iova); - - /* Update the page tables. */ - spin_lock_irqsave(&domain->lock, flags); - - pmd = ipmmu_alloc_pmd(mmu, pgd, iova); - if (!pmd) { - ret = -ENOMEM; - goto done; - } - - switch (size) { - case SZ_2M: - ret = ipmmu_alloc_init_pmd(mmu, pmd, iova, pfn, prot); - break; - case SZ_64K: - case SZ_4K: - ret = ipmmu_alloc_init_pte(mmu, pmd, iova, pfn, size, prot); - break; - default: - ret = -EINVAL; - break; - } - -done: - spin_unlock_irqrestore(&domain->lock, flags); - - if (!ret) - ipmmu_tlb_invalidate(domain); - - return ret; -} - -static void ipmmu_clear_pud(struct ipmmu_vmsa_device *mmu, pud_t *pud) -{ - pgtable_t table = pud_pgtable(*pud); - - /* Clear the PUD. */ - *pud = __pud(0); - ipmmu_flush_pgtable(mmu, pud, sizeof(*pud)); - - /* Free the page table. */ - __free_page(table); -} - -static void ipmmu_clear_pmd(struct ipmmu_vmsa_device *mmu, pud_t *pud, - pmd_t *pmd) -{ - pmd_t pmdval = *pmd; - unsigned int i; - - /* Clear the PMD. */ - *pmd = __pmd(0); - ipmmu_flush_pgtable(mmu, pmd, sizeof(*pmd)); - - /* Free the page table. */ - if (pmd_table(pmdval)) { - pgtable_t table = pmd_pgtable(pmdval); - - __free_page(table); - } - - /* Check whether the PUD is still needed. */ - pmd = pmd_offset(pud, 0); - for (i = 0; i < IPMMU_PTRS_PER_PMD; ++i) { - if (!pmd_none(pmd[i])) - return; - } - - /* Clear the parent PUD. */ - ipmmu_clear_pud(mmu, pud); -} - -static void ipmmu_clear_pte(struct ipmmu_vmsa_device *mmu, pud_t *pud, - pmd_t *pmd, pte_t *pte, unsigned int num_ptes) -{ - unsigned int i; - - /* Clear the PTE. */ - for (i = num_ptes; i; --i) - pte[i-1] = __pte(0); - - ipmmu_flush_pgtable(mmu, pte, sizeof(*pte) * num_ptes); - - /* Check whether the PMD is still needed. */ - pte = pte_offset_kernel(pmd, 0); - for (i = 0; i < IPMMU_PTRS_PER_PTE; ++i) { - if (!pte_none(pte[i])) - return; - } - - /* Clear the parent PMD. */ - ipmmu_clear_pmd(mmu, pud, pmd); -} - -static int ipmmu_split_pmd(struct ipmmu_vmsa_device *mmu, pmd_t *pmd) -{ - pte_t *pte, *start; - pteval_t pteval; - unsigned long pfn; - unsigned int i; - - pte = (pte_t *)get_zeroed_page(GFP_ATOMIC); - if (!pte) - return -ENOMEM; - - /* Copy the PMD attributes. */ - pteval = (pmd_val(*pmd) & ARM_VMSA_PTE_ATTRS_MASK) - | ARM_VMSA_PTE_CONT | ARM_VMSA_PTE_PAGE; - - pfn = pmd_pfn(*pmd); - start = pte; - - for (i = IPMMU_PTRS_PER_PTE; i; --i) - *pte++ = pfn_pte(pfn++, __pgprot(pteval)); - - ipmmu_flush_pgtable(mmu, start, PAGE_SIZE); - *pmd = __pmd(__pa(start) | PMD_NSTABLE | PMD_TYPE_TABLE); - ipmmu_flush_pgtable(mmu, pmd, sizeof(*pmd)); - - return 0; -} - -static void ipmmu_split_pte(struct ipmmu_vmsa_device *mmu, pte_t *pte) -{ - unsigned int i; - - for (i = ARM_VMSA_PTE_CONT_ENTRIES; i; --i) - pte[i-1] = __pte(pte_val(*pte) & ~ARM_VMSA_PTE_CONT); - - ipmmu_flush_pgtable(mmu, pte, sizeof(*pte) * ARM_VMSA_PTE_CONT_ENTRIES); -} - -static int ipmmu_clear_mapping(struct ipmmu_vmsa_domain *domain, - unsigned long iova, size_t size) -{ - struct ipmmu_vmsa_device *mmu = domain->mmu; - unsigned long flags; - pgd_t *pgd = domain->pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - - if (!pgd) - return -EINVAL; - - if (size & ~PAGE_MASK) - return -EINVAL; - - pgd += pgd_index(iova); - pud = (pud_t *)pgd; - - spin_lock_irqsave(&domain->lock, flags); - - /* If there's no PUD or PMD we're done. */ - if (pud_none(*pud)) - goto done; - - pmd = pmd_offset(pud, iova); - if (pmd_none(*pmd)) - goto done; - - /* - * When freeing a 2MB block just clear the PMD. In the unlikely case the - * block is mapped as individual pages this will free the corresponding - * PTE page table. - */ - if (size == SZ_2M) { - ipmmu_clear_pmd(mmu, pud, pmd); - goto done; - } - - /* - * If the PMD has been mapped as a section remap it as pages to allow - * freeing individual pages. - */ - if (pmd_sect(*pmd)) - ipmmu_split_pmd(mmu, pmd); - - pte = pte_offset_kernel(pmd, iova); - - /* - * When freeing a 64kB block just clear the PTE entries. We don't have - * to care about the contiguous hint of the surrounding entries. - */ - if (size == SZ_64K) { - ipmmu_clear_pte(mmu, pud, pmd, pte, ARM_VMSA_PTE_CONT_ENTRIES); - goto done; - } - - /* - * If the PTE has been mapped with the contiguous hint set remap it and - * its surrounding PTEs to allow unmapping a single page. - */ - if (pte_val(*pte) & ARM_VMSA_PTE_CONT) - ipmmu_split_pte(mmu, pte); - - /* Clear the PTE. */ - ipmmu_clear_pte(mmu, pud, pmd, pte, 1); - -done: - spin_unlock_irqrestore(&domain->lock, flags); - - ipmmu_tlb_invalidate(domain); - - return 0; -} - /* ----------------------------------------------------------------------------- * IOMMU Operations */ @@ -867,12 +467,6 @@ static int ipmmu_domain_init(struct iommu_domain *io_domain) spin_lock_init(&domain->lock); - domain->pgd = kzalloc(IPMMU_PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL); - if (!domain->pgd) { - kfree(domain); - return -ENOMEM; - } - io_domain->priv = domain; domain->io_domain = io_domain; @@ -888,7 +482,7 @@ static void ipmmu_domain_destroy(struct iommu_domain *io_domain) * been detached. */ ipmmu_domain_destroy_context(domain); - ipmmu_free_pgtables(domain); + free_io_pgtable_ops(domain->iop); kfree(domain); } @@ -957,53 +551,25 @@ static int ipmmu_map(struct iommu_domain *io_domain, unsigned long iova, if (!domain) return -ENODEV; - return ipmmu_create_mapping(domain, iova, paddr, size, prot); + return domain->iop->map(domain->iop, iova, paddr, size, prot); } static size_t ipmmu_unmap(struct iommu_domain *io_domain, unsigned long iova, size_t size) { struct ipmmu_vmsa_domain *domain = io_domain->priv; - int ret; - ret = ipmmu_clear_mapping(domain, iova, size); - return ret ? 0 : size; + return domain->iop->unmap(domain->iop, iova, size); } static phys_addr_t ipmmu_iova_to_phys(struct iommu_domain *io_domain, dma_addr_t iova) { struct ipmmu_vmsa_domain *domain = io_domain->priv; - pgd_t pgd; - pud_t pud; - pmd_t pmd; - pte_t pte; /* TODO: Is locking needed ? */ - if (!domain->pgd) - return 0; - - pgd = *(domain->pgd + pgd_index(iova)); - if (pgd_none(pgd)) - return 0; - - pud = *pud_offset(&pgd, iova); - if (pud_none(pud)) - return 0; - - pmd = *pmd_offset(&pud, iova); - if (pmd_none(pmd)) - return 0; - - if (pmd_sect(pmd)) - return __pfn_to_phys(pmd_pfn(pmd)) | (iova & ~PMD_MASK); - - pte = *(pmd_page_vaddr(pmd) + pte_index(iova)); - if (pte_none(pte)) - return 0; - - return __pfn_to_phys(pte_pfn(pte)) | (iova & ~PAGE_MASK); + return domain->iop->iova_to_phys(domain->iop, iova); } static int ipmmu_find_utlbs(struct ipmmu_vmsa_device *mmu, struct device *dev, @@ -1188,7 +754,7 @@ static const struct iommu_ops ipmmu_ops = { .iova_to_phys = ipmmu_iova_to_phys, .add_device = ipmmu_add_device, .remove_device = ipmmu_remove_device, - .pgsize_bitmap = SZ_2M | SZ_64K | SZ_4K, + .pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K, }; /* ----------------------------------------------------------------------------- From 0f1fb99b62ce226f8d818852f812c5d79071ce58 Mon Sep 17 00:00:00 2001 From: Emil Medve Date: Thu, 22 Jan 2015 08:47:20 -0600 Subject: [PATCH 266/601] iommu/fsl: Fix section mismatch Section mismatch in reference from the variable fsl_of_pamu_driver to the function .init.text:fsl_pamu_probe() The variable fsl_of_pamu_driver references the function __init fsl_pamu_probe() If the reference is valid then annotate the variable with __init* or __refdata (see linux/init.h) or name the variable: *_template, *_timer, *_sht, *_ops, *_probe, *_probe_one, *_console Signed-off-by: Emil Medve Acked-by: Varun Sethi Signed-off-by: Joerg Roedel --- drivers/iommu/fsl_pamu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c index 80ac68d884c597..d478ad87073f35 100644 --- a/drivers/iommu/fsl_pamu.c +++ b/drivers/iommu/fsl_pamu.c @@ -1224,7 +1224,7 @@ static const struct of_device_id fsl_of_pamu_ids[] = { {}, }; -static struct platform_driver fsl_of_pamu_driver = { +static struct platform_driver fsl_of_pamu_driver __initdata = { .driver = { .name = "fsl-of-pamu", }, From ca4f55878732306cb47bd76bfa803c255b4a0e6f Mon Sep 17 00:00:00 2001 From: Emil Medve Date: Thu, 22 Jan 2015 08:47:21 -0600 Subject: [PATCH 267/601] iommu/fsl: Remove unused fsl_of_pamu_ids[] Signed-off-by: Emil Medve Acked-by: Varun Sethi Signed-off-by: Joerg Roedel --- drivers/iommu/fsl_pamu.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c index d478ad87073f35..113fb7be8cfde9 100644 --- a/drivers/iommu/fsl_pamu.c +++ b/drivers/iommu/fsl_pamu.c @@ -1214,16 +1214,6 @@ static int __init fsl_pamu_probe(struct platform_device *pdev) return ret; } -static const struct of_device_id fsl_of_pamu_ids[] = { - { - .compatible = "fsl,p4080-pamu", - }, - { - .compatible = "fsl,pamu", - }, - {}, -}; - static struct platform_driver fsl_of_pamu_driver __initdata = { .driver = { .name = "fsl-of-pamu", From 3df76a9dcc74d5f012b94ea01ed6e7aaf8362c5a Mon Sep 17 00:00:00 2001 From: Cyril Bur Date: Wed, 21 Jan 2015 13:32:00 +1100 Subject: [PATCH 268/601] powerpc/pseries: Fix endian problems with LE migration RTAS events require arguments be passed in big endian while hypercalls have their arguments passed in registers and the values should therefore be in CPU endian. The "ibm,suspend_me" 'RTAS' call makes a sequence of hypercalls to setup one true RTAS call. This means that "ibm,suspend_me" is handled specially in the ppc_rtas() syscall. The ppc_rtas() syscall has its arguments in big endian and can therefore pass these arguments directly to the RTAS call. "ibm,suspend_me" is handled specially from within ppc_rtas() (by calling rtas_ibm_suspend_me()) which has left an endian bug on little endian systems due to the requirement of hypercalls. The return value from rtas_ibm_suspend_me() gets returned in cpu endian, and is left unconverted, also a bug on little endian systems. rtas_ibm_suspend_me() does not actually make use of the rtas_args that it is passed. This patch removes the convoluted use of the rtas_args struct to pass params to rtas_ibm_suspend_me() in favour of passing what it needs as actual arguments. This patch also ensures the two callers of rtas_ibm_suspend_me() pass function parameters in cpu endian and in the case of ppc_rtas(), converts the return value. migrate_store() (the other caller of rtas_ibm_suspend_me()) is from a sysfs file which deals with everything in cpu endian so this function only underwent cleanup. This patch has been tested with KVM both LE and BE and on PowerVM both LE and BE. Under QEMU/KVM the migration happens without touching these code pathes. For PowerVM there is no obvious regression on BE and the LE code path now provides the correct parameters to the hypervisor. Signed-off-by: Cyril Bur Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/rtas.h | 2 +- arch/powerpc/kernel/rtas.c | 22 +++++++++++++++------- arch/powerpc/platforms/pseries/mobility.c | 22 ++++++---------------- 3 files changed, 22 insertions(+), 24 deletions(-) diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index b390f55b0df137..2e23e92a43722f 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -327,7 +327,7 @@ extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data); extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data); extern int rtas_online_cpus_mask(cpumask_var_t cpus); extern int rtas_offline_cpus_mask(cpumask_var_t cpus); -extern int rtas_ibm_suspend_me(struct rtas_args *); +extern int rtas_ibm_suspend_me(u64 handle, int *vasi_return); struct rtc_time; extern unsigned long rtas_get_boot_time(void); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 4af905e81ab0b2..21c45a2d07062d 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -897,7 +897,7 @@ int rtas_offline_cpus_mask(cpumask_var_t cpus) } EXPORT_SYMBOL(rtas_offline_cpus_mask); -int rtas_ibm_suspend_me(struct rtas_args *args) +int rtas_ibm_suspend_me(u64 handle, int *vasi_return) { long state; long rc; @@ -911,8 +911,7 @@ int rtas_ibm_suspend_me(struct rtas_args *args) return -ENOSYS; /* Make sure the state is valid */ - rc = plpar_hcall(H_VASI_STATE, retbuf, - ((u64)args->args[0] << 32) | args->args[1]); + rc = plpar_hcall(H_VASI_STATE, retbuf, handle); state = retbuf[0]; @@ -920,12 +919,12 @@ int rtas_ibm_suspend_me(struct rtas_args *args) printk(KERN_ERR "rtas_ibm_suspend_me: vasi_state returned %ld\n",rc); return rc; } else if (state == H_VASI_ENABLED) { - args->args[args->nargs] = RTAS_NOT_SUSPENDABLE; + *vasi_return = RTAS_NOT_SUSPENDABLE; return 0; } else if (state != H_VASI_SUSPENDING) { printk(KERN_ERR "rtas_ibm_suspend_me: vasi_state returned state %ld\n", state); - args->args[args->nargs] = -1; + *vasi_return = -1; return 0; } @@ -973,7 +972,7 @@ int rtas_ibm_suspend_me(struct rtas_args *args) return atomic_read(&data.error); } #else /* CONFIG_PPC_PSERIES */ -int rtas_ibm_suspend_me(struct rtas_args *args) +int rtas_ibm_suspend_me(u64 handle, int *vasi_return) { return -ENOSYS; } @@ -1053,7 +1052,16 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs) /* Need to handle ibm,suspend_me call specially */ if (token == ibm_suspend_me_token) { - rc = rtas_ibm_suspend_me(&args); + + /* + * rtas_ibm_suspend_me assumes args are in cpu endian, or at least the + * hcall within it requires it. + */ + int vasi_rc = 0; + u64 handle = ((u64)be32_to_cpu(args.args[0]) << 32) + | be32_to_cpu(args.args[1]); + rc = rtas_ibm_suspend_me(handle, &vasi_rc); + args.rets[0] = cpu_to_be32(vasi_rc); if (rc) return rc; goto copy_return; diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index e7cb6d4a871ae7..90cf3dcbd9f268 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -316,34 +316,24 @@ void post_mobility_fixup(void) static ssize_t migrate_store(struct class *class, struct class_attribute *attr, const char *buf, size_t count) { - struct rtas_args args; u64 streamid; int rc; + int vasi_rc = 0; rc = kstrtou64(buf, 0, &streamid); if (rc) return rc; - memset(&args, 0, sizeof(args)); - args.token = rtas_token("ibm,suspend-me"); - args.nargs = 2; - args.nret = 1; - - args.args[0] = streamid >> 32 ; - args.args[1] = streamid & 0xffffffff; - args.rets = &args.args[args.nargs]; - do { - args.rets[0] = 0; - rc = rtas_ibm_suspend_me(&args); - if (!rc && args.rets[0] == RTAS_NOT_SUSPENDABLE) + rc = rtas_ibm_suspend_me(streamid, &vasi_rc); + if (!rc && vasi_rc == RTAS_NOT_SUSPENDABLE) ssleep(1); - } while (!rc && args.rets[0] == RTAS_NOT_SUSPENDABLE); + } while (!rc && vasi_rc == RTAS_NOT_SUSPENDABLE); if (rc) return rc; - else if (args.rets[0]) - return args.rets[0]; + if (vasi_rc) + return vasi_rc; post_mobility_fixup(); return count; From 6501ab5e380cf25dad85705856bfdde439dbcbd2 Mon Sep 17 00:00:00 2001 From: Pranith Kumar Date: Wed, 21 Jan 2015 21:26:09 -0500 Subject: [PATCH 269/601] powerpc/powernv: Skip registering log region when CONFIG_PRINTK=n When CONFIG_PRINTK=n, log_buf_addr_get() returns NULL and log_buf_len_get() return 0. Check for these return values and skip registering the dump buffer. Signed-off-by: Pranith Kumar Reviewed-by: Stewart Smith Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index f10b9ec8c1f5c1..1db119f0f4ee54 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -667,7 +667,13 @@ static void __init opal_dump_region_init(void) /* Register kernel log buffer */ addr = log_buf_addr_get(); + if (addr == NULL) + return; + size = log_buf_len_get(); + if (size == 0) + return; + rc = opal_register_dump_region(OPAL_DUMP_REGION_LOG_BUF, __pa(addr), size); /* Don't warn if this is just an older OPAL that doesn't From e9fb8b7e4f6af17e3aa4835281e06fdc920341f9 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 6 Jan 2015 16:48:36 +0000 Subject: [PATCH 270/601] compat: Declare compat_sys_sigpending and compat_sys_sigprocmask prototypes __ARCH_WANT_SYS_SIGPENDING or __ARCH_WANT_SYS_SIGPROGMASK may be defined for compat support but the corresponding prototypes are missing from linux/compat.h. Signed-off-by: Catalin Marinas Acked-by: Andrew Morton Cc: Arnd Bergmann --- include/linux/compat.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/linux/compat.h b/include/linux/compat.h index 7450ca2ac1fc6a..ab25814690bc64 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -689,6 +689,15 @@ asmlinkage long compat_sys_sendfile64(int out_fd, int in_fd, asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr, compat_stack_t __user *uoss_ptr); +#ifdef __ARCH_WANT_SYS_SIGPENDING +asmlinkage long compat_sys_sigpending(compat_old_sigset_t __user *set); +#endif + +#ifdef __ARCH_WANT_SYS_SIGPROCMASK +asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t __user *nset, + compat_old_sigset_t __user *oset); +#endif + int compat_restore_altstack(const compat_stack_t __user *uss); int __compat_save_altstack(compat_stack_t __user *, unsigned long); #define compat_save_altstack_ex(uss, sp) do { \ From 54e45c169dbce43cf46d00eb1521b655b6e4f9e9 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 6 Jan 2015 16:52:38 +0000 Subject: [PATCH 271/601] syscalls: Declare sys_*stat64 prototypes if __ARCH_WANT_(COMPAT_)STAT64 Currently, the sys_stat64, sys_fstat64 and sys_lstat64 prototpyes are only declared if BITS_PER_LONG == 32. Following commit 0753f70f07fb (fs: Build sys_stat64() and friends if __ARCH_WANT_COMPAT_STAT64), the implementation of these functions is allowed on 64-bit systems for compat support. The patch changes the condition on the prototype declaration from BITS_PER_LONG == 32 to defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_COMPAT_STAT64). In addition, it moves the sys_fstatat64 prototype under the same #if block Signed-off-by: Catalin Marinas Acked-by: Andrew Morton Cc: Arnd Bergmann --- include/linux/syscalls.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 85893d744901b0..76d1e38aabe1dd 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -410,12 +410,16 @@ asmlinkage long sys_newlstat(const char __user *filename, struct stat __user *statbuf); asmlinkage long sys_newfstat(unsigned int fd, struct stat __user *statbuf); asmlinkage long sys_ustat(unsigned dev, struct ustat __user *ubuf); -#if BITS_PER_LONG == 32 +#if defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_COMPAT_STAT64) asmlinkage long sys_stat64(const char __user *filename, struct stat64 __user *statbuf); asmlinkage long sys_fstat64(unsigned long fd, struct stat64 __user *statbuf); asmlinkage long sys_lstat64(const char __user *filename, struct stat64 __user *statbuf); +asmlinkage long sys_fstatat64(int dfd, const char __user *filename, + struct stat64 __user *statbuf, int flag); +#endif +#if BITS_PER_LONG == 32 asmlinkage long sys_truncate64(const char __user *path, loff_t length); asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length); #endif @@ -771,8 +775,6 @@ asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, umode_t mode); asmlinkage long sys_newfstatat(int dfd, const char __user *filename, struct stat __user *statbuf, int flag); -asmlinkage long sys_fstatat64(int dfd, const char __user *filename, - struct stat64 __user *statbuf, int flag); asmlinkage long sys_readlinkat(int dfd, const char __user *path, char __user *buf, int bufsiz); asmlinkage long sys_utimensat(int dfd, const char __user *filename, From 0156411b1828771c09c92f034e7b81f702b84f07 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 6 Jan 2015 16:42:32 +0000 Subject: [PATCH 272/601] arm64: Implement the compat_sys_call_table in C Unlike the sys_call_table[], the compat one was implemented in sys32.S making it impossible to notice discrepancies between the number of compat syscalls and the __NR_compat_syscalls macro, the latter having to be defined in asm/unistd.h as including asm/unistd32.h would cause conflicts on __NR_* definitions. With this patch, incorrect __NR_compat_syscalls values will result in a build-time error. Signed-off-by: Catalin Marinas Suggested-by: Mark Rutland Acked-by: Mark Rutland --- arch/arm64/include/asm/unistd.h | 3 ++ arch/arm64/kernel/Makefile | 2 +- arch/arm64/kernel/entry.S | 2 +- arch/arm64/kernel/{sys32.S => entry32.S} | 34 ++++++---------- arch/arm64/kernel/sys32.c | 51 ++++++++++++++++++++++++ 5 files changed, 68 insertions(+), 24 deletions(-) rename arch/arm64/kernel/{sys32.S => entry32.S} (80%) create mode 100644 arch/arm64/kernel/sys32.c diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index b780c6c76eec6a..159b44fff258bf 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -48,6 +48,9 @@ #endif #define __ARCH_WANT_SYS_CLONE + +#ifndef __COMPAT_SYSCALL_NR #include +#endif #define NR_syscalls (__NR_syscalls) diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 77d3d95683335c..3b3b1cd3e7f018 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -19,7 +19,7 @@ arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \ cpuinfo.o cpu_errata.o alternative.o cacheinfo.o arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ - sys_compat.o \ + sys_compat.o entry32.o \ ../../arm/kernel/opcodes.o arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 02e6af11776269..cf21bb3bf7524c 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -433,7 +433,7 @@ el0_svc_compat: /* * AArch32 syscall handling */ - adr stbl, compat_sys_call_table // load compat syscall table pointer + adrp stbl, compat_sys_call_table // load compat syscall table pointer uxtw scno, w7 // syscall number in w7 (r7) mov sc_nr, #__NR_compat_syscalls b el0_svc_naked diff --git a/arch/arm64/kernel/sys32.S b/arch/arm64/kernel/entry32.S similarity index 80% rename from arch/arm64/kernel/sys32.S rename to arch/arm64/kernel/entry32.S index 423a5b3fc2be0d..9a8f6ae2530e70 100644 --- a/arch/arm64/kernel/sys32.S +++ b/arch/arm64/kernel/entry32.S @@ -27,26 +27,26 @@ * System call wrappers for the AArch32 compatibility layer. */ -compat_sys_sigreturn_wrapper: +ENTRY(compat_sys_sigreturn_wrapper) mov x0, sp mov x27, #0 // prevent syscall restart handling (why) b compat_sys_sigreturn ENDPROC(compat_sys_sigreturn_wrapper) -compat_sys_rt_sigreturn_wrapper: +ENTRY(compat_sys_rt_sigreturn_wrapper) mov x0, sp mov x27, #0 // prevent syscall restart handling (why) b compat_sys_rt_sigreturn ENDPROC(compat_sys_rt_sigreturn_wrapper) -compat_sys_statfs64_wrapper: +ENTRY(compat_sys_statfs64_wrapper) mov w3, #84 cmp w1, #88 csel w1, w3, w1, eq b compat_sys_statfs64 ENDPROC(compat_sys_statfs64_wrapper) -compat_sys_fstatfs64_wrapper: +ENTRY(compat_sys_fstatfs64_wrapper) mov w3, #84 cmp w1, #88 csel w1, w3, w1, eq @@ -58,33 +58,33 @@ ENDPROC(compat_sys_fstatfs64_wrapper) * in registers or that take 32-bit parameters which require sign * extension. */ -compat_sys_pread64_wrapper: +ENTRY(compat_sys_pread64_wrapper) regs_to_64 x3, x4, x5 b sys_pread64 ENDPROC(compat_sys_pread64_wrapper) -compat_sys_pwrite64_wrapper: +ENTRY(compat_sys_pwrite64_wrapper) regs_to_64 x3, x4, x5 b sys_pwrite64 ENDPROC(compat_sys_pwrite64_wrapper) -compat_sys_truncate64_wrapper: +ENTRY(compat_sys_truncate64_wrapper) regs_to_64 x1, x2, x3 b sys_truncate ENDPROC(compat_sys_truncate64_wrapper) -compat_sys_ftruncate64_wrapper: +ENTRY(compat_sys_ftruncate64_wrapper) regs_to_64 x1, x2, x3 b sys_ftruncate ENDPROC(compat_sys_ftruncate64_wrapper) -compat_sys_readahead_wrapper: +ENTRY(compat_sys_readahead_wrapper) regs_to_64 x1, x2, x3 mov w2, w4 b sys_readahead ENDPROC(compat_sys_readahead_wrapper) -compat_sys_fadvise64_64_wrapper: +ENTRY(compat_sys_fadvise64_64_wrapper) mov w6, w1 regs_to_64 x1, x2, x3 regs_to_64 x2, x4, x5 @@ -92,24 +92,14 @@ compat_sys_fadvise64_64_wrapper: b sys_fadvise64_64 ENDPROC(compat_sys_fadvise64_64_wrapper) -compat_sys_sync_file_range2_wrapper: +ENTRY(compat_sys_sync_file_range2_wrapper) regs_to_64 x2, x2, x3 regs_to_64 x3, x4, x5 b sys_sync_file_range2 ENDPROC(compat_sys_sync_file_range2_wrapper) -compat_sys_fallocate_wrapper: +ENTRY(compat_sys_fallocate_wrapper) regs_to_64 x2, x2, x3 regs_to_64 x3, x4, x5 b sys_fallocate ENDPROC(compat_sys_fallocate_wrapper) - -#undef __SYSCALL -#define __SYSCALL(x, y) .quad y // x - -/* - * The system calls table must be 4KB aligned. - */ - .align 12 -ENTRY(compat_sys_call_table) -#include diff --git a/arch/arm64/kernel/sys32.c b/arch/arm64/kernel/sys32.c new file mode 100644 index 00000000000000..2d5ab3c90b820f --- /dev/null +++ b/arch/arm64/kernel/sys32.c @@ -0,0 +1,51 @@ +/* + * arch/arm64/kernel/sys32.c + * + * Copyright (C) 2015 ARM Ltd. + * + * This program is free software(void); you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/* + * Needed to avoid conflicting __NR_* macros between uapi/asm/unistd.h and + * asm/unistd32.h. + */ +#define __COMPAT_SYSCALL_NR + +#include +#include + +asmlinkage long compat_sys_sigreturn_wrapper(void); +asmlinkage long compat_sys_rt_sigreturn_wrapper(void); +asmlinkage long compat_sys_statfs64_wrapper(void); +asmlinkage long compat_sys_fstatfs64_wrapper(void); +asmlinkage long compat_sys_pread64_wrapper(void); +asmlinkage long compat_sys_pwrite64_wrapper(void); +asmlinkage long compat_sys_truncate64_wrapper(void); +asmlinkage long compat_sys_ftruncate64_wrapper(void); +asmlinkage long compat_sys_readahead_wrapper(void); +asmlinkage long compat_sys_fadvise64_64_wrapper(void); +asmlinkage long compat_sys_sync_file_range2_wrapper(void); +asmlinkage long compat_sys_fallocate_wrapper(void); + +#undef __SYSCALL +#define __SYSCALL(nr, sym) [nr] = sym, + +/* + * The sys_call_table array must be 4K aligned to be accessible from + * kernel/entry.S. + */ +void * const compat_sys_call_table[__NR_compat_syscalls] __aligned(4096) = { + [0 ... __NR_compat_syscalls - 1] = sys_ni_syscall, +#include +}; From 9648606946b3b0c608846dddb30482b48a6f5c68 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 6 Jan 2015 17:01:56 +0000 Subject: [PATCH 273/601] arm64: Remove asm/syscalls.h This patch moves the sys_rt_sigreturn_wrapper prototype to arch/arm64/kernel/sys.c and removes the asm/syscalls.h header. Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/syscalls.h | 30 ------------------------------ arch/arm64/kernel/sys.c | 3 +-- 2 files changed, 1 insertion(+), 32 deletions(-) delete mode 100644 arch/arm64/include/asm/syscalls.h diff --git a/arch/arm64/include/asm/syscalls.h b/arch/arm64/include/asm/syscalls.h deleted file mode 100644 index 48fe7c600e987b..00000000000000 --- a/arch/arm64/include/asm/syscalls.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#ifndef __ASM_SYSCALLS_H -#define __ASM_SYSCALLS_H - -#include -#include -#include - -/* - * System call wrappers implemented in kernel/entry.S. - */ -asmlinkage long sys_rt_sigreturn_wrapper(void); - -#include - -#endif /* __ASM_SYSCALLS_H */ diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c index 3fa98ff14f0eea..dec351a9f3d666 100644 --- a/arch/arm64/kernel/sys.c +++ b/arch/arm64/kernel/sys.c @@ -39,10 +39,9 @@ asmlinkage long sys_mmap(unsigned long addr, unsigned long len, /* * Wrappers to pass the pt_regs argument. */ +asmlinkage long sys_rt_sigreturn_wrapper(void); #define sys_rt_sigreturn sys_rt_sigreturn_wrapper -#include - #undef __SYSCALL #define __SYSCALL(nr, sym) [nr] = sym, From c623b33b4e9599c6ac5076f7db7369eb9869aa04 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 8 Jan 2015 11:42:59 +0000 Subject: [PATCH 274/601] arm64: make sys_call_table const As with x86, mark the sys_call_table const such that it will be placed in the .rodata section. This will cause attempts to modify the table (accidental or deliberate) to fail when strict page permissions are in place. In the absence of strict page permissions, there should be no functional change. Signed-off-by: Mark Rutland Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/sys.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c index dec351a9f3d666..75151aaf1a5202 100644 --- a/arch/arm64/kernel/sys.c +++ b/arch/arm64/kernel/sys.c @@ -49,7 +49,7 @@ asmlinkage long sys_rt_sigreturn_wrapper(void); * The sys_call_table array must be 4K aligned to be accessible from * kernel/entry.S. */ -void *sys_call_table[__NR_syscalls] __aligned(4096) = { +void * const sys_call_table[__NR_syscalls] __aligned(4096) = { [0 ... __NR_syscalls - 1] = sys_ni_syscall, #include }; From af3cfdbf56b91785650f54e7c9a899d814b4b9fb Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Mon, 26 Jan 2015 18:33:44 +0000 Subject: [PATCH 275/601] arm64: kernel: remove ARM64_CPU_SUSPEND config option ARM64_CPU_SUSPEND config option was introduced to make code providing context save/restore selectable only on platforms requiring power management capabilities. Currently ARM64_CPU_SUSPEND depends on the PM_SLEEP config option which in turn is set by the SUSPEND config option. The introduction of CPU_IDLE for arm64 requires that code configured by ARM64_CPU_SUSPEND (context save/restore) should be compiled in in order to enable the CPU idle driver to rely on CPU operations carrying out context save/restore. The ARM64_CPUIDLE config option (ARM64 generic idle driver) is therefore forced to select ARM64_CPU_SUSPEND, even if there may be (ie PM_SLEEP) failed dependencies, which is not a clean way of handling the kernel configuration option. For these reasons, this patch removes the ARM64_CPU_SUSPEND config option and makes the context save/restore dependent on CPU_PM, which is selected whenever either SUSPEND or CPU_IDLE are configured, cleaning up dependencies in the process. This way, code previously configured through ARM64_CPU_SUSPEND is compiled in whenever a power management subsystem requires it to be present in the kernel (SUSPEND || CPU_IDLE), which is the behaviour expected on ARM64 kernels. The cpu_suspend and cpu_init_idle CPU operations are added only if CPU_IDLE is selected, since they are CPU_IDLE specific methods and should be grouped and defined accordingly. PSCI CPU operations are updated to reflect the introduced changes. Signed-off-by: Lorenzo Pieralisi Cc: Arnd Bergmann Cc: Will Deacon Cc: Krzysztof Kozlowski Cc: Daniel Lezcano Cc: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 3 --- arch/arm64/include/asm/cpu_ops.h | 8 ++++---- arch/arm64/include/asm/cpuidle.h | 6 ++++++ arch/arm64/include/asm/suspend.h | 2 -- arch/arm64/kernel/Makefile | 2 +- arch/arm64/kernel/asm-offsets.c | 2 +- arch/arm64/kernel/cpuidle.c | 20 ++++++++++++++++++++ arch/arm64/kernel/hw_breakpoint.c | 2 +- arch/arm64/kernel/psci.c | 2 -- arch/arm64/kernel/suspend.c | 21 --------------------- arch/arm64/mm/proc.S | 2 +- drivers/cpuidle/Kconfig.arm64 | 1 - drivers/cpuidle/cpuidle-arm64.c | 1 - 13 files changed, 34 insertions(+), 38 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 21a59bf37145e7..d3f7e49412316f 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -642,9 +642,6 @@ source "kernel/power/Kconfig" config ARCH_SUSPEND_POSSIBLE def_bool y -config ARM64_CPU_SUSPEND - def_bool PM_SLEEP - endmenu menu "CPU Power Management" diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h index 6f8e2ef9094a78..da301ee7395c85 100644 --- a/arch/arm64/include/asm/cpu_ops.h +++ b/arch/arm64/include/asm/cpu_ops.h @@ -28,8 +28,6 @@ struct device_node; * enable-method property. * @cpu_init: Reads any data necessary for a specific enable-method from the * devicetree, for a given cpu node and proposed logical id. - * @cpu_init_idle: Reads any data necessary to initialize CPU idle states from - * devicetree, for a given cpu node and proposed logical id. * @cpu_prepare: Early one-time preparation step for a cpu. If there is a * mechanism for doing so, tests whether it is possible to boot * the given CPU. @@ -42,6 +40,8 @@ struct device_node; * @cpu_die: Makes a cpu leave the kernel. Must not fail. Called from the * cpu being killed. * @cpu_kill: Ensures a cpu has left the kernel. Called from another cpu. + * @cpu_init_idle: Reads any data necessary to initialize CPU idle states from + * devicetree, for a given cpu node and proposed logical id. * @cpu_suspend: Suspends a cpu and saves the required context. May fail owing * to wrong parameters or error conditions. Called from the * CPU being suspended. Must be called with IRQs disabled. @@ -49,7 +49,6 @@ struct device_node; struct cpu_operations { const char *name; int (*cpu_init)(struct device_node *, unsigned int); - int (*cpu_init_idle)(struct device_node *, unsigned int); int (*cpu_prepare)(unsigned int); int (*cpu_boot)(unsigned int); void (*cpu_postboot)(void); @@ -58,7 +57,8 @@ struct cpu_operations { void (*cpu_die)(unsigned int cpu); int (*cpu_kill)(unsigned int cpu); #endif -#ifdef CONFIG_ARM64_CPU_SUSPEND +#ifdef CONFIG_CPU_IDLE + int (*cpu_init_idle)(struct device_node *, unsigned int); int (*cpu_suspend)(unsigned long); #endif }; diff --git a/arch/arm64/include/asm/cpuidle.h b/arch/arm64/include/asm/cpuidle.h index b52a9932e2b1fa..0710654631e789 100644 --- a/arch/arm64/include/asm/cpuidle.h +++ b/arch/arm64/include/asm/cpuidle.h @@ -3,11 +3,17 @@ #ifdef CONFIG_CPU_IDLE extern int cpu_init_idle(unsigned int cpu); +extern int cpu_suspend(unsigned long arg); #else static inline int cpu_init_idle(unsigned int cpu) { return -EOPNOTSUPP; } + +static inline int cpu_suspend(unsigned long arg) +{ + return -EOPNOTSUPP; +} #endif #endif diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h index 456d67c1f0fac2..003802f5896336 100644 --- a/arch/arm64/include/asm/suspend.h +++ b/arch/arm64/include/asm/suspend.h @@ -23,6 +23,4 @@ struct sleep_save_sp { extern int __cpu_suspend(unsigned long arg, int (*fn)(unsigned long)); extern void cpu_resume(void); -extern int cpu_suspend(unsigned long); - #endif diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 3b3b1cd3e7f018..bef04afd603190 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -27,7 +27,7 @@ arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o topology.o arm64-obj-$(CONFIG_PERF_EVENTS) += perf_regs.o arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o -arm64-obj-$(CONFIG_ARM64_CPU_SUSPEND) += sleep.o suspend.o +arm64-obj-$(CONFIG_CPU_PM) += sleep.o suspend.o arm64-obj-$(CONFIG_CPU_IDLE) += cpuidle.o arm64-obj-$(CONFIG_JUMP_LABEL) += jump_label.o arm64-obj-$(CONFIG_KGDB) += kgdb.o diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 9a9fce090d58fd..a2ae19403abb05 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -152,7 +152,7 @@ int main(void) DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base)); #endif -#ifdef CONFIG_ARM64_CPU_SUSPEND +#ifdef CONFIG_CPU_PM DEFINE(CPU_SUSPEND_SZ, sizeof(struct cpu_suspend_ctx)); DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp)); DEFINE(MPIDR_HASH_MASK, offsetof(struct mpidr_hash, mask)); diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index 19d17f51db37a7..5c0896647fd14e 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -29,3 +29,23 @@ int cpu_init_idle(unsigned int cpu) of_node_put(cpu_node); return ret; } + +/** + * cpu_suspend() - function to enter a low-power idle state + * @arg: argument to pass to CPU suspend operations + * + * Return: 0 on success, -EOPNOTSUPP if CPU suspend hook not initialized, CPU + * operations back-end error code otherwise. + */ +int cpu_suspend(unsigned long arg) +{ + int cpu = smp_processor_id(); + + /* + * If cpu_ops have not been registered or suspend + * has not been initialized, cpu_suspend call fails early. + */ + if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_suspend) + return -EOPNOTSUPP; + return cpu_ops[cpu]->cpu_suspend(arg); +} diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index df1cf15377b44c..98bbe06e469c91 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -894,7 +894,7 @@ static struct notifier_block hw_breakpoint_reset_nb = { .notifier_call = hw_breakpoint_reset_notify, }; -#ifdef CONFIG_ARM64_CPU_SUSPEND +#ifdef CONFIG_CPU_PM extern void cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)); #else static inline void cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index f1dbca7d5c96cd..3425f311c49ed9 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -540,8 +540,6 @@ const struct cpu_operations cpu_psci_ops = { .name = "psci", #ifdef CONFIG_CPU_IDLE .cpu_init_idle = cpu_psci_cpu_init_idle, -#endif -#ifdef CONFIG_ARM64_CPU_SUSPEND .cpu_suspend = cpu_psci_cpu_suspend, #endif #ifdef CONFIG_SMP diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 2d6b6065fe7f4e..d7daf45ae7a253 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -1,7 +1,6 @@ #include #include #include -#include #include #include #include @@ -51,26 +50,6 @@ void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) hw_breakpoint_restore = hw_bp_restore; } -/** - * cpu_suspend() - function to enter a low-power state - * @arg: argument to pass to CPU suspend operations - * - * Return: 0 on success, -EOPNOTSUPP if CPU suspend hook not initialized, CPU - * operations back-end error code otherwise. - */ -int cpu_suspend(unsigned long arg) -{ - int cpu = smp_processor_id(); - - /* - * If cpu_ops have not been registered or suspend - * has not been initialized, cpu_suspend call fails early. - */ - if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_suspend) - return -EOPNOTSUPP; - return cpu_ops[cpu]->cpu_suspend(arg); -} - /* * __cpu_suspend * diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index b98fc8ce6a161b..28eebfb6af763d 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -102,7 +102,7 @@ ENTRY(cpu_do_idle) ret ENDPROC(cpu_do_idle) -#ifdef CONFIG_ARM64_CPU_SUSPEND +#ifdef CONFIG_CPU_PM /** * cpu_do_suspend - save CPU registers context * diff --git a/drivers/cpuidle/Kconfig.arm64 b/drivers/cpuidle/Kconfig.arm64 index d0a08ed1b2ee62..6effb3656735b9 100644 --- a/drivers/cpuidle/Kconfig.arm64 +++ b/drivers/cpuidle/Kconfig.arm64 @@ -4,7 +4,6 @@ config ARM64_CPUIDLE bool "Generic ARM64 CPU idle Driver" - select ARM64_CPU_SUSPEND select DT_IDLE_STATES help Select this to enable generic cpuidle driver for ARM64. diff --git a/drivers/cpuidle/cpuidle-arm64.c b/drivers/cpuidle/cpuidle-arm64.c index 80704b931ba48e..39a2c62716c3b4 100644 --- a/drivers/cpuidle/cpuidle-arm64.c +++ b/drivers/cpuidle/cpuidle-arm64.c @@ -19,7 +19,6 @@ #include #include -#include #include "dt_idle_states.h" From 62fa5e20bd73385253d2b8d3a51f80d1e6d2e1c6 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 27 Jan 2015 18:25:25 +0000 Subject: [PATCH 276/601] arm64: Enable CPU_IDLE in defconfig This patch enables CPU_IDLE and the generic arm64 cpuidle driver (ARM64_CPUIDLE). Signed-off-by: Catalin Marinas --- arch/arm64/configs/defconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 5376d908eabedb..66b6cacc32514f 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -45,6 +45,8 @@ CONFIG_CMA=y CONFIG_CMDLINE="console=ttyAMA0" # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_COMPAT=y +CONFIG_CPU_IDLE=y +CONFIG_ARM64_CPUIDLE=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y From b53486e08377506179e5da0abe8b960dba5c483a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 27 Jan 2015 17:02:02 -0500 Subject: [PATCH 277/601] ktest: Add timings for commands I find that I usually like to see how long a make or other command takes, and adding a start and end time and reporting how long each command runs (in seconds) is helpful. Signed-off-by: Steven Rostedt --- tools/testing/ktest/ktest.pl | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index b9cd036f0442e8..27273c228d92a7 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -1534,10 +1534,14 @@ sub fail { sub run_command { my ($command, $redirect) = @_; + my $start_time; + my $end_time; my $dolog = 0; my $dord = 0; my $pid; + $start_time = time; + $command =~ s/\$SSH_USER/$ssh_user/g; $command =~ s/\$MACHINE/$machine/g; @@ -1570,6 +1574,15 @@ sub run_command { close(LOG) if ($dolog); close(RD) if ($dord); + $end_time = time; + my $delta = $end_time - $start_time; + + if ($delta == 1) { + doprint "[1 second] "; + } else { + doprint "[$delta seconds] "; + } + if ($failed) { doprint "FAILED!\n"; } else { From 988427829bcd230c78106d28dbfa85d45d182909 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 27 Jan 2015 12:10:04 -0600 Subject: [PATCH 278/601] ktest: Restore tty settings after closing console When ktest runs the console program as a child process, the parent and child share the same tty for stdin and stderr. This is problematic when using a libvirt target. The "virsh console" program makes a lot of changes to the tty settings, making ktest's output hard to read (carriage returns don't work). After ktest exits, the terminal is unusable (CRs broken, stdin isn't echoed). I think the best way to fix this issue would be to create a pseudoterminal (pty pair) so the child process would have a dedicated tty, and then use pipes to connect the two ttys. I'm not sure if that's overkill, but it's far beyond my current Perl abilities. This patch is a much easier way to (partially) fix this issue. It saves the tty settings before opening the console and restores them after closing it. There are still a few places where ktest prints mangled output while the console is open, but the output is much more legible overall, and the terminal works just fine after ktest exits. Link: http://lkml.kernel.org/r/1bb89abc0025cf1d6da657c7ba58bbeb4381a515.1422382008.git.jpoimboe@redhat.com Signed-off-by: Josh Poimboeuf Signed-off-by: Steven Rostedt --- tools/testing/ktest/ktest.pl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 27273c228d92a7..1dae000f79f87e 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -178,6 +178,7 @@ my $localversion; my $iteration = 0; my $successes = 0; +my $stty; my $bisect_good; my $bisect_bad; @@ -1349,6 +1350,9 @@ sub open_console { my $flags; + # save terminal settings + $stty = `stty -g`; + my $pid = open($fp, "$console|") or dodie "Can't open console $console"; @@ -1368,6 +1372,9 @@ sub close_console { print "closing!\n"; close($fp); + + # restore terminal settings + system("stty $stty"); } sub start_monitor { From 69a1c994cc540cc84469acf9952f72b899b38e8b Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 27 Jan 2015 17:17:20 +0100 Subject: [PATCH 279/601] tracing: Remove newline from trace_printk warning banner Remove the output-confusing newline below: [ 0.191328] ********************************************************** [ 0.191493] ** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE ** [ 0.191586] ** ** ... Link: http://lkml.kernel.org/r/1422375440-31970-1-git-send-email-bp@alien8.de Signed-off-by: Borislav Petkov [ added an extra '\n' by itself, to keep what it was suppose to do ] Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index acd27555dc5be8..f82e53b0e5a7ca 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2036,7 +2036,8 @@ void trace_printk_init_buffers(void) /* trace_printk() is for debug use only. Don't use it in production. */ - pr_warning("\n**********************************************************\n"); + pr_warning("\n"); + pr_warning("**********************************************************\n"); pr_warning("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); pr_warning("** **\n"); pr_warning("** trace_printk() being used. Allocating extra memory. **\n"); From ce614c3c6c8d215c9a00d6c1fce70211f75d3310 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Mon, 26 Jan 2015 13:22:22 -0600 Subject: [PATCH 280/601] powerpc/mm: fix undefined reference to `.__kernel_map_pages' on FSL PPC64 arch/powerpc has __kernel_map_pages implementations in mm/pgtable_32.c, and mm/hash_utils_64.c, of which the former is built for PPC32, and the latter for PPC64 machines with PPC_STD_MMU. Fix arch/powerpc/Kconfig to not select ARCH_SUPPORTS_DEBUG_PAGEALLOC when CONFIG_PPC_STD_MMU_64 isn't defined, i.e., for 64-bit book3e builds to use the generic __kernel_map_pages() in mm/debug-pagealloc.c. LD init/built-in.o mm/built-in.o: In function `kernel_map_pages': include/linux/mm.h:2076: undefined reference to `.__kernel_map_pages' include/linux/mm.h:2076: undefined reference to `.__kernel_map_pages' include/linux/mm.h:2076: undefined reference to `.__kernel_map_pages' Makefile:925: recipe for target 'vmlinux' failed make: *** [vmlinux] Error 1 Signed-off-by: Kim Phillips Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index a2a168e2dfe757..22b0940494bb50 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -256,6 +256,7 @@ config PPC_OF_PLATFORM_PCI default n config ARCH_SUPPORTS_DEBUG_PAGEALLOC + depends on PPC32 || PPC_STD_MMU_64 def_bool y config ARCH_SUPPORTS_UPROBES From 564ec2f2a06ffd23b9f99a4a2bd3af1fc79774f4 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 22 Dec 2014 15:14:24 +1100 Subject: [PATCH 281/601] powerpc/lib: Makefile, consolidate obj-y sections Signed-off-by: Michael Ellerman --- arch/powerpc/lib/Makefile | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 5526156aae5f91..31e1b34c0722f7 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -9,8 +9,9 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) CFLAGS_REMOVE_code-patching.o = -pg CFLAGS_REMOVE_feature-fixups.o = -pg -obj-y := string.o alloc.o \ - crtsavres.o ppc_ksyms.o +obj-y += string.o alloc.o crtsavres.o ppc_ksyms.o code-patching.o \ + feature-fixups.o + obj-$(CONFIG_PPC32) += div64.o copy_32.o obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \ @@ -33,8 +34,6 @@ endif obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o -obj-y += code-patching.o -obj-y += feature-fixups.o obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o obj-$(CONFIG_ALTIVEC) += xor_vmx.o From 1dcee55feadfef92664ac941d799e62668c1339b Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 22 Dec 2014 15:18:43 +1100 Subject: [PATCH 282/601] powerpc/lib: Makefile, use obj64-y to consolidate 64-bit rules Signed-off-by: Michael Ellerman --- arch/powerpc/lib/Makefile | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 31e1b34c0722f7..7902802a19a56a 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -14,27 +14,25 @@ obj-y += string.o alloc.o crtsavres.o ppc_ksyms.o code-patching.o \ obj-$(CONFIG_PPC32) += div64.o copy_32.o -obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \ - usercopy_64.o mem_64.o hweight_64.o \ - copyuser_power7.o string_64.o copypage_power7.o \ - memcmp_64.o +obj64-y += copypage_64.o copyuser_64.o usercopy_64.o mem_64.o hweight_64.o \ + copyuser_power7.o string_64.o copypage_power7.o memcpy_power7.o \ + memcpy_64.o memcmp_64.o + +obj64-$(CONFIG_SMP) += locks.o +obj64-$(CONFIG_ALTIVEC) += vmx-helper.o + ifeq ($(CONFIG_GENERIC_CSUM),) obj-y += checksum_$(CONFIG_WORD_SIZE).o obj-$(CONFIG_PPC64) += checksum_wrappers_64.o endif -obj-$(CONFIG_PPC64) += memcpy_power7.o memcpy_64.o - obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o -ifeq ($(CONFIG_PPC64),y) -obj-$(CONFIG_SMP) += locks.o -obj-$(CONFIG_ALTIVEC) += vmx-helper.o -endif - obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o obj-$(CONFIG_ALTIVEC) += xor_vmx.o CFLAGS_xor_vmx.o += -maltivec -mabi=altivec + +obj-$(CONFIG_PPC64) += $(obj64-y) From 8aa989b8fba1428b50a1be771c01285f1de0227b Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 27 Jan 2015 16:48:03 +1100 Subject: [PATCH 283/601] powerpc: Remove some unused functions Remove slice_set_psize() which is not used. It was added in 3a8247cc2c85 "powerpc: Only demote individual slices rather than whole process" but was never used. Remove vsx_assist_exception() which is not used. It was added in ce48b2100785 "powerpc: Add VSX context save/restore, ptrace and signal support" but was never used. Remove generic_mach_cpu_die() which is not used. Its last caller was removed in 375f561a4131 "powerpc/powernv: Always go into nap mode when CPU is offline". Remove mpc7448_hpc2_power_off() and mpc7448_hpc2_halt() which are unused. These were introduced in c5d56332fd6c "[POWERPC] Add general support for mpc7448hpc2 (Taiga) platform" but were never used. This was partially found by using a static code analysis program called cppcheck. Signed-off-by: Rickard Strandqvist [mpe: Update changelog with details on when/why they are unused] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/smp.h | 1 - arch/powerpc/kernel/smp.c | 14 --------- arch/powerpc/kernel/traps.c | 15 ---------- arch/powerpc/mm/slice.c | 29 ------------------- .../platforms/embedded6xx/mpc7448_hpc2.c | 11 ------- 5 files changed, 70 deletions(-) diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 5a6614a7f0b23b..d607df5081a7c9 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -64,7 +64,6 @@ DECLARE_PER_CPU(unsigned int, cpu_pvr); extern void migrate_irqs(void); int generic_cpu_disable(void); void generic_cpu_die(unsigned int cpu); -void generic_mach_cpu_die(void); void generic_set_cpu_dead(unsigned int cpu); void generic_set_cpu_up(unsigned int cpu); int generic_check_cpu_restart(unsigned int cpu); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 8ec017cb444619..1cc4bdce19f3d7 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -434,20 +434,6 @@ void generic_cpu_die(unsigned int cpu) printk(KERN_ERR "CPU%d didn't die...\n", cpu); } -void generic_mach_cpu_die(void) -{ - unsigned int cpu; - - local_irq_disable(); - idle_task_exit(); - cpu = smp_processor_id(); - printk(KERN_DEBUG "CPU%d offline\n", cpu); - __this_cpu_write(cpu_state, CPU_DEAD); - smp_wmb(); - while (__this_cpu_read(cpu_state) != CPU_UP_PREPARE) - cpu_relax(); -} - void generic_set_cpu_dead(unsigned int cpu) { per_cpu(cpu_state, cpu) = CPU_DEAD; diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index e6595b72269b5c..19e4744b6eba2a 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1707,21 +1707,6 @@ void altivec_assist_exception(struct pt_regs *regs) } #endif /* CONFIG_ALTIVEC */ -#ifdef CONFIG_VSX -void vsx_assist_exception(struct pt_regs *regs) -{ - if (!user_mode(regs)) { - printk(KERN_EMERG "VSX assist exception in kernel mode" - " at %lx\n", regs->nip); - die("Kernel VSX assist exception", regs, SIGILL); - } - - flush_vsx_to_thread(current); - printk(KERN_INFO "VSX assist not supported at %lx\n", regs->nip); - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); -} -#endif /* CONFIG_VSX */ - #ifdef CONFIG_FSL_BOOKE void CacheLockingException(struct pt_regs *regs, unsigned long address, unsigned long error_code) diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index ded0ea1afde402..0f432a702870fa 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -645,35 +645,6 @@ void slice_set_user_psize(struct mm_struct *mm, unsigned int psize) spin_unlock_irqrestore(&slice_convert_lock, flags); } -void slice_set_psize(struct mm_struct *mm, unsigned long address, - unsigned int psize) -{ - unsigned char *hpsizes; - unsigned long i, flags; - u64 *lpsizes; - - spin_lock_irqsave(&slice_convert_lock, flags); - if (address < SLICE_LOW_TOP) { - i = GET_LOW_SLICE_INDEX(address); - lpsizes = &mm->context.low_slices_psize; - *lpsizes = (*lpsizes & ~(0xful << (i * 4))) | - ((unsigned long) psize << (i * 4)); - } else { - int index, mask_index; - i = GET_HIGH_SLICE_INDEX(address); - hpsizes = mm->context.high_slices_psize; - mask_index = i & 0x1; - index = i >> 1; - hpsizes[index] = (hpsizes[index] & - ~(0xf << (mask_index * 4))) | - (((unsigned long)psize) << (mask_index * 4)); - } - - spin_unlock_irqrestore(&slice_convert_lock, flags); - - copro_flush_all_slbs(mm); -} - void slice_set_range_psize(struct mm_struct *mm, unsigned long start, unsigned long len, unsigned int psize) { diff --git a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c index beeaf4a173e15a..df4ad95f183e12 100644 --- a/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c +++ b/arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c @@ -156,17 +156,6 @@ void mpc7448_hpc2_restart(char *cmd) for (;;) ; /* Spin until reset happens */ } -void mpc7448_hpc2_power_off(void) -{ - local_irq_disable(); - for (;;) ; /* No way to shut power off with software */ -} - -void mpc7448_hpc2_halt(void) -{ - mpc7448_hpc2_power_off(); -} - /* * Called very early, device-tree isn't unflattened */ From 08135139430fabdeaa990da8a9e0d436aad0672b Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 28 Jan 2015 15:10:33 +1100 Subject: [PATCH 284/601] powerpc/powernv: Remove "opal" prefix from pr_xxx()s In commit c8742f85125d "powerpc/powernv: Expose OPAL firmware symbol map" I added pr_fmt() to opal.c. This left some existing pr_xxx()s with duplicate "opal" prefixes, eg: opal: opal: Found 0 interrupts reserved for OPAL Fix them all up. Also make the "Not not found" message a bit more verbose. Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 1db119f0f4ee54..8f5bbfae9c32bc 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -208,7 +208,7 @@ static int __init opal_register_exception_handlers(void) * start catching/handling HMI directly in Linux. */ if (!opal_check_token(OPAL_HANDLE_HMI)) { - pr_info("opal: Old firmware detected, OPAL handles HMIs.\n"); + pr_info("Old firmware detected, OPAL handles HMIs.\n"); opal_register_exception_handler( OPAL_HYPERVISOR_MAINTENANCE_HANDLER, 0, glue); @@ -709,7 +709,7 @@ static int __init opal_init(void) opal_node = of_find_node_by_path("/ibm,opal"); if (!opal_node) { - pr_warn("opal: Node not found\n"); + pr_warn("Device node not found\n"); return -ENODEV; } @@ -732,7 +732,7 @@ static int __init opal_init(void) /* Find all OPAL interrupts and request them */ irqs = of_get_property(opal_node, "opal-interrupts", &irqlen); - pr_debug("opal: Found %d interrupts reserved for OPAL\n", + pr_debug("Found %d interrupts reserved for OPAL\n", irqs ? (irqlen / 4) : 0); opal_irq_count = irqlen / 4; opal_irqs = kzalloc(opal_irq_count * sizeof(unsigned int), GFP_KERNEL); @@ -740,13 +740,13 @@ static int __init opal_init(void) unsigned int hwirq = be32_to_cpup(irqs); unsigned int irq = irq_create_mapping(NULL, hwirq); if (irq == NO_IRQ) { - pr_warning("opal: Failed to map irq 0x%x\n", hwirq); + pr_warning("Failed to map irq 0x%x\n", hwirq); continue; } rc = request_irq(irq, opal_interrupt, 0, "opal", NULL); if (rc) - pr_warning("opal: Error %d requesting irq %d" - " (0x%x)\n", rc, irq, hwirq); + pr_warning("Error %d requesting irq %d (0x%x)\n", + rc, irq, hwirq); opal_irqs[i] = irq; } From c1c3a526bb4ddbec7639a9fb3b84fede25b201d9 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 23 Jan 2015 14:25:05 +1100 Subject: [PATCH 285/601] powerpc/powernv: Separate function for OPAL IRQ setup The patch put the OPAL interrupt setup logic in opal_init() into seperate function opal_irq_init() for easier code maintaining. The patch doesn't introduce logic changes except: * Rename variable names. * Release virtual IRQ upon error from request_irq(). * Don't cache the virtual IRQ to opal_irqs[] upon error from request_irq(). Suggested-by: Michael Ellerman Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal.c | 61 ++++++++++++++++++--------- 1 file changed, 41 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 8f5bbfae9c32bc..933c7fbd6b54d0 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -701,11 +701,49 @@ static void opal_i2c_create_devs(void) of_platform_device_create(np, NULL, NULL); } +static void __init opal_irq_init(struct device_node *dn) +{ + const __be32 *irqs; + int i, irqlen; + + /* Get interrupt property */ + irqs = of_get_property(opal_node, "opal-interrupts", &irqlen); + pr_debug("Found %d interrupts reserved for OPAL\n", + irqs ? (irqlen / 4) : 0); + + /* Install interrupt handlers */ + opal_irq_count = irqlen / 4; + opal_irqs = kzalloc(opal_irq_count * sizeof(unsigned int), GFP_KERNEL); + for (i = 0; irqs && i < opal_irq_count; i++, irqs++) { + unsigned int irq, virq; + int rc; + + /* Get hardware and virtual IRQ */ + irq = be32_to_cpup(irqs); + virq = irq_create_mapping(NULL, irq); + if (virq == NO_IRQ) { + pr_warn("Failed to map irq 0x%x\n", irq); + continue; + } + + /* Install interrupt handler */ + rc = request_irq(virq, opal_interrupt, 0, "opal", NULL); + if (rc) { + irq_dispose_mapping(virq); + pr_warn("Error %d requesting irq %d (0x%x)\n", + rc, virq, irq); + continue; + } + + /* Cache IRQ */ + opal_irqs[i] = virq; + } +} + static int __init opal_init(void) { struct device_node *np, *consoles; - const __be32 *irqs; - int rc, i, irqlen; + int rc; opal_node = of_find_node_by_path("/ibm,opal"); if (!opal_node) { @@ -731,24 +769,7 @@ static int __init opal_init(void) opal_i2c_create_devs(); /* Find all OPAL interrupts and request them */ - irqs = of_get_property(opal_node, "opal-interrupts", &irqlen); - pr_debug("Found %d interrupts reserved for OPAL\n", - irqs ? (irqlen / 4) : 0); - opal_irq_count = irqlen / 4; - opal_irqs = kzalloc(opal_irq_count * sizeof(unsigned int), GFP_KERNEL); - for (i = 0; irqs && i < (irqlen / 4); i++, irqs++) { - unsigned int hwirq = be32_to_cpup(irqs); - unsigned int irq = irq_create_mapping(NULL, hwirq); - if (irq == NO_IRQ) { - pr_warning("Failed to map irq 0x%x\n", hwirq); - continue; - } - rc = request_irq(irq, opal_interrupt, 0, "opal", NULL); - if (rc) - pr_warning("Error %d requesting irq %d (0x%x)\n", - rc, irq, hwirq); - opal_irqs[i] = irq; - } + opal_irq_init(opal_node); /* Create "opal" kobject under /sys/firmware */ rc = opal_sysfs_init(); From 31494cf3532cfee0bf5c913ac9962971aab7b1d4 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 23 Jan 2015 14:25:06 +1100 Subject: [PATCH 286/601] powerpc/powernv: Don't alloc IRQ map if necessary On PowerNV platform, the OPAL interrupts are exported by firmware through device-node property (/ibm,opal::opal-interrupts). Under some extreme circumstances (e.g. simulator), we don't have this property found from the device tree. For that case, we shouldn't allocate the interrupt map. Otherwise, slab complains allocating zero sized memory chunk. Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 933c7fbd6b54d0..18fd4e71c9c13c 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -708,11 +708,12 @@ static void __init opal_irq_init(struct device_node *dn) /* Get interrupt property */ irqs = of_get_property(opal_node, "opal-interrupts", &irqlen); - pr_debug("Found %d interrupts reserved for OPAL\n", - irqs ? (irqlen / 4) : 0); + opal_irq_count = irqs ? (irqlen / 4) : 0; + pr_debug("Found %d interrupts reserved for OPAL\n", opal_irq_count); + if (!opal_irq_count) + return; /* Install interrupt handlers */ - opal_irq_count = irqlen / 4; opal_irqs = kzalloc(opal_irq_count * sizeof(unsigned int), GFP_KERNEL); for (i = 0; irqs && i < opal_irq_count; i++, irqs++) { unsigned int irq, virq; From 523d6e9fae9333a0e2a7baf4d11c8bcca544790e Mon Sep 17 00:00:00 2001 From: "zhichang.yuan" Date: Tue, 9 Dec 2014 07:26:47 +0000 Subject: [PATCH 287/601] arm64:mm: free the useless initial page table For 64K page system, after mapping a PMD section, the corresponding initial page table is not needed any more. That page can be freed. Signed-off-by: Zhichang Yuan [catalin.marinas@arm.com: added BUG_ON() to catch late memblock freeing] Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 3 +++ arch/arm64/mm/mmu.c | 15 ++++++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 59079248529d20..67f6ede394741f 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -342,9 +342,12 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, #ifdef CONFIG_ARM64_64K_PAGES #define pud_sect(pud) (0) +#define pud_table(pud) (1) #else #define pud_sect(pud) ((pud_val(pud) & PUD_TYPE_MASK) == \ PUD_TYPE_SECT) +#define pud_table(pud) ((pud_val(pud) & PUD_TYPE_MASK) == \ + PUD_TYPE_TABLE) #endif static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 155cbb0a74b601..eb293febfb5603 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -153,8 +153,14 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, * Check for previous table entries created during * boot (__create_page_tables) and flush them. */ - if (!pmd_none(old_pmd)) + if (!pmd_none(old_pmd)) { flush_tlb_all(); + if (pmd_table(old_pmd)) { + phys_addr_t table = __pa(pte_offset_map(&old_pmd, 0)); + BUG_ON(alloc != early_alloc); + memblock_free(table, PAGE_SIZE); + } + } } else { alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys), prot, alloc); @@ -209,9 +215,12 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, * Look up the old pmd table and free it. */ if (!pud_none(old_pud)) { - phys_addr_t table = __pa(pmd_offset(&old_pud, 0)); - memblock_free(table, PAGE_SIZE); flush_tlb_all(); + if (pud_table(old_pud)) { + phys_addr_t table = __pa(pmd_offset(&old_pud, 0)); + BUG_ON(alloc != early_alloc); + memblock_free(table, PAGE_SIZE); + } } } else { alloc_init_pmd(mm, pud, addr, next, phys, prot, alloc); From a3bba370c227b6cbc480aa8959bb32cf966d79a6 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 27 Jan 2015 16:52:50 +0000 Subject: [PATCH 288/601] arm64: drop unnecessary cache+tlb maintenance In paging_init, we call flush_cache_all, but this is backed by Set/Way operations which may not achieve anything in the presence of cache line migration and/or system caches. If the caches are already in an inconsistent state at this point, there is nothing we can do (short of flushing the entire physical address space by VA) to empty architected and system caches. As such, flush_cache_all only serves to mask other potential bugs. Hence, this patch removes the boot-time call to flush_cache_all. Immediately after the cache maintenance we flush the TLBs, but this is also unnecessary. Before enabling the MMU, the TLBs are invalidated, and thus are initially clean. When changing the contents of active tables (e.g. in fixup_executable() for DEBUG_RODATA) we perform the required TLB maintenance following the update, and therefore no additional maintenance is required to ensure the new table entries are in effect. Since activating the MMU we will not have modified system register fields permitted to be cached in a TLB, and therefore do not need maintenance for any cached system register fields. Hence, the TLB flush is unnecessary. Shortly after the unnecessary TLB flush, we update TTBR0 to point to an empty zero page rather than the idmap, and flush the TLBs. This maintenance is necessary to remove the global idmap entries from the TLBs (as they would conflict with userspace mappings), and is retained. Signed-off-by: Mark Rutland Acked-by: Marc Zyngier Acked-by: Steve Capper Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index eb293febfb5603..a421f535d35178 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -438,13 +438,6 @@ void __init paging_init(void) map_mem(); fixup_executable(); - /* - * Finally flush the caches and tlb to ensure that we're in a - * consistent state. - */ - flush_cache_all(); - flush_tlb_all(); - /* allocate the zero page. */ zero_page = early_alloc(PAGE_SIZE); From a1c76574f345342d23836b520ce44674d23bc267 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 27 Jan 2015 16:36:30 +0000 Subject: [PATCH 289/601] arm64: mm: use *_sect to check for section maps The {pgd,pud,pmd}_bad family of macros have slightly fuzzy cross-architecture semantics, and seem to imply a populated entry that is not a next-level table, rather than a particular type of entry (e.g. a section map). In arm64 code, for those cases where we care about whether an entry is a section mapping, we can instead use the {pud,pmd}_sect macros to explicitly check for this case. This helps to document precisely what we care about, making the code easier to read, and allows for future relaxation of the *_bad macros to check for other "bad" entries. To that end this patch updates the table dumping and initial table setup to check for section mappings with {pud,pmd}_sect, and adds/restores BUG_ON(*_bad((*p)) checks after we've handled the *_sect and *_none cases so as to catch remaining "bad" cases. In the fault handling code, show_pte is left with *_bad checks as it only cares about whether it can walk the next level table, and this path is used for both kernel and userspace fault handling. The former case will be followed by a die() where we'll report the address that triggered the fault, which can be useful context for debugging. Signed-off-by: Mark Rutland Acked-by: Steve Capper Cc: Ard Biesheuvel Cc: Kees Cook Cc: Laura Abbott Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/mm/dump.c | 18 ++++++++++++------ arch/arm64/mm/mmu.c | 6 ++++-- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index bbc5a29ecaaf79..6129653754736c 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -249,10 +249,12 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) for (i = 0; i < PTRS_PER_PMD; i++, pmd++) { addr = start + i * PMD_SIZE; - if (pmd_none(*pmd) || pmd_sect(*pmd) || pmd_bad(*pmd)) + if (pmd_none(*pmd) || pmd_sect(*pmd)) { note_page(st, addr, 3, pmd_val(*pmd)); - else + } else { + BUG_ON(pmd_bad(*pmd)); walk_pte(st, pmd, addr); + } } } @@ -264,10 +266,12 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) for (i = 0; i < PTRS_PER_PUD; i++, pud++) { addr = start + i * PUD_SIZE; - if (pud_none(*pud) || pud_sect(*pud) || pud_bad(*pud)) + if (pud_none(*pud) || pud_sect(*pud)) { note_page(st, addr, 2, pud_val(*pud)); - else + } else { + BUG_ON(pud_bad(*pud)); walk_pmd(st, pud, addr); + } } } @@ -279,10 +283,12 @@ static void walk_pgd(struct pg_state *st, struct mm_struct *mm, unsigned long st for (i = 0; i < PTRS_PER_PGD; i++, pgd++) { addr = start + i * PGDIR_SIZE; - if (pgd_none(*pgd) || pgd_bad(*pgd)) + if (pgd_none(*pgd)) { note_page(st, addr, 1, pgd_val(*pgd)); - else + } else { + BUG_ON(pgd_bad(*pgd)); walk_pud(st, pgd, addr); + } } } diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index a421f535d35178..2eeac10aa0cf1c 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -90,13 +90,14 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr, { pte_t *pte; - if (pmd_none(*pmd) || pmd_bad(*pmd)) { + if (pmd_none(*pmd) || pmd_sect(*pmd)) { pte = alloc(PTRS_PER_PTE * sizeof(pte_t)); if (pmd_sect(*pmd)) split_pmd(pmd, pte); __pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE); flush_tlb_all(); } + BUG_ON(pmd_bad(*pmd)); pte = pte_offset_kernel(pmd, addr); do { @@ -128,7 +129,7 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, /* * Check for initial section mappings in the pgd/pud and remove them. */ - if (pud_none(*pud) || pud_bad(*pud)) { + if (pud_none(*pud) || pud_sect(*pud)) { pmd = alloc(PTRS_PER_PMD * sizeof(pmd_t)); if (pud_sect(*pud)) { /* @@ -140,6 +141,7 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, pud_populate(mm, pud, pmd); flush_tlb_all(); } + BUG_ON(pud_bad(*pud)); pmd = pmd_offset(pud, addr); do { From 6ea22486ba46bcb665de36514094d74575cd1330 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 28 Jan 2015 12:48:53 +0000 Subject: [PATCH 290/601] tracing: Add array printing helper If a trace event contains an array, there is currently no standard way to format this for text output. Drivers are currently hacking around this by a) local hacks that use the trace_seq functionailty directly, or b) just not printing that information. For fixed size arrays, formatting of the elements can be open-coded, but this gets cumbersome for arrays of non-trivial size. These approaches result in non-standard content of the event format description delivered to userspace, so userland tools needs to be taught to understand and parse each array printing method individually. This patch implements a __print_array() helper that tracepoint implementations can use instead of reinventing it. A simple C-style syntax is used to delimit the array and its elements {like,this}. So that the helpers can be used with large static arrays as well as dynamic arrays, they take a pointer and element count: they can be used with __get_dynamic_array() for use with dynamic arrays. Link: http://lkml.kernel.org/r/1422449335-8289-2-git-send-email-javi.merino@arm.com Cc: Ingo Molnar Signed-off-by: Dave Martin Signed-off-by: Javi Merino Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 4 ++++ include/trace/ftrace.h | 9 ++++++++ kernel/trace/trace_output.c | 44 ++++++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+) diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 0bebb5c348b826..5aa4a926954720 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -44,6 +44,10 @@ const char *ftrace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr, const char *ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int len); +const char *ftrace_print_array_seq(struct trace_seq *p, + const void *buf, int buf_len, + size_t el_size); + struct trace_iterator; struct trace_event; diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 139b5067345b2e..304901fc5f34c6 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -263,6 +263,14 @@ #undef __print_hex #define __print_hex(buf, buf_len) ftrace_print_hex_seq(p, buf, buf_len) +#undef __print_array +#define __print_array(array, count, el_size) \ + ({ \ + BUILD_BUG_ON(el_size != 1 && el_size != 2 && \ + el_size != 4 && el_size != 8); \ + ftrace_print_array_seq(p, array, count, el_size); \ + }) + #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ static notrace enum print_line_t \ @@ -674,6 +682,7 @@ static inline void ftrace_test_probe_##call(void) \ #undef __get_dynamic_array_len #undef __get_str #undef __get_bitmask +#undef __print_array #undef TP_printk #define TP_printk(fmt, args...) "\"" fmt "\", " __stringify(args) diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index b77b9a697619f2..692bf7184c8c13 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -177,6 +177,50 @@ ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len) } EXPORT_SYMBOL(ftrace_print_hex_seq); +const char * +ftrace_print_array_seq(struct trace_seq *p, const void *buf, int buf_len, + size_t el_size) +{ + const char *ret = trace_seq_buffer_ptr(p); + const char *prefix = ""; + void *ptr = (void *)buf; + + trace_seq_putc(p, '{'); + + while (ptr < buf + buf_len) { + switch (el_size) { + case 1: + trace_seq_printf(p, "%s0x%x", prefix, + *(u8 *)ptr); + break; + case 2: + trace_seq_printf(p, "%s0x%x", prefix, + *(u16 *)ptr); + break; + case 4: + trace_seq_printf(p, "%s0x%x", prefix, + *(u32 *)ptr); + break; + case 8: + trace_seq_printf(p, "%s0x%llx", prefix, + *(u64 *)ptr); + break; + default: + trace_seq_printf(p, "BAD SIZE:%zu 0x%x", el_size, + *(u8 *)ptr); + el_size = 1; + } + prefix = ","; + ptr += el_size; + } + + trace_seq_putc(p, '}'); + trace_seq_putc(p, 0); + + return ret; +} +EXPORT_SYMBOL(ftrace_print_array_seq); + int ftrace_raw_output_prep(struct trace_iterator *iter, struct trace_event *trace_event) { From da194930ede6d87945786f72b1c36c7a4ed0f3a6 Mon Sep 17 00:00:00 2001 From: Tina Ruchandani Date: Wed, 28 Jan 2015 19:46:11 +0530 Subject: [PATCH 291/601] trace: Use 64-bit timekeeping The ring_buffer_producer uses 'struct timeval' to measure its start and end times. 'struct timeval' on 32-bit systems will have its tv_sec value overflow in year 2038 and beyond. This patch replaces struct timeval with 'ktime_t' which uses 64-bit representation for nanoseconds. Link: http://lkml.kernel.org/r/20150128141611.GA2701@tinar Suggested-by: Arnd Bergmann Suggested-by: Steven Rostedt Signed-off-by: Tina Ruchandani Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer_benchmark.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index 3f9e328c30b529..13d945c0d03f2b 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include struct rb_page { @@ -17,7 +17,7 @@ struct rb_page { }; /* run time and sleep time in seconds */ -#define RUN_TIME 10 +#define RUN_TIME 10ULL #define SLEEP_TIME 10 /* number of events for writer to wake up the reader */ @@ -212,8 +212,7 @@ static void ring_buffer_consumer(void) static void ring_buffer_producer(void) { - struct timeval start_tv; - struct timeval end_tv; + ktime_t start_time, end_time, timeout; unsigned long long time; unsigned long long entries; unsigned long long overruns; @@ -227,7 +226,8 @@ static void ring_buffer_producer(void) * make the system stall) */ trace_printk("Starting ring buffer hammer\n"); - do_gettimeofday(&start_tv); + start_time = ktime_get(); + timeout = ktime_add_ns(start_time, RUN_TIME * NSEC_PER_SEC); do { struct ring_buffer_event *event; int *entry; @@ -244,7 +244,7 @@ static void ring_buffer_producer(void) ring_buffer_unlock_commit(buffer, event); } } - do_gettimeofday(&end_tv); + end_time = ktime_get(); cnt++; if (consumer && !(cnt % wakeup_interval)) @@ -264,7 +264,7 @@ static void ring_buffer_producer(void) cond_resched(); #endif - } while (end_tv.tv_sec < (start_tv.tv_sec + RUN_TIME) && !kill_test); + } while (ktime_before(end_time, timeout) && !kill_test); trace_printk("End ring buffer hammer\n"); if (consumer) { @@ -280,9 +280,7 @@ static void ring_buffer_producer(void) wait_for_completion(&read_done); } - time = end_tv.tv_sec - start_tv.tv_sec; - time *= USEC_PER_SEC; - time += (long long)((long)end_tv.tv_usec - (long)start_tv.tv_usec); + time = ktime_us_delta(end_time, start_time); entries = ring_buffer_entries(buffer); overruns = ring_buffer_overruns(buffer); From 4d92f50249eb3ed1c066276e214e8cc7be81e96d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 28 Jan 2015 07:43:56 +0100 Subject: [PATCH 292/601] s390: reintroduce diag 44 calls for cpu_relax() Christian Borntraeger reported that the now missing diag 44 calls (voluntary time slice end) does cause a performance regression for stop_machine() calls if a machine has more virtual cpus than the host has physical cpus. This patch mainly reverts 57f2ffe14fd125c2 ("s390: remove diag 44 calls from cpu_relax()") with the exception that we still do not issue diag 44 calls if running with smt enabled. Due to group scheduling algorithms when running in LPAR this would lead to significant latencies. However, when running in LPAR we do not have more virtual than physical cpus. Reported-and-tested-by: Christian Borntraeger Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/processor.h | 5 +---- arch/s390/kernel/processor.c | 10 +++++++++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index bed05ea7ec27eb..e7cbbdcdee1337 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -215,10 +215,7 @@ static inline unsigned short stap(void) /* * Give up the time slice of the virtual PU. */ -static inline void cpu_relax(void) -{ - barrier(); -} +void cpu_relax(void); #define cpu_relax_lowlatency() barrier() diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index dbdd33ee010204..26108232fcaaf0 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -8,16 +8,24 @@ #include #include -#include #include #include #include #include #include #include +#include static DEFINE_PER_CPU(struct cpuid, cpu_id); +void cpu_relax(void) +{ + if (!smp_cpu_mtid && MACHINE_HAS_DIAG44) + asm volatile("diag 0,0,0x44"); + barrier(); +} +EXPORT_SYMBOL(cpu_relax); + /* * cpu_init - initializes state that is per-CPU. */ From c0a80c0c27e5e65b180a25e6c4c2f7ef9e386cd3 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 9 Jan 2015 13:06:33 +0100 Subject: [PATCH 293/601] ftrace: allow architectures to specify ftrace compile options If the kernel is compiled with function tracer support the -pg compile option is passed to gcc to generate extra code into the prologue of each function. This patch replaces the "open-coded" -pg compile flag with a CC_FLAGS_FTRACE makefile variable which architectures can override if a different option should be used for code generation. Acked-by: Steven Rostedt Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- Makefile | 6 +++++- kernel/Makefile | 4 ++-- kernel/events/Makefile | 2 +- kernel/locking/Makefile | 8 ++++---- kernel/sched/Makefile | 2 +- kernel/trace/Makefile | 4 ++-- lib/Makefile | 2 +- scripts/Makefile.build | 5 +++-- 8 files changed, 19 insertions(+), 14 deletions(-) diff --git a/Makefile b/Makefile index fd80c6e9bc2367..11c6fe8f708fee 100644 --- a/Makefile +++ b/Makefile @@ -724,10 +724,14 @@ KBUILD_CFLAGS += $(call cc-option, -femit-struct-debug-baseonly) \ endif ifdef CONFIG_FUNCTION_TRACER +ifndef CC_FLAGS_FTRACE +CC_FLAGS_FTRACE := -pg +endif +export CC_FLAGS_FTRACE ifdef CONFIG_HAVE_FENTRY CC_USING_FENTRY := $(call cc-option, -mfentry -DCC_USING_FENTRY) endif -KBUILD_CFLAGS += -pg $(CC_USING_FENTRY) +KBUILD_CFLAGS += $(CC_FLAGS_FTRACE) $(CC_USING_FENTRY) KBUILD_AFLAGS += $(CC_USING_FENTRY) ifdef CONFIG_DYNAMIC_FTRACE ifdef CONFIG_HAVE_C_RECORDMCOUNT diff --git a/kernel/Makefile b/kernel/Makefile index a59481a3fa6cff..13af308f246032 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -13,8 +13,8 @@ obj-y = fork.o exec_domain.o panic.o \ ifdef CONFIG_FUNCTION_TRACER # Do not trace debug files and internal ftrace files -CFLAGS_REMOVE_cgroup-debug.o = -pg -CFLAGS_REMOVE_irq_work.o = -pg +CFLAGS_REMOVE_cgroup-debug.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_irq_work.o = $(CC_FLAGS_FTRACE) endif # cond_syscall is currently not LTO compatible diff --git a/kernel/events/Makefile b/kernel/events/Makefile index 103f5d147b2f9f..2925188f50eabe 100644 --- a/kernel/events/Makefile +++ b/kernel/events/Makefile @@ -1,5 +1,5 @@ ifdef CONFIG_FUNCTION_TRACER -CFLAGS_REMOVE_core.o = -pg +CFLAGS_REMOVE_core.o = $(CC_FLAGS_FTRACE) endif obj-y := core.o ring_buffer.o callchain.o diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index 8541bfdfd232bb..4caca3f7af533b 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -2,10 +2,10 @@ obj-y += mutex.o semaphore.o rwsem.o mcs_spinlock.o ifdef CONFIG_FUNCTION_TRACER -CFLAGS_REMOVE_lockdep.o = -pg -CFLAGS_REMOVE_lockdep_proc.o = -pg -CFLAGS_REMOVE_mutex-debug.o = -pg -CFLAGS_REMOVE_rtmutex-debug.o = -pg +CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_lockdep_proc.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_mutex-debug.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_rtmutex-debug.o = $(CC_FLAGS_FTRACE) endif obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index ab32b7b0db5c6b..46be8702487561 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -1,5 +1,5 @@ ifdef CONFIG_FUNCTION_TRACER -CFLAGS_REMOVE_clock.o = -pg +CFLAGS_REMOVE_clock.o = $(CC_FLAGS_FTRACE) endif ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 67d6369ddf83e6..9a7e0e60a1a89e 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -3,11 +3,11 @@ ifdef CONFIG_FUNCTION_TRACER ORIG_CFLAGS := $(KBUILD_CFLAGS) -KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS)) +KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS)) ifdef CONFIG_FTRACE_SELFTEST # selftest needs instrumentation -CFLAGS_trace_selftest_dynamic.o = -pg +CFLAGS_trace_selftest_dynamic.o = $(CC_FLAGS_FTRACE) obj-y += trace_selftest_dynamic.o endif endif diff --git a/lib/Makefile b/lib/Makefile index 3c3b30b9e020d2..f3f73e50519a87 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -4,7 +4,7 @@ ifdef CONFIG_FUNCTION_TRACER ORIG_CFLAGS := $(KBUILD_CFLAGS) -KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS)) +KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS)) endif lib-y := ctype.o string.o vsprintf.o cmdline.o \ diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 649ce68440331c..01df30af4d4a50 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -234,8 +234,9 @@ sub_cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH "$(if $(part-of-module),1,0)" "$(@)"; recordmcount_source := $(srctree)/scripts/recordmcount.pl endif -cmd_record_mcount = \ - if [ "$(findstring -pg,$(_c_flags))" = "-pg" ]; then \ +cmd_record_mcount = \ + if [ "$(findstring $(CC_FLAGS_FTRACE),$(_c_flags))" = \ + "$(CC_FLAGS_FTRACE)" ]; then \ $(sub_cmd_record_mcount) \ fi; endif From 61f552141c9c0e88b3fdc7046265781ffd8fa68a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 18 Jan 2015 16:45:42 +0100 Subject: [PATCH 294/601] ftrace: let notrace function attribute disable hotpatching if necessary gcc supports an s390 specific function attribute called "hotpatch". It can be used to specify the number of halfwords that shall be added before and after a function and which shall be filled with nops for runtime patching. s390 will use the hotpatch attribute for function tracing, therefore make sure that the notrace function attribute either disables the mcount call or in case of hotpatch nop generation. Acked-by: Steven Rostedt Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- include/linux/compiler.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index d5ad7b1118fc10..1ef679f4b88e68 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -54,7 +54,11 @@ extern void __chk_io_ptr(const volatile void __iomem *); #include #endif +#ifdef CC_USING_HOTPATCH +#define notrace __attribute__((hotpatch(0,0))) +#else #define notrace __attribute__((no_instrument_function)) +#endif /* Intel compiler defines __GNUC__. So we will overwrite implementations * coming from above header files here From e6d60b368b45b9be3aa068f8e5fa98c3487c9d4e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 9 Jan 2015 13:08:28 +0100 Subject: [PATCH 295/601] s390/ftrace: hotpatch support for function tracing Make use of gcc's hotpatch support to generate better code for ftrace function tracing. The generated code now contains only a six byte nop in each function prologue instead of a 24 byte code block which will be runtime patched to support function tracing. With the new code generation the runtime overhead for supporting function tracing is close to zero, while the original code did show a significant performance impact. Acked-by: Steven Rostedt Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 1 - arch/s390/Makefile | 10 ++++++++++ arch/s390/include/asm/ftrace.h | 15 +++++++++++++++ arch/s390/kernel/Makefile | 4 ++-- arch/s390/kernel/ftrace.c | 15 ++++++++++++++- arch/s390/kernel/kprobes.c | 3 ++- arch/s390/kernel/mcount.S | 2 ++ scripts/recordmcount.pl | 9 +++++++-- 8 files changed, 52 insertions(+), 7 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 7eba5b5723e986..8d11babf9aa505 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -117,7 +117,6 @@ config S390 select HAVE_BPF_JIT if 64BIT && PACK_STACK select HAVE_CMPXCHG_DOUBLE select HAVE_CMPXCHG_LOCAL - select HAVE_C_RECORDMCOUNT select HAVE_DEBUG_KMEMLEAK select HAVE_DYNAMIC_FTRACE if 64BIT select HAVE_DYNAMIC_FTRACE_WITH_REGS if 64BIT diff --git a/arch/s390/Makefile b/arch/s390/Makefile index e742ec5de9a7e5..acb6859c6a95f4 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -87,6 +87,16 @@ ifeq ($(call cc-option-yn,-mwarn-dynamicstack),y) cflags-$(CONFIG_WARN_DYNAMIC_STACK) += -mwarn-dynamicstack endif +ifdef CONFIG_FUNCTION_TRACER +# make use of hotpatch feature if the compiler supports it +cc_hotpatch := -mhotpatch=0,3 +ifeq ($(call cc-option-yn,$(cc_hotpatch)),y) +CC_FLAGS_FTRACE := $(cc_hotpatch) +KBUILD_AFLAGS += -DCC_USING_HOTPATCH +KBUILD_CFLAGS += -DCC_USING_HOTPATCH +endif +endif + KBUILD_CFLAGS += -mbackchain -msoft-float $(cflags-y) KBUILD_CFLAGS += -pipe -fno-strength-reduce -Wno-sign-compare KBUILD_AFLAGS += $(aflags-y) diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index abb618f1ead25f..836c56290499b8 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -3,8 +3,12 @@ #define ARCH_SUPPORTS_FTRACE_OPS 1 +#ifdef CC_USING_HOTPATCH +#define MCOUNT_INSN_SIZE 6 +#else #define MCOUNT_INSN_SIZE 24 #define MCOUNT_RETURN_FIXUP 18 +#endif #ifndef __ASSEMBLY__ @@ -37,17 +41,28 @@ struct ftrace_insn { static inline void ftrace_generate_nop_insn(struct ftrace_insn *insn) { #ifdef CONFIG_FUNCTION_TRACER +#ifdef CC_USING_HOTPATCH + /* brcl 0,0 */ + insn->opc = 0xc004; + insn->disp = 0; +#else /* jg .+24 */ insn->opc = 0xc0f4; insn->disp = MCOUNT_INSN_SIZE / 2; #endif +#endif } static inline int is_ftrace_nop(struct ftrace_insn *insn) { #ifdef CONFIG_FUNCTION_TRACER +#ifdef CC_USING_HOTPATCH + if (insn->disp == 0) + return 1; +#else if (insn->disp == MCOUNT_INSN_SIZE / 2) return 1; +#endif #endif return 0; } diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 204c43a4c245dd..31fab2676fe921 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -4,8 +4,8 @@ ifdef CONFIG_FUNCTION_TRACER # Don't trace early setup code and tracing code -CFLAGS_REMOVE_early.o = -pg -CFLAGS_REMOVE_ftrace.o = -pg +CFLAGS_REMOVE_early.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) endif # diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 3dabcae40e0421..82c19899574f83 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -46,6 +46,13 @@ * lg %r14,8(%r15) # offset 18 * The jg instruction branches to offset 24 to skip as many instructions * as possible. + * In case we use gcc's hotpatch feature the original and also the disabled + * function prologue contains only a single six byte instruction and looks + * like this: + * > brcl 0,0 # offset 0 + * To enable ftrace the code gets patched like above and afterwards looks + * like this: + * > brasl %r0,ftrace_caller # offset 0 */ unsigned long ftrace_plt; @@ -64,9 +71,15 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old))) return -EFAULT; if (addr == MCOUNT_ADDR) { - /* Initial code replacement; we expect to see stg r14,8(r15) */ + /* Initial code replacement */ +#ifdef CC_USING_HOTPATCH + /* We expect to see brcl 0,0 */ + ftrace_generate_nop_insn(&orig); +#else + /* We expect to see stg r14,8(r15) */ orig.opc = 0xe3e0; orig.disp = 0xf0080024; +#endif ftrace_generate_nop_insn(&new); } else if (old.opc == BREAKPOINT_INSTRUCTION) { /* diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 1e4c710dfb9233..f516edc1fbe31a 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -69,7 +69,8 @@ static void copy_instruction(struct kprobe *p) /* * If kprobes patches the instruction that is morphed by * ftrace make sure that kprobes always sees the branch - * "jg .+24" that skips the mcount block + * "jg .+24" that skips the mcount block or the "brcl 0,0" + * in case of hotpatch. */ ftrace_generate_nop_insn((struct ftrace_insn *)p->ainsn.insn); p->ainsn.is_ftrace_insn = 1; diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index b6dfc5bfcb8922..e499370fbccb32 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -27,7 +27,9 @@ ENTRY(ftrace_caller) .globl ftrace_regs_caller .set ftrace_regs_caller,ftrace_caller lgr %r1,%r15 +#ifndef CC_USING_HOTPATCH aghi %r0,MCOUNT_RETURN_FIXUP +#endif aghi %r15,-STACK_FRAME_SIZE stg %r1,__SF_BACKCHAIN(%r15) stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15) diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 56ea99a12ab79c..be39be0abefca8 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -242,8 +242,13 @@ sub check_objcopy $cc .= " -m32"; } elsif ($arch eq "s390" && $bits == 64) { - $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_390_(PC|PLT)32DBL\\s+_mcount\\+0x2\$"; - $mcount_adjust = -14; + if ($cc =~ /-DCC_USING_HOTPATCH/) { + $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*c0 04 00 00 00 00\\s*brcl\\s*0,[0-9a-f]+ <([^\+]*)>\$"; + $mcount_adjust = 0; + } else { + $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_390_(PC|PLT)32DBL\\s+_mcount\\+0x2\$"; + $mcount_adjust = -14; + } $alignment = 8; $type = ".quad"; $ld .= " -m elf64_s390"; From 8ea55c95c372a7a51fa50cb7c75240bfbe8bd337 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Wed, 28 Jan 2015 18:44:17 +0100 Subject: [PATCH 296/601] s390/dasd: add locking for global_profile access Access to DASDs global statistics is done without locking which can lead to inconsistent data. Add locking to fix this. Also move the relevant structs in a global dasd_profile struct. Signed-off-by: Sebastian Ott Reviewed-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 32 ++++++++++++++++++++------------ drivers/s390/block/dasd_int.h | 2 +- drivers/s390/block/dasd_proc.c | 4 +++- 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 4abf11965484b1..a67e8dae73c35b 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -674,8 +674,12 @@ EXPORT_SYMBOL(dasd_enable_device); unsigned int dasd_global_profile_level = DASD_PROFILE_OFF; #ifdef CONFIG_DASD_PROFILE -struct dasd_profile_info dasd_global_profile_data; -static struct dentry *dasd_global_profile_dentry; +static struct dasd_profile_info dasd_global_profile_data; +struct dasd_profile dasd_global_profile = { + .dentry = NULL, + .data = &dasd_global_profile_data, + .lock = __SPIN_LOCK_UNLOCKED(dasd_global_profile.lock), +}; static struct dentry *dasd_debugfs_global_entry; /* @@ -696,11 +700,13 @@ static void dasd_profile_start(struct dasd_block *block, if (++counter >= 31) break; + spin_lock(&dasd_global_profile.lock); if (dasd_global_profile_level) { - dasd_global_profile_data.dasd_io_nr_req[counter]++; + dasd_global_profile.data->dasd_io_nr_req[counter]++; if (rq_data_dir(req) == READ) - dasd_global_profile_data.dasd_read_nr_req[counter]++; + dasd_global_profile.data->dasd_read_nr_req[counter]++; } + spin_unlock(&dasd_global_profile.lock); spin_lock(&block->profile.lock); if (block->profile.data) { @@ -825,8 +831,9 @@ static void dasd_profile_end(struct dasd_block *block, dasd_profile_counter(irqtime / sectors, irqtimeps_ind); dasd_profile_counter(endtime, endtime_ind); + spin_lock(&dasd_global_profile.lock); if (dasd_global_profile_level) { - dasd_profile_end_add_data(&dasd_global_profile_data, + dasd_profile_end_add_data(dasd_global_profile.data, cqr->startdev != block->base, cqr->cpmode == 1, rq_data_dir(req) == READ, @@ -835,6 +842,7 @@ static void dasd_profile_end(struct dasd_block *block, irqtime_ind, irqtimeps_ind, endtime_ind); } + spin_unlock(&dasd_global_profile.lock); spin_lock(&block->profile.lock); if (block->profile.data) @@ -878,8 +886,7 @@ void dasd_profile_reset(struct dasd_profile *profile) void dasd_global_profile_reset(void) { - memset(&dasd_global_profile_data, 0, sizeof(dasd_global_profile_data)); - getnstimeofday(&dasd_global_profile_data.starttod); + dasd_profile_reset(&dasd_global_profile); } int dasd_profile_on(struct dasd_profile *profile) @@ -1077,7 +1084,9 @@ static int dasd_stats_global_show(struct seq_file *m, void *v) seq_puts(m, "disabled\n"); return 0; } - dasd_stats_seq_print(m, &dasd_global_profile_data); + spin_lock_bh(&dasd_global_profile.lock); + dasd_stats_seq_print(m, dasd_global_profile.data); + spin_unlock_bh(&dasd_global_profile.lock); return 0; } @@ -1123,8 +1132,8 @@ static void dasd_profile_exit(struct dasd_profile *profile) static void dasd_statistics_removeroot(void) { dasd_global_profile_level = DASD_PROFILE_OFF; - debugfs_remove(dasd_global_profile_dentry); - dasd_global_profile_dentry = NULL; + debugfs_remove(dasd_global_profile.dentry); + dasd_global_profile.dentry = NULL; debugfs_remove(dasd_debugfs_global_entry); debugfs_remove(dasd_debugfs_root_entry); } @@ -1136,7 +1145,6 @@ static void dasd_statistics_createroot(void) dasd_debugfs_root_entry = NULL; dasd_debugfs_global_entry = NULL; - dasd_global_profile_dentry = NULL; pde = debugfs_create_dir("dasd", NULL); if (!pde || IS_ERR(pde)) goto error; @@ -1151,7 +1159,7 @@ static void dasd_statistics_createroot(void) NULL, &dasd_stats_global_fops); if (!pde || IS_ERR(pde)) goto error; - dasd_global_profile_dentry = pde; + dasd_global_profile.dentry = pde; return; error: diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 8b5d4100abf7c7..91731fa05604ad 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -651,7 +651,7 @@ dasd_check_blocksize(int bsize) #define DASD_PROFILE_GLOBAL_ONLY 2 extern debug_info_t *dasd_debug_area; -extern struct dasd_profile_info dasd_global_profile_data; +extern struct dasd_profile dasd_global_profile; extern unsigned int dasd_global_profile_level; extern const struct block_device_operations dasd_device_operations; diff --git a/drivers/s390/block/dasd_proc.c b/drivers/s390/block/dasd_proc.c index 78ac905a5b7f18..76410084c48f4b 100644 --- a/drivers/s390/block/dasd_proc.c +++ b/drivers/s390/block/dasd_proc.c @@ -219,7 +219,8 @@ static int dasd_stats_proc_show(struct seq_file *m, void *v) "/proc/dasd/statistics'\n"); return 0; } - prof = &dasd_global_profile_data; + spin_lock_bh(&dasd_global_profile.lock); + prof = dasd_global_profile.data; /* prevent counter 'overflow' on output */ for (factor = 1; (prof->dasd_io_reqs / factor) > 9999999; @@ -255,6 +256,7 @@ static int dasd_stats_proc_show(struct seq_file *m, void *v) dasd_statistics_array(m, prof->dasd_io_time3, factor); seq_printf(m, "# of req in chanq at enqueuing (1..32) \n"); dasd_statistics_array(m, prof->dasd_io_nr_req, factor); + spin_unlock_bh(&dasd_global_profile.lock); #else seq_printf(m, "Statistics are not activated in this kernel\n"); #endif From 6765cc2ac60f124ffffd0232e095c5ec6eb70d57 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Wed, 28 Jan 2015 19:06:29 +0100 Subject: [PATCH 297/601] s390/dasd: cleanup profiling The dasd driver has a lot of duplicated code to handle dasd_global_profile. With this patch we use the same code for the global and the per device profiling data. Note that dasd_stats_write had to change slightly to maintain some odd differences between A) per device and global profile and B) proc and sysfs interface usage. Signed-off-by: Sebastian Ott Reviewed-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 90 ++++++---------------------------- drivers/s390/block/dasd_int.h | 1 - drivers/s390/block/dasd_proc.c | 21 +++++--- 3 files changed, 27 insertions(+), 85 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index a67e8dae73c35b..be34ef41b7c741 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -674,10 +674,7 @@ EXPORT_SYMBOL(dasd_enable_device); unsigned int dasd_global_profile_level = DASD_PROFILE_OFF; #ifdef CONFIG_DASD_PROFILE -static struct dasd_profile_info dasd_global_profile_data; struct dasd_profile dasd_global_profile = { - .dentry = NULL, - .data = &dasd_global_profile_data, .lock = __SPIN_LOCK_UNLOCKED(dasd_global_profile.lock), }; static struct dentry *dasd_debugfs_global_entry; @@ -701,7 +698,7 @@ static void dasd_profile_start(struct dasd_block *block, break; spin_lock(&dasd_global_profile.lock); - if (dasd_global_profile_level) { + if (dasd_global_profile.data) { dasd_global_profile.data->dasd_io_nr_req[counter]++; if (rq_data_dir(req) == READ) dasd_global_profile.data->dasd_read_nr_req[counter]++; @@ -832,7 +829,7 @@ static void dasd_profile_end(struct dasd_block *block, dasd_profile_counter(endtime, endtime_ind); spin_lock(&dasd_global_profile.lock); - if (dasd_global_profile_level) { + if (dasd_global_profile.data) { dasd_profile_end_add_data(dasd_global_profile.data, cqr->startdev != block->base, cqr->cpmode == 1, @@ -884,11 +881,6 @@ void dasd_profile_reset(struct dasd_profile *profile) spin_unlock_bh(&profile->lock); } -void dasd_global_profile_reset(void) -{ - dasd_profile_reset(&dasd_global_profile); -} - int dasd_profile_on(struct dasd_profile *profile) { struct dasd_profile_info *data; @@ -956,12 +948,20 @@ static ssize_t dasd_stats_write(struct file *file, dasd_profile_reset(prof); } else if (strncmp(str, "on", 2) == 0) { rc = dasd_profile_on(prof); - if (!rc) - rc = user_len; + if (rc) + goto out; + rc = user_len; + if (prof == &dasd_global_profile) { + dasd_profile_reset(prof); + dasd_global_profile_level = DASD_PROFILE_GLOBAL_ONLY; + } } else if (strncmp(str, "off", 3) == 0) { + if (prof == &dasd_global_profile) + dasd_global_profile_level = DASD_PROFILE_OFF; dasd_profile_off(prof); } else rc = -EINVAL; +out: vfree(buffer); return rc; } @@ -1051,59 +1051,6 @@ static const struct file_operations dasd_stats_raw_fops = { .write = dasd_stats_write, }; -static ssize_t dasd_stats_global_write(struct file *file, - const char __user *user_buf, - size_t user_len, loff_t *pos) -{ - char *buffer, *str; - ssize_t rc; - - if (user_len > 65536) - user_len = 65536; - buffer = dasd_get_user_string(user_buf, user_len); - if (IS_ERR(buffer)) - return PTR_ERR(buffer); - str = skip_spaces(buffer); - rc = user_len; - if (strncmp(str, "reset", 5) == 0) { - dasd_global_profile_reset(); - } else if (strncmp(str, "on", 2) == 0) { - dasd_global_profile_reset(); - dasd_global_profile_level = DASD_PROFILE_GLOBAL_ONLY; - } else if (strncmp(str, "off", 3) == 0) { - dasd_global_profile_level = DASD_PROFILE_OFF; - } else - rc = -EINVAL; - vfree(buffer); - return rc; -} - -static int dasd_stats_global_show(struct seq_file *m, void *v) -{ - if (!dasd_global_profile_level) { - seq_puts(m, "disabled\n"); - return 0; - } - spin_lock_bh(&dasd_global_profile.lock); - dasd_stats_seq_print(m, dasd_global_profile.data); - spin_unlock_bh(&dasd_global_profile.lock); - return 0; -} - -static int dasd_stats_global_open(struct inode *inode, struct file *file) -{ - return single_open(file, dasd_stats_global_show, NULL); -} - -static const struct file_operations dasd_stats_global_fops = { - .owner = THIS_MODULE, - .open = dasd_stats_global_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, - .write = dasd_stats_global_write, -}; - static void dasd_profile_init(struct dasd_profile *profile, struct dentry *base_dentry) { @@ -1132,19 +1079,16 @@ static void dasd_profile_exit(struct dasd_profile *profile) static void dasd_statistics_removeroot(void) { dasd_global_profile_level = DASD_PROFILE_OFF; - debugfs_remove(dasd_global_profile.dentry); - dasd_global_profile.dentry = NULL; + dasd_profile_exit(&dasd_global_profile); debugfs_remove(dasd_debugfs_global_entry); debugfs_remove(dasd_debugfs_root_entry); } static void dasd_statistics_createroot(void) { - umode_t mode; struct dentry *pde; dasd_debugfs_root_entry = NULL; - dasd_debugfs_global_entry = NULL; pde = debugfs_create_dir("dasd", NULL); if (!pde || IS_ERR(pde)) goto error; @@ -1153,13 +1097,7 @@ static void dasd_statistics_createroot(void) if (!pde || IS_ERR(pde)) goto error; dasd_debugfs_global_entry = pde; - - mode = (S_IRUSR | S_IWUSR | S_IFREG); - pde = debugfs_create_file("statistics", mode, dasd_debugfs_global_entry, - NULL, &dasd_stats_global_fops); - if (!pde || IS_ERR(pde)) - goto error; - dasd_global_profile.dentry = pde; + dasd_profile_init(&dasd_global_profile, dasd_debugfs_global_entry); return; error: diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 91731fa05604ad..227e3dea315526 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -728,7 +728,6 @@ int dasd_device_is_ro(struct dasd_device *); void dasd_profile_reset(struct dasd_profile *); int dasd_profile_on(struct dasd_profile *); void dasd_profile_off(struct dasd_profile *); -void dasd_global_profile_reset(void); char *dasd_get_user_string(const char __user *, size_t); /* externals in dasd_devmap.c */ diff --git a/drivers/s390/block/dasd_proc.c b/drivers/s390/block/dasd_proc.c index 76410084c48f4b..aa7bb2d1da8115 100644 --- a/drivers/s390/block/dasd_proc.c +++ b/drivers/s390/block/dasd_proc.c @@ -212,15 +212,15 @@ static int dasd_stats_proc_show(struct seq_file *m, void *v) struct dasd_profile_info *prof; int factor; - /* check for active profiling */ - if (!dasd_global_profile_level) { + spin_lock_bh(&dasd_global_profile.lock); + prof = dasd_global_profile.data; + if (!prof) { + spin_unlock_bh(&dasd_global_profile.lock); seq_printf(m, "Statistics are off - they might be " "switched on using 'echo set on > " "/proc/dasd/statistics'\n"); return 0; } - spin_lock_bh(&dasd_global_profile.lock); - prof = dasd_global_profile.data; /* prevent counter 'overflow' on output */ for (factor = 1; (prof->dasd_io_reqs / factor) > 9999999; @@ -293,14 +293,19 @@ static ssize_t dasd_stats_proc_write(struct file *file, dasd_stats_all_block_off(); goto out_error; } - dasd_global_profile_reset(); + rc = dasd_profile_on(&dasd_global_profile); + if (rc) { + dasd_stats_all_block_off(); + goto out_error; + } + dasd_profile_reset(&dasd_global_profile); dasd_global_profile_level = DASD_PROFILE_ON; pr_info("The statistics feature has been switched " "on\n"); } else if (strcmp(str, "off") == 0) { - /* switch off and reset statistics profiling */ + /* switch off statistics profiling */ dasd_global_profile_level = DASD_PROFILE_OFF; - dasd_global_profile_reset(); + dasd_profile_off(&dasd_global_profile); dasd_stats_all_block_off(); pr_info("The statistics feature has been switched " "off\n"); @@ -308,7 +313,7 @@ static ssize_t dasd_stats_proc_write(struct file *file, goto out_parse_error; } else if (strncmp(str, "reset", 5) == 0) { /* reset the statistics */ - dasd_global_profile_reset(); + dasd_profile_reset(&dasd_global_profile); dasd_stats_all_block_reset(); pr_info("The statistics have been reset\n"); } else From 38fa3dc15c6948135977d50ecf90061f21cebc15 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 28 Jan 2015 09:43:01 -0500 Subject: [PATCH 298/601] ktest: Show times for build, install, boot and test Seeing the times for how long a build, install, reboot and the test takes is helpful for analyzing the test process. Seeing how different changes affect the timings. Show the build, install, boot and test times when at the end of the test, or between each interval for tests that do those mulitple times (like bisect and patchcheck). Signed-off-by: Steven Rostedt --- tools/testing/ktest/ktest.pl | 109 +++++++++++++++++++++++++++++++++-- 1 file changed, 105 insertions(+), 4 deletions(-) diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 1dae000f79f87e..177319822401f0 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -198,6 +198,11 @@ my $patchcheck_cherry; my $patchcheck_end; +my $build_time; +my $install_time; +my $reboot_time; +my $test_time; + # set when a test is something other that just building or install # which would require more options. my $buildonly = 1; @@ -555,6 +560,66 @@ sub get_mandatory_config { } } +sub show_time { + my ($time) = @_; + + my $hours = 0; + my $minutes = 0; + + if ($time > 3600) { + $hours = int($time / 3600); + $time -= $hours * 3600; + } + if ($time > 60) { + $minutes = int($time / 60); + $time -= $minutes * 60; + } + + if ($hours > 0) { + doprint "$hours hour"; + doprint "s" if ($hours > 1); + doprint " "; + } + + if ($minutes > 0) { + doprint "$minutes minute"; + doprint "s" if ($minutes > 1); + doprint " "; + } + + doprint "$time second"; + doprint "s" if ($time != 1); +} + +sub print_times { + doprint "\n"; + if ($build_time) { + doprint "Build time: "; + show_time($build_time); + doprint "\n"; + } + if ($install_time) { + doprint "Install time: "; + show_time($install_time); + doprint "\n"; + } + if ($reboot_time) { + doprint "Reboot time: "; + show_time($reboot_time); + doprint "\n"; + } + if ($test_time) { + doprint "Test time: "; + show_time($test_time); + doprint "\n"; + } + # reset for iterations like bisect + $build_time = 0; + $install_time = 0; + $reboot_time = 0; + $test_time = 0; +} + sub get_mandatory_configs { get_mandatory_config("MACHINE"); get_mandatory_config("BUILD_DIR"); @@ -1786,6 +1851,8 @@ sub monitor { my $skip_call_trace = 0; my $loops; + my $start_time = time; + wait_for_monitor 5; my $line; @@ -1910,6 +1977,9 @@ sub monitor { } } + my $end_time = time; + $reboot_time = $end_time - $start_time; + close(DMESG); if ($bug) { @@ -1958,6 +2028,8 @@ sub install { return if ($no_install); + my $start_time = time; + if (defined($pre_install)) { my $cp_pre_install = eval_kernel_version $pre_install; run_command "$cp_pre_install" or @@ -1989,6 +2061,8 @@ sub install { if (!$install_mods) { do_post_install; doprint "No modules needed\n"; + my $end_time = time; + $install_time = $end_time - $start_time; return; } @@ -2016,6 +2090,9 @@ sub install { run_ssh "rm -f /tmp/$modtar"; do_post_install; + + my $end_time = time; + $install_time = $end_time - $start_time; } sub get_version { @@ -2228,6 +2305,8 @@ sub build { unlink $buildlog; + my $start_time = time; + # Failed builds should not reboot the target my $save_no_reboot = $no_reboot; $no_reboot = 1; @@ -2313,6 +2392,9 @@ sub build { $no_reboot = $save_no_reboot; + my $end_time = time; + $build_time = $end_time - $start_time; + return 1; } @@ -2403,6 +2485,8 @@ sub do_run_test { my $bug = 0; my $bug_ignored = 0; + my $start_time = time; + wait_for_monitor 1; doprint "run test $run_test\n"; @@ -2469,6 +2553,9 @@ sub do_run_test { waitpid $child_pid, 0; $child_exit = $?; + my $end_time = time; + $test_time = $end_time - $start_time; + if (!$bug && $in_bisect) { if (defined($bisect_ret_good)) { if ($child_exit == $bisect_ret_good) { @@ -2775,6 +2862,7 @@ sub bisect { do { $result = run_bisect $type; $test = run_git_bisect "git bisect $result"; + print_times; } while ($test); run_command "git bisect log" or @@ -3188,6 +3276,7 @@ sub config_bisect { do { $ret = run_config_bisect \%good_configs, \%bad_configs; + print_times; } while (!$ret); return $ret if ($ret < 0); @@ -3317,10 +3406,12 @@ sub patchcheck { do_run_test or $failed = 1; } end_monitor; - return 0 if ($failed); - + if ($failed) { + print_times; + return 0; + } patchcheck_reboot; - + print_times; } $in_patchcheck = 0; success $i; @@ -4020,6 +4111,11 @@ sub set_test_option { $iteration = $i; + $build_time = 0; + $install_time = 0; + $reboot_time = 0; + $test_time = 0; + undef %force_config; my $makecmd = set_test_option("MAKE_CMD", $i); @@ -4183,9 +4279,14 @@ sub set_test_option { do_run_test or $failed = 1; } end_monitor; - next if ($failed); + if ($failed) { + print_times; + next; + } } + print_times; + success $i; } From 64d982838e89e0d3981bfe825e0e76b36e106332 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Wed, 28 Jan 2015 15:17:35 -0500 Subject: [PATCH 299/601] ktest: Rename start_monitor_and_boot to start_monitor_and_install The function start_monitor_and_boot is a misnomer. It use to, but now it starts the monitor and installs. It does not boot. Rename it before I get confused by it again. Signed-off-by: Steven Rostedt --- tools/testing/ktest/ktest.pl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 177319822401f0..6ae890a0d48681 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -2105,7 +2105,7 @@ sub get_version { $have_version = 1; } -sub start_monitor_and_boot { +sub start_monitor_and_install { # Make sure the stable kernel has finished booting # Install bisects, don't need console @@ -2656,7 +2656,7 @@ sub run_bisect_test { dodie "Failed on build" if $failed; # Now boot the box - start_monitor_and_boot or $failed = 1; + start_monitor_and_install or $failed = 1; if ($type ne "boot") { if ($failed && $bisect_skip) { @@ -3400,7 +3400,7 @@ sub patchcheck { my $failed = 0; - start_monitor_and_boot or $failed = 1; + start_monitor_and_install or $failed = 1; if (!$failed && $type ne "boot"){ do_run_test or $failed = 1; @@ -3864,7 +3864,7 @@ sub make_min_config { my $failed = 0; build "oldconfig" or $failed = 1; if (!$failed) { - start_monitor_and_boot or $failed = 1; + start_monitor_and_install or $failed = 1; if ($type eq "test" && !$failed) { do_run_test or $failed = 1; @@ -4273,7 +4273,7 @@ sub set_test_option { if ($test_type ne "build") { my $failed = 0; - start_monitor_and_boot or $failed = 1; + start_monitor_and_install or $failed = 1; if (!$failed && $test_type ne "boot" && defined($run_test)) { do_run_test or $failed = 1; From 83508093f448e929bf55d07dd08246d22b03d753 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Thu, 15 Jan 2015 02:29:16 +0100 Subject: [PATCH 300/601] ARM: 8278/1: sa1100: split irq handling for low GPIOs Low GPIO pins use an interrupt in SC interrupts space. However it's possible to handle them as if all the GPIO interrupts are instead tied to single GPIO handler, which later decodes GEDR register and chain-calls next IRQ handler. So split first 11 interrupts into system part (IRQ_GPIO0_SC - IRQ_GPIO10_SC) which work exactly like the rest of system controller interrupts and real GPIO interrupts (IRQ_GPIO0..IRQ_GPIO10). A single handler sa1100_gpio_handler then decodes and calls next handler. Signed-off-by: Dmitry Eremin-Solenikov Tested-by: Linus Walleij Signed-off-by: Russell King --- arch/arm/mach-sa1100/include/mach/irqs.h | 73 +++++++++++--------- arch/arm/mach-sa1100/irq.c | 87 ++++++++++++++---------- drivers/gpio/gpio-sa1100.c | 2 +- 3 files changed, 93 insertions(+), 69 deletions(-) diff --git a/arch/arm/mach-sa1100/include/mach/irqs.h b/arch/arm/mach-sa1100/include/mach/irqs.h index de0983494c7e0a..734e30e406a354 100644 --- a/arch/arm/mach-sa1100/include/mach/irqs.h +++ b/arch/arm/mach-sa1100/include/mach/irqs.h @@ -8,17 +8,17 @@ * 2001/11/14 RMK Cleaned up and standardised a lot of the IRQs. */ -#define IRQ_GPIO0 1 -#define IRQ_GPIO1 2 -#define IRQ_GPIO2 3 -#define IRQ_GPIO3 4 -#define IRQ_GPIO4 5 -#define IRQ_GPIO5 6 -#define IRQ_GPIO6 7 -#define IRQ_GPIO7 8 -#define IRQ_GPIO8 9 -#define IRQ_GPIO9 10 -#define IRQ_GPIO10 11 +#define IRQ_GPIO0_SC 1 +#define IRQ_GPIO1_SC 2 +#define IRQ_GPIO2_SC 3 +#define IRQ_GPIO3_SC 4 +#define IRQ_GPIO4_SC 5 +#define IRQ_GPIO5_SC 6 +#define IRQ_GPIO6_SC 7 +#define IRQ_GPIO7_SC 8 +#define IRQ_GPIO8_SC 9 +#define IRQ_GPIO9_SC 10 +#define IRQ_GPIO10_SC 11 #define IRQ_GPIO11_27 12 #define IRQ_LCD 13 /* LCD controller */ #define IRQ_Ser0UDC 14 /* Ser. port 0 UDC */ @@ -41,32 +41,43 @@ #define IRQ_RTC1Hz 31 /* RTC 1 Hz clock */ #define IRQ_RTCAlrm 32 /* RTC Alarm */ -#define IRQ_GPIO11 33 -#define IRQ_GPIO12 34 -#define IRQ_GPIO13 35 -#define IRQ_GPIO14 36 -#define IRQ_GPIO15 37 -#define IRQ_GPIO16 38 -#define IRQ_GPIO17 39 -#define IRQ_GPIO18 40 -#define IRQ_GPIO19 41 -#define IRQ_GPIO20 42 -#define IRQ_GPIO21 43 -#define IRQ_GPIO22 44 -#define IRQ_GPIO23 45 -#define IRQ_GPIO24 46 -#define IRQ_GPIO25 47 -#define IRQ_GPIO26 48 -#define IRQ_GPIO27 49 +#define IRQ_GPIO0 33 +#define IRQ_GPIO1 34 +#define IRQ_GPIO2 35 +#define IRQ_GPIO3 36 +#define IRQ_GPIO4 37 +#define IRQ_GPIO5 38 +#define IRQ_GPIO6 39 +#define IRQ_GPIO7 40 +#define IRQ_GPIO8 41 +#define IRQ_GPIO9 42 +#define IRQ_GPIO10 43 +#define IRQ_GPIO11 44 +#define IRQ_GPIO12 45 +#define IRQ_GPIO13 46 +#define IRQ_GPIO14 47 +#define IRQ_GPIO15 48 +#define IRQ_GPIO16 49 +#define IRQ_GPIO17 50 +#define IRQ_GPIO18 51 +#define IRQ_GPIO19 52 +#define IRQ_GPIO20 53 +#define IRQ_GPIO21 54 +#define IRQ_GPIO22 55 +#define IRQ_GPIO23 56 +#define IRQ_GPIO24 57 +#define IRQ_GPIO25 58 +#define IRQ_GPIO26 59 +#define IRQ_GPIO27 60 /* * The next 16 interrupts are for board specific purposes. Since * the kernel can only run on one machine at a time, we can re-use * these. If you need more, increase IRQ_BOARD_END, but keep it - * within sensible limits. IRQs 49 to 64 are available. + * within sensible limits. IRQs 61 to 76 are available. */ -#define IRQ_BOARD_START 50 -#define IRQ_BOARD_END 66 +#define IRQ_BOARD_START 61 +#define IRQ_BOARD_END 77 /* * Figure out the MAX IRQ number. diff --git a/arch/arm/mach-sa1100/irq.c b/arch/arm/mach-sa1100/irq.c index 63e2901db41634..2dc6a2a9c60c0a 100644 --- a/arch/arm/mach-sa1100/irq.c +++ b/arch/arm/mach-sa1100/irq.c @@ -87,7 +87,7 @@ static struct irq_domain *sa1100_normal_irqdomain; */ static int GPIO_IRQ_rising_edge; static int GPIO_IRQ_falling_edge; -static int GPIO_IRQ_mask = (1 << 11) - 1; +static int GPIO_IRQ_mask; static int sa1100_gpio_type(struct irq_data *d, unsigned int type) { @@ -124,6 +124,26 @@ static void sa1100_gpio_ack(struct irq_data *d) GEDR = BIT(d->hwirq); } +static void sa1100_gpio_mask(struct irq_data *d) +{ + unsigned int mask = BIT(d->hwirq); + + GPIO_IRQ_mask &= ~mask; + + GRER &= ~mask; + GFER &= ~mask; +} + +static void sa1100_gpio_unmask(struct irq_data *d) +{ + unsigned int mask = BIT(d->hwirq); + + GPIO_IRQ_mask |= mask; + + GRER = GPIO_IRQ_rising_edge & GPIO_IRQ_mask; + GFER = GPIO_IRQ_falling_edge & GPIO_IRQ_mask; +} + static int sa1100_gpio_wake(struct irq_data *d, unsigned int on) { if (on) @@ -139,8 +159,8 @@ static int sa1100_gpio_wake(struct irq_data *d, unsigned int on) static struct irq_chip sa1100_low_gpio_chip = { .name = "GPIO-l", .irq_ack = sa1100_gpio_ack, - .irq_mask = sa1100_mask_irq, - .irq_unmask = sa1100_unmask_irq, + .irq_mask = sa1100_gpio_mask, + .irq_unmask = sa1100_gpio_unmask, .irq_set_type = sa1100_gpio_type, .irq_set_wake = sa1100_gpio_wake, }; @@ -163,16 +183,16 @@ static struct irq_domain_ops sa1100_low_gpio_irqdomain_ops = { static struct irq_domain *sa1100_low_gpio_irqdomain; /* - * IRQ11 (GPIO11 through 27) handler. We enter here with the + * IRQ 0-11 (GPIO) handler. We enter here with the * irq_controller_lock held, and IRQs disabled. Decode the IRQ * and call the handler. */ static void -sa1100_high_gpio_handler(unsigned int irq, struct irq_desc *desc) +sa1100_gpio_handler(unsigned int irq, struct irq_desc *desc) { unsigned int mask; - mask = GEDR & 0xfffff800; + mask = GEDR; do { /* * clear down all currently active IRQ sources. @@ -180,8 +200,7 @@ sa1100_high_gpio_handler(unsigned int irq, struct irq_desc *desc) */ GEDR = mask; - irq = IRQ_GPIO11; - mask >>= 11; + irq = IRQ_GPIO0; do { if (mask & 1) generic_handle_irq(irq); @@ -189,7 +208,7 @@ sa1100_high_gpio_handler(unsigned int irq, struct irq_desc *desc) irq++; } while (mask); - mask = GEDR & 0xfffff800; + mask = GEDR; } while (mask); } @@ -198,31 +217,11 @@ sa1100_high_gpio_handler(unsigned int irq, struct irq_desc *desc) * In addition, the IRQs are all collected up into one bit in the * interrupt controller registers. */ -static void sa1100_high_gpio_mask(struct irq_data *d) -{ - unsigned int mask = BIT(d->hwirq); - - GPIO_IRQ_mask &= ~mask; - - GRER &= ~mask; - GFER &= ~mask; -} - -static void sa1100_high_gpio_unmask(struct irq_data *d) -{ - unsigned int mask = BIT(d->hwirq); - - GPIO_IRQ_mask |= mask; - - GRER = GPIO_IRQ_rising_edge & GPIO_IRQ_mask; - GFER = GPIO_IRQ_falling_edge & GPIO_IRQ_mask; -} - static struct irq_chip sa1100_high_gpio_chip = { .name = "GPIO-h", .irq_ack = sa1100_gpio_ack, - .irq_mask = sa1100_high_gpio_mask, - .irq_unmask = sa1100_high_gpio_unmask, + .irq_mask = sa1100_gpio_mask, + .irq_unmask = sa1100_gpio_unmask, .irq_set_type = sa1100_gpio_type, .irq_set_wake = sa1100_gpio_wake, }; @@ -325,7 +324,7 @@ sa1100_handle_irq(struct pt_regs *regs) if (mask == 0) break; - handle_IRQ(ffs(mask) - 1 + IRQ_GPIO0, regs); + handle_IRQ(ffs(mask) - 1 + IRQ_GPIO0_SC, regs); } while (1); } @@ -350,22 +349,36 @@ void __init sa1100_init_irq(void) */ ICCR = 1; + sa1100_normal_irqdomain = irq_domain_add_legacy(NULL, + 32, IRQ_GPIO0_SC, 0, + &sa1100_normal_irqdomain_ops, NULL); + sa1100_low_gpio_irqdomain = irq_domain_add_legacy(NULL, 11, IRQ_GPIO0, 0, &sa1100_low_gpio_irqdomain_ops, NULL); - sa1100_normal_irqdomain = irq_domain_add_legacy(NULL, - 21, IRQ_GPIO11_27, 11, - &sa1100_normal_irqdomain_ops, NULL); - sa1100_high_gpio_irqdomain = irq_domain_add_legacy(NULL, 17, IRQ_GPIO11, 11, &sa1100_high_gpio_irqdomain_ops, NULL); + /* + * Install handlers for GPIO 0-10 edge detect interrupts + */ + irq_set_chained_handler(IRQ_GPIO0_SC, sa1100_gpio_handler); + irq_set_chained_handler(IRQ_GPIO1_SC, sa1100_gpio_handler); + irq_set_chained_handler(IRQ_GPIO2_SC, sa1100_gpio_handler); + irq_set_chained_handler(IRQ_GPIO3_SC, sa1100_gpio_handler); + irq_set_chained_handler(IRQ_GPIO4_SC, sa1100_gpio_handler); + irq_set_chained_handler(IRQ_GPIO5_SC, sa1100_gpio_handler); + irq_set_chained_handler(IRQ_GPIO6_SC, sa1100_gpio_handler); + irq_set_chained_handler(IRQ_GPIO7_SC, sa1100_gpio_handler); + irq_set_chained_handler(IRQ_GPIO8_SC, sa1100_gpio_handler); + irq_set_chained_handler(IRQ_GPIO9_SC, sa1100_gpio_handler); + irq_set_chained_handler(IRQ_GPIO10_SC, sa1100_gpio_handler); /* * Install handler for GPIO 11-27 edge detect interrupts */ - irq_set_chained_handler(IRQ_GPIO11_27, sa1100_high_gpio_handler); + irq_set_chained_handler(IRQ_GPIO11_27, sa1100_gpio_handler); set_handle_irq(sa1100_handle_irq); diff --git a/drivers/gpio/gpio-sa1100.c b/drivers/gpio/gpio-sa1100.c index a90be34e4d5c24..5b5d3c7bb84e9a 100644 --- a/drivers/gpio/gpio-sa1100.c +++ b/drivers/gpio/gpio-sa1100.c @@ -50,7 +50,7 @@ static int sa1100_direction_output(struct gpio_chip *chip, unsigned offset, int static int sa1100_to_irq(struct gpio_chip *chip, unsigned offset) { - return offset < 11 ? (IRQ_GPIO0 + offset) : (IRQ_GPIO11 - 11 + offset); + return IRQ_GPIO0 + offset; } static struct gpio_chip sa1100_gpio_chip = { From 590f2661069823cb14c6ca695a71b85e591d12a6 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Thu, 15 Jan 2015 02:30:58 +0100 Subject: [PATCH 301/601] ARM: 8279/1: sa1100: merge both GPIO irqdomains Now there is no difference between low and high GPIO irqdomains. Merge them into single irqdomain handling all GPIOs. Signed-off-by: Dmitry Eremin-Solenikov Tested-by: Linus Walleij Signed-off-by: Russell King --- arch/arm/mach-sa1100/irq.c | 57 ++++++++------------------------------ 1 file changed, 11 insertions(+), 46 deletions(-) diff --git a/arch/arm/mach-sa1100/irq.c b/arch/arm/mach-sa1100/irq.c index 2dc6a2a9c60c0a..5589b23ed043e0 100644 --- a/arch/arm/mach-sa1100/irq.c +++ b/arch/arm/mach-sa1100/irq.c @@ -154,10 +154,10 @@ static int sa1100_gpio_wake(struct irq_data *d, unsigned int on) } /* - * This is for IRQs from 0 to 10. + * This is for GPIO IRQs */ -static struct irq_chip sa1100_low_gpio_chip = { - .name = "GPIO-l", +static struct irq_chip sa1100_gpio_chip = { + .name = "GPIO", .irq_ack = sa1100_gpio_ack, .irq_mask = sa1100_gpio_mask, .irq_unmask = sa1100_gpio_unmask, @@ -165,22 +165,22 @@ static struct irq_chip sa1100_low_gpio_chip = { .irq_set_wake = sa1100_gpio_wake, }; -static int sa1100_low_gpio_irqdomain_map(struct irq_domain *d, +static int sa1100_gpio_irqdomain_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hwirq) { - irq_set_chip_and_handler(irq, &sa1100_low_gpio_chip, + irq_set_chip_and_handler(irq, &sa1100_gpio_chip, handle_edge_irq); set_irq_flags(irq, IRQF_VALID | IRQF_PROBE); return 0; } -static struct irq_domain_ops sa1100_low_gpio_irqdomain_ops = { - .map = sa1100_low_gpio_irqdomain_map, +static struct irq_domain_ops sa1100_gpio_irqdomain_ops = { + .map = sa1100_gpio_irqdomain_map, .xlate = irq_domain_xlate_onetwocell, }; -static struct irq_domain *sa1100_low_gpio_irqdomain; +static struct irq_domain *sa1100_gpio_irqdomain; /* * IRQ 0-11 (GPIO) handler. We enter here with the @@ -212,37 +212,6 @@ sa1100_gpio_handler(unsigned int irq, struct irq_desc *desc) } while (mask); } -/* - * Like GPIO0 to 10, GPIO11-27 IRQs need to be handled specially. - * In addition, the IRQs are all collected up into one bit in the - * interrupt controller registers. - */ -static struct irq_chip sa1100_high_gpio_chip = { - .name = "GPIO-h", - .irq_ack = sa1100_gpio_ack, - .irq_mask = sa1100_gpio_mask, - .irq_unmask = sa1100_gpio_unmask, - .irq_set_type = sa1100_gpio_type, - .irq_set_wake = sa1100_gpio_wake, -}; - -static int sa1100_high_gpio_irqdomain_map(struct irq_domain *d, - unsigned int irq, irq_hw_number_t hwirq) -{ - irq_set_chip_and_handler(irq, &sa1100_high_gpio_chip, - handle_edge_irq); - set_irq_flags(irq, IRQF_VALID | IRQF_PROBE); - - return 0; -} - -static struct irq_domain_ops sa1100_high_gpio_irqdomain_ops = { - .map = sa1100_high_gpio_irqdomain_map, - .xlate = irq_domain_xlate_onetwocell, -}; - -static struct irq_domain *sa1100_high_gpio_irqdomain; - static struct resource irq_resource = DEFINE_RES_MEM_NAMED(0x90050000, SZ_64K, "irqs"); @@ -353,13 +322,9 @@ void __init sa1100_init_irq(void) 32, IRQ_GPIO0_SC, 0, &sa1100_normal_irqdomain_ops, NULL); - sa1100_low_gpio_irqdomain = irq_domain_add_legacy(NULL, - 11, IRQ_GPIO0, 0, - &sa1100_low_gpio_irqdomain_ops, NULL); - - sa1100_high_gpio_irqdomain = irq_domain_add_legacy(NULL, - 17, IRQ_GPIO11, 11, - &sa1100_high_gpio_irqdomain_ops, NULL); + sa1100_gpio_irqdomain = irq_domain_add_legacy(NULL, + 28, IRQ_GPIO0, 0, + &sa1100_gpio_irqdomain_ops, NULL); /* * Install handlers for GPIO 0-10 edge detect interrupts From a82be3f0f1a4a03ec43750b12cb48871ec2a2e28 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Thu, 15 Jan 2015 02:31:48 +0100 Subject: [PATCH 302/601] ARM: 8280/1: sa1100: switch to irq_domain_add_simple() As now both SC and GPIO irq domains start from 0 hwirq and do not contain holes, switch to using irq_domain_add_simple() instead of irq_domain_add_legacy(). Signed-off-by: Dmitry Eremin-Solenikov Tested-by: Linus Walleij Signed-off-by: Russell King --- arch/arm/mach-sa1100/irq.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-sa1100/irq.c b/arch/arm/mach-sa1100/irq.c index 5589b23ed043e0..a9dfe8e16d95d8 100644 --- a/arch/arm/mach-sa1100/irq.c +++ b/arch/arm/mach-sa1100/irq.c @@ -318,12 +318,12 @@ void __init sa1100_init_irq(void) */ ICCR = 1; - sa1100_normal_irqdomain = irq_domain_add_legacy(NULL, - 32, IRQ_GPIO0_SC, 0, + sa1100_normal_irqdomain = irq_domain_add_simple(NULL, + 32, IRQ_GPIO0_SC, &sa1100_normal_irqdomain_ops, NULL); - sa1100_gpio_irqdomain = irq_domain_add_legacy(NULL, - 28, IRQ_GPIO0, 0, + sa1100_gpio_irqdomain = irq_domain_add_simple(NULL, + 28, IRQ_GPIO0, &sa1100_gpio_irqdomain_ops, NULL); /* From a0ea298d325616b58760c5182705df76912e0d73 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Thu, 15 Jan 2015 02:32:26 +0100 Subject: [PATCH 303/601] ARM: 8281/1: sa1100: move GPIO-related IRQ code to gpio driver As a part of driver consolidation, move GPIO-related IRQ code to drivers/gpio/gpio-sa1100.c. The code does not use GPIOLIB_IRQCHIP (yet), because sa1100 does not have a device for gpios, which is a requirement for GPIOLIB_IRQCHIP. This will be the next step. Signed-off-by: Dmitry Eremin-Solenikov Tested-by: Linus Walleij Signed-off-by: Russell King --- arch/arm/mach-sa1100/irq.c | 174 -------------------------------- drivers/gpio/gpio-sa1100.c | 197 +++++++++++++++++++++++++++++++++++++ 2 files changed, 197 insertions(+), 174 deletions(-) diff --git a/arch/arm/mach-sa1100/irq.c b/arch/arm/mach-sa1100/irq.c index a9dfe8e16d95d8..a7d116af4a4f44 100644 --- a/arch/arm/mach-sa1100/irq.c +++ b/arch/arm/mach-sa1100/irq.c @@ -80,138 +80,6 @@ static struct irq_domain_ops sa1100_normal_irqdomain_ops = { static struct irq_domain *sa1100_normal_irqdomain; -/* - * SA1100 GPIO edge detection for IRQs: - * IRQs are generated on Falling-Edge, Rising-Edge, or both. - * Use this instead of directly setting GRER/GFER. - */ -static int GPIO_IRQ_rising_edge; -static int GPIO_IRQ_falling_edge; -static int GPIO_IRQ_mask; - -static int sa1100_gpio_type(struct irq_data *d, unsigned int type) -{ - unsigned int mask; - - mask = BIT(d->hwirq); - - if (type == IRQ_TYPE_PROBE) { - if ((GPIO_IRQ_rising_edge | GPIO_IRQ_falling_edge) & mask) - return 0; - type = IRQ_TYPE_EDGE_RISING | IRQ_TYPE_EDGE_FALLING; - } - - if (type & IRQ_TYPE_EDGE_RISING) { - GPIO_IRQ_rising_edge |= mask; - } else - GPIO_IRQ_rising_edge &= ~mask; - if (type & IRQ_TYPE_EDGE_FALLING) { - GPIO_IRQ_falling_edge |= mask; - } else - GPIO_IRQ_falling_edge &= ~mask; - - GRER = GPIO_IRQ_rising_edge & GPIO_IRQ_mask; - GFER = GPIO_IRQ_falling_edge & GPIO_IRQ_mask; - - return 0; -} - -/* - * GPIO IRQs must be acknowledged. - */ -static void sa1100_gpio_ack(struct irq_data *d) -{ - GEDR = BIT(d->hwirq); -} - -static void sa1100_gpio_mask(struct irq_data *d) -{ - unsigned int mask = BIT(d->hwirq); - - GPIO_IRQ_mask &= ~mask; - - GRER &= ~mask; - GFER &= ~mask; -} - -static void sa1100_gpio_unmask(struct irq_data *d) -{ - unsigned int mask = BIT(d->hwirq); - - GPIO_IRQ_mask |= mask; - - GRER = GPIO_IRQ_rising_edge & GPIO_IRQ_mask; - GFER = GPIO_IRQ_falling_edge & GPIO_IRQ_mask; -} - -static int sa1100_gpio_wake(struct irq_data *d, unsigned int on) -{ - if (on) - PWER |= BIT(d->hwirq); - else - PWER &= ~BIT(d->hwirq); - return 0; -} - -/* - * This is for GPIO IRQs - */ -static struct irq_chip sa1100_gpio_chip = { - .name = "GPIO", - .irq_ack = sa1100_gpio_ack, - .irq_mask = sa1100_gpio_mask, - .irq_unmask = sa1100_gpio_unmask, - .irq_set_type = sa1100_gpio_type, - .irq_set_wake = sa1100_gpio_wake, -}; - -static int sa1100_gpio_irqdomain_map(struct irq_domain *d, - unsigned int irq, irq_hw_number_t hwirq) -{ - irq_set_chip_and_handler(irq, &sa1100_gpio_chip, - handle_edge_irq); - set_irq_flags(irq, IRQF_VALID | IRQF_PROBE); - - return 0; -} - -static struct irq_domain_ops sa1100_gpio_irqdomain_ops = { - .map = sa1100_gpio_irqdomain_map, - .xlate = irq_domain_xlate_onetwocell, -}; - -static struct irq_domain *sa1100_gpio_irqdomain; - -/* - * IRQ 0-11 (GPIO) handler. We enter here with the - * irq_controller_lock held, and IRQs disabled. Decode the IRQ - * and call the handler. - */ -static void -sa1100_gpio_handler(unsigned int irq, struct irq_desc *desc) -{ - unsigned int mask; - - mask = GEDR; - do { - /* - * clear down all currently active IRQ sources. - * We will be processing them all. - */ - GEDR = mask; - - irq = IRQ_GPIO0; - do { - if (mask & 1) - generic_handle_irq(irq); - mask >>= 1; - irq++; - } while (mask); - - mask = GEDR; - } while (mask); -} - static struct resource irq_resource = DEFINE_RES_MEM_NAMED(0x90050000, SZ_64K, "irqs"); @@ -238,17 +106,6 @@ static int sa1100irq_suspend(void) IC_GPIO6|IC_GPIO5|IC_GPIO4|IC_GPIO3|IC_GPIO2| IC_GPIO1|IC_GPIO0); - /* - * Set the appropriate edges for wakeup. - */ - GRER = PWER & GPIO_IRQ_rising_edge; - GFER = PWER & GPIO_IRQ_falling_edge; - - /* - * Clear any pending GPIO interrupts. - */ - GEDR = GEDR; - return 0; } @@ -260,9 +117,6 @@ static void sa1100irq_resume(void) ICCR = st->iccr; ICLR = st->iclr; - GRER = GPIO_IRQ_rising_edge & GPIO_IRQ_mask; - GFER = GPIO_IRQ_falling_edge & GPIO_IRQ_mask; - ICMR = st->icmr; } } @@ -307,11 +161,6 @@ void __init sa1100_init_irq(void) /* all IRQs are IRQ, not FIQ */ ICLR = 0; - /* clear all GPIO edge detects */ - GFER = 0; - GRER = 0; - GEDR = -1; - /* * Whatever the doc says, this has to be set for the wait-on-irq * instruction to work... on a SA1100 rev 9 at least. @@ -322,29 +171,6 @@ void __init sa1100_init_irq(void) 32, IRQ_GPIO0_SC, &sa1100_normal_irqdomain_ops, NULL); - sa1100_gpio_irqdomain = irq_domain_add_simple(NULL, - 28, IRQ_GPIO0, - &sa1100_gpio_irqdomain_ops, NULL); - - /* - * Install handlers for GPIO 0-10 edge detect interrupts - */ - irq_set_chained_handler(IRQ_GPIO0_SC, sa1100_gpio_handler); - irq_set_chained_handler(IRQ_GPIO1_SC, sa1100_gpio_handler); - irq_set_chained_handler(IRQ_GPIO2_SC, sa1100_gpio_handler); - irq_set_chained_handler(IRQ_GPIO3_SC, sa1100_gpio_handler); - irq_set_chained_handler(IRQ_GPIO4_SC, sa1100_gpio_handler); - irq_set_chained_handler(IRQ_GPIO5_SC, sa1100_gpio_handler); - irq_set_chained_handler(IRQ_GPIO6_SC, sa1100_gpio_handler); - irq_set_chained_handler(IRQ_GPIO7_SC, sa1100_gpio_handler); - irq_set_chained_handler(IRQ_GPIO8_SC, sa1100_gpio_handler); - irq_set_chained_handler(IRQ_GPIO9_SC, sa1100_gpio_handler); - irq_set_chained_handler(IRQ_GPIO10_SC, sa1100_gpio_handler); - /* - * Install handler for GPIO 11-27 edge detect interrupts - */ - irq_set_chained_handler(IRQ_GPIO11_27, sa1100_gpio_handler); - set_handle_irq(sa1100_handle_irq); sa1100_init_gpio(); diff --git a/drivers/gpio/gpio-sa1100.c b/drivers/gpio/gpio-sa1100.c index 5b5d3c7bb84e9a..bec397a60204ba 100644 --- a/drivers/gpio/gpio-sa1100.c +++ b/drivers/gpio/gpio-sa1100.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -64,7 +65,203 @@ static struct gpio_chip sa1100_gpio_chip = { .ngpio = GPIO_MAX + 1, }; +/* + * SA1100 GPIO edge detection for IRQs: + * IRQs are generated on Falling-Edge, Rising-Edge, or both. + * Use this instead of directly setting GRER/GFER. + */ +static int GPIO_IRQ_rising_edge; +static int GPIO_IRQ_falling_edge; +static int GPIO_IRQ_mask; + +static int sa1100_gpio_type(struct irq_data *d, unsigned int type) +{ + unsigned int mask; + + mask = BIT(d->hwirq); + + if (type == IRQ_TYPE_PROBE) { + if ((GPIO_IRQ_rising_edge | GPIO_IRQ_falling_edge) & mask) + return 0; + type = IRQ_TYPE_EDGE_RISING | IRQ_TYPE_EDGE_FALLING; + } + + if (type & IRQ_TYPE_EDGE_RISING) + GPIO_IRQ_rising_edge |= mask; + else + GPIO_IRQ_rising_edge &= ~mask; + if (type & IRQ_TYPE_EDGE_FALLING) + GPIO_IRQ_falling_edge |= mask; + else + GPIO_IRQ_falling_edge &= ~mask; + + GRER = GPIO_IRQ_rising_edge & GPIO_IRQ_mask; + GFER = GPIO_IRQ_falling_edge & GPIO_IRQ_mask; + + return 0; +} + +/* + * GPIO IRQs must be acknowledged. + */ +static void sa1100_gpio_ack(struct irq_data *d) +{ + GEDR = BIT(d->hwirq); +} + +static void sa1100_gpio_mask(struct irq_data *d) +{ + unsigned int mask = BIT(d->hwirq); + + GPIO_IRQ_mask &= ~mask; + + GRER &= ~mask; + GFER &= ~mask; +} + +static void sa1100_gpio_unmask(struct irq_data *d) +{ + unsigned int mask = BIT(d->hwirq); + + GPIO_IRQ_mask |= mask; + + GRER = GPIO_IRQ_rising_edge & GPIO_IRQ_mask; + GFER = GPIO_IRQ_falling_edge & GPIO_IRQ_mask; +} + +static int sa1100_gpio_wake(struct irq_data *d, unsigned int on) +{ + if (on) + PWER |= BIT(d->hwirq); + else + PWER &= ~BIT(d->hwirq); + return 0; +} + +/* + * This is for GPIO IRQs + */ +static struct irq_chip sa1100_gpio_irq_chip = { + .name = "GPIO", + .irq_ack = sa1100_gpio_ack, + .irq_mask = sa1100_gpio_mask, + .irq_unmask = sa1100_gpio_unmask, + .irq_set_type = sa1100_gpio_type, + .irq_set_wake = sa1100_gpio_wake, +}; + +static int sa1100_gpio_irqdomain_map(struct irq_domain *d, + unsigned int irq, irq_hw_number_t hwirq) +{ + irq_set_chip_and_handler(irq, &sa1100_gpio_irq_chip, + handle_edge_irq); + set_irq_flags(irq, IRQF_VALID | IRQF_PROBE); + + return 0; +} + +static struct irq_domain_ops sa1100_gpio_irqdomain_ops = { + .map = sa1100_gpio_irqdomain_map, + .xlate = irq_domain_xlate_onetwocell, +}; + +static struct irq_domain *sa1100_gpio_irqdomain; + +/* + * IRQ 0-11 (GPIO) handler. We enter here with the + * irq_controller_lock held, and IRQs disabled. Decode the IRQ + * and call the handler. + */ +static void +sa1100_gpio_handler(unsigned int irq, struct irq_desc *desc) +{ + unsigned int mask; + + mask = GEDR; + do { + /* + * clear down all currently active IRQ sources. + * We will be processing them all. + */ + GEDR = mask; + + irq = IRQ_GPIO0; + do { + if (mask & 1) + generic_handle_irq(irq); + mask >>= 1; + irq++; + } while (mask); + + mask = GEDR; + } while (mask); +} + +static int sa1100_gpio_suspend(void) +{ + /* + * Set the appropriate edges for wakeup. + */ + GRER = PWER & GPIO_IRQ_rising_edge; + GFER = PWER & GPIO_IRQ_falling_edge; + + /* + * Clear any pending GPIO interrupts. + */ + GEDR = GEDR; + + return 0; +} + +static void sa1100_gpio_resume(void) +{ + GRER = GPIO_IRQ_rising_edge & GPIO_IRQ_mask; + GFER = GPIO_IRQ_falling_edge & GPIO_IRQ_mask; +} + +static struct syscore_ops sa1100_gpio_syscore_ops = { + .suspend = sa1100_gpio_suspend, + .resume = sa1100_gpio_resume, +}; + +static int __init sa1100_gpio_init_devicefs(void) +{ + register_syscore_ops(&sa1100_gpio_syscore_ops); + return 0; +} + +device_initcall(sa1100_gpio_init_devicefs); + void __init sa1100_init_gpio(void) { + /* clear all GPIO edge detects */ + GFER = 0; + GRER = 0; + GEDR = -1; + gpiochip_add(&sa1100_gpio_chip); + + sa1100_gpio_irqdomain = irq_domain_add_simple(NULL, + 28, IRQ_GPIO0, + &sa1100_gpio_irqdomain_ops, NULL); + + /* + * Install handlers for GPIO 0-10 edge detect interrupts + */ + irq_set_chained_handler(IRQ_GPIO0_SC, sa1100_gpio_handler); + irq_set_chained_handler(IRQ_GPIO1_SC, sa1100_gpio_handler); + irq_set_chained_handler(IRQ_GPIO2_SC, sa1100_gpio_handler); + irq_set_chained_handler(IRQ_GPIO3_SC, sa1100_gpio_handler); + irq_set_chained_handler(IRQ_GPIO4_SC, sa1100_gpio_handler); + irq_set_chained_handler(IRQ_GPIO5_SC, sa1100_gpio_handler); + irq_set_chained_handler(IRQ_GPIO6_SC, sa1100_gpio_handler); + irq_set_chained_handler(IRQ_GPIO7_SC, sa1100_gpio_handler); + irq_set_chained_handler(IRQ_GPIO8_SC, sa1100_gpio_handler); + irq_set_chained_handler(IRQ_GPIO9_SC, sa1100_gpio_handler); + irq_set_chained_handler(IRQ_GPIO10_SC, sa1100_gpio_handler); + /* + * Install handler for GPIO 11-27 edge detect interrupts + */ + irq_set_chained_handler(IRQ_GPIO11_27, sa1100_gpio_handler); + } From 364e386917c8cdfadc5abd5a9b2c5cbac1c2133e Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Thu, 15 Jan 2015 02:33:23 +0100 Subject: [PATCH 304/601] ARM: 8282/1: sa1100: use handle_domain_irq Use handle_domain_irq instead of handle_IRQ to automatically map hardware irq number to virq. Signed-off-by: Dmitry Eremin-Solenikov Tested-by: Linus Walleij Signed-off-by: Russell King --- arch/arm/mach-sa1100/irq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-sa1100/irq.c b/arch/arm/mach-sa1100/irq.c index a7d116af4a4f44..65aebfa66fe5c2 100644 --- a/arch/arm/mach-sa1100/irq.c +++ b/arch/arm/mach-sa1100/irq.c @@ -147,7 +147,8 @@ sa1100_handle_irq(struct pt_regs *regs) if (mask == 0) break; - handle_IRQ(ffs(mask) - 1 + IRQ_GPIO0_SC, regs); + handle_domain_irq(sa1100_normal_irqdomain, + ffs(mask) - 1, regs); } while (1); } From 1ff990c01829adc24b3f88c07ec66a7c258a93e7 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Thu, 15 Jan 2015 03:04:41 +0100 Subject: [PATCH 305/601] ARM: 8283/1: sa1100: collie: clear PWER register on machine init Let kernel drivers to control wakeup sources instead of hardcoding them in the collie.c board file. Signed-off-by: Dmitry Eremin-Solenikov Signed-off-by: Russell King --- arch/arm/mach-sa1100/collie.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c index b90c7d8283910d..bd5155d04519a3 100644 --- a/arch/arm/mach-sa1100/collie.c +++ b/arch/arm/mach-sa1100/collie.c @@ -371,8 +371,7 @@ static void __init collie_init(void) PPC_LDD6 | PPC_LDD7 | PPC_L_PCLK | PPC_L_LCLK | PPC_L_FCLK | PPC_L_BIAS | PPC_TXD1 | PPC_TXD2 | PPC_TXD3 | PPC_TXD4 | PPC_SCLK | PPC_SFRM; - PWER = _COLLIE_GPIO_AC_IN | _COLLIE_GPIO_CO | _COLLIE_GPIO_ON_KEY | - _COLLIE_GPIO_WAKEUP | _COLLIE_GPIO_nREMOCON_INT | PWER_RTC; + PWER = 0; PGSR = _COLLIE_GPIO_nREMOCON_ON; From e461894dc2ce7778ccde1c3483c9b15a85a7fc5f Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Thu, 15 Jan 2015 03:06:22 +0100 Subject: [PATCH 306/601] ARM: 8284/1: sa1100: clear RCSR_SMR on resume StrongARM core uses RCSR SMR bit to tell to bootloader that it was reset by entering the sleep mode. After we have resumed, there is little point in having that bit enabled. Moreover, if this bit is set before reboot, the bootloader can become confused. Thus clear the SMR bit on resume just before clearing the scratchpad (resume address) register. Cc: stable@vger.kernel.org Signed-off-by: Dmitry Eremin-Solenikov Signed-off-by: Russell King --- arch/arm/mach-sa1100/pm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-sa1100/pm.c b/arch/arm/mach-sa1100/pm.c index 6645d1e31f14d8..34853d5dfda28b 100644 --- a/arch/arm/mach-sa1100/pm.c +++ b/arch/arm/mach-sa1100/pm.c @@ -81,6 +81,7 @@ static int sa11x0_pm_enter(suspend_state_t state) /* * Ensure not to come back here if it wasn't intended */ + RCSR = RCSR_SMR; PSPR = 0; /* From db1177ee6206f957f579fdbfebb2c6a64e82c694 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 29 Jan 2015 14:38:38 +0100 Subject: [PATCH 307/601] s390/mm: correct missing space when reporting user process faults Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 232c14ea42695b..0d02e48cf04a91 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -261,7 +261,7 @@ static inline void report_user_fault(struct pt_regs *regs, long signr) return; if (!printk_ratelimit()) return; - printk(KERN_ALERT "User process fault: interruption code %04x ilc:%d", + printk(KERN_ALERT "User process fault: interruption code %04x ilc:%d ", regs->int_code & 0xffff, regs->int_code >> 17); print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN); printk(KERN_CONT "\n"); From 5c6497c50f8d809eac6d01512c291a1f67382abd Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 29 Jan 2015 13:45:35 +0100 Subject: [PATCH 308/601] s390/jump label: add sanity checks Add sanity checks to verify that only expected code will be replaced. If the code patterns do not match print the code patterns and panic, since something went terribly wrong. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/jump_label.c | 56 ++++++++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 14 deletions(-) diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c index b987ab2c1541af..25aef40584f748 100644 --- a/arch/s390/kernel/jump_label.c +++ b/arch/s390/kernel/jump_label.c @@ -22,31 +22,59 @@ struct insn_args { enum jump_label_type type; }; +static void jump_label_make_nop(struct jump_entry *entry, struct insn *insn) +{ + /* brcl 0,0 */ + insn->opcode = 0xc004; + insn->offset = 0; +} + +static void jump_label_make_branch(struct jump_entry *entry, struct insn *insn) +{ + /* brcl 15,offset */ + insn->opcode = 0xc0f4; + insn->offset = (entry->target - entry->code) >> 1; +} + +static void jump_label_bug(struct jump_entry *entry, struct insn *insn) +{ + unsigned char *ipc = (unsigned char *)entry->code; + unsigned char *ipe = (unsigned char *)insn; + + pr_emerg("Jump label code mismatch at %pS [%p]\n", ipc, ipc); + pr_emerg("Found: %02x %02x %02x %02x %02x %02x\n", + ipc[0], ipc[1], ipc[2], ipc[3], ipc[4], ipc[5]); + pr_emerg("Expected: %02x %02x %02x %02x %02x %02x\n", + ipe[0], ipe[1], ipe[2], ipe[3], ipe[4], ipe[5]); + panic("Corrupted kernel text"); +} + static void __jump_label_transform(struct jump_entry *entry, - enum jump_label_type type) + enum jump_label_type type, + int init) { - struct insn insn; - int rc; + struct insn old, new; if (type == JUMP_LABEL_ENABLE) { - /* brcl 15,offset */ - insn.opcode = 0xc0f4; - insn.offset = (entry->target - entry->code) >> 1; + jump_label_make_nop(entry, &old); + jump_label_make_branch(entry, &new); } else { - /* brcl 0,0 */ - insn.opcode = 0xc004; - insn.offset = 0; + if (init) + jump_label_make_nop(entry, &old); + else + jump_label_make_branch(entry, &old); + jump_label_make_nop(entry, &new); } - - rc = probe_kernel_write((void *)entry->code, &insn, JUMP_LABEL_NOP_SIZE); - WARN_ON_ONCE(rc < 0); + if (memcmp((void *)entry->code, &old, sizeof(old))) + jump_label_bug(entry, &old); + probe_kernel_write((void *)entry->code, &new, sizeof(new)); } static int __sm_arch_jump_label_transform(void *data) { struct insn_args *args = data; - __jump_label_transform(args->entry, args->type); + __jump_label_transform(args->entry, args->type, 0); return 0; } @@ -64,7 +92,7 @@ void arch_jump_label_transform(struct jump_entry *entry, void arch_jump_label_transform_static(struct jump_entry *entry, enum jump_label_type type) { - __jump_label_transform(entry, type); + __jump_label_transform(entry, type, 1); } #endif From d5caa4dbf9bd2ad8cd7f6be0ca76722be947182b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 29 Jan 2015 14:10:22 +0100 Subject: [PATCH 309/601] s390/jump label: use different nop instruction Use a brcl 0,2 instruction for jump label nops during compile time, so we don't mix up the different nops during mcount/hotpatch call site detection. The initial jump label code instruction replacement will exchange these instructions with either a branch or a brcl 0,0 instruction. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/jump_label.h | 7 ++++++- arch/s390/kernel/jump_label.c | 19 +++++++++++++------ 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h index 346b1c85ffb40d..58642fd29c878c 100644 --- a/arch/s390/include/asm/jump_label.h +++ b/arch/s390/include/asm/jump_label.h @@ -4,6 +4,7 @@ #include #define JUMP_LABEL_NOP_SIZE 6 +#define JUMP_LABEL_NOP_OFFSET 2 #ifdef CONFIG_64BIT #define ASM_PTR ".quad" @@ -13,9 +14,13 @@ #define ASM_ALIGN ".balign 4" #endif +/* + * We use a brcl 0,2 instruction for jump labels at compile time so it + * can be easily distinguished from a hotpatch generated instruction. + */ static __always_inline bool arch_static_branch(struct static_key *key) { - asm_volatile_goto("0: brcl 0,0\n" + asm_volatile_goto("0: brcl 0,"__stringify(JUMP_LABEL_NOP_OFFSET)"\n" ".pushsection __jump_table, \"aw\"\n" ASM_ALIGN "\n" ASM_PTR " 0b, %l[label], %0\n" diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c index 25aef40584f748..cb2d51e779dfaf 100644 --- a/arch/s390/kernel/jump_label.c +++ b/arch/s390/kernel/jump_label.c @@ -49,6 +49,11 @@ static void jump_label_bug(struct jump_entry *entry, struct insn *insn) panic("Corrupted kernel text"); } +static struct insn orignop = { + .opcode = 0xc004, + .offset = JUMP_LABEL_NOP_OFFSET >> 1, +}; + static void __jump_label_transform(struct jump_entry *entry, enum jump_label_type type, int init) @@ -59,14 +64,16 @@ static void __jump_label_transform(struct jump_entry *entry, jump_label_make_nop(entry, &old); jump_label_make_branch(entry, &new); } else { - if (init) - jump_label_make_nop(entry, &old); - else - jump_label_make_branch(entry, &old); + jump_label_make_branch(entry, &old); jump_label_make_nop(entry, &new); } - if (memcmp((void *)entry->code, &old, sizeof(old))) - jump_label_bug(entry, &old); + if (init) { + if (memcmp((void *)entry->code, &orignop, sizeof(orignop))) + jump_label_bug(entry, &old); + } else { + if (memcmp((void *)entry->code, &old, sizeof(old))) + jump_label_bug(entry, &old); + } probe_kernel_write((void *)entry->code, &new, sizeof(new)); } From 6917c857e3ab5bc5e15d2b1ff34dc2443ccf5b0d Mon Sep 17 00:00:00 2001 From: Dave P Martin Date: Thu, 29 Jan 2015 16:24:43 +0000 Subject: [PATCH 310/601] arm64: Avoid breakage caused by .altmacro in fpsimd save/restore macros Alternate macro mode is not a property of a macro definition, but a gas runtime state that alters the way macros are expanded for ever after (until .noaltmacro is seen). This means that subsequent assembly code that calls other macros can break if fpsimdmacros.h is included. Since these instruction sequences are simple (if dull -- but in a good way), this patch solves the problem by simply expanding the .irp loops. The pre-existing fpsimd_{save,restore} macros weren't rolled with .irp anyway and the sequences affected are short, so this change restores consistency at little cost. Signed-off-by: Dave Martin Acked-by: Marc Zyngier Acked-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fpsimdmacros.h | 43 ++++++++++++++++++++------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index 007618b8188c08..a2daf129302841 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -76,7 +76,6 @@ fpsimd_restore_fpcr x\tmpnr, \state .endm -.altmacro .macro fpsimd_save_partial state, numnr, tmpnr1, tmpnr2 mrs x\tmpnr1, fpsr str w\numnr, [\state, #8] @@ -86,11 +85,22 @@ add \state, \state, x\numnr, lsl #4 sub x\tmpnr1, x\tmpnr1, x\numnr, lsl #1 br x\tmpnr1 - .irp qa, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0 - .irp qb, %(qa + 1) - stp q\qa, q\qb, [\state, # -16 * \qa - 16] - .endr - .endr + stp q30, q31, [\state, #-16 * 30 - 16] + stp q28, q29, [\state, #-16 * 28 - 16] + stp q26, q27, [\state, #-16 * 26 - 16] + stp q24, q25, [\state, #-16 * 24 - 16] + stp q22, q23, [\state, #-16 * 22 - 16] + stp q20, q21, [\state, #-16 * 20 - 16] + stp q18, q19, [\state, #-16 * 18 - 16] + stp q16, q17, [\state, #-16 * 16 - 16] + stp q14, q15, [\state, #-16 * 14 - 16] + stp q12, q13, [\state, #-16 * 12 - 16] + stp q10, q11, [\state, #-16 * 10 - 16] + stp q8, q9, [\state, #-16 * 8 - 16] + stp q6, q7, [\state, #-16 * 6 - 16] + stp q4, q5, [\state, #-16 * 4 - 16] + stp q2, q3, [\state, #-16 * 2 - 16] + stp q0, q1, [\state, #-16 * 0 - 16] 0: .endm @@ -103,10 +113,21 @@ add \state, \state, x\tmpnr2, lsl #4 sub x\tmpnr1, x\tmpnr1, x\tmpnr2, lsl #1 br x\tmpnr1 - .irp qa, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0 - .irp qb, %(qa + 1) - ldp q\qa, q\qb, [\state, # -16 * \qa - 16] - .endr - .endr + ldp q30, q31, [\state, #-16 * 30 - 16] + ldp q28, q29, [\state, #-16 * 28 - 16] + ldp q26, q27, [\state, #-16 * 26 - 16] + ldp q24, q25, [\state, #-16 * 24 - 16] + ldp q22, q23, [\state, #-16 * 22 - 16] + ldp q20, q21, [\state, #-16 * 20 - 16] + ldp q18, q19, [\state, #-16 * 18 - 16] + ldp q16, q17, [\state, #-16 * 16 - 16] + ldp q14, q15, [\state, #-16 * 14 - 16] + ldp q12, q13, [\state, #-16 * 12 - 16] + ldp q10, q11, [\state, #-16 * 10 - 16] + ldp q8, q9, [\state, #-16 * 8 - 16] + ldp q6, q7, [\state, #-16 * 6 - 16] + ldp q4, q5, [\state, #-16 * 4 - 16] + ldp q2, q3, [\state, #-16 * 2 - 16] + ldp q0, q1, [\state, #-16 * 0 - 16] 0: .endm From 41089357e1874559458f672b9591436ffd3a12e9 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 29 Jan 2015 17:33:35 +0000 Subject: [PATCH 311/601] arm64: Fix section mismatch on alloc_init_p[mu]d() Commit 523d6e9fae93 (arm64:mm: free the useless initial page table) introduced a BUG_ON checking for the allocation type but it was referring the early_alloc() function in the __init section. This patch changes the check to slab_is_available() and also relaxes the BUG to a WARN_ON_ONCE. Reported-by: Will Deacon Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 2eeac10aa0cf1c..c6daaf6c6f9790 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -159,8 +160,8 @@ static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, flush_tlb_all(); if (pmd_table(old_pmd)) { phys_addr_t table = __pa(pte_offset_map(&old_pmd, 0)); - BUG_ON(alloc != early_alloc); - memblock_free(table, PAGE_SIZE); + if (!WARN_ON_ONCE(slab_is_available())) + memblock_free(table, PAGE_SIZE); } } } else { @@ -220,8 +221,8 @@ static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, flush_tlb_all(); if (pud_table(old_pud)) { phys_addr_t table = __pa(pmd_offset(&old_pud, 0)); - BUG_ON(alloc != early_alloc); - memblock_free(table, PAGE_SIZE); + if (!WARN_ON_ONCE(slab_is_available())) + memblock_free(table, PAGE_SIZE); } } } else { From 6d5f6a0eba15c1d2cfd367f1c3fb77ab2bfe8ca8 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Thu, 22 Jan 2015 19:05:06 -0600 Subject: [PATCH 312/601] powerpc/fsl_pci: Fix pci stack build bug with FRAME_WARN Fix this: CC arch/powerpc/sysdev/fsl_pci.o arch/powerpc/sysdev/fsl_pci.c: In function 'fsl_pcie_check_link': arch/powerpc/sysdev/fsl_pci.c:91:1: error: the frame size of 1360 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] when configuring FRAME_WARN, by refactoring indirect_read_config() to take hose and bus number instead of the 1344-byte struct pci_bus. Signed-off-by: Kim Phillips Signed-off-by: Scott Wood --- arch/powerpc/include/asm/pci-bridge.h | 4 ++++ arch/powerpc/sysdev/fsl_pci.c | 11 ++++------- arch/powerpc/sysdev/indirect_pci.c | 25 +++++++++++++++++-------- 3 files changed, 25 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 725247beebecda..546d036fe925f4 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -119,6 +119,10 @@ extern void setup_indirect_pci(struct pci_controller* hose, extern int indirect_read_config(struct pci_bus *bus, unsigned int devfn, int offset, int len, u32 *val); +extern int __indirect_read_config(struct pci_controller *hose, + unsigned char bus_number, unsigned int devfn, + int offset, int len, u32 *val); + extern int indirect_write_config(struct pci_bus *bus, unsigned int devfn, int offset, int len, u32 val); diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 6455c1eada1a16..7cc215e86d8297 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -68,13 +68,10 @@ static int fsl_pcie_check_link(struct pci_controller *hose) u32 val = 0; if (hose->indirect_type & PPC_INDIRECT_TYPE_FSL_CFG_REG_LINK) { - if (hose->ops->read == fsl_indirect_read_config) { - struct pci_bus bus; - bus.number = hose->first_busno; - bus.sysdata = hose; - bus.ops = hose->ops; - indirect_read_config(&bus, 0, PCIE_LTSSM, 4, &val); - } else + if (hose->ops->read == fsl_indirect_read_config) + __indirect_read_config(hose, hose->first_busno, 0, + PCIE_LTSSM, 4, &val); + else early_read_config_dword(hose, 0, 0, PCIE_LTSSM, &val); if (val < PCIE_LTSSM_L0) return 1; diff --git a/arch/powerpc/sysdev/indirect_pci.c b/arch/powerpc/sysdev/indirect_pci.c index 1f6c570d66d45e..692de9dbc680d3 100644 --- a/arch/powerpc/sysdev/indirect_pci.c +++ b/arch/powerpc/sysdev/indirect_pci.c @@ -20,31 +20,31 @@ #include #include -int indirect_read_config(struct pci_bus *bus, unsigned int devfn, - int offset, int len, u32 *val) +int __indirect_read_config(struct pci_controller *hose, + unsigned char bus_number, unsigned int devfn, + int offset, int len, u32 *val) { - struct pci_controller *hose = pci_bus_to_host(bus); volatile void __iomem *cfg_data; u8 cfg_type = 0; u32 bus_no, reg; if (hose->indirect_type & PPC_INDIRECT_TYPE_NO_PCIE_LINK) { - if (bus->number != hose->first_busno) + if (bus_number != hose->first_busno) return PCIBIOS_DEVICE_NOT_FOUND; if (devfn != 0) return PCIBIOS_DEVICE_NOT_FOUND; } if (ppc_md.pci_exclude_device) - if (ppc_md.pci_exclude_device(hose, bus->number, devfn)) + if (ppc_md.pci_exclude_device(hose, bus_number, devfn)) return PCIBIOS_DEVICE_NOT_FOUND; if (hose->indirect_type & PPC_INDIRECT_TYPE_SET_CFG_TYPE) - if (bus->number != hose->first_busno) + if (bus_number != hose->first_busno) cfg_type = 1; - bus_no = (bus->number == hose->first_busno) ? - hose->self_busno : bus->number; + bus_no = (bus_number == hose->first_busno) ? + hose->self_busno : bus_number; if (hose->indirect_type & PPC_INDIRECT_TYPE_EXT_REG) reg = ((offset & 0xf00) << 16) | (offset & 0xfc); @@ -77,6 +77,15 @@ int indirect_read_config(struct pci_bus *bus, unsigned int devfn, return PCIBIOS_SUCCESSFUL; } +int indirect_read_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 *val) +{ + struct pci_controller *hose = pci_bus_to_host(bus); + + return __indirect_read_config(hose, bus->number, devfn, offset, len, + val); +} + int indirect_write_config(struct pci_bus *bus, unsigned int devfn, int offset, int len, u32 val) { From e8d2081bb4d7a3bb6d779ac3829b81aceabad065 Mon Sep 17 00:00:00 2001 From: Emil Medve Date: Wed, 21 Jan 2015 16:31:50 -0600 Subject: [PATCH 313/601] powerpc/dts: Remove T4240 emulator support Probably we should have not upstreamed this in the first place Signed-off-by: Emil Medve Signed-off-by: Scott Wood --- arch/powerpc/boot/dts/t4240emu.dts | 281 ----------------------------- 1 file changed, 281 deletions(-) delete mode 100644 arch/powerpc/boot/dts/t4240emu.dts diff --git a/arch/powerpc/boot/dts/t4240emu.dts b/arch/powerpc/boot/dts/t4240emu.dts deleted file mode 100644 index decaf357db9c56..00000000000000 --- a/arch/powerpc/boot/dts/t4240emu.dts +++ /dev/null @@ -1,281 +0,0 @@ -/* - * T4240 emulator Device Tree Source - * - * Copyright 2013 Freescale Semiconductor Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Freescale Semiconductor nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * - * ALTERNATIVELY, this software may be distributed under the terms of the - * GNU General Public License ("GPL") as published by the Free Software - * Foundation, either version 2 of that License or (at your option) any - * later version. - * - * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/dts-v1/; - -/include/ "fsl/e6500_power_isa.dtsi" -/ { - compatible = "fsl,T4240"; - #address-cells = <2>; - #size-cells = <2>; - interrupt-parent = <&mpic>; - - aliases { - ccsr = &soc; - - serial0 = &serial0; - serial1 = &serial1; - serial2 = &serial2; - serial3 = &serial3; - dma0 = &dma0; - dma1 = &dma1; - }; - - cpus { - #address-cells = <1>; - #size-cells = <0>; - - cpu0: PowerPC,e6500@0 { - device_type = "cpu"; - reg = <0 1>; - next-level-cache = <&L2_1>; - fsl,portid-mapping = <0x80000000>; - }; - cpu1: PowerPC,e6500@2 { - device_type = "cpu"; - reg = <2 3>; - next-level-cache = <&L2_1>; - fsl,portid-mapping = <0x80000000>; - }; - cpu2: PowerPC,e6500@4 { - device_type = "cpu"; - reg = <4 5>; - next-level-cache = <&L2_1>; - fsl,portid-mapping = <0x80000000>; - }; - cpu3: PowerPC,e6500@6 { - device_type = "cpu"; - reg = <6 7>; - next-level-cache = <&L2_1>; - fsl,portid-mapping = <0x80000000>; - }; - - cpu4: PowerPC,e6500@8 { - device_type = "cpu"; - reg = <8 9>; - next-level-cache = <&L2_2>; - fsl,portid-mapping = <0x40000000>; - }; - cpu5: PowerPC,e6500@10 { - device_type = "cpu"; - reg = <10 11>; - next-level-cache = <&L2_2>; - fsl,portid-mapping = <0x40000000>; - }; - cpu6: PowerPC,e6500@12 { - device_type = "cpu"; - reg = <12 13>; - next-level-cache = <&L2_2>; - fsl,portid-mapping = <0x40000000>; - }; - cpu7: PowerPC,e6500@14 { - device_type = "cpu"; - reg = <14 15>; - next-level-cache = <&L2_2>; - fsl,portid-mapping = <0x40000000>; - }; - - cpu8: PowerPC,e6500@16 { - device_type = "cpu"; - reg = <16 17>; - next-level-cache = <&L2_3>; - fsl,portid-mapping = <0x20000000>; - }; - cpu9: PowerPC,e6500@18 { - device_type = "cpu"; - reg = <18 19>; - next-level-cache = <&L2_3>; - fsl,portid-mapping = <0x20000000>; - }; - cpu10: PowerPC,e6500@20 { - device_type = "cpu"; - reg = <20 21>; - next-level-cache = <&L2_3>; - fsl,portid-mapping = <0x20000000>; - }; - cpu11: PowerPC,e6500@22 { - device_type = "cpu"; - reg = <22 23>; - next-level-cache = <&L2_3>; - fsl,portid-mapping = <0x20000000>; - }; - }; -}; - -/ { - model = "fsl,T4240QDS"; - compatible = "fsl,T4240EMU", "fsl,T4240QDS"; - #address-cells = <2>; - #size-cells = <2>; - interrupt-parent = <&mpic>; - - ifc: localbus@ffe124000 { - reg = <0xf 0xfe124000 0 0x2000>; - ranges = <0 0 0xf 0xe8000000 0x08000000 - 2 0 0xf 0xff800000 0x00010000 - 3 0 0xf 0xffdf0000 0x00008000>; - - nor@0,0 { - #address-cells = <1>; - #size-cells = <1>; - compatible = "cfi-flash"; - reg = <0x0 0x0 0x8000000>; - - bank-width = <2>; - device-width = <1>; - }; - - }; - - memory { - device_type = "memory"; - }; - - soc: soc@ffe000000 { - ranges = <0x00000000 0xf 0xfe000000 0x1000000>; - reg = <0xf 0xfe000000 0 0x00001000>; - - }; -}; - -&ifc { - #address-cells = <2>; - #size-cells = <1>; - compatible = "fsl,ifc", "simple-bus"; - interrupts = <25 2 0 0>; -}; - -&soc { - #address-cells = <1>; - #size-cells = <1>; - device_type = "soc"; - compatible = "simple-bus"; - - soc-sram-error { - compatible = "fsl,soc-sram-error"; - interrupts = <16 2 1 29>; - }; - - corenet-law@0 { - compatible = "fsl,corenet-law"; - reg = <0x0 0x1000>; - fsl,num-laws = <32>; - }; - - ddr1: memory-controller@8000 { - compatible = "fsl,qoriq-memory-controller-v4.7", - "fsl,qoriq-memory-controller"; - reg = <0x8000 0x1000>; - interrupts = <16 2 1 23>; - }; - - ddr2: memory-controller@9000 { - compatible = "fsl,qoriq-memory-controller-v4.7", - "fsl,qoriq-memory-controller"; - reg = <0x9000 0x1000>; - interrupts = <16 2 1 22>; - }; - - ddr3: memory-controller@a000 { - compatible = "fsl,qoriq-memory-controller-v4.7", - "fsl,qoriq-memory-controller"; - reg = <0xa000 0x1000>; - interrupts = <16 2 1 21>; - }; - - cpc: l3-cache-controller@10000 { - compatible = "fsl,t4240-l3-cache-controller", "cache"; - reg = <0x10000 0x1000 - 0x11000 0x1000 - 0x12000 0x1000>; - interrupts = <16 2 1 27 - 16 2 1 26 - 16 2 1 25>; - }; - - corenet-cf@18000 { - compatible = "fsl,corenet2-cf", "fsl,corenet-cf"; - reg = <0x18000 0x1000>; - interrupts = <16 2 1 31>; - fsl,ccf-num-csdids = <32>; - fsl,ccf-num-snoopids = <32>; - }; - - iommu@20000 { - compatible = "fsl,pamu-v1.0", "fsl,pamu"; - reg = <0x20000 0x6000>; - fsl,portid-mapping = <0x8000>; - interrupts = < - 24 2 0 0 - 16 2 1 30>; - }; - -/include/ "fsl/qoriq-mpic.dtsi" - - guts: global-utilities@e0000 { - compatible = "fsl,t4240-device-config", "fsl,qoriq-device-config-2.0"; - reg = <0xe0000 0xe00>; - fsl,has-rstcr; - fsl,liodn-bits = <12>; - }; - -/include/ "fsl/qoriq-clockgen2.dtsi" - global-utilities@e1000 { - compatible = "fsl,t4240-clockgen", "fsl,qoriq-clockgen-2.0"; - }; - -/include/ "fsl/qoriq-dma-0.dtsi" -/include/ "fsl/qoriq-dma-1.dtsi" - -/include/ "fsl/qoriq-i2c-0.dtsi" -/include/ "fsl/qoriq-i2c-1.dtsi" -/include/ "fsl/qoriq-duart-0.dtsi" -/include/ "fsl/qoriq-duart-1.dtsi" - - L2_1: l2-cache-controller@c20000 { - compatible = "fsl,t4240-l2-cache-controller"; - reg = <0xc20000 0x40000>; - next-level-cache = <&cpc>; - }; - L2_2: l2-cache-controller@c60000 { - compatible = "fsl,t4240-l2-cache-controller"; - reg = <0xc60000 0x40000>; - next-level-cache = <&cpc>; - }; - L2_3: l2-cache-controller@ca0000 { - compatible = "fsl,t4240-l2-cache-controller"; - reg = <0xca0000 0x40000>; - next-level-cache = <&cpc>; - }; -}; From 238cac16c03eef00bcb607e09defee79dadca958 Mon Sep 17 00:00:00 2001 From: Emil Medve Date: Wed, 21 Jan 2015 16:22:52 -0600 Subject: [PATCH 314/601] powerpc: Remove duplicate tlbcam_index declarations They seem to be leftovers from '14cf11a powerpc: Merge enough to start building in arch/powerpc' Signed-off-by: Emil Medve Signed-off-by: Scott Wood --- arch/powerpc/mm/fsl_booke_mmu.c | 2 -- arch/powerpc/mm/pgtable_32.c | 1 - 2 files changed, 3 deletions(-) diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index 94cd728166d37a..b46912fee7cd6d 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -67,8 +67,6 @@ struct tlbcamrange { phys_addr_t phys; } tlbcam_addrs[NUM_TLBCAMS]; -extern unsigned int tlbcam_index; - unsigned long tlbcam_sz(int idx) { return tlbcam_addrs[idx].limit - tlbcam_addrs[idx].start + 1; diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 50fad3801f3040..6eec88685a71b5 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -63,7 +63,6 @@ void setbat(int index, unsigned long virt, phys_addr_t phys, #endif /* HAVE_BATS */ #ifdef HAVE_TLBCAM -extern unsigned int tlbcam_index; extern phys_addr_t v_mapped_by_tlbcam(unsigned long va); extern unsigned long p_mapped_by_tlbcam(phys_addr_t pa); #else /* !HAVE_TLBCAM */ From d2caa3cebda8b626336e100b80a0ed6f909dccab Mon Sep 17 00:00:00 2001 From: Tom Huynh Date: Tue, 20 Jan 2015 16:19:50 -0600 Subject: [PATCH 315/601] powerpc/perf: fix fsl_emb_pmu_start to write correct pmc value PMCs on PowerPC increases towards 0x80000000 and triggers an overflow interrupt when the msb is set to collect a sample. Therefore, to setup for the next sample collection, pmu_start should set the pmc value to 0x80000000 - left instead of left which incorrectly delays the next overflow interrupt. Same as commit 9a45a9407c69 ("powerpc/perf: power_pmu_start restores incorrect values, breaking frequency events") for book3s. Signed-off-by: Tom Huynh Signed-off-by: Scott Wood --- arch/powerpc/perf/core-fsl-emb.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c index 4acaea01fe0313..e9fe904b653804 100644 --- a/arch/powerpc/perf/core-fsl-emb.c +++ b/arch/powerpc/perf/core-fsl-emb.c @@ -389,6 +389,7 @@ static void fsl_emb_pmu_del(struct perf_event *event, int flags) static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags) { unsigned long flags; + unsigned long val; s64 left; if (event->hw.idx < 0 || !event->hw.sample_period) @@ -405,7 +406,10 @@ static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags) event->hw.state = 0; left = local64_read(&event->hw.period_left); - write_pmc(event->hw.idx, left); + val = 0; + if (left < 0x80000000L) + val = 0x80000000L - left; + write_pmc(event->hw.idx, val); perf_event_update_userpage(event); perf_pmu_enable(event->pmu); From a7b9f671f2d141528491c346e21e8a179cee9d21 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Mon, 19 Jan 2015 17:04:38 +0100 Subject: [PATCH 316/601] powerpc32: adds handling of _PAGE_RO Some powerpc like the 8xx don't have a RW bit in PTE bits but a RO (Read Only) bit. This patch implements the handling of a _PAGE_RO flag to be used in place of _PAGE_RW Signed-off-by: Christophe Leroy [scottwood@freescale.com: fix whitespace] Signed-off-by: Scott Wood --- arch/powerpc/include/asm/pgtable-ppc32.h | 8 +++++--- arch/powerpc/include/asm/pgtable.h | 7 +++++-- arch/powerpc/include/asm/pte-common.h | 25 ++++++++++++++++-------- arch/powerpc/mm/pgtable_32.c | 2 +- 4 files changed, 28 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h index 234e07c4780327..62a3e49a9a149c 100644 --- a/arch/powerpc/include/asm/pgtable-ppc32.h +++ b/arch/powerpc/include/asm/pgtable-ppc32.h @@ -275,7 +275,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - pte_update(ptep, (_PAGE_RW | _PAGE_HWWRITE), 0); + pte_update(ptep, (_PAGE_RW | _PAGE_HWWRITE), _PAGE_RO); } static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) @@ -286,9 +286,11 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry) { - unsigned long bits = pte_val(entry) & + unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC); - pte_update(ptep, 0, bits); + unsigned long clr = ~pte_val(entry) & _PAGE_RO; + + pte_update(ptep, clr, set); } #define __HAVE_ARCH_PTE_SAME diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index a8805fee0df91b..7e77f2ca51325a 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -30,7 +30,8 @@ struct mm_struct; #include /* Generic accessors to PTE bits */ -static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; } +static inline int pte_write(pte_t pte) +{ return (pte_val(pte) & (_PAGE_RW | _PAGE_RO)) != _PAGE_RO; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } @@ -115,12 +116,14 @@ static inline unsigned long pte_pfn(pte_t pte) { /* Generic modifiers for PTE bits */ static inline pte_t pte_wrprotect(pte_t pte) { - pte_val(pte) &= ~(_PAGE_RW | _PAGE_HWWRITE); return pte; } + pte_val(pte) &= ~(_PAGE_RW | _PAGE_HWWRITE); + pte_val(pte) |= _PAGE_RO; return pte; } static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~(_PAGE_DIRTY | _PAGE_HWWRITE); return pte; } static inline pte_t pte_mkold(pte_t pte) { pte_val(pte) &= ~_PAGE_ACCESSED; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { + pte_val(pte) &= ~_PAGE_RO; pte_val(pte) |= _PAGE_RW; return pte; } static inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= _PAGE_DIRTY; return pte; } diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h index e040c3595129e4..2aef9b7a0eb2af 100644 --- a/arch/powerpc/include/asm/pte-common.h +++ b/arch/powerpc/include/asm/pte-common.h @@ -34,6 +34,12 @@ #ifndef _PAGE_PSIZE #define _PAGE_PSIZE 0 #endif +/* _PAGE_RO and _PAGE_RW shall not be defined at the same time */ +#ifndef _PAGE_RO +#define _PAGE_RO 0 +#else +#define _PAGE_RW 0 +#endif #ifndef _PMD_PRESENT_MASK #define _PMD_PRESENT_MASK _PMD_PRESENT #endif @@ -42,10 +48,10 @@ #define PMD_PAGE_SIZE(pmd) bad_call_to_PMD_PAGE_SIZE() #endif #ifndef _PAGE_KERNEL_RO -#define _PAGE_KERNEL_RO 0 +#define _PAGE_KERNEL_RO (_PAGE_RO) #endif #ifndef _PAGE_KERNEL_ROX -#define _PAGE_KERNEL_ROX (_PAGE_EXEC) +#define _PAGE_KERNEL_ROX (_PAGE_EXEC | _PAGE_RO) #endif #ifndef _PAGE_KERNEL_RW #define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE) @@ -95,7 +101,7 @@ extern unsigned long bad_call_to_PMD_PAGE_SIZE(void); /* Mask of bits returned by pte_pgprot() */ #define PAGE_PROT_BITS (_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \ _PAGE_WRITETHRU | _PAGE_ENDIAN | _PAGE_4K_PFN | \ - _PAGE_USER | _PAGE_ACCESSED | \ + _PAGE_USER | _PAGE_ACCESSED | _PAGE_RO | \ _PAGE_RW | _PAGE_HWWRITE | _PAGE_DIRTY | _PAGE_EXEC) #ifdef CONFIG_NUMA_BALANCING @@ -128,11 +134,14 @@ extern unsigned long bad_call_to_PMD_PAGE_SIZE(void); */ #define PAGE_NONE __pgprot(_PAGE_BASE) #define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW) -#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC) -#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER) -#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) -#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER) -#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC) +#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | \ + _PAGE_EXEC) +#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RO) +#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RO | \ + _PAGE_EXEC) +#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RO) +#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RO | \ + _PAGE_EXEC) #define __P000 PAGE_NONE #define __P001 PAGE_READONLY diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 6eec88685a71b5..83313962043128 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -146,7 +146,7 @@ void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags) { /* writeable implies dirty for kernel addresses */ - if (flags & _PAGE_RW) + if ((flags & (_PAGE_RW | _PAGE_RO)) != _PAGE_RO) flags |= _PAGE_DIRTY | _PAGE_HWWRITE; /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */ From cadbfd01468663e8b9f9518a58ef67398a20d06b Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Mon, 19 Jan 2015 17:04:44 +0100 Subject: [PATCH 317/601] powerpc/8xx: use _PAGE_RO instead of _PAGE_RW On powerpc 8xx, in TLB entries, 0x400 bit is set to 1 for read-only pages and is set to 0 for RW pages. So we should use _PAGE_RO instead of _PAGE_RW Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/include/asm/pgtable-ppc32.h | 11 +++++------ arch/powerpc/include/asm/pte-8xx.h | 9 ++++----- arch/powerpc/kernel/head_8xx.S | 3 --- 3 files changed, 9 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h index 62a3e49a9a149c..9cde3c1522e316 100644 --- a/arch/powerpc/include/asm/pgtable-ppc32.h +++ b/arch/powerpc/include/asm/pgtable-ppc32.h @@ -178,12 +178,11 @@ static inline unsigned long pte_update(pte_t *p, andc %1,%0,%5\n\ or %1,%1,%6\n\ /* 0x200 == Extended encoding, bit 22 */ \ - /* Bit 22 has to be 1 if neither _PAGE_USER nor _PAGE_RW are set */ \ - rlwimi %1,%1,32-2,0x200\n /* get _PAGE_USER */ \ - rlwinm %3,%1,32-1,0x200\n /* get _PAGE_RW */ \ - or %1,%3,%1\n\ - xori %1,%1,0x200\n" -" stwcx. %1,0,%4\n\ + /* Bit 22 has to be 1 when _PAGE_USER is unset and _PAGE_RO is set */ \ + rlwimi %1,%1,32-1,0x200\n /* get _PAGE_RO */ \ + rlwinm %3,%1,32-2,0x200\n /* get _PAGE_USER */ \ + andc %1,%1,%3\n\ + stwcx. %1,0,%4\n\ bne- 1b" : "=&r" (old), "=&r" (tmp), "=m" (*p), "=&r" (tmp2) : "r" (p), "r" (clr), "r" (set), "m" (*p) diff --git a/arch/powerpc/include/asm/pte-8xx.h b/arch/powerpc/include/asm/pte-8xx.h index daa4616e61c4a2..eb6edb44f14042 100644 --- a/arch/powerpc/include/asm/pte-8xx.h +++ b/arch/powerpc/include/asm/pte-8xx.h @@ -46,9 +46,9 @@ * require a TLB exception handler change. It is assumed unused bits * are always zero. */ -#define _PAGE_RW 0x0400 /* lsb PP bits, inverted in HW */ +#define _PAGE_RO 0x0400 /* lsb PP bits */ #define _PAGE_USER 0x0800 /* msb PP bits */ -/* set when neither _PAGE_USER nor _PAGE_RW are set */ +/* set when _PAGE_USER is unset and _PAGE_RO is set */ #define _PAGE_KNLRO 0x0200 #define _PMD_PRESENT 0x0001 @@ -62,9 +62,8 @@ #define PTE_ATOMIC_UPDATES 1 /* We need to add _PAGE_SHARED to kernel pages */ -#define _PAGE_KERNEL_RO (_PAGE_SHARED | _PAGE_KNLRO) -#define _PAGE_KERNEL_ROX (_PAGE_EXEC | _PAGE_KNLRO) -#define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE) +#define _PAGE_KERNEL_RO (_PAGE_SHARED | _PAGE_RO | _PAGE_KNLRO) +#define _PAGE_KERNEL_ROX (_PAGE_EXEC | _PAGE_RO | _PAGE_KNLRO) #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_PTE_8xx_H */ diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index d99aac0d69f1b6..65492d0439647a 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -441,9 +441,6 @@ DataStoreTLBMiss: and r11, r11, r10 rlwimi r10, r11, 0, _PAGE_PRESENT #endif - /* invert RW */ - xori r10, r10, _PAGE_RW - /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 22 and 28 must be clear. * Software indicator bits 24, 25, 26, and 27 must be From 7f93c9d90f4dc7084217cf94574e427bad40e37c Mon Sep 17 00:00:00 2001 From: Shaohui Xie Date: Wed, 28 Jan 2015 19:54:24 +0800 Subject: [PATCH 318/601] power/fsl: add MDIO dt binding for FMan This binding is for FMan MDIO, it covers FMan v2 & FMan v3. Signed-off-by: Shaohui Xie [scottwood@freescale.com: mark interrupts required only for external] Signed-off-by: Scott Wood --- .../devicetree/bindings/powerpc/fsl/fman.txt | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/Documentation/devicetree/bindings/powerpc/fsl/fman.txt b/Documentation/devicetree/bindings/powerpc/fsl/fman.txt index edeea160ca396b..edda55f7400415 100644 --- a/Documentation/devicetree/bindings/powerpc/fsl/fman.txt +++ b/Documentation/devicetree/bindings/powerpc/fsl/fman.txt @@ -7,6 +7,7 @@ CONTENTS - FMan MURAM Node - FMan dTSEC/XGEC/mEMAC Node - FMan IEEE 1588 Node + - FMan MDIO Node - Example ============================================================================= @@ -356,6 +357,69 @@ ptp-timer@fe000 { reg = <0xfe000 0x1000>; }; +============================================================================= +FMan MDIO Node + +DESCRIPTION + +The MDIO is a bus to which the PHY devices are connected. + +PROPERTIES + +- compatible + Usage: required + Value type: + Definition: A standard property. + Must include "fsl,fman-mdio" for 1 Gb/s MDIO from FMan v2. + Must include "fsl,fman-xmdio" for 10 Gb/s MDIO from FMan v2. + Must include "fsl,fman-memac-mdio" for 1/10 Gb/s MDIO from + FMan v3. + +- reg + Usage: required + Value type: + Definition: A standard property. + +- bus-frequency + Usage: optional + Value type: + Definition: Specifies the external MDIO bus clock speed to + be used, if different from the standard 2.5 MHz. + This may be due to the standard speed being unsupported (e.g. + due to a hardware problem), or to advertise that all relevant + components in the system support a faster speed. + +- interrupts + Usage: required for external MDIO + Value type: + Definition: Event interrupt of external MDIO controller. + +- fsl,fman-internal-mdio + Usage: required for internal MDIO + Value type: boolean + Definition: Fman has internal MDIO for internal PCS(Physical + Coding Sublayer) PHYs and external MDIO for external PHYs. + The settings and programming routines for internal/external + MDIO are different. Must be included for internal MDIO. + +EXAMPLE + +Example for FMan v2 external MDIO: + +mdio@f1000 { + compatible = "fsl,fman-xmdio"; + reg = <0xf1000 0x1000>; + interrupts = <101 2 0 0>; +}; + +Example for FMan v3 internal MDIO: + +mdio@f1000 { + compatible = "fsl,fman-memac-mdio"; + reg = <0xf1000 0x1000>; + fsl,fman-internal-mdio; +}; + ============================================================================= Example @@ -531,4 +595,10 @@ fman@400000 { compatible = "fsl,fman-ptp-timer"; reg = <0xfe000 0x1000>; }; + + mdio@f1000 { + compatible = "fsl,fman-xmdio"; + reg = <0xf1000 0x1000>; + interrupts = <101 2 0 0>; + }; }; From debddd95ec0bb8ac1bbe719b5e4588e453c7b2fc Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Mon, 19 Jan 2015 16:44:42 +0100 Subject: [PATCH 319/601] powerpc/8xx: reduce pressure on TLB due to context switches For nohash powerpc, when we run out of contexts, contexts are freed by stealing used contexts in-turn. When a victim has been selected, the associated TLB entries are freed using _tlbil_pid(). Unfortunatly, on the PPC 8xx, _tlbil_pid() does a tlbia, hence flushes ALL TLB entries and not only the one linked to the stolen context. Therefore, as implented today, at each task switch requiring a new context, all entries are flushed. This patch modifies the implementation so that when running out of contexts, all contexts get freed at once, hence dividing the number of calls to tlbia by 16. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/mm/mmu_context_nohash.c | 43 +++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index 9cba6cba2e50a9..986afbc22c76b2 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -52,12 +52,15 @@ #include #include +#include "mmu_decl.h" + static unsigned int first_context, last_context; static unsigned int next_context, nr_free_contexts; static unsigned long *context_map; static unsigned long *stale_map[NR_CPUS]; static struct mm_struct **context_mm; static DEFINE_RAW_SPINLOCK(context_lock); +static bool no_selective_tlbil; #define CTX_MAP_SIZE \ (sizeof(unsigned long) * (last_context / BITS_PER_LONG + 1)) @@ -133,6 +136,38 @@ static unsigned int steal_context_smp(unsigned int id) } #endif /* CONFIG_SMP */ +static unsigned int steal_all_contexts(void) +{ + struct mm_struct *mm; + int cpu = smp_processor_id(); + unsigned int id; + + for (id = first_context; id <= last_context; id++) { + /* Pick up the victim mm */ + mm = context_mm[id]; + + pr_hardcont(" | steal %d from 0x%p", id, mm); + + /* Mark this mm as having no context anymore */ + mm->context.id = MMU_NO_CONTEXT; + if (id != first_context) { + context_mm[id] = NULL; + __clear_bit(id, context_map); +#ifdef DEBUG_MAP_CONSISTENCY + mm->context.active = 0; +#endif + } + __clear_bit(id, stale_map[cpu]); + } + + /* Flush the TLB for all contexts (not to be used on SMP) */ + _tlbil_all(); + + nr_free_contexts = last_context - first_context; + + return first_context; +} + /* Note that this will also be called on SMP if all other CPUs are * offlined, which means that it may be called for cpu != 0. For * this to work, we somewhat assume that CPUs that are onlined @@ -241,7 +276,10 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) goto stolen; } #endif /* CONFIG_SMP */ - id = steal_context_up(id); + if (no_selective_tlbil) + id = steal_all_contexts(); + else + id = steal_context_up(id); goto stolen; } nr_free_contexts--; @@ -407,12 +445,15 @@ void __init mmu_context_init(void) if (mmu_has_feature(MMU_FTR_TYPE_8xx)) { first_context = 0; last_context = 15; + no_selective_tlbil = true; } else if (mmu_has_feature(MMU_FTR_TYPE_47x)) { first_context = 1; last_context = 65535; + no_selective_tlbil = false; } else { first_context = 1; last_context = 255; + no_selective_tlbil = false; } #ifdef DEBUG_CLAMP_LAST_CONTEXT From 2374d0af29c154d6d989987a94278caed8c13be5 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Tue, 20 Jan 2015 10:57:33 +0100 Subject: [PATCH 320/601] powerpc/8xx: remove remaining unnecessary code in FixupDAR Since commit 33fb845a6f01 ("powerpc/8xx: Don't use MD_TWC for walk"), MD_EPN and MD_TWC are not writen anymore in FixupDAR so saving r3 has become useless. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 65492d0439647a..73715cd0de452f 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -529,9 +529,6 @@ DARFixed:/* Return from dcbx instruction bug workaround */ /* define if you don't want to use self modifying code */ #define NO_SELF_MODIFYING_CODE FixupDAR:/* Entry point for dcbx workaround. */ -#ifdef CONFIG_8xx_CPU6 - mtspr SPRN_DAR, r3 -#endif mtspr SPRN_SPRG_SCRATCH2, r10 /* fetch instruction from memory. */ mfspr r10, SPRN_SRR0 @@ -548,9 +545,6 @@ FixupDAR:/* Entry point for dcbx workaround. */ /* Extract level 2 index */ rlwinm r11, r11, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 lwzx r11, r10, r11 /* Get the pte */ -#ifdef CONFIG_8xx_CPU6 - mfspr r3, SPRN_DAR -#endif /* concat physical page address(r11) and page offset(r10) */ mfspr r10, SPRN_SRR0 rlwimi r11, r10, 0, 32 - PAGE_SHIFT, 31 From 5ddb75cee5afab3bdaf6eb4efefc8029923a9cc7 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Tue, 20 Jan 2015 10:57:33 +0100 Subject: [PATCH 321/601] powerpc/8xx: remove tests on PGDIR entry validity Kernel MMU handling code handles validity of entries via _PMD_PRESENT which corresponds to V bit in MD_TWC and MI_TWC. When the V bit is not set, MPC8xx triggers TLBError exception. So we don't have to check that and branch ourself to TLBError. We can set TLB entries with non present entries, remove all those tests and let the 8xx handle it. This reduce the number of cycle when the entries are valid which is the case most of the time, and doesn't significantly increase the time for handling invalid entries. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 41 +++++++--------------------------- 1 file changed, 8 insertions(+), 33 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 73715cd0de452f..32cc9e55b772ab 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -329,12 +329,9 @@ InstructionTLBMiss: /* Extract level 1 index */ rlwinm r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwzx r11, r10, r11 /* Get the level 1 entry */ - rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ - beq 2f /* If zero, don't try to find a pte */ + rlwinm r10, r11,0,0,19 /* Extract page descriptor page address */ - /* We have a pte table, so load the MI_TWC with the attributes - * for this "segment." - */ + /* Load the MI_TWC with the attributes for this "segment." */ MTSPR_CPU6(SPRN_MI_TWC, r11, r3) /* Set segment attributes */ mfspr r11, SPRN_SRR0 /* Get effective address of fault */ /* Extract level 2 index */ @@ -342,13 +339,11 @@ InstructionTLBMiss: lwzx r10, r10, r11 /* Get the pte */ #ifdef CONFIG_SWAP - andi. r11, r10, _PAGE_ACCESSED | _PAGE_PRESENT - cmpwi cr0, r11, _PAGE_ACCESSED | _PAGE_PRESENT - li r11, RPN_PATTERN - bne- cr0, 2f -#else - li r11, RPN_PATTERN + rlwinm r11, r10, 32-5, _PAGE_PRESENT + and r11, r11, r10 + rlwimi r10, r11, 0, _PAGE_PRESENT #endif + li r11, RPN_PATTERN /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 21 and 28 must be clear. * Software indicator bits 24, 25, 26, and 27 must be @@ -366,21 +361,6 @@ InstructionTLBMiss: mfspr r10, SPRN_SPRG_SCRATCH2 EXCEPTION_EPILOG_0 rfi -2: - mfspr r10, SPRN_SRR1 - /* clear all error bits as TLB Miss - * sets a few unconditionally - */ - rlwinm r10, r10, 0, 0xffff - mtspr SPRN_SRR1, r10 - - /* Restore registers */ -#ifdef CONFIG_8xx_CPU6 - mfspr r3, SPRN_DAR - mtspr SPRN_DAR, r11 /* Tag DAR */ -#endif - mfspr r10, SPRN_SPRG_SCRATCH2 - b InstructionTLBError1 . = 0x1200 DataStoreTLBMiss: @@ -403,8 +383,6 @@ DataStoreTLBMiss: /* Extract level 1 index */ rlwinm r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwzx r11, r10, r11 /* Get the level 1 entry */ - rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ - beq 2f /* If zero, don't try to find a pte */ /* We have a pte table, so load fetch the pte from the table. */ @@ -447,7 +425,7 @@ DataStoreTLBMiss: * set. All other Linux PTE bits control the behavior * of the MMU. */ -2: li r11, RPN_PATTERN + li r11, RPN_PATTERN rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ MTSPR_CPU6(SPRN_MD_RPN, r10, r3) /* Update TLB entry */ @@ -466,10 +444,7 @@ DataStoreTLBMiss: */ . = 0x1300 InstructionTLBError: - EXCEPTION_PROLOG_0 -InstructionTLBError1: - EXCEPTION_PROLOG_1 - EXCEPTION_PROLOG_2 + EXCEPTION_PROLOG mr r4,r12 mr r5,r9 andis. r10,r5,0x4000 From ce67f5d0a00cce231e62334c3624737623c32d6a Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Tue, 20 Jan 2015 10:57:34 +0100 Subject: [PATCH 322/601] powerpc32: Use kmem_cache memory for PGDIR When pages are not 4K, PGDIR table is allocated with kmalloc(). In order to optimise TLB handlers, aligned memory is needed. kmalloc() doesn't provide aligned memory blocks, so lets use a kmem_cache pool instead. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/include/asm/pgtable-ppc32.h | 4 ++++ arch/powerpc/mm/pgtable_32.c | 16 ++++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h index 9cde3c1522e316..26ce0ab0a9e45f 100644 --- a/arch/powerpc/include/asm/pgtable-ppc32.h +++ b/arch/powerpc/include/asm/pgtable-ppc32.h @@ -347,10 +347,14 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry) #define pte_to_pgoff(pte) (pte_val(pte) >> 3) #define pgoff_to_pte(off) ((pte_t) { ((off) << 3) | _PAGE_FILE }) +#ifndef CONFIG_PPC_4K_PAGES +void pgtable_cache_init(void); +#else /* * No page table caches to initialise */ #define pgtable_cache_init() do { } while (0) +#endif extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, pmd_t **pmdp); diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 83313962043128..03b1a3b0fbd539 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -72,13 +72,25 @@ extern unsigned long p_mapped_by_tlbcam(phys_addr_t pa); #define PGDIR_ORDER (32 + PGD_T_LOG2 - PGDIR_SHIFT) +#ifndef CONFIG_PPC_4K_PAGES +static struct kmem_cache *pgtable_cache; + +void pgtable_cache_init(void) +{ + pgtable_cache = kmem_cache_create("PGDIR cache", 1 << PGDIR_ORDER, + 1 << PGDIR_ORDER, 0, NULL); + if (pgtable_cache == NULL) + panic("Couldn't allocate pgtable caches"); +} +#endif + pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *ret; /* pgdir take page or two with 4K pages and a page fraction otherwise */ #ifndef CONFIG_PPC_4K_PAGES - ret = kzalloc(1 << PGDIR_ORDER, GFP_KERNEL); + ret = kmem_cache_alloc(pgtable_cache, GFP_KERNEL | __GFP_ZERO); #else ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER - PAGE_SHIFT); @@ -89,7 +101,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) void pgd_free(struct mm_struct *mm, pgd_t *pgd) { #ifndef CONFIG_PPC_4K_PAGES - kfree((void *)pgd); + kmem_cache_free(pgtable_cache, (void *)pgd); #else free_pages((unsigned long)pgd, PGDIR_ORDER - PAGE_SHIFT); #endif From 17bb312f4c75e59ef6f18015011c4efbb545a748 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Tue, 20 Jan 2015 10:57:34 +0100 Subject: [PATCH 323/601] powerpc/8xx: Take benefit of aligned PGDIR L1 base address is now aligned so we can insert L1 index into r11 directly and then preserve r10 Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 32cc9e55b772ab..0ab16b66d712d4 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -326,16 +326,15 @@ InstructionTLBMiss: ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l 3: #endif - /* Extract level 1 index */ - rlwinm r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 - lwzx r11, r10, r11 /* Get the level 1 entry */ - rlwinm r10, r11,0,0,19 /* Extract page descriptor page address */ + /* Insert level 1 index */ + rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 + lwz r11, 0(r11) /* Get the level 1 entry */ /* Load the MI_TWC with the attributes for this "segment." */ MTSPR_CPU6(SPRN_MI_TWC, r11, r3) /* Set segment attributes */ - mfspr r11, SPRN_SRR0 /* Get effective address of fault */ + rlwinm r11, r11,0,0,19 /* Extract page descriptor page address */ /* Extract level 2 index */ - rlwinm r11, r11, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 + rlwinm r10, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 lwzx r10, r10, r11 /* Get the pte */ #ifdef CONFIG_SWAP @@ -380,13 +379,12 @@ DataStoreTLBMiss: lis r11, (swapper_pg_dir-PAGE_OFFSET)@h ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l 3: - /* Extract level 1 index */ - rlwinm r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 - lwzx r11, r10, r11 /* Get the level 1 entry */ + /* Insert level 1 index */ + rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 + lwz r11, 0(r11) /* Get the level 1 entry */ /* We have a pte table, so load fetch the pte from the table. */ - mfspr r10, SPRN_MD_EPN /* Get address of fault */ /* Extract level 2 index */ rlwinm r10, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */ @@ -512,16 +510,14 @@ FixupDAR:/* Entry point for dcbx workaround. */ beq- 3f /* Branch if user space */ lis r11, (swapper_pg_dir-PAGE_OFFSET)@h ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l - /* Extract level 1 index */ -3: rlwinm r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 - lwzx r11, r10, r11 /* Get the level 1 entry */ - rlwinm r10, r11,0,0,19 /* Extract page descriptor page address */ - mfspr r11, SPRN_SRR0 /* Get effective address of fault */ - /* Extract level 2 index */ - rlwinm r11, r11, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 - lwzx r11, r10, r11 /* Get the pte */ + /* Insert level 1 index */ +3: rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 + lwz r11, 0(r11) /* Get the level 1 entry */ + rlwinm r11, r11,0,0,19 /* Extract page descriptor page address */ + /* Insert level 2 index */ + rlwimi r11, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 + lwz r11, 0(r11) /* Get the pte */ /* concat physical page address(r11) and page offset(r10) */ - mfspr r10, SPRN_SRR0 rlwimi r11, r10, 0, 32 - PAGE_SHIFT, 31 lwz r11,0(r11) /* Check if it really is a dcbx instruction. */ From fde5a9057fcf2e96da1de3b6acf815c1e7d73f1e Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Tue, 20 Jan 2015 10:57:34 +0100 Subject: [PATCH 324/601] powerpc/8xx: Optimise access to swapper_pg_dir All accessed to PGD entries are done via 0(r11). By using lower part of swapper_pg_dir as load index to r11, we can remove the ori instruction. Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 35 ++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 0ab16b66d712d4..24934e79cd2353 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -319,16 +319,15 @@ InstructionTLBMiss: * pin the first 8MB of kernel memory */ andis. r11, r10, 0x8000 /* Address >= 0x80000000 */ #endif - mfspr r11, SPRN_M_TW /* Get level 1 table base address */ + mfspr r11, SPRN_M_TW /* Get level 1 table */ #ifdef CONFIG_MODULES beq 3f - lis r11, (swapper_pg_dir-PAGE_OFFSET)@h - ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l + lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: #endif /* Insert level 1 index */ rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 - lwz r11, 0(r11) /* Get the level 1 entry */ + lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */ /* Load the MI_TWC with the attributes for this "segment." */ MTSPR_CPU6(SPRN_MI_TWC, r11, r3) /* Set segment attributes */ @@ -374,14 +373,13 @@ DataStoreTLBMiss: * kernel page tables. */ andis. r11, r10, 0x8000 - mfspr r11, SPRN_M_TW /* Get level 1 table base address */ + mfspr r11, SPRN_M_TW /* Get level 1 table */ beq 3f - lis r11, (swapper_pg_dir-PAGE_OFFSET)@h - ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l + lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: /* Insert level 1 index */ rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 - lwz r11, 0(r11) /* Get the level 1 entry */ + lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */ /* We have a pte table, so load fetch the pte from the table. */ @@ -506,13 +504,12 @@ FixupDAR:/* Entry point for dcbx workaround. */ /* fetch instruction from memory. */ mfspr r10, SPRN_SRR0 andis. r11, r10, 0x8000 /* Address >= 0x80000000 */ - mfspr r11, SPRN_M_TW /* Get level 1 table base address */ - beq- 3f /* Branch if user space */ - lis r11, (swapper_pg_dir-PAGE_OFFSET)@h - ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l + mfspr r11, SPRN_M_TW /* Get level 1 table */ + beq 3f + lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha /* Insert level 1 index */ 3: rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 - lwz r11, 0(r11) /* Get the level 1 entry */ + lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */ rlwinm r11, r11,0,0,19 /* Extract page descriptor page address */ /* Insert level 2 index */ rlwimi r11, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 @@ -667,8 +664,7 @@ start_here: * init's THREAD like the context switch code does, but this is * easier......until someone changes init's static structures. */ - lis r6, swapper_pg_dir@h - ori r6, r6, swapper_pg_dir@l + lis r6, swapper_pg_dir@ha tophys(r6,r6) #ifdef CONFIG_8xx_CPU6 lis r4, cpu6_errata_word@h @@ -847,6 +843,13 @@ _GLOBAL(set_context) stw r4, 0x4(r5) #endif + /* Register M_TW will contain base address of level 1 table minus the + * lower part of the kernel PGDIR base address, so that all accesses to + * level 1 table are done relative to lower part of kernel PGDIR base + * address. + */ + li r5, (swapper_pg_dir-PAGE_OFFSET)@l + sub r4, r4, r5 #ifdef CONFIG_8xx_CPU6 lis r6, cpu6_errata_word@h ori r6, r6, cpu6_errata_word@l @@ -854,7 +857,7 @@ _GLOBAL(set_context) li r7, 0x3f80 stw r7, 12(r6) lwz r7, 12(r6) - mtspr SPRN_M_TW, r4 /* Update MMU base address */ + mtspr SPRN_M_TW, r4 /* Update pointeur to level 1 table */ li r7, 0x3380 stw r7, 12(r6) lwz r7, 12(r6) From 4545ff7ed841df77a239a22bffed4fc977a7d7bc Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Tue, 20 Jan 2015 10:57:34 +0100 Subject: [PATCH 325/601] powerpc/8xx: Remove duplicated code in set_context() Signed-off-by: Christophe Leroy Signed-off-by: Scott Wood --- arch/powerpc/kernel/head_8xx.S | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 24934e79cd2353..9b53fe139bf6f5 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -850,23 +850,21 @@ _GLOBAL(set_context) */ li r5, (swapper_pg_dir-PAGE_OFFSET)@l sub r4, r4, r5 + tophys (r4, r4) #ifdef CONFIG_8xx_CPU6 lis r6, cpu6_errata_word@h ori r6, r6, cpu6_errata_word@l - tophys (r4, r4) li r7, 0x3f80 stw r7, 12(r6) lwz r7, 12(r6) +#endif mtspr SPRN_M_TW, r4 /* Update pointeur to level 1 table */ +#ifdef CONFIG_8xx_CPU6 li r7, 0x3380 stw r7, 12(r6) lwz r7, 12(r6) - mtspr SPRN_M_CASID, r3 /* Update context */ -#else - mtspr SPRN_M_CASID,r3 /* Update context */ - tophys (r4, r4) - mtspr SPRN_M_TW, r4 /* and pgd */ #endif + mtspr SPRN_M_CASID, r3 /* Update context */ SYNC blr From 974ff4e2d71148655f0ea23096dbca4555d24dab Mon Sep 17 00:00:00 2001 From: Esben Haabendal Date: Tue, 6 Jan 2015 12:26:59 +0100 Subject: [PATCH 326/601] powerpc: Add machine_check cpu function for e300c3 cpus Signed-off-by: Esben Haabendal Signed-off-by: Scott Wood --- arch/powerpc/kernel/cputable.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 8084059063365c..f337666768a765 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1133,6 +1133,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .cpu_setup = __setup_cpu_603, + .machine_check = machine_check_generic, .num_pmcs = 4, .oprofile_cpu_type = "ppc/e300", .oprofile_type = PPC_OPROFILE_FSL_EMB, From cbe8c43dfd9426104ef74389a287d4f200960811 Mon Sep 17 00:00:00 2001 From: Andy Fleming Date: Tue, 23 Dec 2014 18:17:56 +0800 Subject: [PATCH 327/601] powerpc/config: Enable MDIO support Also, enable Vitesse PHY and fixed PHY support. Signed-off-by: Andy Fleming Signed-off-by: Shaohui Xie Signed-off-by: Scott Wood --- arch/powerpc/configs/corenet32_smp_defconfig | 1 + arch/powerpc/configs/corenet64_smp_defconfig | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/arch/powerpc/configs/corenet32_smp_defconfig b/arch/powerpc/configs/corenet32_smp_defconfig index 2826bb4ff56058..4d0771aad9aa11 100644 --- a/arch/powerpc/configs/corenet32_smp_defconfig +++ b/arch/powerpc/configs/corenet32_smp_defconfig @@ -92,6 +92,7 @@ CONFIG_SATA_SIL=y CONFIG_PATA_SIL680=y CONFIG_NETDEVICES=y CONFIG_FSL_PQ_MDIO=y +CONFIG_FSL_XGMAC_MDIO=y CONFIG_E1000=y CONFIG_E1000E=y CONFIG_AT803X_PHY=y diff --git a/arch/powerpc/configs/corenet64_smp_defconfig b/arch/powerpc/configs/corenet64_smp_defconfig index 6db97e4414b2ea..88b475185611ae 100644 --- a/arch/powerpc/configs/corenet64_smp_defconfig +++ b/arch/powerpc/configs/corenet64_smp_defconfig @@ -79,7 +79,11 @@ CONFIG_SATA_FSL=y CONFIG_SATA_SIL24=y CONFIG_NETDEVICES=y CONFIG_DUMMY=y +CONFIG_FSL_PQ_MDIO=y +CONFIG_FSL_XGMAC_MDIO=y CONFIG_E1000E=y +CONFIG_VITESSE_PHY=y +CONFIG_FIXED_PHY=y CONFIG_INPUT_FF_MEMLESS=m # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_KEYBOARD is not set From bb344ca5b90df62b1a3b7a35c6a9d00b306a170d Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Wed, 17 Dec 2014 19:06:31 -0600 Subject: [PATCH 328/601] powerpc/mpc85xx: Add ranges to etsec2 nodes Commit 746c9e9f92dd "of/base: Fix PowerPC address parsing hack" limited the applicability of the workaround whereby a missing ranges is treated as an empty ranges. This workaround was hiding a bug in the etsec2 device tree nodes, which have children with reg, but did not have ranges. Signed-off-by: Scott Wood Reported-by: Alexander Graf --- arch/powerpc/boot/dts/fsl/pq3-etsec2-0.dtsi | 1 + arch/powerpc/boot/dts/fsl/pq3-etsec2-1.dtsi | 1 + arch/powerpc/boot/dts/fsl/pq3-etsec2-2.dtsi | 1 + 3 files changed, 3 insertions(+) diff --git a/arch/powerpc/boot/dts/fsl/pq3-etsec2-0.dtsi b/arch/powerpc/boot/dts/fsl/pq3-etsec2-0.dtsi index 1382fec9e8c5f8..7fcb1ac0f2325f 100644 --- a/arch/powerpc/boot/dts/fsl/pq3-etsec2-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/pq3-etsec2-0.dtsi @@ -50,6 +50,7 @@ ethernet@b0000 { fsl,num_tx_queues = <0x8>; fsl,magic-packet; local-mac-address = [ 00 00 00 00 00 00 ]; + ranges; queue-group@b0000 { #address-cells = <1>; diff --git a/arch/powerpc/boot/dts/fsl/pq3-etsec2-1.dtsi b/arch/powerpc/boot/dts/fsl/pq3-etsec2-1.dtsi index 221cd2ea5b3124..9f25427c152785 100644 --- a/arch/powerpc/boot/dts/fsl/pq3-etsec2-1.dtsi +++ b/arch/powerpc/boot/dts/fsl/pq3-etsec2-1.dtsi @@ -50,6 +50,7 @@ ethernet@b1000 { fsl,num_tx_queues = <0x8>; fsl,magic-packet; local-mac-address = [ 00 00 00 00 00 00 ]; + ranges; queue-group@b1000 { #address-cells = <1>; diff --git a/arch/powerpc/boot/dts/fsl/pq3-etsec2-2.dtsi b/arch/powerpc/boot/dts/fsl/pq3-etsec2-2.dtsi index 61456c317609ce..cd7c318ab131af 100644 --- a/arch/powerpc/boot/dts/fsl/pq3-etsec2-2.dtsi +++ b/arch/powerpc/boot/dts/fsl/pq3-etsec2-2.dtsi @@ -49,6 +49,7 @@ ethernet@b2000 { fsl,num_tx_queues = <0x8>; fsl,magic-packet; local-mac-address = [ 00 00 00 00 00 00 ]; + ranges; queue-group@b2000 { #address-cells = <1>; From 2727ed54716e606673121d5863934512bd4a5eb8 Mon Sep 17 00:00:00 2001 From: Alessio Igor Bogani Date: Thu, 11 Dec 2014 09:56:45 +0100 Subject: [PATCH 329/601] powerpc/85xx: Add support for Emerson/Artesyn MVME2500. Add support for the Artesyn MVME2500 Single Board Computer. The MVME2500 is a 6U form factor VME64 computer with: - A single Freescale QorIQ P2010 CPU - 1 GB of DDR3 onboard memory - Three Gigabit Ethernets - Five 16550 compatible UARTS - One USB 2.0 port, one SHDC socket and one SATA connector - One PCI/PCI eXpress Mezzanine Card (PMC/XMC) Slot - MultiProcessor Interrupt Controller (MPIC) - A DS1375T Real Time Clock (RTC) and 512 KB of Non-Volatile Memory - Two 64 KB EEPROMs - U-Boot in 16 SPI Flash This patch is based on linux-3.18 and has been boot tested. Signed-off-by: Alessio Igor Bogani Signed-off-by: Scott Wood --- arch/powerpc/boot/dts/mvme2500.dts | 280 +++++++++++++++++++++++++ arch/powerpc/configs/mpc85xx_defconfig | 16 +- arch/powerpc/platforms/85xx/Kconfig | 6 + arch/powerpc/platforms/85xx/Makefile | 1 + arch/powerpc/platforms/85xx/mvme2500.c | 74 +++++++ 5 files changed, 374 insertions(+), 3 deletions(-) create mode 100644 arch/powerpc/boot/dts/mvme2500.dts create mode 100644 arch/powerpc/platforms/85xx/mvme2500.c diff --git a/arch/powerpc/boot/dts/mvme2500.dts b/arch/powerpc/boot/dts/mvme2500.dts new file mode 100644 index 00000000000000..67714cf0f745a3 --- /dev/null +++ b/arch/powerpc/boot/dts/mvme2500.dts @@ -0,0 +1,280 @@ +/* + * Device tree source for the Emerson/Artesyn MVME2500 + * + * Copyright 2014 Elettra-Sincrotrone Trieste S.C.p.A. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * Based on: P2020 DS Device Tree Source + * Copyright 2009 Freescale Semiconductor Inc. + */ + +/include/ "fsl/p2020si-pre.dtsi" + +/ { + model = "MVME2500"; + compatible = "artesyn,MVME2500"; + + aliases { + serial2 = &serial2; + serial3 = &serial3; + serial4 = &serial4; + serial5 = &serial5; + }; + + memory { + device_type = "memory"; + }; + + soc: soc@ffe00000 { + ranges = <0x0 0 0xffe00000 0x100000>; + + i2c@3000 { + hwmon@4c { + compatible = "adi,adt7461"; + reg = <0x4c>; + }; + + rtc@68 { + compatible = "dallas,ds1337"; + reg = <0x68>; + interrupts = <8 1 0 0>; + }; + + eeprom@54 { + compatible = "atmel,24c64"; + reg = <0x54>; + }; + + eeprom@52 { + compatible = "atmel,24c512"; + reg = <0x52>; + }; + + eeprom@53 { + compatible = "atmel,24c512"; + reg = <0x53>; + }; + + eeprom@50 { + compatible = "atmel,24c02"; + reg = <0x50>; + }; + + }; + + spi0: spi@7000 { + fsl,espi-num-chipselects = <2>; + + flash@0 { + compatible = "atmel,at25df641"; + reg = <0>; + spi-max-frequency = <10000000>; + }; + flash@1 { + compatible = "atmel,at25df641"; + reg = <1>; + spi-max-frequency = <10000000>; + }; + }; + + usb@22000 { + dr_mode = "host"; + phy_type = "ulpi"; + }; + + enet0: ethernet@24000 { + tbi-handle = <&tbi0>; + phy-handle = <&phy1>; + phy-connection-type = "rgmii-id"; + }; + + mdio@24520 { + phy1: ethernet-phy@1 { + compatible = "brcm,bcm54616S"; + interrupts = <6 1 0 0>; + reg = <0x1>; + }; + + phy2: ethernet-phy@2 { + compatible = "brcm,bcm54616S"; + interrupts = <6 1 0 0>; + reg = <0x2>; + }; + + phy3: ethernet-phy@3 { + compatible = "brcm,bcm54616S"; + interrupts = <5 1 0 0>; + reg = <0x3>; + }; + + phy7: ethernet-phy@7 { + compatible = "brcm,bcm54616S"; + interrupts = <7 1 0 0>; + reg = <0x7>; + }; + + tbi0: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + enet1: ethernet@25000 { + tbi-handle = <&tbi1>; + phy-handle = <&phy7>; + phy-connection-type = "rgmii-id"; + }; + + mdio@25520 { + tbi1: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + + enet2: ethernet@26000 { + tbi-handle = <&tbi2>; + phy-handle = <&phy3>; + phy-connection-type = "rgmii-id"; + }; + + mdio@26520 { + tbi2: tbi-phy@11 { + reg = <0x11>; + device_type = "tbi-phy"; + }; + }; + }; + + lbc: localbus@ffe05000 { + reg = <0 0xffe05000 0 0x1000>; + + ranges = <0x0 0x0 0x0 0xfff00000 0x00080000 + 0x1 0x0 0x0 0xffc40000 0x00010000 + 0x2 0x0 0x0 0xffc50000 0x00010000 + 0x3 0x0 0x0 0xffc60000 0x00010000 + 0x4 0x0 0x0 0xffc70000 0x00010000 + 0x6 0x0 0x0 0xffc80000 0x00010000 + 0x5 0x0 0x0 0xffdf0000 0x00008000>; + + serial2: serial@1,0 { + device_type = "serial"; + compatible = "ns16550"; + reg = <0x1 0x0 0x100>; + clock-frequency = <1843200>; + interrupts = <11 2 0 0>; + }; + + serial3: serial@2,0 { + device_type = "serial"; + compatible = "ns16550"; + reg = <0x2 0x0 0x100>; + clock-frequency = <1843200>; + interrupts = <1 2 0 0>; + }; + + serial4: serial@3,0 { + device_type = "serial"; + compatible = "ns16550"; + reg = <0x3 0x0 0x100>; + clock-frequency = <1843200>; + interrupts = <2 2 0 0>; + }; + + serial5: serial@4,0 { + device_type = "serial"; + compatible = "ns16550"; + reg = <0x4 0x0 0x100>; + clock-frequency = <1843200>; + interrupts = <3 2 0 0>; + }; + + mram@0,0 { + compatible = "everspin,mram", "mtd-ram"; + reg = <0x0 0x0 0x80000>; + bank-width = <2>; + }; + + board-control@5,0 { + compatible = "artesyn,mvme2500-fpga"; + reg = <0x5 0x0 0x01000>; + }; + + cpld@6,0 { + compatible = "artesyn,mvme2500-cpld"; + reg = <0x6 0x0 0x10000>; + interrupts = <9 1 0 0>; + }; + }; + + pci0: pcie@ffe08000 { + reg = <0 0xffe08000 0 0x1000>; + ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000 + 0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>; + pcie@0 { + ranges = <0x2000000 0x0 0x80000000 + 0x2000000 0x0 0x80000000 + 0x0 0x20000000 + + 0x1000000 0x0 0x0 + 0x1000000 0x0 0x0 + 0x0 0x10000>; + }; + }; + + pci1: pcie@ffe09000 { + reg = <0 0xffe09000 0 0x1000>; + ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000 + 0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>; + pcie@0 { + ranges = <0x2000000 0x0 0xa0000000 + 0x2000000 0x0 0xa0000000 + 0x0 0x20000000 + + 0x1000000 0x0 0x0 + 0x1000000 0x0 0x0 + 0x0 0x10000>; + }; + + }; + + pci2: pcie@ffe0a000 { + reg = <0 0xffe0a000 0 0x1000>; + ranges = <0x2000000 0x0 0xc0000000 0 0xc0000000 0x0 0x20000000 + 0x1000000 0x0 0x00000000 0 0xffc20000 0x0 0x10000>; + pcie@0 { + ranges = <0x2000000 0x0 0xc0000000 + 0x2000000 0x0 0xc0000000 + 0x0 0x20000000 + + 0x1000000 0x0 0x0 + 0x1000000 0x0 0x0 + 0x0 0x10000>; + }; + }; +}; + +/include/ "fsl/p2020si-post.dtsi" + +/ { + soc@ffe00000 { + serial@4600 { + status = "disabled"; + }; + + i2c@3100 { + status = "disabled"; + }; + + sdhc@2e000 { + compatible = "fsl,p2020-esdhc", "fsl,esdhc"; + non-removable; + }; + + }; + +}; diff --git a/arch/powerpc/configs/mpc85xx_defconfig b/arch/powerpc/configs/mpc85xx_defconfig index cfae862d634742..8535c343dd577e 100644 --- a/arch/powerpc/configs/mpc85xx_defconfig +++ b/arch/powerpc/configs/mpc85xx_defconfig @@ -42,6 +42,7 @@ CONFIG_TQM8548=y CONFIG_TQM8555=y CONFIG_TQM8560=y CONFIG_SBC8548=y +CONFIG_MVME2500=y CONFIG_QUICC_ENGINE=y CONFIG_QE_GPIO=y CONFIG_HIGHMEM=y @@ -49,6 +50,8 @@ CONFIG_BINFMT_MISC=m CONFIG_MATH_EMULATION=y CONFIG_FORCE_MAX_ZONEORDER=12 CONFIG_PCI=y +CONFIG_PCIEPORTBUS=y +# CONFIG_PCIEASPM is not set CONFIG_PCI_MSI=y CONFIG_RAPIDIO=y CONFIG_NET=y @@ -85,10 +88,14 @@ CONFIG_FTL=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_INTELEXT=y CONFIG_MTD_CFI_AMDSTD=y +CONFIG_MTD_PHYSMAP=y CONFIG_MTD_PHYSMAP_OF=y +CONFIG_MTD_PLATRAM=y +CONFIG_MTD_M25P80=y CONFIG_MTD_NAND=y CONFIG_MTD_NAND_FSL_ELBC=y CONFIG_MTD_NAND_FSL_IFC=y +CONFIG_MTD_SPI_NOR=y CONFIG_MTD_UBI=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_NBD=y @@ -120,6 +127,7 @@ CONFIG_MARVELL_PHY=y CONFIG_DAVICOM_PHY=y CONFIG_CICADA_PHY=y CONFIG_VITESSE_PHY=y +CONFIG_BROADCOM_PHY=y CONFIG_FIXED_PHY=y CONFIG_INPUT_FF_MEMLESS=m # CONFIG_INPUT_MOUSEDEV is not set @@ -128,8 +136,8 @@ CONFIG_INPUT_FF_MEMLESS=m CONFIG_SERIO_LIBPS2=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_SERIAL_8250_NR_UARTS=2 -CONFIG_SERIAL_8250_RUNTIME_UARTS=2 +CONFIG_SERIAL_8250_NR_UARTS=6 +CONFIG_SERIAL_8250_RUNTIME_UARTS=6 CONFIG_SERIAL_8250_MANY_PORTS=y CONFIG_SERIAL_8250_DETECT_IRQ=y CONFIG_SERIAL_8250_RSA=y @@ -142,7 +150,8 @@ CONFIG_SPI=y CONFIG_SPI_FSL_SPI=y CONFIG_SPI_FSL_ESPI=y CONFIG_GPIO_MPC8XXX=y -# CONFIG_HWMON is not set +CONFIG_HWMON=m +CONFIG_SENSORS_LM90=m CONFIG_FB=y CONFIG_FB_FSL_DIU=y # CONFIG_VGA_CONSOLE is not set @@ -185,6 +194,7 @@ CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_SDHCI_OF_ESDHC=y CONFIG_EDAC=y CONFIG_EDAC_MM_EDAC=y +CONFIG_EDAC_MPC85XX=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1307=y CONFIG_RTC_DRV_DS1374=y diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig index f22635a71d0117..2fb4b24368a6e6 100644 --- a/arch/powerpc/platforms/85xx/Kconfig +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -241,6 +241,12 @@ config SGY_CTS1000 help Enable this to support functionality in Servergy's CTS-1000 systems. +config MVME2500 + bool "Artesyn MVME2500" + select DEFAULT_UIMAGE + help + This option enables support for the Emerson/Artesyn MVME2500 board. + endif # PPC32 config PPC_QEMU_E500 diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile index 73032604662556..1fe7fb95175a50 100644 --- a/arch/powerpc/platforms/85xx/Makefile +++ b/arch/powerpc/platforms/85xx/Makefile @@ -31,3 +31,4 @@ obj-$(CONFIG_XES_MPC85xx) += xes_mpc85xx.o obj-$(CONFIG_GE_IMP3A) += ge_imp3a.o obj-$(CONFIG_PPC_QEMU_E500) += qemu_e500.o obj-$(CONFIG_SGY_CTS1000) += sgy_cts1000.o +obj-$(CONFIG_MVME2500) += mvme2500.o diff --git a/arch/powerpc/platforms/85xx/mvme2500.c b/arch/powerpc/platforms/85xx/mvme2500.c new file mode 100644 index 00000000000000..1233050560ae75 --- /dev/null +++ b/arch/powerpc/platforms/85xx/mvme2500.c @@ -0,0 +1,74 @@ +/* + * Board setup routines for the Emerson/Artesyn MVME2500 + * + * Copyright 2014 Elettra-Sincrotrone Trieste S.C.p.A. + * + * Based on earlier code by: + * + * Xianghua Xiao (x.xiao@freescale.com) + * Tom Armistead (tom.armistead@emerson.com) + * Copyright 2012 Emerson + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * Author Alessio Igor Bogani + * + */ + +#include +#include +#include +#include +#include + +#include "mpc85xx.h" + +void __init mvme2500_pic_init(void) +{ + struct mpic *mpic = mpic_alloc(NULL, 0, + MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU, + 0, 256, " OpenPIC "); + BUG_ON(mpic == NULL); + mpic_init(mpic); +} + +/* + * Setup the architecture + */ +static void __init mvme2500_setup_arch(void) +{ + if (ppc_md.progress) + ppc_md.progress("mvme2500_setup_arch()", 0); + fsl_pci_assign_primary(); + pr_info("MVME2500 board from Artesyn\n"); +} + +machine_arch_initcall(mvme2500, mpc85xx_common_publish_devices); + +/* + * Called very early, device-tree isn't unflattened + */ +static int __init mvme2500_probe(void) +{ + unsigned long root = of_get_flat_dt_root(); + + return of_flat_dt_is_compatible(root, "artesyn,MVME2500"); +} + +define_machine(mvme2500) { + .name = "MVME2500", + .probe = mvme2500_probe, + .setup_arch = mvme2500_setup_arch, + .init_IRQ = mvme2500_pic_init, +#ifdef CONFIG_PCI + .pcibios_fixup_bus = fsl_pcibios_fixup_bus, + .pcibios_fixup_phb = fsl_pcibios_fixup_phb, +#endif + .get_irq = mpic_get_irq, + .restart = fsl_rstcr_restart, + .calibrate_decr = generic_calibrate_decr, + .progress = udbg_progress, +}; From c3e09b3a90850b299aa805c32945163e5101ddcd Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Wed, 10 Dec 2014 18:56:22 -0600 Subject: [PATCH 330/601] memory/fsl-corenet-cf: Add t1040 support T1040 has a different version of corenet-cf, despite being incorrectly labelled with a fsl,corenet2-cf compatible. The t1040 version of corenet-cf has a version register that can be read to distinguish. The t4240/b4860 version officially does not, but testing shows that it does and has a different value, so use that. If somehow this ends up not being reliable and we treat a t4240/b4860 as a t1040 (the reverse should not happen, as t1040's version register is official), currently the worst that should happen is writing to reserved bits to enable events that don't exist. The changes to the t1040 version of corenet-cf that this driver cares about are the addition of two new error events. There are also changes to the format of cecar2, which is printed, but not interpreted, by this driver. Signed-off-by: Scott Wood --- drivers/memory/fsl-corenet-cf.c | 36 +++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/drivers/memory/fsl-corenet-cf.c b/drivers/memory/fsl-corenet-cf.c index fc7ab5a3561e70..d708ded5457bb8 100644 --- a/drivers/memory/fsl-corenet-cf.c +++ b/drivers/memory/fsl-corenet-cf.c @@ -27,18 +27,29 @@ enum ccf_version { struct ccf_info { enum ccf_version version; int err_reg_offs; + bool has_brr; }; static const struct ccf_info ccf1_info = { .version = CCF1, .err_reg_offs = 0xa00, + .has_brr = false, }; static const struct ccf_info ccf2_info = { .version = CCF2, .err_reg_offs = 0xe40, + .has_brr = true, }; +/* + * This register is present but not documented, with different values for + * IP_ID, on other chips with fsl,corenet2-cf such as t4240 and b4860. + */ +#define CCF_BRR 0xbf8 +#define CCF_BRR_IPID 0xffff0000 +#define CCF_BRR_IPID_T1040 0x09310000 + static const struct of_device_id ccf_matches[] = { { .compatible = "fsl,corenet1-cf", @@ -66,6 +77,8 @@ struct ccf_err_regs { /* LAE/CV also valid for errdis and errinten */ #define ERRDET_LAE (1 << 0) /* Local Access Error */ #define ERRDET_CV (1 << 1) /* Coherency Violation */ +#define ERRDET_UTID (1 << 2) /* Unavailable Target ID (t1040) */ +#define ERRDET_MCST (1 << 3) /* Multicast Stash (t1040) */ #define ERRDET_CTYPE_SHIFT 26 /* Capture Type (ccf2 only) */ #define ERRDET_CTYPE_MASK (0x1f << ERRDET_CTYPE_SHIFT) #define ERRDET_CAP (1 << 31) /* Capture Valid (ccf2 only) */ @@ -84,6 +97,7 @@ struct ccf_private { struct device *dev; void __iomem *regs; struct ccf_err_regs __iomem *err_regs; + bool t1040; }; static irqreturn_t ccf_irq(int irq, void *dev_id) @@ -142,6 +156,12 @@ static irqreturn_t ccf_irq(int irq, void *dev_id) if (errdet & ERRDET_CV) dev_crit(ccf->dev, "Coherency Violation\n"); + if (errdet & ERRDET_UTID) + dev_crit(ccf->dev, "Unavailable Target ID\n"); + + if (errdet & ERRDET_MCST) + dev_crit(ccf->dev, "Multicast Stash\n"); + if (cap_valid) { dev_crit(ccf->dev, "address 0x%09llx, src id 0x%x\n", addr, src_id); @@ -157,6 +177,7 @@ static int ccf_probe(struct platform_device *pdev) struct ccf_private *ccf; struct resource *r; const struct of_device_id *match; + u32 errinten; int ret, irq; match = of_match_device(ccf_matches, &pdev->dev); @@ -183,6 +204,13 @@ static int ccf_probe(struct platform_device *pdev) ccf->info = match->data; ccf->err_regs = ccf->regs + ccf->info->err_reg_offs; + if (ccf->info->has_brr) { + u32 brr = ioread32be(ccf->regs + CCF_BRR); + + if ((brr & CCF_BRR_IPID) == CCF_BRR_IPID_T1040) + ccf->t1040 = true; + } + dev_set_drvdata(&pdev->dev, ccf); irq = platform_get_irq(pdev, 0); @@ -197,15 +225,19 @@ static int ccf_probe(struct platform_device *pdev) return ret; } + errinten = ERRDET_LAE | ERRDET_CV; + if (ccf->t1040) + errinten |= ERRDET_UTID | ERRDET_MCST; + switch (ccf->info->version) { case CCF1: /* On CCF1 this register enables rather than disables. */ - iowrite32be(ERRDET_LAE | ERRDET_CV, &ccf->err_regs->errdis); + iowrite32be(errinten, &ccf->err_regs->errdis); break; case CCF2: iowrite32be(0, &ccf->err_regs->errdis); - iowrite32be(ERRDET_LAE | ERRDET_CV, &ccf->err_regs->errinten); + iowrite32be(errinten, &ccf->err_regs->errinten); break; } From 1ee9df499b270f259adc8df9cca396d5ef6e9e46 Mon Sep 17 00:00:00 2001 From: Emil Medve Date: Mon, 8 Dec 2014 04:29:16 -0600 Subject: [PATCH 331/601] dt/bindings: b/qman: Fix the alloc-ranges in the example(s) 'ranges' are specified as not as Signed-off-by: Emil Medve Signed-off-by: Scott Wood --- Documentation/devicetree/bindings/soc/fsl/bman.txt | 2 +- Documentation/devicetree/bindings/soc/fsl/qman.txt | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/soc/fsl/bman.txt b/Documentation/devicetree/bindings/soc/fsl/bman.txt index 9f80bf8709acec..ee03ef2e265c2e 100644 --- a/Documentation/devicetree/bindings/soc/fsl/bman.txt +++ b/Documentation/devicetree/bindings/soc/fsl/bman.txt @@ -96,7 +96,7 @@ The example below shows a BMan FBPR dynamic allocation memory node bman_fbpr: bman-fbpr { compatible = "fsl,bman-fbpr"; - alloc-ranges = <0 0 0xf 0xffffffff>; + alloc-ranges = <0 0 0x10 0>; size = <0 0x1000000>; alignment = <0 0x1000000>; }; diff --git a/Documentation/devicetree/bindings/soc/fsl/qman.txt b/Documentation/devicetree/bindings/soc/fsl/qman.txt index 063e3a0b9d045e..f3da91ef4287a7 100644 --- a/Documentation/devicetree/bindings/soc/fsl/qman.txt +++ b/Documentation/devicetree/bindings/soc/fsl/qman.txt @@ -113,13 +113,13 @@ The example below shows a QMan FQD and a PFDR dynamic allocation memory nodes qman_fqd: qman-fqd { compatible = "fsl,qman-fqd"; - alloc-ranges = <0 0 0xf 0xffffffff>; + alloc-ranges = <0 0 0x10 0>; size = <0 0x400000>; alignment = <0 0x400000>; }; qman_pfdr: qman-pfdr { compatible = "fsl,qman-pfdr"; - alloc-ranges = <0 0 0xf 0xffffffff>; + alloc-ranges = <0 0 0x10 0>; size = <0 0x2000000>; alignment = <0 0x2000000>; }; From 7af98c7c4dffaf5b0a082fa9c42785d7807e6235 Mon Sep 17 00:00:00 2001 From: Emil Medve Date: Mon, 8 Dec 2014 04:29:17 -0600 Subject: [PATCH 332/601] dt/bindings: b/qman: Add phandle to the portals This supports SoC(s) with multiple B/QMan instances Signed-off-by: Emil Medve Signed-off-by: Scott Wood --- Documentation/devicetree/bindings/soc/fsl/bman.txt | 10 ++++++++++ Documentation/devicetree/bindings/soc/fsl/qman.txt | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/Documentation/devicetree/bindings/soc/fsl/bman.txt b/Documentation/devicetree/bindings/soc/fsl/bman.txt index ee03ef2e265c2e..47ac834414d848 100644 --- a/Documentation/devicetree/bindings/soc/fsl/bman.txt +++ b/Documentation/devicetree/bindings/soc/fsl/bman.txt @@ -36,6 +36,11 @@ are located at offsets 0xbf8 and 0xbfc Value type: Definition: Standard property. The error interrupt +- fsl,bman-portals + Usage: Required + Value type: + Definition: Phandle to this BMan instance's portals + - fsl,liodn Usage: See pamu.txt Value type: @@ -104,6 +109,10 @@ The example below shows a BMan FBPR dynamic allocation memory node The example below shows a (P4080) BMan CCSR-space node + bportals: bman-portals@ff4000000 { + ... + }; + crypto@300000 { ... fsl,bman = <&bman, 2>; @@ -115,6 +124,7 @@ The example below shows a (P4080) BMan CCSR-space node reg = <0x31a000 0x1000>; interrupts = <16 2 1 2>; fsl,liodn = <0x17>; + fsl,bman-portals = <&bportals>; memory-region = <&bman_fbpr>; }; diff --git a/Documentation/devicetree/bindings/soc/fsl/qman.txt b/Documentation/devicetree/bindings/soc/fsl/qman.txt index f3da91ef4287a7..556ebb8be75d9c 100644 --- a/Documentation/devicetree/bindings/soc/fsl/qman.txt +++ b/Documentation/devicetree/bindings/soc/fsl/qman.txt @@ -38,6 +38,11 @@ are located at offsets 0xbf8 and 0xbfc Value type: Definition: Standard property. The error interrupt +- fsl,qman-portals + Usage: Required + Value type: + Definition: Phandle to this QMan instance's portals + - fsl,liodn Usage: See pamu.txt Value type: @@ -127,6 +132,10 @@ The example below shows a QMan FQD and a PFDR dynamic allocation memory nodes The example below shows a (P4080) QMan CCSR-space node + qportals: qman-portals@ff4200000 { + ... + }; + clockgen: global-utilities@e1000 { ... sysclk: sysclk { @@ -154,6 +163,7 @@ The example below shows a (P4080) QMan CCSR-space node reg = <0x318000 0x1000>; interrupts = <16 2 1 3> fsl,liodn = <0x16>; + fsl,qman-portals = <&qportals>; memory-region = <&qman_fqd &qman_pfdr>; clocks = <&platform_pll 1>; }; From 39b55b53b924cbee8cb6efd54ab4ccc9158eb8b5 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Mon, 8 Dec 2014 04:29:18 -0600 Subject: [PATCH 333/601] powerpc/mpc85xx: Create dts components for the FSL QorIQ DPAA BMan Signed-off-by: Kumar Gala Signed-off-by: Geoff Thorpe Signed-off-by: Hai-Ying Wang [Emil Medve: Sync with the upstream binding] Signed-off-by: Emil Medve Signed-off-by: Scott Wood --- .../boot/dts/fsl/qoriq-bman1-portals.dtsi | 90 +++++++++++++++++++ arch/powerpc/boot/dts/fsl/qoriq-bman1.dtsi | 41 +++++++++ 2 files changed, 131 insertions(+) create mode 100644 arch/powerpc/boot/dts/fsl/qoriq-bman1-portals.dtsi create mode 100644 arch/powerpc/boot/dts/fsl/qoriq-bman1.dtsi diff --git a/arch/powerpc/boot/dts/fsl/qoriq-bman1-portals.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-bman1-portals.dtsi new file mode 100644 index 00000000000000..5022432ebaa9b2 --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/qoriq-bman1-portals.dtsi @@ -0,0 +1,90 @@ +/* + * QorIQ BMan Portal device tree stub for 10 portals + * + * Copyright 2011 - 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +&bportals { + #address-cells = <1>; + #size-cells = <1>; + compatible = "simple-bus"; + + bman-portal@0 { + compatible = "fsl,bman-portal"; + reg = <0x0 0x4000>, <0x100000 0x1000>; + interrupts = <105 2 0 0>; + }; + bman-portal@4000 { + compatible = "fsl,bman-portal"; + reg = <0x4000 0x4000>, <0x101000 0x1000>; + interrupts = <107 2 0 0>; + }; + bman-portal@8000 { + compatible = "fsl,bman-portal"; + reg = <0x8000 0x4000>, <0x102000 0x1000>; + interrupts = <109 2 0 0>; + }; + bman-portal@c000 { + compatible = "fsl,bman-portal"; + reg = <0xc000 0x4000>, <0x103000 0x1000>; + interrupts = <111 2 0 0>; + }; + bman-portal@10000 { + compatible = "fsl,bman-portal"; + reg = <0x10000 0x4000>, <0x104000 0x1000>; + interrupts = <113 2 0 0>; + }; + bman-portal@14000 { + compatible = "fsl,bman-portal"; + reg = <0x14000 0x4000>, <0x105000 0x1000>; + interrupts = <115 2 0 0>; + }; + bman-portal@18000 { + compatible = "fsl,bman-portal"; + reg = <0x18000 0x4000>, <0x106000 0x1000>; + interrupts = <117 2 0 0>; + }; + bman-portal@1c000 { + compatible = "fsl,bman-portal"; + reg = <0x1c000 0x4000>, <0x107000 0x1000>; + interrupts = <119 2 0 0>; + }; + bman-portal@20000 { + compatible = "fsl,bman-portal"; + reg = <0x20000 0x4000>, <0x108000 0x1000>; + interrupts = <121 2 0 0>; + }; + bman-portal@24000 { + compatible = "fsl,bman-portal"; + reg = <0x24000 0x4000>, <0x109000 0x1000>; + interrupts = <123 2 0 0>; + }; +}; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-bman1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-bman1.dtsi new file mode 100644 index 00000000000000..3b5e3504acb7aa --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/qoriq-bman1.dtsi @@ -0,0 +1,41 @@ +/* + * QorIQ BMan device tree stub [ controller @ offset 0x31a000 ] + * + * Copyright 2011 - 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +bman: bman@31a000 { + compatible = "fsl,bman"; + reg = <0x31a000 0x1000>; + interrupts = <16 2 1 2>; + fsl,bman-portals = <&bportals>; + memory-region = <&bman_fbpr>; +}; From de58824fc41ff0c266f4d8926271007fd121d9b4 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Mon, 8 Dec 2014 04:29:19 -0600 Subject: [PATCH 334/601] powerpc/mpc85xx: Create dts components for the FSL QorIQ DPAA QMan Change-Id: I16e63db731e55a3d60d4e147573c1af8718082d3 Signed-off-by: Kumar Gala Signed-off-by: Geoff Thorpe Signed-off-by: Hai-Ying Wang [Emil Medve: Sync with the upstream binding] Signed-off-by: Emil Medve Signed-off-by: Scott Wood --- .../boot/dts/fsl/qoriq-qman1-portals.dtsi | 101 ++++++++++++++++++ arch/powerpc/boot/dts/fsl/qoriq-qman1.dtsi | 41 +++++++ arch/powerpc/boot/dts/fsl/qoriq-qman3.dtsi | 41 +++++++ 3 files changed, 183 insertions(+) create mode 100644 arch/powerpc/boot/dts/fsl/qoriq-qman1-portals.dtsi create mode 100644 arch/powerpc/boot/dts/fsl/qoriq-qman1.dtsi create mode 100644 arch/powerpc/boot/dts/fsl/qoriq-qman3.dtsi diff --git a/arch/powerpc/boot/dts/fsl/qoriq-qman1-portals.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-qman1-portals.dtsi new file mode 100644 index 00000000000000..05d51acafa67a9 --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/qoriq-qman1-portals.dtsi @@ -0,0 +1,101 @@ +/* + * QorIQ QMan Portal device tree stub for 10 portals & 15 pool channels + * + * Copyright 2011 - 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +&qportals { + #address-cells = <1>; + #size-cells = <1>; + compatible = "simple-bus"; + + qportal0: qman-portal@0 { + compatible = "fsl,qman-portal"; + reg = <0x0 0x4000>, <0x100000 0x1000>; + interrupts = <104 2 0 0>; + fsl,qman-channel-id = <0x0>; + }; + qportal1: qman-portal@4000 { + compatible = "fsl,qman-portal"; + reg = <0x4000 0x4000>, <0x101000 0x1000>; + interrupts = <106 2 0 0>; + fsl,qman-channel-id = <1>; + }; + qportal2: qman-portal@8000 { + compatible = "fsl,qman-portal"; + reg = <0x8000 0x4000>, <0x102000 0x1000>; + interrupts = <108 2 0 0>; + fsl,qman-channel-id = <2>; + }; + qportal3: qman-portal@c000 { + compatible = "fsl,qman-portal"; + reg = <0xc000 0x4000>, <0x103000 0x1000>; + interrupts = <110 2 0 0>; + fsl,qman-channel-id = <3>; + }; + qportal4: qman-portal@10000 { + compatible = "fsl,qman-portal"; + reg = <0x10000 0x4000>, <0x104000 0x1000>; + interrupts = <112 2 0 0>; + fsl,qman-channel-id = <4>; + }; + qportal5: qman-portal@14000 { + compatible = "fsl,qman-portal"; + reg = <0x14000 0x4000>, <0x105000 0x1000>; + interrupts = <114 2 0 0>; + fsl,qman-channel-id = <5>; + }; + qportal6: qman-portal@18000 { + compatible = "fsl,qman-portal"; + reg = <0x18000 0x4000>, <0x106000 0x1000>; + interrupts = <116 2 0 0>; + fsl,qman-channel-id = <6>; + }; + + qportal7: qman-portal@1c000 { + compatible = "fsl,qman-portal"; + reg = <0x1c000 0x4000>, <0x107000 0x1000>; + interrupts = <118 2 0 0>; + fsl,qman-channel-id = <7>; + }; + qportal8: qman-portal@20000 { + compatible = "fsl,qman-portal"; + reg = <0x20000 0x4000>, <0x108000 0x1000>; + interrupts = <120 2 0 0>; + fsl,qman-channel-id = <8>; + }; + qportal9: qman-portal@24000 { + compatible = "fsl,qman-portal"; + reg = <0x24000 0x4000>, <0x109000 0x1000>; + interrupts = <122 2 0 0>; + fsl,qman-channel-id = <9>; + }; +}; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-qman1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-qman1.dtsi new file mode 100644 index 00000000000000..0695778c43867a --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/qoriq-qman1.dtsi @@ -0,0 +1,41 @@ +/* + * QorIQ QMan device tree stub [ controller @ offset 0x318000 ] + * + * Copyright 2011 - 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +qman: qman@318000 { + compatible = "fsl,qman"; + reg = <0x318000 0x1000>; + interrupts = <16 2 1 3>; + fsl,qman-portals = <&qportals>; + memory-region = <&qman_fqd &qman_pfdr>; +}; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-qman3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-qman3.dtsi new file mode 100644 index 00000000000000..b379abd1439d86 --- /dev/null +++ b/arch/powerpc/boot/dts/fsl/qoriq-qman3.dtsi @@ -0,0 +1,41 @@ +/* + * QorIQ QMan rev3 device tree stub [ controller @ offset 0x318000 ] + * + * Copyright 2014 Freescale Semiconductor Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Freescale Semiconductor nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * + * ALTERNATIVELY, this software may be distributed under the terms of the + * GNU General Public License ("GPL") as published by the Free Software + * Foundation, either version 2 of that License or (at your option) any + * later version. + * + * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +qman: qman@318000 { + compatible = "fsl,qman"; + reg = <0x318000 0x2000>; + interrupts = <16 2 1 3>; + fsl,qman-portals = <&qportals>; + memory-region = <&qman_fqd &qman_pfdr>; +}; From fd5f4914b6053fc91ef2c278c54254a660427bf1 Mon Sep 17 00:00:00 2001 From: Alessio Igor Bogani Date: Thu, 4 Dec 2014 10:23:11 +0100 Subject: [PATCH 335/601] powerpc: dts: pq3/85xx: Fix GPIO address Fix the GPIO address in the device tree to match the documented location. Signed-off-by: Alessio Igor Bogani Signed-off-by: Scott Wood --- arch/powerpc/boot/dts/fsl/pq3-gpio-0.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/boot/dts/fsl/pq3-gpio-0.dtsi b/arch/powerpc/boot/dts/fsl/pq3-gpio-0.dtsi index 72a3ef5945c1c2..a1b48546b02d12 100644 --- a/arch/powerpc/boot/dts/fsl/pq3-gpio-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/pq3-gpio-0.dtsi @@ -1,5 +1,5 @@ /* - * PQ3 GPIO device tree stub [ controller @ offset 0xf000 ] + * PQ3 GPIO device tree stub [ controller @ offset 0xfc00 ] * * Copyright 2011 Freescale Semiconductor Inc. * @@ -32,10 +32,10 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -gpio-controller@f000 { +gpio-controller@fc00 { #gpio-cells = <2>; compatible = "fsl,pq3-gpio"; - reg = <0xf000 0x100>; + reg = <0xfc00 0x100>; interrupts = <47 0x2 0 0>; gpio-controller; }; From 8ac6e995ac17f33a5cc6796c5069488eb9f4abd7 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sat, 22 Nov 2014 16:18:20 +0100 Subject: [PATCH 336/601] PowerPC-83xx: Deletion of an unnecessary check before the function call "of_node_put" The of_node_put() function tests whether its argument is NULL and then returns immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Scott Wood --- arch/powerpc/platforms/83xx/usb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/83xx/usb.c b/arch/powerpc/platforms/83xx/usb.c index 1ad748bb39b4b5..5c31d8292d3bb8 100644 --- a/arch/powerpc/platforms/83xx/usb.c +++ b/arch/powerpc/platforms/83xx/usb.c @@ -162,8 +162,7 @@ int mpc831x_usb_cfg(void) iounmap(immap); - if (immr_node) - of_node_put(immr_node); + of_node_put(immr_node); /* Map USB SOC space */ ret = of_address_to_resource(np, 0, &res); From 5db431285d832156852d6f0013089b5d2305a28d Mon Sep 17 00:00:00 2001 From: Rickard Strandqvist Date: Sat, 26 Jul 2014 23:26:51 +0200 Subject: [PATCH 337/601] powerpc/qe: Use strlcpy() Replace strcpy and strncpy with strlcpy to avoid strings that are too big, or lack null termination. Signed-off-by: Rickard Strandqvist [scottwood@freescale.com: cleaned up commit message] Signed-off-by: Scott Wood --- arch/powerpc/sysdev/qe_lib/qe.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/sysdev/qe_lib/qe.c b/arch/powerpc/sysdev/qe_lib/qe.c index 1f29cee8da7b0a..c2518cdb7ddb6e 100644 --- a/arch/powerpc/sysdev/qe_lib/qe.c +++ b/arch/powerpc/sysdev/qe_lib/qe.c @@ -497,7 +497,7 @@ int qe_upload_firmware(const struct qe_firmware *firmware) * saved microcode information and put in the new. */ memset(&qe_firmware_info, 0, sizeof(qe_firmware_info)); - strcpy(qe_firmware_info.id, firmware->id); + strlcpy(qe_firmware_info.id, firmware->id, sizeof(qe_firmware_info.id)); qe_firmware_info.extended_modes = firmware->extended_modes; memcpy(qe_firmware_info.vtraps, firmware->vtraps, sizeof(firmware->vtraps)); @@ -583,8 +583,8 @@ struct qe_firmware_info *qe_get_firmware_info(void) /* Copy the data into qe_firmware_info*/ sprop = of_get_property(fw, "id", NULL); if (sprop) - strncpy(qe_firmware_info.id, sprop, - sizeof(qe_firmware_info.id) - 1); + strlcpy(qe_firmware_info.id, sprop, + sizeof(qe_firmware_info.id)); prop = of_find_property(fw, "extended-modes", NULL); if (prop && (prop->length == sizeof(u64))) { From 0d7d9b3a456aa8670cd9552f69529d57716f3122 Mon Sep 17 00:00:00 2001 From: Alexandru-Cezar Sardan Date: Thu, 26 Jun 2014 11:58:58 +0300 Subject: [PATCH 338/601] perf/powerpc: reset event hw state when adding it to the PMU When adding an event to the PMU with PERF_EF_START the STOPPED and UPTODATE flags need to be cleared in the hw.event status variable because they are preventing the update of the event count on overflow interrupt. Signed-off-by: Alexandru-Cezar Sardan Signed-off-by: Scott Wood --- arch/powerpc/perf/core-fsl-emb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c index e9fe904b653804..5d747b4cb8ee18 100644 --- a/arch/powerpc/perf/core-fsl-emb.c +++ b/arch/powerpc/perf/core-fsl-emb.c @@ -330,9 +330,11 @@ static int fsl_emb_pmu_add(struct perf_event *event, int flags) } local64_set(&event->hw.prev_count, val); - if (!(flags & PERF_EF_START)) { + if (unlikely(!(flags & PERF_EF_START))) { event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE; val = 0; + } else { + event->hw.state &= ~(PERF_HES_STOPPED | PERF_HES_UPTODATE); } write_pmc(i, val); From c9111a41dc27db8a9b6c0429f4965ddd8766f620 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 30 Apr 2014 23:26:46 -0700 Subject: [PATCH 339/601] powerpc: defconfigs: add MTD_SPI_NOR (new dependency for M25P80) These defconfigs contain the CONFIG_M25P80 symbol, which is now dependent on the MTD_SPI_NOR symbol. Add CONFIG_MTD_SPI_NOR to satisfy the new dependency. At the same time, drop the now-nonexistent CONFIG_MTD_CHAR symbol. Signed-off-by: Brian Norris Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Scott Wood --- arch/powerpc/configs/corenet32_smp_defconfig | 1 + arch/powerpc/configs/corenet64_smp_defconfig | 1 + arch/powerpc/configs/mpc85xx_smp_defconfig | 1 + 3 files changed, 3 insertions(+) diff --git a/arch/powerpc/configs/corenet32_smp_defconfig b/arch/powerpc/configs/corenet32_smp_defconfig index 4d0771aad9aa11..51866f17068457 100644 --- a/arch/powerpc/configs/corenet32_smp_defconfig +++ b/arch/powerpc/configs/corenet32_smp_defconfig @@ -75,6 +75,7 @@ CONFIG_MTD_PHYSMAP_OF=y CONFIG_MTD_NAND=y CONFIG_MTD_NAND_FSL_ELBC=y CONFIG_MTD_NAND_FSL_IFC=y +CONFIG_MTD_SPI_NOR=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=131072 diff --git a/arch/powerpc/configs/corenet64_smp_defconfig b/arch/powerpc/configs/corenet64_smp_defconfig index 88b475185611ae..d6c0c819895288 100644 --- a/arch/powerpc/configs/corenet64_smp_defconfig +++ b/arch/powerpc/configs/corenet64_smp_defconfig @@ -69,6 +69,7 @@ CONFIG_MTD_PHYSMAP_OF=y CONFIG_MTD_NAND=y CONFIG_MTD_NAND_FSL_ELBC=y CONFIG_MTD_NAND_FSL_IFC=y +CONFIG_MTD_SPI_NOR=y CONFIG_MTD_UBI=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y diff --git a/arch/powerpc/configs/mpc85xx_smp_defconfig b/arch/powerpc/configs/mpc85xx_smp_defconfig index 5b79adb64ce2e5..c45ad2e01b0ce1 100644 --- a/arch/powerpc/configs/mpc85xx_smp_defconfig +++ b/arch/powerpc/configs/mpc85xx_smp_defconfig @@ -91,6 +91,7 @@ CONFIG_MTD_PHYSMAP_OF=y CONFIG_MTD_NAND=y CONFIG_MTD_NAND_FSL_ELBC=y CONFIG_MTD_NAND_FSL_IFC=y +CONFIG_MTD_SPI_NOR=y CONFIG_MTD_UBI=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_NBD=y From 5c75a0dac3099f33a500b8b31f00541374752049 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 28 Jan 2015 16:52:01 +0100 Subject: [PATCH 340/601] s390/smp: increase maximum value of NR_CPUS to 512 With SMT we can have more than 256 CPUs. Let's make them available. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 8d11babf9aa505..373cd5badf1cf8 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -373,14 +373,14 @@ config SMP Even if you don't know what to do here, say Y. config NR_CPUS - int "Maximum number of CPUs (2-256)" - range 2 256 + int "Maximum number of CPUs (2-512)" + range 2 512 depends on SMP default "32" if !64BIT default "64" if 64BIT help This allows you to specify the maximum number of CPUs which this - kernel will support. The maximum supported value is 256 and the + kernel will support. The maximum supported value is 512 and the minimum value which makes sense is 2. This is purely to save memory - each supported CPU adds From e5144c9375851948f50e13db80c3150e3a7521b8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 28 Jan 2015 15:45:53 +0100 Subject: [PATCH 341/601] iommu: Disable on !MMU builds A lot of the IOMMU support code does not build if the CPU does not have an MMU itself, and it's not clear if there is any use case for it, so let's just disable it and wait for anybody to need it. This avoids randconfig errors like ../arch/arm/mm/dma-mapping.c: In function '__iommu_alloc_remap': ../arch/arm/mm/dma-mapping.c:1278:34: error: 'VM_ARM_DMA_CONSISTENT' undeclared (first use in this function) area = get_vm_area_caller(size, VM_ARM_DMA_CONSISTENT | VM_USERMAP, ^ ../arch/arm/mm/dma-mapping.c:1278:34: note: each undeclared identifier is reported only once for each function it appears in ../arch/arm/mm/dma-mapping.c: In function '__atomic_get_pages': ../arch/arm/mm/dma-mapping.c:1358:27: error: 'atomic_pool' undeclared (first use in this function) struct dma_pool *pool = &atomic_pool; Signed-off-by: Arnd Bergmann Signed-off-by: Joerg Roedel --- drivers/iommu/Kconfig | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index a839ca93376b85..5189cc12c5a365 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -4,6 +4,7 @@ config IOMMU_API menuconfig IOMMU_SUPPORT bool "IOMMU Hardware Support" + depends on MMU default y ---help--- Say Y here if you want to compile device drivers for IO Memory @@ -144,7 +145,7 @@ config IRQ_REMAP # OMAP IOMMU support config OMAP_IOMMU bool "OMAP IOMMU Support" - depends on ARCH_OMAP2PLUS + depends on ARCH_OMAP2PLUS && MMU select IOMMU_API config OMAP_IOMMU_DEBUG @@ -191,7 +192,7 @@ config TEGRA_IOMMU_SMMU config EXYNOS_IOMMU bool "Exynos IOMMU Support" - depends on ARCH_EXYNOS && ARM + depends on ARCH_EXYNOS && ARM && MMU select IOMMU_API select ARM_DMA_USE_IOMMU help @@ -220,7 +221,7 @@ config SHMOBILE_IPMMU_TLB config SHMOBILE_IOMMU bool "IOMMU for Renesas IPMMU/IPMMUI" default n - depends on ARM + depends on ARM && MMU depends on ARCH_SHMOBILE || COMPILE_TEST select IOMMU_API select ARM_DMA_USE_IOMMU @@ -309,6 +310,7 @@ config SPAPR_TCE_IOMMU config ARM_SMMU bool "ARM Ltd. System MMU (SMMU) Support" depends on ARM64 || (ARM_LPAE && OF) + depends on MMU select IOMMU_API select ARM_DMA_USE_IOMMU if ARM help From bb590c9011a7c4e195af486438ead666023f96bd Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sat, 24 Jan 2015 23:13:50 +0200 Subject: [PATCH 342/601] iommu/ipmmu-vmsa: Fix IOMMU lookup when multiple IOMMUs are registered When adding a new device the driver loops over all registered IOMMUs and calls the ipmmu_find_utlbs() function to parse the DT iommus attribute. The function returns an error when the IOMMU referenced in DT doesn't match the current IOMMU. The caller incorrectly breaks from the loop immediately when the error is reported, resulting in only the first IOMMU being considered. Fix this, and while at it move code that isn't specific to an IOMMU instance out of the loop. Signed-off-by: Laurent Pinchart Signed-off-by: Joerg Roedel --- drivers/iommu/ipmmu-vmsa.c | 49 ++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 3d7e7325a1e5f0..10186cac7716e2 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -573,45 +573,28 @@ static phys_addr_t ipmmu_iova_to_phys(struct iommu_domain *io_domain, } static int ipmmu_find_utlbs(struct ipmmu_vmsa_device *mmu, struct device *dev, - unsigned int **_utlbs) + unsigned int *utlbs, unsigned int num_utlbs) { - unsigned int *utlbs; unsigned int i; - int count; - - count = of_count_phandle_with_args(dev->of_node, "iommus", - "#iommu-cells"); - if (count < 0) - return -EINVAL; - - utlbs = kcalloc(count, sizeof(*utlbs), GFP_KERNEL); - if (!utlbs) - return -ENOMEM; - for (i = 0; i < count; ++i) { + for (i = 0; i < num_utlbs; ++i) { struct of_phandle_args args; int ret; ret = of_parse_phandle_with_args(dev->of_node, "iommus", "#iommu-cells", i, &args); if (ret < 0) - goto error; + return ret; of_node_put(args.np); if (args.np != mmu->dev->of_node || args.args_count != 1) - goto error; + return -EINVAL; utlbs[i] = args.args[0]; } - *_utlbs = utlbs; - - return count; - -error: - kfree(utlbs); - return -EINVAL; + return 0; } static int ipmmu_add_device(struct device *dev) @@ -619,10 +602,10 @@ static int ipmmu_add_device(struct device *dev) struct ipmmu_vmsa_archdata *archdata; struct ipmmu_vmsa_device *mmu; struct iommu_group *group = NULL; - unsigned int *utlbs = NULL; + unsigned int *utlbs; unsigned int i; - int num_utlbs = 0; - int ret; + int num_utlbs; + int ret = -ENODEV; if (dev->archdata.iommu) { dev_warn(dev, "IOMMU driver already assigned to device %s\n", @@ -631,11 +614,21 @@ static int ipmmu_add_device(struct device *dev) } /* Find the master corresponding to the device. */ + + num_utlbs = of_count_phandle_with_args(dev->of_node, "iommus", + "#iommu-cells"); + if (num_utlbs < 0) + return -ENODEV; + + utlbs = kcalloc(num_utlbs, sizeof(*utlbs), GFP_KERNEL); + if (!utlbs) + return -ENOMEM; + spin_lock(&ipmmu_devices_lock); list_for_each_entry(mmu, &ipmmu_devices, list) { - num_utlbs = ipmmu_find_utlbs(mmu, dev, &utlbs); - if (num_utlbs) { + ret = ipmmu_find_utlbs(mmu, dev, utlbs, num_utlbs); + if (!ret) { /* * TODO Take a reference to the MMU to protect * against device removal. @@ -646,7 +639,7 @@ static int ipmmu_add_device(struct device *dev) spin_unlock(&ipmmu_devices_lock); - if (num_utlbs <= 0) + if (ret < 0) return -ENODEV; for (i = 0; i < num_utlbs; ++i) { From 477ab7a19cec8409e4e2dd10e7348e4cac3c06e5 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 20 Jan 2015 16:13:33 +0100 Subject: [PATCH 343/601] iommu: Make more drivers depend on COMPILE_TEST For easier compile testing of these iommu drivers. Acked-by: Laurent Pinchart Signed-off-by: Joerg Roedel --- drivers/iommu/Kconfig | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 5189cc12c5a365..db98eb70646636 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -23,7 +23,8 @@ config OF_IOMMU config FSL_PAMU bool "Freescale IOMMU support" - depends on PPC_E500MC + depends on PPC32 + depends on PPC_E500MC || COMPILE_TEST select IOMMU_API select GENERIC_ALLOCATOR help @@ -34,7 +35,8 @@ config FSL_PAMU # MSM IOMMU support config MSM_IOMMU bool "MSM IOMMU Support" - depends on ARCH_MSM8X60 || ARCH_MSM8960 + depends on ARM + depends on ARCH_MSM8X60 || ARCH_MSM8960 || COMPILE_TEST select IOMMU_API help Support for the IOMMUs found on certain Qualcomm SOCs. @@ -145,7 +147,8 @@ config IRQ_REMAP # OMAP IOMMU support config OMAP_IOMMU bool "OMAP IOMMU Support" - depends on ARCH_OMAP2PLUS && MMU + depends on ARM && MMU + depends on ARCH_OMAP2PLUS || COMPILE_TEST select IOMMU_API config OMAP_IOMMU_DEBUG From 1d7f449c898b4180b9db79007f23f599b5bb879e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 22 Jan 2015 14:42:06 +0100 Subject: [PATCH 344/601] iommu/omap: Print phys_addr_t using %pa Fixes this compile warning: drivers/iommu/omap-iommu.c: In function 'omap_iommu_map': drivers/iommu/omap-iommu.c:1139:2: warning: format '%x' expects argument of type 'unsigned int', but argument 5 has type 'phys_addr_t' [-Wformat=] dev_dbg(dev, "mapping da 0x%lx to pa 0x%x size 0x%x\n", da, pa, bytes); Acked-by: Laurent Pinchart Signed-off-by: Joerg Roedel --- drivers/iommu/omap-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index bbb7dcef02d382..f59f857b702e8e 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1126,7 +1126,7 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da, return -EINVAL; } - dev_dbg(dev, "mapping da 0x%lx to pa 0x%x size 0x%x\n", da, pa, bytes); + dev_dbg(dev, "mapping da 0x%lx to pa %pa size 0x%x\n", da, &pa, bytes); iotlb_init_entry(&e, da, pa, omap_pgsz); From 8cd4f75183446d00432235fedceffae03a6332db Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 22 Jan 2015 14:50:39 +0100 Subject: [PATCH 345/601] iommu/fsl: Use %pa to print phys_addr_t Fix compile warnings. Acked-by: Laurent Pinchart Signed-off-by: Joerg Roedel --- drivers/iommu/fsl_pamu.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c index 113fb7be8cfde9..6ee947e41870f5 100644 --- a/drivers/iommu/fsl_pamu.c +++ b/drivers/iommu/fsl_pamu.c @@ -352,7 +352,7 @@ int pamu_config_ppaace(int liodn, phys_addr_t win_addr, phys_addr_t win_size, unsigned long fspi; if ((win_size & (win_size - 1)) || win_size < PAMU_PAGE_SIZE) { - pr_debug("window size too small or not a power of two %llx\n", win_size); + pr_debug("window size too small or not a power of two %pa\n", &win_size); return -EINVAL; } @@ -1121,8 +1121,7 @@ static int __init fsl_pamu_probe(struct platform_device *pdev) spaact = (void *)ppaact + (PAGE_SIZE << get_order(PAACT_SIZE)); omt = (void *)spaact + (PAGE_SIZE << get_order(SPAACT_SIZE)); - dev_dbg(&pdev->dev, "ppaact virt=%p phys=0x%llx\n", ppaact, - (unsigned long long) ppaact_phys); + dev_dbg(&pdev->dev, "ppaact virt=%p phys=%pa\n", ppaact, &ppaact_phys); /* Check to see if we need to implement the work-around on this SOC */ @@ -1138,7 +1137,7 @@ static int __init fsl_pamu_probe(struct platform_device *pdev) if (csd_port_id) { dev_dbg(&pdev->dev, "creating coherency subdomain at address " - "0x%llx, size %zu, port id 0x%08x", ppaact_phys, + "%pa, size %zu, port id 0x%08x", &ppaact_phys, mem_size, csd_port_id); ret = create_csd(ppaact_phys, mem_size, csd_port_id); From 8502427ccd9500cefc1ad47655371f9121934845 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:02:04 -0500 Subject: [PATCH 346/601] xprtrdma: human-readable completion status Make it easier to grep the system log for specific error conditions. The wc.opcode field is not included because opcode numbers are sparse, and because wc.opcode is not necessarily valid when completion reports an error. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 70 ++++++++++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 13 deletions(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index c98e4064391032..56f705d63d5ced 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -173,18 +173,54 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context) } } +static const char * const wc_status[] = { + "success", + "local length error", + "local QP operation error", + "local EE context operation error", + "local protection error", + "WR flushed", + "memory management operation error", + "bad response error", + "local access error", + "remote invalid request error", + "remote access error", + "remote operation error", + "transport retry counter exceeded", + "RNR retrycounter exceeded", + "local RDD violation error", + "remove invalid RD request", + "operation aborted", + "invalid EE context number", + "invalid EE context state", + "fatal error", + "response timeout error", + "general error", +}; + +#define COMPLETION_MSG(status) \ + ((status) < ARRAY_SIZE(wc_status) ? \ + wc_status[(status)] : "unexpected completion error") + static void rpcrdma_sendcq_process_wc(struct ib_wc *wc) { - struct rpcrdma_mw *frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; + if (likely(wc->status == IB_WC_SUCCESS)) + return; - dprintk("RPC: %s: frmr %p status %X opcode %d\n", - __func__, frmr, wc->status, wc->opcode); + /* WARNING: Only wr_id and status are reliable at this point */ + if (wc->wr_id == 0ULL) { + if (wc->status != IB_WC_WR_FLUSH_ERR) + pr_err("RPC: %s: SEND: %s\n", + __func__, COMPLETION_MSG(wc->status)); + } else { + struct rpcrdma_mw *r; - if (wc->wr_id == 0ULL) - return; - if (wc->status != IB_WC_SUCCESS) - frmr->r.frmr.fr_state = FRMR_IS_STALE; + r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id; + r->r.frmr.fr_state = FRMR_IS_STALE; + pr_err("RPC: %s: frmr %p (stale): %s\n", + __func__, r, COMPLETION_MSG(wc->status)); + } } static int @@ -248,16 +284,17 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list) struct rpcrdma_rep *rep = (struct rpcrdma_rep *)(unsigned long)wc->wr_id; - dprintk("RPC: %s: rep %p status %X opcode %X length %u\n", - __func__, rep, wc->status, wc->opcode, wc->byte_len); + /* WARNING: Only wr_id and status are reliable at this point */ + if (wc->status != IB_WC_SUCCESS) + goto out_fail; - if (wc->status != IB_WC_SUCCESS) { - rep->rr_len = ~0U; - goto out_schedule; - } + /* status == SUCCESS means all fields in wc are trustworthy */ if (wc->opcode != IB_WC_RECV) return; + dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n", + __func__, rep, wc->byte_len); + rep->rr_len = wc->byte_len; ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device, rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE); @@ -275,6 +312,13 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list) out_schedule: list_add_tail(&rep->rr_list, sched_list); + return; +out_fail: + if (wc->status != IB_WC_WR_FLUSH_ERR) + pr_err("RPC: %s: rep %p: %s\n", + __func__, rep, COMPLETION_MSG(wc->status)); + rep->rr_len = ~0U; + goto out_schedule; } static int From 284f4902a632584e8d73cf7d9363f819adf7240c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:02:13 -0500 Subject: [PATCH 347/601] xprtrdma: Modernize htonl and ntohl Clean up: Replace htonl and ntohl with the be32 equivalents. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: Anna Schumaker --- include/linux/sunrpc/rpc_rdma.h | 9 +++++++ include/linux/sunrpc/svc_rdma.h | 2 -- net/sunrpc/xprtrdma/rpc_rdma.c | 48 ++++++++++++++++++--------------- 3 files changed, 35 insertions(+), 24 deletions(-) diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h index b78f16b1dea3a0..1578ed241c192f 100644 --- a/include/linux/sunrpc/rpc_rdma.h +++ b/include/linux/sunrpc/rpc_rdma.h @@ -42,6 +42,9 @@ #include +#define RPCRDMA_VERSION 1 +#define rpcrdma_version cpu_to_be32(RPCRDMA_VERSION) + struct rpcrdma_segment { __be32 rs_handle; /* Registered memory handle */ __be32 rs_length; /* Length of the chunk in bytes */ @@ -115,4 +118,10 @@ enum rpcrdma_proc { RDMA_ERROR = 4 /* An RPC RDMA encoding error */ }; +#define rdma_msg cpu_to_be32(RDMA_MSG) +#define rdma_nomsg cpu_to_be32(RDMA_NOMSG) +#define rdma_msgp cpu_to_be32(RDMA_MSGP) +#define rdma_done cpu_to_be32(RDMA_DONE) +#define rdma_error cpu_to_be32(RDMA_ERROR) + #endif /* _LINUX_SUNRPC_RPC_RDMA_H */ diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 975da754c778d3..ddfe88f522199c 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -63,8 +63,6 @@ extern atomic_t rdma_stat_rq_prod; extern atomic_t rdma_stat_sq_poll; extern atomic_t rdma_stat_sq_prod; -#define RPCRDMA_VERSION 1 - /* * Contexts are built when an RDMA request is created and are a * record of the resources that can be recovered when the request diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index df01d124936c32..a6fb30b0a8cc4a 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -209,9 +209,11 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, if (cur_rchunk) { /* read */ cur_rchunk->rc_discrim = xdr_one; /* all read chunks have the same "position" */ - cur_rchunk->rc_position = htonl(pos); - cur_rchunk->rc_target.rs_handle = htonl(seg->mr_rkey); - cur_rchunk->rc_target.rs_length = htonl(seg->mr_len); + cur_rchunk->rc_position = cpu_to_be32(pos); + cur_rchunk->rc_target.rs_handle = + cpu_to_be32(seg->mr_rkey); + cur_rchunk->rc_target.rs_length = + cpu_to_be32(seg->mr_len); xdr_encode_hyper( (__be32 *)&cur_rchunk->rc_target.rs_offset, seg->mr_base); @@ -222,8 +224,10 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, cur_rchunk++; r_xprt->rx_stats.read_chunk_count++; } else { /* write/reply */ - cur_wchunk->wc_target.rs_handle = htonl(seg->mr_rkey); - cur_wchunk->wc_target.rs_length = htonl(seg->mr_len); + cur_wchunk->wc_target.rs_handle = + cpu_to_be32(seg->mr_rkey); + cur_wchunk->wc_target.rs_length = + cpu_to_be32(seg->mr_len); xdr_encode_hyper( (__be32 *)&cur_wchunk->wc_target.rs_offset, seg->mr_base); @@ -257,7 +261,7 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, *iptr++ = xdr_zero; /* encode a NULL reply chunk */ } else { warray->wc_discrim = xdr_one; - warray->wc_nchunks = htonl(nchunks); + warray->wc_nchunks = cpu_to_be32(nchunks); iptr = (__be32 *) cur_wchunk; if (type == rpcrdma_writech) { *iptr++ = xdr_zero; /* finish the write chunk list */ @@ -404,11 +408,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) /* build RDMA header in private area at front */ headerp = (struct rpcrdma_msg *) req->rl_base; - /* don't htonl XID, it's already done in request */ + /* don't byte-swap XID, it's already done in request */ headerp->rm_xid = rqst->rq_xid; - headerp->rm_vers = xdr_one; - headerp->rm_credit = htonl(r_xprt->rx_buf.rb_max_requests); - headerp->rm_type = htonl(RDMA_MSG); + headerp->rm_vers = rpcrdma_version; + headerp->rm_credit = cpu_to_be32(r_xprt->rx_buf.rb_max_requests); + headerp->rm_type = rdma_msg; /* * Chunks needed for results? @@ -482,11 +486,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) RPCRDMA_INLINE_PAD_VALUE(rqst)); if (padlen) { - headerp->rm_type = htonl(RDMA_MSGP); + headerp->rm_type = rdma_msgp; headerp->rm_body.rm_padded.rm_align = - htonl(RPCRDMA_INLINE_PAD_VALUE(rqst)); + cpu_to_be32(RPCRDMA_INLINE_PAD_VALUE(rqst)); headerp->rm_body.rm_padded.rm_thresh = - htonl(RPCRDMA_INLINE_PAD_THRESH); + cpu_to_be32(RPCRDMA_INLINE_PAD_THRESH); headerp->rm_body.rm_padded.rm_pempty[0] = xdr_zero; headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero; headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero; @@ -570,7 +574,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b unsigned int i, total_len; struct rpcrdma_write_chunk *cur_wchunk; - i = ntohl(**iptrp); /* get array count */ + i = be32_to_cpu(**iptrp); if (i > max) return -1; cur_wchunk = (struct rpcrdma_write_chunk *) (*iptrp + 1); @@ -582,11 +586,11 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b xdr_decode_hyper((__be32 *)&seg->rs_offset, &off); dprintk("RPC: %s: chunk %d@0x%llx:0x%x\n", __func__, - ntohl(seg->rs_length), + be32_to_cpu(seg->rs_length), (unsigned long long)off, - ntohl(seg->rs_handle)); + be32_to_cpu(seg->rs_handle)); } - total_len += ntohl(seg->rs_length); + total_len += be32_to_cpu(seg->rs_length); ++cur_wchunk; } /* check and adjust for properly terminated write chunk */ @@ -749,9 +753,9 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) goto repost; } headerp = (struct rpcrdma_msg *) rep->rr_base; - if (headerp->rm_vers != xdr_one) { + if (headerp->rm_vers != rpcrdma_version) { dprintk("RPC: %s: invalid version %d\n", - __func__, ntohl(headerp->rm_vers)); + __func__, be32_to_cpu(headerp->rm_vers)); goto repost; } @@ -793,7 +797,7 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) /* check for expected message types */ /* The order of some of these tests is important. */ switch (headerp->rm_type) { - case htonl(RDMA_MSG): + case rdma_msg: /* never expect read chunks */ /* never expect reply chunks (two ways to check) */ /* never expect write chunks without having offered RDMA */ @@ -832,7 +836,7 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len, rdmalen); break; - case htonl(RDMA_NOMSG): + case rdma_nomsg: /* never expect read or write chunks, always reply chunks */ if (headerp->rm_body.rm_chunks[0] != xdr_zero || headerp->rm_body.rm_chunks[1] != xdr_zero || @@ -853,7 +857,7 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) dprintk("%s: invalid rpcrdma reply header (type %d):" " chunks[012] == %d %d %d" " expected chunks <= %d\n", - __func__, ntohl(headerp->rm_type), + __func__, be32_to_cpu(headerp->rm_type), headerp->rm_body.rm_chunks[0], headerp->rm_body.rm_chunks[1], headerp->rm_body.rm_chunks[2], From 052151a9798ef7a79372fdc688018dc405a6063c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:02:21 -0500 Subject: [PATCH 348/601] xprtrdma: Display XIDs in host byte order xprtsock.c and the backchannel code display XIDs in host byte order. Follow suit in xprtrdma. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/rpc_rdma.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index a6fb30b0a8cc4a..150dd764180386 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -766,7 +766,8 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) spin_unlock(&xprt->transport_lock); dprintk("RPC: %s: reply 0x%p failed " "to match any request xid 0x%08x len %d\n", - __func__, rep, headerp->rm_xid, rep->rr_len); + __func__, rep, be32_to_cpu(headerp->rm_xid), + rep->rr_len); repost: r_xprt->rx_stats.bad_reply_count++; rep->rr_func = rpcrdma_reply_handler; @@ -782,13 +783,14 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) spin_unlock(&xprt->transport_lock); dprintk("RPC: %s: duplicate reply 0x%p to RPC " "request 0x%p: xid 0x%08x\n", __func__, rep, req, - headerp->rm_xid); + be32_to_cpu(headerp->rm_xid)); goto repost; } dprintk("RPC: %s: reply 0x%p completes request 0x%p\n" " RPC request 0x%p xid 0x%08x\n", - __func__, rep, req, rqst, headerp->rm_xid); + __func__, rep, req, rqst, + be32_to_cpu(headerp->rm_xid)); /* from here on, the reply is no longer an orphan */ req->rl_reply = rep; From f2846481b4bf758cf7c3fe8f24b35950306f1db2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:02:29 -0500 Subject: [PATCH 349/601] xprtrdma: Clean up hdrlen Clean up: Replace naked integers with a documenting macro. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: Anna Schumaker --- include/linux/sunrpc/rpc_rdma.h | 5 ++++- net/sunrpc/xprtrdma/rpc_rdma.c | 12 +++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h index 1578ed241c192f..f33c5a4d6fe47f 100644 --- a/include/linux/sunrpc/rpc_rdma.h +++ b/include/linux/sunrpc/rpc_rdma.h @@ -98,7 +98,10 @@ struct rpcrdma_msg { } rm_body; }; -#define RPCRDMA_HDRLEN_MIN 28 +/* + * Smallest RPC/RDMA header: rm_xid through rm_type, then rm_nochunks + */ +#define RPCRDMA_HDRLEN_MIN (sizeof(__be32) * 7) enum rpcrdma_errcode { ERR_VERS = 1, diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 150dd764180386..dcf5ebc3d373b9 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -472,7 +472,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) return -EIO; } - hdrlen = 28; /*sizeof *headerp;*/ + hdrlen = RPCRDMA_HDRLEN_MIN; padlen = 0; /* @@ -748,7 +748,7 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) } return; } - if (rep->rr_len < 28) { + if (rep->rr_len < RPCRDMA_HDRLEN_MIN) { dprintk("RPC: %s: short/invalid reply\n", __func__); goto repost; } @@ -830,8 +830,9 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) } else { /* else ordinary inline */ rdmalen = 0; - iptr = (__be32 *)((unsigned char *)headerp + 28); - rep->rr_len -= 28; /*sizeof *headerp;*/ + iptr = (__be32 *)((unsigned char *)headerp + + RPCRDMA_HDRLEN_MIN); + rep->rr_len -= RPCRDMA_HDRLEN_MIN; status = rep->rr_len; } /* Fix up the rpc results for upper layer */ @@ -845,7 +846,8 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) headerp->rm_body.rm_chunks[2] != xdr_one || req->rl_nchunks == 0) goto badheader; - iptr = (__be32 *)((unsigned char *)headerp + 28); + iptr = (__be32 *)((unsigned char *)headerp + + RPCRDMA_HDRLEN_MIN); rdmalen = rpcrdma_count_chunks(rep, req->rl_nchunks, 0, &iptr); if (rdmalen < 0) goto badheader; From 5abefb861fd4306467813380cf21ce21d4b274ce Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:02:37 -0500 Subject: [PATCH 350/601] xprtrdma: Rename "xprt" and "rdma_connect" fields in struct rpcrdma_xprt Clean up: Use consistent field names in struct rpcrdma_xprt. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/transport.c | 19 ++++++++++--------- net/sunrpc/xprtrdma/xprt_rdma.h | 6 +++--- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index bbd6155d3e3454..ee57513263398b 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -200,9 +200,9 @@ xprt_rdma_free_addresses(struct rpc_xprt *xprt) static void xprt_rdma_connect_worker(struct work_struct *work) { - struct rpcrdma_xprt *r_xprt = - container_of(work, struct rpcrdma_xprt, rdma_connect.work); - struct rpc_xprt *xprt = &r_xprt->xprt; + struct rpcrdma_xprt *r_xprt = container_of(work, struct rpcrdma_xprt, + rx_connect_worker.work); + struct rpc_xprt *xprt = &r_xprt->rx_xprt; int rc = 0; xprt_clear_connected(xprt); @@ -235,7 +235,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt) dprintk("RPC: %s: called\n", __func__); - cancel_delayed_work_sync(&r_xprt->rdma_connect); + cancel_delayed_work_sync(&r_xprt->rx_connect_worker); xprt_clear_connected(xprt); @@ -374,7 +374,8 @@ xprt_setup_rdma(struct xprt_create *args) * connection loss notification is async. We also catch connection loss * when reaping receives. */ - INIT_DELAYED_WORK(&new_xprt->rdma_connect, xprt_rdma_connect_worker); + INIT_DELAYED_WORK(&new_xprt->rx_connect_worker, + xprt_rdma_connect_worker); new_ep->rep_func = rpcrdma_conn_func; new_ep->rep_xprt = xprt; @@ -434,17 +435,17 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) if (r_xprt->rx_ep.rep_connected != 0) { /* Reconnect */ - schedule_delayed_work(&r_xprt->rdma_connect, - xprt->reestablish_timeout); + schedule_delayed_work(&r_xprt->rx_connect_worker, + xprt->reestablish_timeout); xprt->reestablish_timeout <<= 1; if (xprt->reestablish_timeout > RPCRDMA_MAX_REEST_TO) xprt->reestablish_timeout = RPCRDMA_MAX_REEST_TO; else if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO) xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; } else { - schedule_delayed_work(&r_xprt->rdma_connect, 0); + schedule_delayed_work(&r_xprt->rx_connect_worker, 0); if (!RPC_IS_ASYNC(task)) - flush_delayed_work(&r_xprt->rdma_connect); + flush_delayed_work(&r_xprt->rx_connect_worker); } } diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index b799041b75bf9e..9a7aab31bf6e76 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -318,16 +318,16 @@ struct rpcrdma_stats { * during unmount. */ struct rpcrdma_xprt { - struct rpc_xprt xprt; + struct rpc_xprt rx_xprt; struct rpcrdma_ia rx_ia; struct rpcrdma_ep rx_ep; struct rpcrdma_buffer rx_buf; struct rpcrdma_create_data_internal rx_data; - struct delayed_work rdma_connect; + struct delayed_work rx_connect_worker; struct rpcrdma_stats rx_stats; }; -#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt) +#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt) #define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) /* Setting this to 0 ensures interoperability with early servers. From 5d410ba061c1e4bc0068ce91f2cf349998cde46c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:02:46 -0500 Subject: [PATCH 351/601] xprtrdma: Remove rpcrdma_ep::rep_ia Clean up: This field is not used. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 1 - net/sunrpc/xprtrdma/xprt_rdma.h | 1 - 2 files changed, 2 deletions(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 56f705d63d5ced..56e14b369d42cc 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -825,7 +825,6 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, else if (ep->rep_cqinit <= 2) ep->rep_cqinit = 0; INIT_CQCOUNT(ep); - ep->rep_ia = ia; init_waitqueue_head(&ep->rep_connect_wait); INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 9a7aab31bf6e76..5160a84fdb72ef 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -83,7 +83,6 @@ struct rpcrdma_ep { atomic_t rep_cqcount; int rep_cqinit; int rep_connected; - struct rpcrdma_ia *rep_ia; struct ib_qp_init_attr rep_attr; wait_queue_head_t rep_connect_wait; struct ib_sge rep_pad; /* holds zeroed pad */ From 3eb358106660195948f4e95822039c5799fc41f8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:02:54 -0500 Subject: [PATCH 352/601] xprtrdma: Remove rl_mr field, and the mr_chunk union Clean up: Since commit 0ac531c18323 ("xprtrdma: Remove REGISTER memory registration mode"), the rl_mr pointer is no longer used anywhere. After removal, there's only a single member of the mr_chunk union, so mr_chunk can be removed as well, in favor of a single pointer field. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 25 ++++++++++++------------- net/sunrpc/xprtrdma/xprt_rdma.h | 5 +---- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 56e14b369d42cc..1000f637edeeb0 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1493,8 +1493,8 @@ rpcrdma_buffer_put_mrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) int i; for (i = 1, seg++; i < RPCRDMA_MAX_SEGS; seg++, i++) - rpcrdma_buffer_put_mr(&seg->mr_chunk.rl_mw, buf); - rpcrdma_buffer_put_mr(&seg1->mr_chunk.rl_mw, buf); + rpcrdma_buffer_put_mr(&seg->rl_mw, buf); + rpcrdma_buffer_put_mr(&seg1->rl_mw, buf); } static void @@ -1580,7 +1580,7 @@ rpcrdma_buffer_get_frmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf, list_add(&r->mw_list, stale); continue; } - req->rl_segments[i].mr_chunk.rl_mw = r; + req->rl_segments[i].rl_mw = r; if (unlikely(i-- == 0)) return req; /* Success */ } @@ -1602,7 +1602,7 @@ rpcrdma_buffer_get_fmrs(struct rpcrdma_req *req, struct rpcrdma_buffer *buf) r = list_entry(buf->rb_mws.next, struct rpcrdma_mw, mw_list); list_del(&r->mw_list); - req->rl_segments[i].mr_chunk.rl_mw = r; + req->rl_segments[i].rl_mw = r; if (unlikely(i-- == 0)) return req; /* Success */ } @@ -1842,7 +1842,7 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, struct rpcrdma_xprt *r_xprt) { struct rpcrdma_mr_seg *seg1 = seg; - struct rpcrdma_mw *mw = seg1->mr_chunk.rl_mw; + struct rpcrdma_mw *mw = seg1->rl_mw; struct rpcrdma_frmr *frmr = &mw->r.frmr; struct ib_mr *mr = frmr->fr_mr; struct ib_send_wr fastreg_wr, *bad_wr; @@ -1931,12 +1931,12 @@ rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg, struct ib_send_wr invalidate_wr, *bad_wr; int rc; - seg1->mr_chunk.rl_mw->r.frmr.fr_state = FRMR_IS_INVALID; + seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID; memset(&invalidate_wr, 0, sizeof invalidate_wr); - invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw; + invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw; invalidate_wr.opcode = IB_WR_LOCAL_INV; - invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; + invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey; DECR_CQCOUNT(&r_xprt->rx_ep); read_lock(&ia->ri_qplock); @@ -1946,7 +1946,7 @@ rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg, read_unlock(&ia->ri_qplock); if (rc) { /* Force rpcrdma_buffer_get() to retry */ - seg1->mr_chunk.rl_mw->r.frmr.fr_state = FRMR_IS_STALE; + seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE; dprintk("RPC: %s: failed ib_post_send for invalidate," " status %i\n", __func__, rc); } @@ -1978,8 +1978,7 @@ rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg, offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) break; } - rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr, - physaddrs, i, seg1->mr_dma); + rc = ib_map_phys_fmr(seg1->rl_mw->r.fmr, physaddrs, i, seg1->mr_dma); if (rc) { dprintk("RPC: %s: failed ib_map_phys_fmr " "%u@0x%llx+%i (%d)... status %i\n", __func__, @@ -1988,7 +1987,7 @@ rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg, while (i--) rpcrdma_unmap_one(ia, --seg); } else { - seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey; + seg1->mr_rkey = seg1->rl_mw->r.fmr->rkey; seg1->mr_base = seg1->mr_dma + pageoff; seg1->mr_nsegs = i; seg1->mr_len = len; @@ -2005,7 +2004,7 @@ rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg, LIST_HEAD(l); int rc; - list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l); + list_add(&seg1->rl_mw->r.fmr->list, &l); rc = ib_unmap_fmr(&l); read_lock(&ia->ri_qplock); while (seg1->mr_nsegs--) diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 5160a84fdb72ef..532d58667b9d11 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -210,10 +210,7 @@ struct rpcrdma_mw { */ struct rpcrdma_mr_seg { /* chunk descriptors */ - union { /* chunk memory handles */ - struct ib_mr *rl_mr; /* if registered directly */ - struct rpcrdma_mw *rl_mw; /* if registered from region */ - } mr_chunk; + struct rpcrdma_mw *rl_mw; /* registered MR */ u64 mr_base; /* registration result */ u32 mr_rkey; /* registration result */ u32 mr_len; /* length of chunk or segment */ From eba8ff660b2d8b7fcd6669fcab2c025b59f66d26 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:03:02 -0500 Subject: [PATCH 353/601] xprtrdma: Move credit update to RPC reply handler Reduce work in the receive CQ handler, which can be run at hardware interrupt level, by moving the RPC/RDMA credit update logic to the RPC reply handler. This has some additional benefits: More header sanity checking is done before trusting the incoming credit value, and the receive CQ handler no longer touches the RPC/RDMA header (the CPU stalls while waiting for the header contents to be brought into the cache). This further extends work begun by commit e7ce710a8802 ("xprtrdma: Avoid deadlock when credit window is reset"). Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/rpc_rdma.c | 10 ++++++++-- net/sunrpc/xprtrdma/verbs.c | 15 ++------------- net/sunrpc/xprtrdma/xprt_rdma.h | 1 - 3 files changed, 10 insertions(+), 16 deletions(-) diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index dcf5ebc3d373b9..d7310109b6014b 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -736,7 +736,7 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) struct rpc_xprt *xprt = rep->rr_xprt; struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); __be32 *iptr; - int rdmalen, status; + int credits, rdmalen, status; unsigned long cwnd; /* Check status. If bad, signal disconnect and return rep to pool */ @@ -871,8 +871,14 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) break; } + credits = be32_to_cpu(headerp->rm_credit); + if (credits == 0) + credits = 1; /* don't deadlock */ + else if (credits > r_xprt->rx_buf.rb_max_requests) + credits = r_xprt->rx_buf.rb_max_requests; + cwnd = xprt->cwnd; - xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT; + xprt->cwnd = credits << RPC_CWNDSHIFT; if (xprt->cwnd > cwnd) xprt_release_rqst_cong(rqst->rq_task); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 1000f637edeeb0..71a071aaf0ab68 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -49,6 +49,7 @@ #include #include +#include #include #include "xprt_rdma.h" @@ -298,17 +299,7 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list) rep->rr_len = wc->byte_len; ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device, rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE); - - if (rep->rr_len >= 16) { - struct rpcrdma_msg *p = (struct rpcrdma_msg *)rep->rr_base; - unsigned int credits = ntohl(p->rm_credit); - - if (credits == 0) - credits = 1; /* don't deadlock */ - else if (credits > rep->rr_buffer->rb_max_requests) - credits = rep->rr_buffer->rb_max_requests; - atomic_set(&rep->rr_buffer->rb_credits, credits); - } + prefetch(rep->rr_base); out_schedule: list_add_tail(&rep->rr_list, sched_list); @@ -480,7 +471,6 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) case RDMA_CM_EVENT_DEVICE_REMOVAL: connstate = -ENODEV; connected: - atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1); dprintk("RPC: %s: %sconnected\n", __func__, connstate > 0 ? "" : "dis"); ep->rep_connected = connstate; @@ -1186,7 +1176,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, buf->rb_max_requests = cdata->max_requests; spin_lock_init(&buf->rb_lock); - atomic_set(&buf->rb_credits, 1); /* Need to allocate: * 1. arrays for send and recv pointers diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 532d58667b9d11..3fcc92b0e3cafe 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -248,7 +248,6 @@ struct rpcrdma_req { */ struct rpcrdma_buffer { spinlock_t rb_lock; /* protects indexes */ - atomic_t rb_credits; /* most recent server credits */ int rb_max_requests;/* client max requests */ struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */ struct list_head rb_all; From afadc468eb309b7c48ffdc8fa4c72acbb9991613 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:03:11 -0500 Subject: [PATCH 354/601] xprtrdma: Remove rpcrdma_ep::rep_func and ::rep_xprt Clean up: The rep_func field always refers to rpcrdma_conn_func(). rep_func should have been removed by commit b45ccfd25d50 ("xprtrdma: Remove MEMWINDOWS registration modes"). Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/rpc_rdma.c | 4 +++- net/sunrpc/xprtrdma/transport.c | 2 -- net/sunrpc/xprtrdma/verbs.c | 6 +++--- net/sunrpc/xprtrdma/xprt_rdma.h | 2 -- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index d7310109b6014b..f2eda155299a87 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -695,7 +695,9 @@ rpcrdma_connect_worker(struct work_struct *work) { struct rpcrdma_ep *ep = container_of(work, struct rpcrdma_ep, rep_connect_worker.work); - struct rpc_xprt *xprt = ep->rep_xprt; + struct rpcrdma_xprt *r_xprt = + container_of(ep, struct rpcrdma_xprt, rx_ep); + struct rpc_xprt *xprt = &r_xprt->rx_xprt; spin_lock_bh(&xprt->transport_lock); if (++xprt->connect_cookie == 0) /* maintain a reserved value */ diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index ee57513263398b..a487bde71b4a1a 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -376,8 +376,6 @@ xprt_setup_rdma(struct xprt_create *args) */ INIT_DELAYED_WORK(&new_xprt->rx_connect_worker, xprt_rdma_connect_worker); - new_ep->rep_func = rpcrdma_conn_func; - new_ep->rep_xprt = xprt; xprt_rdma_format_addresses(xprt); xprt->max_payload = rpcrdma_max_payload(new_xprt); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 71a071aaf0ab68..c61bb61c4d1338 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -154,7 +154,7 @@ rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) event->device->name, context); if (ep->rep_connected == 1) { ep->rep_connected = -EIO; - ep->rep_func(ep); + rpcrdma_conn_func(ep); wake_up_all(&ep->rep_connect_wait); } } @@ -169,7 +169,7 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context) event->device->name, context); if (ep->rep_connected == 1) { ep->rep_connected = -EIO; - ep->rep_func(ep); + rpcrdma_conn_func(ep); wake_up_all(&ep->rep_connect_wait); } } @@ -474,7 +474,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) dprintk("RPC: %s: %sconnected\n", __func__, connstate > 0 ? "" : "dis"); ep->rep_connected = connstate; - ep->rep_func(ep); + rpcrdma_conn_func(ep); wake_up_all(&ep->rep_connect_wait); /*FALLTHROUGH*/ default: diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 3fcc92b0e3cafe..657c370e48b9e7 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -87,8 +87,6 @@ struct rpcrdma_ep { wait_queue_head_t rep_connect_wait; struct ib_sge rep_pad; /* holds zeroed pad */ struct ib_mr *rep_pad_mr; /* holds zeroed pad */ - void (*rep_func)(struct rpcrdma_ep *); - struct rpc_xprt *rep_xprt; /* for rep_func */ struct rdma_conn_param rep_remote_cma; struct sockaddr_storage rep_remote_addr; struct delayed_work rep_connect_worker; From 5ae711a24601257f395c1f8746ac95be0cbd75e5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:03:19 -0500 Subject: [PATCH 355/601] xprtrdma: Free the pd if ib_query_qp() fails If ib_query_qp() fails or the memory registration mode isn't supported, don't leak the PD. An orphaned IB/core resource will cause IB module removal to hang. Fixes: bd7ed1d13304 ("RPC/RDMA: check selected memory registration ...") Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index c61bb61c4d1338..aa012a393448a8 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -614,7 +614,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) if (rc) { dprintk("RPC: %s: ib_query_device failed %d\n", __func__, rc); - goto out2; + goto out3; } if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) { @@ -672,14 +672,14 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) "phys register failed with %lX\n", __func__, PTR_ERR(ia->ri_bind_mem)); rc = -ENOMEM; - goto out2; + goto out3; } break; default: printk(KERN_ERR "RPC: Unsupported memory " "registration mode: %d\n", memreg); rc = -ENOMEM; - goto out2; + goto out3; } dprintk("RPC: %s: memory registration strategy is %d\n", __func__, memreg); @@ -689,6 +689,10 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) rwlock_init(&ia->ri_qplock); return 0; + +out3: + ib_dealloc_pd(ia->ri_pd); + ia->ri_pd = NULL; out2: rdma_destroy_id(ia->ri_id); ia->ri_id = NULL; From 7bc7972cdd1f137552ca979caa11c8acbe119ae8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:03:27 -0500 Subject: [PATCH 356/601] xprtrdma: Take struct ib_device_attr off the stack Device attributes are large, and are used in more than one place. Stash a copy in dynamically allocated memory. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 37 ++++++++++++--------------------- net/sunrpc/xprtrdma/xprt_rdma.h | 1 + 2 files changed, 14 insertions(+), 24 deletions(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index aa012a393448a8..123bb04dd823d9 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -588,8 +588,8 @@ int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) { int rc, mem_priv; - struct ib_device_attr devattr; struct rpcrdma_ia *ia = &xprt->rx_ia; + struct ib_device_attr *devattr = &ia->ri_devattr; ia->ri_id = rpcrdma_create_id(xprt, ia, addr); if (IS_ERR(ia->ri_id)) { @@ -605,26 +605,21 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) goto out2; } - /* - * Query the device to determine if the requested memory - * registration strategy is supported. If it isn't, set the - * strategy to a globally supported model. - */ - rc = ib_query_device(ia->ri_id->device, &devattr); + rc = ib_query_device(ia->ri_id->device, devattr); if (rc) { dprintk("RPC: %s: ib_query_device failed %d\n", __func__, rc); goto out3; } - if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) { + if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) { ia->ri_have_dma_lkey = 1; ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey; } if (memreg == RPCRDMA_FRMR) { /* Requires both frmr reg and local dma lkey */ - if ((devattr.device_cap_flags & + if ((devattr->device_cap_flags & (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) != (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) { dprintk("RPC: %s: FRMR registration " @@ -634,7 +629,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) /* Mind the ia limit on FRMR page list depth */ ia->ri_max_frmr_depth = min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, - devattr.max_fast_reg_page_list_len); + devattr->max_fast_reg_page_list_len); } } if (memreg == RPCRDMA_MTHCAFMR) { @@ -736,20 +731,13 @@ int rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) { - struct ib_device_attr devattr; + struct ib_device_attr *devattr = &ia->ri_devattr; struct ib_cq *sendcq, *recvcq; int rc, err; - rc = ib_query_device(ia->ri_id->device, &devattr); - if (rc) { - dprintk("RPC: %s: ib_query_device failed %d\n", - __func__, rc); - return rc; - } - /* check provider's send/recv wr limits */ - if (cdata->max_requests > devattr.max_qp_wr) - cdata->max_requests = devattr.max_qp_wr; + if (cdata->max_requests > devattr->max_qp_wr) + cdata->max_requests = devattr->max_qp_wr; ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; ep->rep_attr.qp_context = ep; @@ -784,8 +772,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, } ep->rep_attr.cap.max_send_wr *= depth; - if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) { - cdata->max_requests = devattr.max_qp_wr / depth; + if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) { + cdata->max_requests = devattr->max_qp_wr / depth; if (!cdata->max_requests) return -EINVAL; ep->rep_attr.cap.max_send_wr = cdata->max_requests * @@ -868,10 +856,11 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, /* Client offers RDMA Read but does not initiate */ ep->rep_remote_cma.initiator_depth = 0; - if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */ + if (devattr->max_qp_rd_atom > 32) /* arbitrary but <= 255 */ ep->rep_remote_cma.responder_resources = 32; else - ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom; + ep->rep_remote_cma.responder_resources = + devattr->max_qp_rd_atom; ep->rep_remote_cma.retry_count = 7; ep->rep_remote_cma.flow_control = 0; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 657c370e48b9e7..ec596cebc96623 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -70,6 +70,7 @@ struct rpcrdma_ia { int ri_async_rc; enum rpcrdma_memreg ri_memreg_strategy; unsigned int ri_max_frmr_depth; + struct ib_device_attr ri_devattr; }; /* From ce1ab9ab47973dcff7548abda20e49add2c4ca95 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:03:35 -0500 Subject: [PATCH 357/601] xprtrdma: Take struct ib_qp_attr and ib_qp_init_attr off the stack Reduce stack footprint of the connection upcall handler function. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 15 ++++++++------- net/sunrpc/xprtrdma/xprt_rdma.h | 2 ++ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 123bb04dd823d9..958b372cb919ca 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -425,8 +425,8 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr; #endif - struct ib_qp_attr attr; - struct ib_qp_init_attr iattr; + struct ib_qp_attr *attr = &ia->ri_qp_attr; + struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr; int connstate = 0; switch (event->event) { @@ -449,12 +449,13 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) break; case RDMA_CM_EVENT_ESTABLISHED: connstate = 1; - ib_query_qp(ia->ri_id->qp, &attr, - IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC, - &iattr); + ib_query_qp(ia->ri_id->qp, attr, + IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC, + iattr); dprintk("RPC: %s: %d responder resources" " (%d initiator)\n", - __func__, attr.max_dest_rd_atomic, attr.max_rd_atomic); + __func__, attr->max_dest_rd_atomic, + attr->max_rd_atomic); goto connected; case RDMA_CM_EVENT_CONNECT_ERROR: connstate = -ENOTCONN; @@ -487,7 +488,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) if (connstate == 1) { - int ird = attr.max_dest_rd_atomic; + int ird = attr->max_dest_rd_atomic; int tird = ep->rep_remote_cma.responder_resources; printk(KERN_INFO "rpcrdma: connection to %pI4:%u " "on %s, memreg %d slots %d ird %d%s\n", diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index ec596cebc96623..2b4e7787734d51 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -71,6 +71,8 @@ struct rpcrdma_ia { enum rpcrdma_memreg ri_memreg_strategy; unsigned int ri_max_frmr_depth; struct ib_device_attr ri_devattr; + struct ib_qp_attr ri_qp_attr; + struct ib_qp_init_attr ri_qp_init_attr; }; /* From ac920d04a7f307bfd7633f60abe33fb626f6ec83 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:03:44 -0500 Subject: [PATCH 358/601] xprtrdma: Simplify synopsis of rpcrdma_buffer_create() Clean up: There is one call site for rpcrdma_buffer_create(). All of the arguments there are fields of an rpcrdma_xprt. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/transport.c | 3 +-- net/sunrpc/xprtrdma/verbs.c | 7 +++++-- net/sunrpc/xprtrdma/xprt_rdma.h | 4 +--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index a487bde71b4a1a..808b3c52427a28 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -364,8 +364,7 @@ xprt_setup_rdma(struct xprt_create *args) * any inline data. Also specify any padding which will be provided * from a preregistered zero buffer. */ - rc = rpcrdma_buffer_create(&new_xprt->rx_buf, new_ep, &new_xprt->rx_ia, - &new_xprt->rx_data); + rc = rpcrdma_buffer_create(new_xprt); if (rc) goto out3; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 958b372cb919ca..fd71501403fd90 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1161,9 +1161,11 @@ rpcrdma_init_frmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf) } int -rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, - struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) +rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) { + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; + struct rpcrdma_ia *ia = &r_xprt->rx_ia; + struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; char *p; size_t len, rlen, wlen; int i, rc; @@ -1200,6 +1202,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, * Register the zeroed pad buffer, if any. */ if (cdata->padding) { + struct rpcrdma_ep *ep = &r_xprt->rx_ep; rc = rpcrdma_register_internal(ia, p, cdata->padding, &ep->rep_pad_mr, &ep->rep_pad); if (rc) diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 2b4e7787734d51..5c2fac3f30b646 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -354,9 +354,7 @@ int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *, /* * Buffer calls - xprtrdma/verbs.c */ -int rpcrdma_buffer_create(struct rpcrdma_buffer *, struct rpcrdma_ep *, - struct rpcrdma_ia *, - struct rpcrdma_create_data_internal *); +int rpcrdma_buffer_create(struct rpcrdma_xprt *); void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); From 1392402c405a75de1cdc658d36c6007ea1c037de Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:03:52 -0500 Subject: [PATCH 359/601] xprtrdma: Refactor rpcrdma_buffer_create() and rpcrdma_buffer_destroy() Move the details of how to create and destroy rpcrdma_req and rpcrdma_rep structures into helper functions. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 148 +++++++++++++++++++++++------------- 1 file changed, 95 insertions(+), 53 deletions(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index fd71501403fd90..24ea6dd184e43a 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1075,6 +1075,69 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) } } +static struct rpcrdma_req * +rpcrdma_create_req(struct rpcrdma_xprt *r_xprt) +{ + struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; + size_t wlen = 1 << fls(cdata->inline_wsize + + sizeof(struct rpcrdma_req)); + struct rpcrdma_ia *ia = &r_xprt->rx_ia; + struct rpcrdma_req *req; + int rc; + + rc = -ENOMEM; + req = kmalloc(wlen, GFP_KERNEL); + if (req == NULL) + goto out; + memset(req, 0, sizeof(struct rpcrdma_req)); + + rc = rpcrdma_register_internal(ia, req->rl_base, wlen - + offsetof(struct rpcrdma_req, rl_base), + &req->rl_handle, &req->rl_iov); + if (rc) + goto out_free; + + req->rl_size = wlen - sizeof(struct rpcrdma_req); + req->rl_buffer = &r_xprt->rx_buf; + return req; + +out_free: + kfree(req); +out: + return ERR_PTR(rc); +} + +static struct rpcrdma_rep * +rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) +{ + struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; + size_t rlen = 1 << fls(cdata->inline_rsize + + sizeof(struct rpcrdma_rep)); + struct rpcrdma_ia *ia = &r_xprt->rx_ia; + struct rpcrdma_rep *rep; + int rc; + + rc = -ENOMEM; + rep = kmalloc(rlen, GFP_KERNEL); + if (rep == NULL) + goto out; + memset(rep, 0, sizeof(struct rpcrdma_rep)); + + rc = rpcrdma_register_internal(ia, rep->rr_base, rlen - + offsetof(struct rpcrdma_rep, rr_base), + &rep->rr_handle, &rep->rr_iov); + if (rc) + goto out_free; + + rep->rr_buffer = &r_xprt->rx_buf; + return rep; + +out_free: + kfree(rep); +out: + return ERR_PTR(rc); +} + static int rpcrdma_init_fmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf) { @@ -1167,7 +1230,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; char *p; - size_t len, rlen, wlen; + size_t len; int i, rc; buf->rb_max_requests = cdata->max_requests; @@ -1227,68 +1290,55 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) break; } - /* - * Allocate/init the request/reply buffers. Doing this - * using kmalloc for now -- one for each buf. - */ - wlen = 1 << fls(cdata->inline_wsize + sizeof(struct rpcrdma_req)); - rlen = 1 << fls(cdata->inline_rsize + sizeof(struct rpcrdma_rep)); - dprintk("RPC: %s: wlen = %zu, rlen = %zu\n", - __func__, wlen, rlen); - for (i = 0; i < buf->rb_max_requests; i++) { struct rpcrdma_req *req; struct rpcrdma_rep *rep; - req = kmalloc(wlen, GFP_KERNEL); - if (req == NULL) { + req = rpcrdma_create_req(r_xprt); + if (IS_ERR(req)) { dprintk("RPC: %s: request buffer %d alloc" " failed\n", __func__, i); - rc = -ENOMEM; + rc = PTR_ERR(req); goto out; } - memset(req, 0, sizeof(struct rpcrdma_req)); buf->rb_send_bufs[i] = req; - buf->rb_send_bufs[i]->rl_buffer = buf; - - rc = rpcrdma_register_internal(ia, req->rl_base, - wlen - offsetof(struct rpcrdma_req, rl_base), - &buf->rb_send_bufs[i]->rl_handle, - &buf->rb_send_bufs[i]->rl_iov); - if (rc) - goto out; - buf->rb_send_bufs[i]->rl_size = wlen - - sizeof(struct rpcrdma_req); - - rep = kmalloc(rlen, GFP_KERNEL); - if (rep == NULL) { + rep = rpcrdma_create_rep(r_xprt); + if (IS_ERR(rep)) { dprintk("RPC: %s: reply buffer %d alloc failed\n", __func__, i); - rc = -ENOMEM; + rc = PTR_ERR(rep); goto out; } - memset(rep, 0, sizeof(struct rpcrdma_rep)); buf->rb_recv_bufs[i] = rep; - buf->rb_recv_bufs[i]->rr_buffer = buf; - - rc = rpcrdma_register_internal(ia, rep->rr_base, - rlen - offsetof(struct rpcrdma_rep, rr_base), - &buf->rb_recv_bufs[i]->rr_handle, - &buf->rb_recv_bufs[i]->rr_iov); - if (rc) - goto out; - } - dprintk("RPC: %s: max_requests %d\n", - __func__, buf->rb_max_requests); - /* done */ + return 0; out: rpcrdma_buffer_destroy(buf); return rc; } +static void +rpcrdma_destroy_rep(struct rpcrdma_ia *ia, struct rpcrdma_rep *rep) +{ + if (!rep) + return; + + rpcrdma_deregister_internal(ia, rep->rr_handle, &rep->rr_iov); + kfree(rep); +} + +static void +rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req) +{ + if (!req) + return; + + rpcrdma_deregister_internal(ia, req->rl_handle, &req->rl_iov); + kfree(req); +} + static void rpcrdma_destroy_fmrs(struct rpcrdma_buffer *buf) { @@ -1344,18 +1394,10 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) dprintk("RPC: %s: entering\n", __func__); for (i = 0; i < buf->rb_max_requests; i++) { - if (buf->rb_recv_bufs && buf->rb_recv_bufs[i]) { - rpcrdma_deregister_internal(ia, - buf->rb_recv_bufs[i]->rr_handle, - &buf->rb_recv_bufs[i]->rr_iov); - kfree(buf->rb_recv_bufs[i]); - } - if (buf->rb_send_bufs && buf->rb_send_bufs[i]) { - rpcrdma_deregister_internal(ia, - buf->rb_send_bufs[i]->rl_handle, - &buf->rb_send_bufs[i]->rl_iov); - kfree(buf->rb_send_bufs[i]); - } + if (buf->rb_recv_bufs) + rpcrdma_destroy_rep(ia, buf->rb_recv_bufs[i]); + if (buf->rb_send_bufs) + rpcrdma_destroy_req(ia, buf->rb_send_bufs[i]); } switch (ia->ri_memreg_strategy) { From 9128c3e794a77917a86dd5490ca2c5233a8c6fde Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:04:00 -0500 Subject: [PATCH 360/601] xprtrdma: Add struct rpcrdma_regbuf and helpers There are several spots that allocate a buffer via kmalloc (usually contiguously with another data structure) and then register that buffer internally. I'd like to split the buffers out of these data structures to allow the data structures to scale. Start by adding functions that can kmalloc and register a buffer, and can manage/preserve the buffer's associated ib_sge and ib_mr fields. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 55 +++++++++++++++++++++++++++++++++ net/sunrpc/xprtrdma/xprt_rdma.h | 43 ++++++++++++++++++++++++++ 2 files changed, 98 insertions(+) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 24ea6dd184e43a..cdd6aacc91682a 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1828,6 +1828,61 @@ rpcrdma_deregister_internal(struct rpcrdma_ia *ia, return rc; } +/** + * rpcrdma_alloc_regbuf - kmalloc and register memory for SEND/RECV buffers + * @ia: controlling rpcrdma_ia + * @size: size of buffer to be allocated, in bytes + * @flags: GFP flags + * + * Returns pointer to private header of an area of internally + * registered memory, or an ERR_PTR. The registered buffer follows + * the end of the private header. + * + * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for + * receiving the payload of RDMA RECV operations. regbufs are not + * used for RDMA READ/WRITE operations, thus are registered only for + * LOCAL access. + */ +struct rpcrdma_regbuf * +rpcrdma_alloc_regbuf(struct rpcrdma_ia *ia, size_t size, gfp_t flags) +{ + struct rpcrdma_regbuf *rb; + int rc; + + rc = -ENOMEM; + rb = kmalloc(sizeof(*rb) + size, flags); + if (rb == NULL) + goto out; + + rb->rg_size = size; + rb->rg_owner = NULL; + rc = rpcrdma_register_internal(ia, rb->rg_base, size, + &rb->rg_mr, &rb->rg_iov); + if (rc) + goto out_free; + + return rb; + +out_free: + kfree(rb); +out: + return ERR_PTR(rc); +} + +/** + * rpcrdma_free_regbuf - deregister and free registered buffer + * @ia: controlling rpcrdma_ia + * @rb: regbuf to be deregistered and freed + */ +void +rpcrdma_free_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) +{ + if (rb) { + rpcrdma_deregister_internal(ia, rb->rg_mr, &rb->rg_iov); + kfree(rb); + } +} + /* * Wrappers for chunk registration, shared by read/write chunk code. */ diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 5c2fac3f30b646..36c37c60f1feb3 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -106,6 +106,44 @@ struct rpcrdma_ep { #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) +/* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV + * + * The below structure appears at the front of a large region of kmalloc'd + * memory, which always starts on a good alignment boundary. + */ + +struct rpcrdma_regbuf { + size_t rg_size; + struct rpcrdma_req *rg_owner; + struct ib_mr *rg_mr; + struct ib_sge rg_iov; + __be32 rg_base[0] __attribute__ ((aligned(256))); +}; + +static inline u64 +rdmab_addr(struct rpcrdma_regbuf *rb) +{ + return rb->rg_iov.addr; +} + +static inline u32 +rdmab_length(struct rpcrdma_regbuf *rb) +{ + return rb->rg_iov.length; +} + +static inline u32 +rdmab_lkey(struct rpcrdma_regbuf *rb) +{ + return rb->rg_iov.lkey; +} + +static inline struct rpcrdma_msg * +rdmab_to_msg(struct rpcrdma_regbuf *rb) +{ + return (struct rpcrdma_msg *)rb->rg_base; +} + enum rpcrdma_chunktype { rpcrdma_noch = 0, rpcrdma_readch, @@ -372,6 +410,11 @@ int rpcrdma_register_external(struct rpcrdma_mr_seg *, int rpcrdma_deregister_external(struct rpcrdma_mr_seg *, struct rpcrdma_xprt *); +struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *, + size_t, gfp_t); +void rpcrdma_free_regbuf(struct rpcrdma_ia *, + struct rpcrdma_regbuf *); + /* * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c */ From 0ca77dc372110cbed4dbac5e867ffdc60ebccf6a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:04:08 -0500 Subject: [PATCH 361/601] xprtrdma: Allocate RPC send buffer separately from struct rpcrdma_req Because internal memory registration is an expensive and synchronous operation, xprtrdma pre-registers send and receive buffers at mount time, and then re-uses them for each RPC. A "hardway" allocation is a memory allocation and registration that replaces a send buffer during the processing of an RPC. Hardway must be done if the RPC send buffer is too small to accommodate an RPC's call and reply headers. For xprtrdma, each RPC send buffer is currently part of struct rpcrdma_req so that xprt_rdma_free(), which is passed nothing but the address of an RPC send buffer, can find its matching struct rpcrdma_req and rpcrdma_rep quickly via container_of / offsetof. That means that hardway currently has to replace a whole rpcrmda_req when it replaces an RPC send buffer. This is often a fairly hefty chunk of contiguous memory due to the size of the rl_segments array and the fact that both the send and receive buffers are part of struct rpcrdma_req. Some obscure re-use of fields in rpcrdma_req is done so that xprt_rdma_free() can detect replaced rpcrdma_req structs, and restore the original. This commit breaks apart the RPC send buffer and struct rpcrdma_req so that increasing the size of the rl_segments array does not change the alignment of each RPC send buffer. (Increasing rl_segments is needed to bump up the maximum r/wsize for NFS/RDMA). This change opens up some interesting possibilities for improving the design of xprt_rdma_allocate(). xprt_rdma_allocate() is now the one place where RPC send buffers are allocated or re-allocated, and they are now always left in place by xprt_rdma_free(). A large re-allocation that includes both the rl_segments array and the RPC send buffer is no longer needed. Send buffer re-allocation becomes quite rare. Good send buffer alignment is guaranteed no matter what the size of the rl_segments array is. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/rpc_rdma.c | 6 +- net/sunrpc/xprtrdma/transport.c | 146 +++++++++++++------------------- net/sunrpc/xprtrdma/verbs.c | 16 ++-- net/sunrpc/xprtrdma/xprt_rdma.h | 14 ++- 4 files changed, 78 insertions(+), 104 deletions(-) diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index f2eda155299a87..8a6bdbd3e93600 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -541,9 +541,9 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) req->rl_send_iov[0].length = hdrlen; req->rl_send_iov[0].lkey = req->rl_iov.lkey; - req->rl_send_iov[1].addr = req->rl_iov.addr + (base - req->rl_base); + req->rl_send_iov[1].addr = rdmab_addr(req->rl_sendbuf); req->rl_send_iov[1].length = rpclen; - req->rl_send_iov[1].lkey = req->rl_iov.lkey; + req->rl_send_iov[1].lkey = rdmab_lkey(req->rl_sendbuf); req->rl_niovs = 2; @@ -556,7 +556,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) req->rl_send_iov[3].addr = req->rl_send_iov[1].addr + rpclen; req->rl_send_iov[3].length = rqst->rq_slen - rpclen; - req->rl_send_iov[3].lkey = req->rl_iov.lkey; + req->rl_send_iov[3].lkey = rdmab_lkey(req->rl_sendbuf); req->rl_niovs = 4; } diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 808b3c52427a28..a9d566227e7e52 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -449,77 +449,72 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) /* * The RDMA allocate/free functions need the task structure as a place * to hide the struct rpcrdma_req, which is necessary for the actual send/recv - * sequence. For this reason, the recv buffers are attached to send - * buffers for portions of the RPC. Note that the RPC layer allocates - * both send and receive buffers in the same call. We may register - * the receive buffer portion when using reply chunks. + * sequence. + * + * The RPC layer allocates both send and receive buffers in the same call + * (rq_send_buf and rq_rcv_buf are both part of a single contiguous buffer). + * We may register rq_rcv_buf when using reply chunks. */ static void * xprt_rdma_allocate(struct rpc_task *task, size_t size) { struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; - struct rpcrdma_req *req, *nreq; + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + struct rpcrdma_regbuf *rb; + struct rpcrdma_req *req; + size_t min_size; + gfp_t flags = task->tk_flags & RPC_TASK_SWAPPER ? + GFP_ATOMIC : GFP_NOFS; - req = rpcrdma_buffer_get(&rpcx_to_rdmax(xprt)->rx_buf); + req = rpcrdma_buffer_get(&r_xprt->rx_buf); if (req == NULL) return NULL; - if (size > req->rl_size) { - dprintk("RPC: %s: size %zd too large for buffer[%zd]: " - "prog %d vers %d proc %d\n", - __func__, size, req->rl_size, - task->tk_client->cl_prog, task->tk_client->cl_vers, - task->tk_msg.rpc_proc->p_proc); - /* - * Outgoing length shortage. Our inline write max must have - * been configured to perform direct i/o. - * - * This is therefore a large metadata operation, and the - * allocate call was made on the maximum possible message, - * e.g. containing long filename(s) or symlink data. In - * fact, while these metadata operations *might* carry - * large outgoing payloads, they rarely *do*. However, we - * have to commit to the request here, so reallocate and - * register it now. The data path will never require this - * reallocation. - * - * If the allocation or registration fails, the RPC framework - * will (doggedly) retry. - */ - if (task->tk_flags & RPC_TASK_SWAPPER) - nreq = kmalloc(sizeof *req + size, GFP_ATOMIC); - else - nreq = kmalloc(sizeof *req + size, GFP_NOFS); - if (nreq == NULL) - goto outfail; - - if (rpcrdma_register_internal(&rpcx_to_rdmax(xprt)->rx_ia, - nreq->rl_base, size + sizeof(struct rpcrdma_req) - - offsetof(struct rpcrdma_req, rl_base), - &nreq->rl_handle, &nreq->rl_iov)) { - kfree(nreq); - goto outfail; - } - rpcx_to_rdmax(xprt)->rx_stats.hardway_register_count += size; - nreq->rl_size = size; - nreq->rl_niovs = 0; - nreq->rl_nchunks = 0; - nreq->rl_buffer = (struct rpcrdma_buffer *)req; - nreq->rl_reply = req->rl_reply; - memcpy(nreq->rl_segments, - req->rl_segments, sizeof nreq->rl_segments); - /* flag the swap with an unused field */ - nreq->rl_iov.length = 0; - req->rl_reply = NULL; - req = nreq; - } + if (req->rl_sendbuf == NULL) + goto out_sendbuf; + if (size > req->rl_sendbuf->rg_size) + goto out_sendbuf; + +out: dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); req->rl_connect_cookie = 0; /* our reserved value */ - return req->rl_xdr_buf; - -outfail: + return req->rl_sendbuf->rg_base; + +out_sendbuf: + /* XDR encoding and RPC/RDMA marshaling of this request has not + * yet occurred. Thus a lower bound is needed to prevent buffer + * overrun during marshaling. + * + * RPC/RDMA marshaling may choose to send payload bearing ops + * inline, if the result is smaller than the inline threshold. + * The value of the "size" argument accounts for header + * requirements but not for the payload in these cases. + * + * Likewise, allocate enough space to receive a reply up to the + * size of the inline threshold. + * + * It's unlikely that both the send header and the received + * reply will be large, but slush is provided here to allow + * flexibility when marshaling. + */ + min_size = RPCRDMA_INLINE_READ_THRESHOLD(task->tk_rqstp); + min_size += RPCRDMA_INLINE_WRITE_THRESHOLD(task->tk_rqstp); + if (size < min_size) + size = min_size; + + rb = rpcrdma_alloc_regbuf(&r_xprt->rx_ia, size, flags); + if (IS_ERR(rb)) + goto out_fail; + rb->rg_owner = req; + + r_xprt->rx_stats.hardway_register_count += size; + rpcrdma_free_regbuf(&r_xprt->rx_ia, req->rl_sendbuf); + req->rl_sendbuf = rb; + goto out; + +out_fail: rpcrdma_buffer_put(req); - rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++; + r_xprt->rx_stats.failed_marshal_count++; return NULL; } @@ -531,47 +526,24 @@ xprt_rdma_free(void *buffer) { struct rpcrdma_req *req; struct rpcrdma_xprt *r_xprt; - struct rpcrdma_rep *rep; + struct rpcrdma_regbuf *rb; int i; if (buffer == NULL) return; - req = container_of(buffer, struct rpcrdma_req, rl_xdr_buf[0]); - if (req->rl_iov.length == 0) { /* see allocate above */ - r_xprt = container_of(((struct rpcrdma_req *) req->rl_buffer)->rl_buffer, - struct rpcrdma_xprt, rx_buf); - } else - r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf); - rep = req->rl_reply; + rb = container_of(buffer, struct rpcrdma_regbuf, rg_base[0]); + req = rb->rg_owner; + r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf); - dprintk("RPC: %s: called on 0x%p%s\n", - __func__, rep, (rep && rep->rr_func) ? " (with waiter)" : ""); + dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply); - /* - * Finish the deregistration. The process is considered - * complete when the rr_func vector becomes NULL - this - * was put in place during rpcrdma_reply_handler() - the wait - * call below will not block if the dereg is "done". If - * interrupted, our framework will clean up. - */ for (i = 0; req->rl_nchunks;) { --req->rl_nchunks; i += rpcrdma_deregister_external( &req->rl_segments[i], r_xprt); } - if (req->rl_iov.length == 0) { /* see allocate above */ - struct rpcrdma_req *oreq = (struct rpcrdma_req *)req->rl_buffer; - oreq->rl_reply = req->rl_reply; - (void) rpcrdma_deregister_internal(&r_xprt->rx_ia, - req->rl_handle, - &req->rl_iov); - kfree(req); - req = oreq; - } - - /* Put back request+reply buffers */ rpcrdma_buffer_put(req); } diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index cdd6aacc91682a..40894403db8153 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1079,25 +1079,22 @@ static struct rpcrdma_req * rpcrdma_create_req(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; - size_t wlen = 1 << fls(cdata->inline_wsize + - sizeof(struct rpcrdma_req)); + size_t wlen = cdata->inline_wsize; struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_req *req; int rc; rc = -ENOMEM; - req = kmalloc(wlen, GFP_KERNEL); + req = kmalloc(sizeof(*req) + wlen, GFP_KERNEL); if (req == NULL) goto out; - memset(req, 0, sizeof(struct rpcrdma_req)); + memset(req, 0, sizeof(*req)); - rc = rpcrdma_register_internal(ia, req->rl_base, wlen - - offsetof(struct rpcrdma_req, rl_base), + rc = rpcrdma_register_internal(ia, req->rl_base, wlen, &req->rl_handle, &req->rl_iov); if (rc) goto out_free; - req->rl_size = wlen - sizeof(struct rpcrdma_req); req->rl_buffer = &r_xprt->rx_buf; return req; @@ -1121,7 +1118,7 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) rep = kmalloc(rlen, GFP_KERNEL); if (rep == NULL) goto out; - memset(rep, 0, sizeof(struct rpcrdma_rep)); + memset(rep, 0, sizeof(*rep)); rc = rpcrdma_register_internal(ia, rep->rr_base, rlen - offsetof(struct rpcrdma_rep, rr_base), @@ -1335,6 +1332,7 @@ rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req) if (!req) return; + rpcrdma_free_regbuf(ia, req->rl_sendbuf); rpcrdma_deregister_internal(ia, req->rl_handle, &req->rl_iov); kfree(req); } @@ -1729,8 +1727,6 @@ rpcrdma_recv_buffer_get(struct rpcrdma_req *req) struct rpcrdma_buffer *buffers = req->rl_buffer; unsigned long flags; - if (req->rl_iov.length == 0) /* special case xprt_rdma_allocate() */ - buffers = ((struct rpcrdma_req *) buffers)->rl_buffer; spin_lock_irqsave(&buffers->rb_lock, flags); if (buffers->rb_recv_index < buffers->rb_max_requests) { req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index]; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 36c37c60f1feb3..aa82f8d1c5b4d9 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -262,7 +262,6 @@ struct rpcrdma_mr_seg { /* chunk descriptors */ }; struct rpcrdma_req { - size_t rl_size; /* actual length of buffer */ unsigned int rl_niovs; /* 0, 2 or 4 */ unsigned int rl_nchunks; /* non-zero if chunks */ unsigned int rl_connect_cookie; /* retry detection */ @@ -271,13 +270,20 @@ struct rpcrdma_req { struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */ struct ib_sge rl_send_iov[4]; /* for active requests */ + struct rpcrdma_regbuf *rl_sendbuf; struct ib_sge rl_iov; /* for posting */ struct ib_mr *rl_handle; /* handle for mem in rl_iov */ char rl_base[MAX_RPCRDMAHDR]; /* start of actual buffer */ - __u32 rl_xdr_buf[0]; /* start of returned rpc rq_buffer */ }; -#define rpcr_to_rdmar(r) \ - container_of((r)->rq_buffer, struct rpcrdma_req, rl_xdr_buf[0]) + +static inline struct rpcrdma_req * +rpcr_to_rdmar(struct rpc_rqst *rqst) +{ + struct rpcrdma_regbuf *rb = container_of(rqst->rq_buffer, + struct rpcrdma_regbuf, + rg_base[0]); + return rb->rg_owner; +} /* * struct rpcrdma_buffer -- holds list/queue of pre-registered memory for From 85275c874eaeb92fb2a78a1d4ebb1ff4b0f7b732 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:04:16 -0500 Subject: [PATCH 362/601] xprtrdma: Allocate RPC/RDMA send buffer separately from struct rpcrdma_req The rl_base field is currently the buffer where each RPC/RDMA call header is built. The inline threshold is an agreed-on size limit to for RDMA SEND operations that pass between client and server. The sum of the RPC/RDMA header size and the RPC header size must be less than or equal to this threshold. Increasing the r/wsize maximum will require MAX_SEGS to grow significantly, but the inline threshold size won't change (both sides agree on it). The server's inline threshold doesn't change. Since an RPC/RDMA header can never be larger than the inline threshold, make all RPC/RDMA header buffers the size of the inline threshold. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/rpc_rdma.c | 11 +++++------ net/sunrpc/xprtrdma/transport.c | 9 +++++++++ net/sunrpc/xprtrdma/verbs.c | 22 +++------------------- net/sunrpc/xprtrdma/xprt_rdma.h | 6 ++---- 4 files changed, 19 insertions(+), 29 deletions(-) diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 8a6bdbd3e93600..c1d4a093b8f11d 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -294,7 +294,7 @@ ssize_t rpcrdma_marshal_chunks(struct rpc_rqst *rqst, ssize_t result) { struct rpcrdma_req *req = rpcr_to_rdmar(rqst); - struct rpcrdma_msg *headerp = (struct rpcrdma_msg *)req->rl_base; + struct rpcrdma_msg *headerp = rdmab_to_msg(req->rl_rdmabuf); if (req->rl_rtype != rpcrdma_noch) result = rpcrdma_create_chunks(rqst, &rqst->rq_snd_buf, @@ -406,8 +406,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) base = rqst->rq_svec[0].iov_base; rpclen = rqst->rq_svec[0].iov_len; - /* build RDMA header in private area at front */ - headerp = (struct rpcrdma_msg *) req->rl_base; + headerp = rdmab_to_msg(req->rl_rdmabuf); /* don't byte-swap XID, it's already done in request */ headerp->rm_xid = rqst->rq_xid; headerp->rm_vers = rpcrdma_version; @@ -528,7 +527,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd" " headerp 0x%p base 0x%p lkey 0x%x\n", __func__, transfertypes[req->rl_wtype], hdrlen, rpclen, padlen, - headerp, base, req->rl_iov.lkey); + headerp, base, rdmab_lkey(req->rl_rdmabuf)); /* * initialize send_iov's - normally only two: rdma chunk header and @@ -537,9 +536,9 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) * header and any write data. In all non-rdma cases, any following * data has been copied into the RPC header buffer. */ - req->rl_send_iov[0].addr = req->rl_iov.addr; + req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf); req->rl_send_iov[0].length = hdrlen; - req->rl_send_iov[0].lkey = req->rl_iov.lkey; + req->rl_send_iov[0].lkey = rdmab_lkey(req->rl_rdmabuf); req->rl_send_iov[1].addr = rdmab_addr(req->rl_sendbuf); req->rl_send_iov[1].length = rpclen; diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index a9d566227e7e52..2c2fabe99d840a 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -470,6 +470,8 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size) if (req == NULL) return NULL; + if (req->rl_rdmabuf == NULL) + goto out_rdmabuf; if (req->rl_sendbuf == NULL) goto out_sendbuf; if (size > req->rl_sendbuf->rg_size) @@ -480,6 +482,13 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size) req->rl_connect_cookie = 0; /* our reserved value */ return req->rl_sendbuf->rg_base; +out_rdmabuf: + min_size = RPCRDMA_INLINE_WRITE_THRESHOLD(task->tk_rqstp); + rb = rpcrdma_alloc_regbuf(&r_xprt->rx_ia, min_size, flags); + if (IS_ERR(rb)) + goto out_fail; + req->rl_rdmabuf = rb; + out_sendbuf: /* XDR encoding and RPC/RDMA marshaling of this request has not * yet occurred. Thus a lower bound is needed to prevent buffer diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 40894403db8153..c81749b9a0de97 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1078,30 +1078,14 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) static struct rpcrdma_req * rpcrdma_create_req(struct rpcrdma_xprt *r_xprt) { - struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; - size_t wlen = cdata->inline_wsize; - struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_req *req; - int rc; - rc = -ENOMEM; - req = kmalloc(sizeof(*req) + wlen, GFP_KERNEL); + req = kzalloc(sizeof(*req), GFP_KERNEL); if (req == NULL) - goto out; - memset(req, 0, sizeof(*req)); - - rc = rpcrdma_register_internal(ia, req->rl_base, wlen, - &req->rl_handle, &req->rl_iov); - if (rc) - goto out_free; + return ERR_PTR(-ENOMEM); req->rl_buffer = &r_xprt->rx_buf; return req; - -out_free: - kfree(req); -out: - return ERR_PTR(rc); } static struct rpcrdma_rep * @@ -1333,7 +1317,7 @@ rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req) return; rpcrdma_free_regbuf(ia, req->rl_sendbuf); - rpcrdma_deregister_internal(ia, req->rl_handle, &req->rl_iov); + rpcrdma_free_regbuf(ia, req->rl_rdmabuf); kfree(req); } diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index aa82f8d1c5b4d9..84ad863fe637af 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -268,12 +268,10 @@ struct rpcrdma_req { enum rpcrdma_chunktype rl_rtype, rl_wtype; struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ - struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */ struct ib_sge rl_send_iov[4]; /* for active requests */ + struct rpcrdma_regbuf *rl_rdmabuf; struct rpcrdma_regbuf *rl_sendbuf; - struct ib_sge rl_iov; /* for posting */ - struct ib_mr *rl_handle; /* handle for mem in rl_iov */ - char rl_base[MAX_RPCRDMAHDR]; /* start of actual buffer */ + struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS]; }; static inline struct rpcrdma_req * From 6b1184cd4fb086a826f658b02d9d9912dd0dde08 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:04:25 -0500 Subject: [PATCH 363/601] xprtrdma: Allocate RPC/RDMA receive buffer separately from struct rpcrdma_rep The rr_base field is currently the buffer where RPC replies land. An RPC/RDMA reply header lands in this buffer. In some cases an RPC reply header also lands in this buffer, just after the RPC/RDMA header. The inline threshold is an agreed-on size limit for RDMA SEND operations that pass from server and client. The sum of the RPC/RDMA reply header size and the RPC reply header size must be less than this threshold. The largest RDMA RECV that the client should have to handle is the size of the inline threshold. The receive buffer should thus be the size of the inline threshold, and not related to RPCRDMA_MAX_SEGS. RPC replies received via RDMA WRITE (long replies) are caught in rq_rcv_buf, which is the second half of the RPC send buffer. Ie, such replies are not involved in any way with rr_base. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/rpc_rdma.c | 5 +++-- net/sunrpc/xprtrdma/verbs.c | 27 ++++++++++++++------------- net/sunrpc/xprtrdma/xprt_rdma.h | 14 ++++++-------- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index c1d4a093b8f11d..02efcaa1bbacb1 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -572,6 +572,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b { unsigned int i, total_len; struct rpcrdma_write_chunk *cur_wchunk; + char *base = (char *)rdmab_to_msg(rep->rr_rdmabuf); i = be32_to_cpu(**iptrp); if (i > max) @@ -599,7 +600,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b return -1; cur_wchunk = (struct rpcrdma_write_chunk *) w; } - if ((char *) cur_wchunk > rep->rr_base + rep->rr_len) + if ((char *)cur_wchunk > base + rep->rr_len) return -1; *iptrp = (__be32 *) cur_wchunk; @@ -753,7 +754,7 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) dprintk("RPC: %s: short/invalid reply\n", __func__); goto repost; } - headerp = (struct rpcrdma_msg *) rep->rr_base; + headerp = rdmab_to_msg(rep->rr_rdmabuf); if (headerp->rm_vers != rpcrdma_version) { dprintk("RPC: %s: invalid version %d\n", __func__, be32_to_cpu(headerp->rm_vers)); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index c81749b9a0de97..f58521dd88e21c 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -298,8 +298,9 @@ rpcrdma_recvcq_process_wc(struct ib_wc *wc, struct list_head *sched_list) rep->rr_len = wc->byte_len; ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device, - rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE); - prefetch(rep->rr_base); + rdmab_addr(rep->rr_rdmabuf), + rep->rr_len, DMA_FROM_DEVICE); + prefetch(rdmab_to_msg(rep->rr_rdmabuf)); out_schedule: list_add_tail(&rep->rr_list, sched_list); @@ -1092,23 +1093,21 @@ static struct rpcrdma_rep * rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; - size_t rlen = 1 << fls(cdata->inline_rsize + - sizeof(struct rpcrdma_rep)); struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_rep *rep; int rc; rc = -ENOMEM; - rep = kmalloc(rlen, GFP_KERNEL); + rep = kzalloc(sizeof(*rep), GFP_KERNEL); if (rep == NULL) goto out; - memset(rep, 0, sizeof(*rep)); - rc = rpcrdma_register_internal(ia, rep->rr_base, rlen - - offsetof(struct rpcrdma_rep, rr_base), - &rep->rr_handle, &rep->rr_iov); - if (rc) + rep->rr_rdmabuf = rpcrdma_alloc_regbuf(ia, cdata->inline_rsize, + GFP_KERNEL); + if (IS_ERR(rep->rr_rdmabuf)) { + rc = PTR_ERR(rep->rr_rdmabuf); goto out_free; + } rep->rr_buffer = &r_xprt->rx_buf; return rep; @@ -1306,7 +1305,7 @@ rpcrdma_destroy_rep(struct rpcrdma_ia *ia, struct rpcrdma_rep *rep) if (!rep) return; - rpcrdma_deregister_internal(ia, rep->rr_handle, &rep->rr_iov); + rpcrdma_free_regbuf(ia, rep->rr_rdmabuf); kfree(rep); } @@ -2209,11 +2208,13 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, recv_wr.next = NULL; recv_wr.wr_id = (u64) (unsigned long) rep; - recv_wr.sg_list = &rep->rr_iov; + recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; recv_wr.num_sge = 1; ib_dma_sync_single_for_cpu(ia->ri_id->device, - rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL); + rdmab_addr(rep->rr_rdmabuf), + rdmab_length(rep->rr_rdmabuf), + DMA_BIDIRECTIONAL); rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 84ad863fe637af..2b69316dfd1148 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -180,14 +180,12 @@ enum rpcrdma_chunktype { struct rpcrdma_buffer; struct rpcrdma_rep { - unsigned int rr_len; /* actual received reply length */ - struct rpcrdma_buffer *rr_buffer; /* home base for this structure */ - struct rpc_xprt *rr_xprt; /* needed for request/reply matching */ - void (*rr_func)(struct rpcrdma_rep *);/* called by tasklet in softint */ - struct list_head rr_list; /* tasklet list */ - struct ib_sge rr_iov; /* for posting */ - struct ib_mr *rr_handle; /* handle for mem in rr_iov */ - char rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */ + unsigned int rr_len; + struct rpcrdma_buffer *rr_buffer; + struct rpc_xprt *rr_xprt; + void (*rr_func)(struct rpcrdma_rep *); + struct list_head rr_list; + struct rpcrdma_regbuf *rr_rdmabuf; }; /* From c05fbb5a593571961fdb4ba06a2bff49aed9dcee Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:04:33 -0500 Subject: [PATCH 364/601] xprtrdma: Allocate zero pad separately from rpcrdma_buffer Use the new rpcrdma_alloc_regbuf() API to shrink the amount of contiguous memory needed for a buffer pool by moving the zero pad buffer into a regbuf. This is for consistency with the other uses of internally registered memory. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/rpc_rdma.c | 4 ++-- net/sunrpc/xprtrdma/verbs.c | 29 ++++++++++------------------- net/sunrpc/xprtrdma/xprt_rdma.h | 3 +-- 3 files changed, 13 insertions(+), 23 deletions(-) diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 02efcaa1bbacb1..7e9acd9361c55b 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -549,9 +549,9 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) if (padlen) { struct rpcrdma_ep *ep = &r_xprt->rx_ep; - req->rl_send_iov[2].addr = ep->rep_pad.addr; + req->rl_send_iov[2].addr = rdmab_addr(ep->rep_padbuf); req->rl_send_iov[2].length = padlen; - req->rl_send_iov[2].lkey = ep->rep_pad.lkey; + req->rl_send_iov[2].lkey = rdmab_lkey(ep->rep_padbuf); req->rl_send_iov[3].addr = req->rl_send_iov[1].addr + rpclen; req->rl_send_iov[3].length = rqst->rq_slen - rpclen; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index f58521dd88e21c..8a05f45d1a1182 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -794,6 +794,14 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ep->rep_attr.qp_type = IB_QPT_RC; ep->rep_attr.port_num = ~0; + if (cdata->padding) { + ep->rep_padbuf = rpcrdma_alloc_regbuf(ia, cdata->padding, + GFP_KERNEL); + if (IS_ERR(ep->rep_padbuf)) + return PTR_ERR(ep->rep_padbuf); + } else + ep->rep_padbuf = NULL; + dprintk("RPC: %s: requested max: dtos: send %d recv %d; " "iovs: send %d recv %d\n", __func__, @@ -876,6 +884,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, dprintk("RPC: %s: ib_destroy_cq returned %i\n", __func__, err); out1: + rpcrdma_free_regbuf(ia, ep->rep_padbuf); return rc; } @@ -902,11 +911,7 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) ia->ri_id->qp = NULL; } - /* padding - could be done in rpcrdma_buffer_destroy... */ - if (ep->rep_pad_mr) { - rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad); - ep->rep_pad_mr = NULL; - } + rpcrdma_free_regbuf(ia, ep->rep_padbuf); rpcrdma_clean_cq(ep->rep_attr.recv_cq); rc = ib_destroy_cq(ep->rep_attr.recv_cq); @@ -1220,12 +1225,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) * 1. arrays for send and recv pointers * 2. arrays of struct rpcrdma_req to fill in pointers * 3. array of struct rpcrdma_rep for replies - * 4. padding, if any * Send/recv buffers in req/rep need to be registered */ len = buf->rb_max_requests * (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *)); - len += cdata->padding; p = kzalloc(len, GFP_KERNEL); if (p == NULL) { @@ -1241,18 +1244,6 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) buf->rb_recv_bufs = (struct rpcrdma_rep **) p; p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests]; - /* - * Register the zeroed pad buffer, if any. - */ - if (cdata->padding) { - struct rpcrdma_ep *ep = &r_xprt->rx_ep; - rc = rpcrdma_register_internal(ia, p, cdata->padding, - &ep->rep_pad_mr, &ep->rep_pad); - if (rc) - goto out; - } - p += cdata->padding; - INIT_LIST_HEAD(&buf->rb_mws); INIT_LIST_HEAD(&buf->rb_all); switch (ia->ri_memreg_strategy) { diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 2b69316dfd1148..5630353ed24090 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -88,8 +88,7 @@ struct rpcrdma_ep { int rep_connected; struct ib_qp_init_attr rep_attr; wait_queue_head_t rep_connect_wait; - struct ib_sge rep_pad; /* holds zeroed pad */ - struct ib_mr *rep_pad_mr; /* holds zeroed pad */ + struct rpcrdma_regbuf *rep_padbuf; struct rdma_conn_param rep_remote_cma; struct sockaddr_storage rep_remote_addr; struct delayed_work rep_connect_worker; From df515ca7b3b47bf6fd489fe6fca0d9ab243e1985 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 21 Jan 2015 11:04:41 -0500 Subject: [PATCH 365/601] xprtrdma: Clean up after adding regbuf management rpcrdma_{de}register_internal() are used only in verbs.c now. MAX_RPCRDMAHDR is no longer used and can be removed. Signed-off-by: Chuck Lever Reviewed-by: Steve Wise Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 4 ++-- net/sunrpc/xprtrdma/xprt_rdma.h | 9 --------- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 8a05f45d1a1182..124676c1378089 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1729,7 +1729,7 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) * Wrappers for internal-use kmalloc memory registration, used by buffer code. */ -int +static int rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, struct ib_mr **mrp, struct ib_sge *iov) { @@ -1780,7 +1780,7 @@ rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, return rc; } -int +static int rpcrdma_deregister_internal(struct rpcrdma_ia *ia, struct ib_mr *mr, struct ib_sge *iov) { diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 5630353ed24090..c9d2a02f631b22 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -171,10 +171,6 @@ enum rpcrdma_chunktype { /* temporary static scatter/gather max */ #define RPCRDMA_MAX_DATA_SEGS (64) /* max scatter/gather */ #define RPCRDMA_MAX_SEGS (RPCRDMA_MAX_DATA_SEGS + 2) /* head+tail = 2 */ -#define MAX_RPCRDMAHDR (\ - /* max supported RPC/RDMA header */ \ - sizeof(struct rpcrdma_msg) + (2 * sizeof(u32)) + \ - (sizeof(struct rpcrdma_read_chunk) * RPCRDMA_MAX_SEGS) + sizeof(u32)) struct rpcrdma_buffer; @@ -401,11 +397,6 @@ void rpcrdma_buffer_put(struct rpcrdma_req *); void rpcrdma_recv_buffer_get(struct rpcrdma_req *); void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); -int rpcrdma_register_internal(struct rpcrdma_ia *, void *, int, - struct ib_mr **, struct ib_sge *); -int rpcrdma_deregister_internal(struct rpcrdma_ia *, - struct ib_mr *, struct ib_sge *); - int rpcrdma_register_external(struct rpcrdma_mr_seg *, int, int, struct rpcrdma_xprt *); int rpcrdma_deregister_external(struct rpcrdma_mr_seg *, From a0a1d50cd1e80652142af5cddcde500d06c71bdd Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 26 Jan 2015 17:11:47 -0500 Subject: [PATCH 366/601] xprtrdma: Update the GFP flags used in xprt_rdma_allocate() Reflect the more conservative approach used in the socket transport's version of this transport method. An RPC buffer allocation should avoid forcing not just FS activity, but any I/O. In particular, two recent changes missed updating xprtrdma: - Commit c6c8fe79a83e ("net, sunrpc: suppress allocation warning ...") - Commit a564b8f03986 ("nfs: enable swap on NFS") Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/transport.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 2c2fabe99d840a..2e192baa59f3d8 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -463,13 +463,16 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size) struct rpcrdma_regbuf *rb; struct rpcrdma_req *req; size_t min_size; - gfp_t flags = task->tk_flags & RPC_TASK_SWAPPER ? - GFP_ATOMIC : GFP_NOFS; + gfp_t flags; req = rpcrdma_buffer_get(&r_xprt->rx_buf); if (req == NULL) return NULL; + flags = GFP_NOIO | __GFP_NOWARN; + if (RPC_IS_SWAPPER(task)) + flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; + if (req->rl_rdmabuf == NULL) goto out_rdmabuf; if (req->rl_sendbuf == NULL) From 0dc294f717d41bfbafc746a7a96a7bc0f114c20c Mon Sep 17 00:00:00 2001 From: Arseny Solokha Date: Fri, 30 Jan 2015 19:08:27 +0700 Subject: [PATCH 367/601] powerpc/mm: bail out early when flushing TLB page MMU_NO_CONTEXT is conditionally defined as 0 or (unsigned int)-1. However, in __flush_tlb_page() a corresponding variable is only tested for open coded 0, which can cause NULL pointer dereference if `mm' argument was legitimately passed as such. Bail out early in case the first argument is NULL, thus eliminate confusion between different values of MMU_NO_CONTEXT and avoid disabling and then re-enabling preemption unnecessarily. Signed-off-by: Arseny Solokha Signed-off-by: Scott Wood --- arch/powerpc/mm/tlb_nohash.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index f38ea4df6a8556..ab0616b0e6c24e 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -284,8 +284,11 @@ void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, struct cpumask *cpu_mask; unsigned int pid; + if (unlikely(!mm)) + return; + preempt_disable(); - pid = mm ? mm->context.id : 0; + pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) goto bail; cpu_mask = mm_cpumask(mm); From 6ffa30d3f734d4f6b478081dfc09592021028f90 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 14 Jan 2015 13:08:57 -0500 Subject: [PATCH 368/601] nfs: don't call blocking operations while !TASK_RUNNING Bruce reported seeing this warning pop when mounting using v4.1: ------------[ cut here ]------------ WARNING: CPU: 1 PID: 1121 at kernel/sched/core.c:7300 __might_sleep+0xbd/0xd0() do not call blocking ops when !TASK_RUNNING; state=1 set at [] prepare_to_wait+0x2f/0x90 Modules linked in: rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace sunrpc fscache ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw snd_hda_codec_generic snd_hda_intel snd_hda_controller snd_hda_codec snd_hwdep snd_pcm snd_timer ppdev joydev snd virtio_console virtio_balloon pcspkr serio_raw parport_pc parport pvpanic floppy soundcore i2c_piix4 virtio_blk virtio_net qxl drm_kms_helper ttm drm virtio_pci virtio_ring ata_generic virtio pata_acpi CPU: 1 PID: 1121 Comm: nfsv4.1-svc Not tainted 3.19.0-rc4+ #25 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140709_153950- 04/01/2014 0000000000000000 000000004e5e3f73 ffff8800b998fb48 ffffffff8186ac78 0000000000000000 ffff8800b998fba0 ffff8800b998fb88 ffffffff810ac9da ffff8800b998fb68 ffffffff81c923e7 00000000000004d9 0000000000000000 Call Trace: [] dump_stack+0x4c/0x65 [] warn_slowpath_common+0x8a/0xc0 [] warn_slowpath_fmt+0x55/0x70 [] ? prepare_to_wait+0x2f/0x90 [] ? prepare_to_wait+0x2f/0x90 [] __might_sleep+0xbd/0xd0 [] kmem_cache_alloc_trace+0x243/0x430 [] ? groups_alloc+0x3e/0x130 [] groups_alloc+0x3e/0x130 [] svcauth_unix_accept+0x16e/0x290 [sunrpc] [] svc_authenticate+0xe1/0xf0 [sunrpc] [] svc_process_common+0x244/0x6a0 [sunrpc] [] bc_svc_process+0x1c4/0x260 [sunrpc] [] nfs41_callback_svc+0x128/0x1f0 [nfsv4] [] ? wait_woken+0xc0/0xc0 [] ? nfs4_callback_svc+0x60/0x60 [nfsv4] [] kthread+0x11f/0x140 [] ? local_clock+0x15/0x30 [] ? kthread_create_on_node+0x250/0x250 [] ret_from_fork+0x7c/0xb0 [] ? kthread_create_on_node+0x250/0x250 ---[ end trace 675220a11e30f4f2 ]--- nfs41_callback_svc does most of its work while in TASK_INTERRUPTIBLE, which is just wrong. Fix that by finishing the wait immediately if we've found that the list has something on it. Also, we don't expect this kthread to accept signals, so we should be using a TASK_UNINTERRUPTIBLE sleep instead. That however, opens us up hung task warnings from the watchdog, so have the schedule_timeout wake up every 60s if there's no callback activity. Reported-by: "J. Bruce Fields" Signed-off-by: Jeff Layton Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index b8fb3a4ef64955..351be9205bf889 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -128,22 +128,24 @@ nfs41_callback_svc(void *vrqstp) if (try_to_freeze()) continue; - prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE); + prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_UNINTERRUPTIBLE); spin_lock_bh(&serv->sv_cb_lock); if (!list_empty(&serv->sv_cb_list)) { req = list_first_entry(&serv->sv_cb_list, struct rpc_rqst, rq_bc_list); list_del(&req->rq_bc_list); spin_unlock_bh(&serv->sv_cb_lock); + finish_wait(&serv->sv_cb_waitq, &wq); dprintk("Invoking bc_svc_process()\n"); error = bc_svc_process(serv, req, rqstp); dprintk("bc_svc_process() returned w/ error code= %d\n", error); } else { spin_unlock_bh(&serv->sv_cb_lock); - schedule(); + /* schedule_timeout to game the hung task watchdog */ + schedule_timeout(60 * HZ); + finish_wait(&serv->sv_cb_waitq, &wq); } - finish_wait(&serv->sv_cb_waitq, &wq); } return 0; } From 3a7ed3fff3bb22828f7d7ba6b75c7d22ee54df38 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 8 Jan 2015 01:18:30 -0800 Subject: [PATCH 369/601] nfs: prevent truncate on active swapfile Most filesystems prevent truncation of an active swapfile by way of inode_newsize_ok, called from inode_change_ok. NFS doesn't call either from nfs_setattr, presumably because most of these checks are expected to be done server-side. However, the IS_SWAPFILE check can only be done client-side, and truncating a swapfile can't possibly be good. Signed-off-by: Omar Sandoval Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 2211f6ba873628..d2398c193bdaab 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -507,10 +507,15 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) attr->ia_valid &= ~ATTR_MODE; if (attr->ia_valid & ATTR_SIZE) { + loff_t i_size; + BUG_ON(!S_ISREG(inode->i_mode)); - if (attr->ia_size == i_size_read(inode)) + i_size = i_size_read(inode); + if (attr->ia_size == i_size) attr->ia_valid &= ~ATTR_SIZE; + else if (attr->ia_size < i_size && IS_SWAPFILE(inode)) + return -ETXTBSY; } /* Optimization: if the end result is no change, don't RPC */ From c7c545d4a34872f4a3d710e22f21fb61f7258706 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 17 Dec 2014 02:52:26 +0300 Subject: [PATCH 370/601] NFS: a couple off by ones These tests are off by one because if len == sizeof(nfs_export_path) then we have truncated the name. Signed-off-by: Dan Carpenter Signed-off-by: Trond Myklebust --- fs/nfs/nfsroot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index cd3c910d2d129e..9bc9f04fb7f699 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -261,11 +261,11 @@ static int __init root_nfs_data(char *cmdline) */ len = snprintf(nfs_export_path, sizeof(nfs_export_path), tmp, utsname()->nodename); - if (len > (int)sizeof(nfs_export_path)) + if (len >= (int)sizeof(nfs_export_path)) goto out_devnametoolong; len = snprintf(nfs_root_device, sizeof(nfs_root_device), "%pI4:%s", &servaddr, nfs_export_path); - if (len > (int)sizeof(nfs_root_device)) + if (len >= (int)sizeof(nfs_root_device)) goto out_devnametoolong; retval = 0; From 11cd64a234d5a1a7111627ef947beb0e5fad9e71 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 30 Jan 2015 11:04:12 +0000 Subject: [PATCH 371/601] ima: /proc/keys is now mandatory /proc/keys is now mandatory and its config option no longer exists, so it doesn't need selecting. Reported-by: Paul Bolle Signed-off-by: David Howells Signed-off-by: Mimi Zohar Signed-off-by: James Morris --- security/integrity/ima/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/security/integrity/ima/Kconfig b/security/integrity/ima/Kconfig index 57515bc915c0f2..df303346029ba5 100644 --- a/security/integrity/ima/Kconfig +++ b/security/integrity/ima/Kconfig @@ -126,7 +126,6 @@ config IMA_TRUSTED_KEYRING bool "Require all keys on the .ima keyring be signed" depends on IMA_APPRAISE && SYSTEM_TRUSTED_KEYRING depends on INTEGRITY_ASYMMETRIC_KEYS - select KEYS_DEBUG_PROC_KEYS default y help This option requires that all keys added to the .ima From 6f963ec2d6bf2476a16799eece920acb2100ff1c Mon Sep 17 00:00:00 2001 From: Ryan Grimm Date: Wed, 28 Jan 2015 20:16:04 -0600 Subject: [PATCH 372/601] cxl: Fix device_node reference counting When unbinding and rebinding the driver on a system with a card in PHB0, this error condition is reached after a few attempts: ERROR: Bad of_node_put() on /pciex@3fffe40000000 CPU: 0 PID: 3040 Comm: bash Not tainted 3.18.0-rc3-12545-g3627ffe #152 Call Trace: [c000000721acb5c0] [c00000000086ef94] .dump_stack+0x84/0xb0 (unreliable) [c000000721acb640] [c00000000073a0a8] .of_node_release+0xd8/0xe0 [c000000721acb6d0] [c00000000044bc44] .kobject_release+0x74/0xe0 [c000000721acb760] [c0000000007394fc] .of_node_put+0x1c/0x30 [c000000721acb7d0] [c000000000545cd8] .cxl_probe+0x1a98/0x1d50 [c000000721acb900] [c0000000004845a0] .local_pci_probe+0x40/0xc0 [c000000721acb980] [c000000000484998] .pci_device_probe+0x128/0x170 [c000000721acba30] [c00000000052400c] .driver_probe_device+0xac/0x2a0 [c000000721acbad0] [c000000000522468] .bind_store+0x108/0x160 [c000000721acbb70] [c000000000521448] .drv_attr_store+0x38/0x60 [c000000721acbbe0] [c000000000293840] .sysfs_kf_write+0x60/0xa0 [c000000721acbc50] [c000000000292500] .kernfs_fop_write+0x140/0x1d0 [c000000721acbcf0] [c000000000208648] .vfs_write+0xd8/0x260 [c000000721acbd90] [c000000000208b18] .SyS_write+0x58/0x100 [c000000721acbe30] [c000000000009258] syscall_exit+0x0/0x98 We are missing a call to of_node_get(). pnv_pci_to_phb_node() should call of_node_get() otherwise np's reference count isn't incremented and it might go away. Rename pnv_pci_to_phb_node() to pnv_pci_get_phb_node() so it's clear it calls of_node_get(). Signed-off-by: Ryan Grimm Acked-by: Ian Munsie Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/pnv-pci.h | 2 +- arch/powerpc/platforms/powernv/pci-ioda.c | 6 +++--- drivers/misc/cxl/pci.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h index 3c00d648336d78..f9b498292a5c1c 100644 --- a/arch/powerpc/include/asm/pnv-pci.h +++ b/arch/powerpc/include/asm/pnv-pci.h @@ -19,7 +19,7 @@ int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, int pnv_cxl_alloc_hwirqs(struct pci_dev *dev, int num); void pnv_cxl_release_hwirqs(struct pci_dev *dev, int hwirq, int num); int pnv_cxl_get_irq_count(struct pci_dev *dev); -struct device_node *pnv_pci_to_phb_node(struct pci_dev *dev); +struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev); #ifdef CONFIG_CXL_BASE int pnv_cxl_alloc_hwirq_ranges(struct cxl_irq_ranges *irqs, diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 85b473823fdad0..6c9ff2b95119fa 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1487,13 +1487,13 @@ static void set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq) #ifdef CONFIG_CXL_BASE -struct device_node *pnv_pci_to_phb_node(struct pci_dev *dev) +struct device_node *pnv_pci_get_phb_node(struct pci_dev *dev) { struct pci_controller *hose = pci_bus_to_host(dev->bus); - return hose->dn; + return of_node_get(hose->dn); } -EXPORT_SYMBOL(pnv_pci_to_phb_node); +EXPORT_SYMBOL(pnv_pci_get_phb_node); int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode) { diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 428ea8ba25fcea..cb250673b5c634 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -317,7 +317,7 @@ static int init_implementation_adapter_regs(struct cxl *adapter, struct pci_dev u64 psl_dsnctl; u64 chipid; - if (!(np = pnv_pci_to_phb_node(dev))) + if (!(np = pnv_pci_get_phb_node(dev))) return -ENODEV; while (np && !(prop = of_get_property(np, "ibm,chip-id", NULL))) From 4c3b21686111e0ac6018469dacbc5549f9915cf8 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 5 Dec 2014 21:16:59 +1100 Subject: [PATCH 373/601] powerpc/kernel: Make syscall_exit a local label Currently when we back trace something that is in a syscall we see something like this: [c000000000000000] [c000000000000000] SyS_read+0x6c/0x110 [c000000000000000] [c000000000000000] syscall_exit+0x0/0x98 Although it's entirely correct, seeing syscall_exit at the bottom can be confusing - we were exiting from a syscall and then called SyS_read() ? If we instead change syscall_exit to be a local label we get something more intuitive: [c0000001fa46fde0] [c00000000026719c] SyS_read+0x6c/0x110 [c0000001fa46fe30] [c000000000009264] system_call+0x38/0xd0 ie. we were handling a system call, and it was SyS_read(). Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/entry_64.S | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 3291ed148bdfe2..fefaa1485bd31e 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -178,7 +178,7 @@ system_call: /* label this so stack traces look sane */ mtctr r12 bctrl /* Call handler */ -syscall_exit: +.Lsyscall_exit: std r3,RESULT(r1) #ifdef SHOW_SYSCALLS bl do_show_syscall_exit @@ -270,7 +270,7 @@ syscall_dotrace: syscall_enosys: li r3,-ENOSYS - b syscall_exit + b .Lsyscall_exit syscall_exit_work: #ifdef CONFIG_PPC_BOOK3S @@ -347,33 +347,33 @@ _GLOBAL(save_nvgprs) _GLOBAL(ppc_fork) bl save_nvgprs bl sys_fork - b syscall_exit + b .Lsyscall_exit _GLOBAL(ppc_vfork) bl save_nvgprs bl sys_vfork - b syscall_exit + b .Lsyscall_exit _GLOBAL(ppc_clone) bl save_nvgprs bl sys_clone - b syscall_exit + b .Lsyscall_exit _GLOBAL(ppc32_swapcontext) bl save_nvgprs bl compat_sys_swapcontext - b syscall_exit + b .Lsyscall_exit _GLOBAL(ppc64_swapcontext) bl save_nvgprs bl sys_swapcontext - b syscall_exit + b .Lsyscall_exit _GLOBAL(ret_from_fork) bl schedule_tail REST_NVGPRS(r1) li r3,0 - b syscall_exit + b .Lsyscall_exit _GLOBAL(ret_from_kernel_thread) bl schedule_tail @@ -385,7 +385,7 @@ _GLOBAL(ret_from_kernel_thread) #endif blrl li r3,0 - b syscall_exit + b .Lsyscall_exit /* * This routine switches between two different tasks. The process From a4bcbe6a41adcaa5e7f1830a7c1da8691d9d2b1d Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 14 Jan 2015 14:47:56 +1100 Subject: [PATCH 374/601] powerpc: Remove old compile time disabled syscall tracing code We have code to do syscall tracing which is disabled at compile time by default. It's not been touched since the dawn of time (ie. v2.6.12). There are now better ways to do syscall tracing, ie. using the raw_syscall, or syscall tracepoints. For the specific case of tracing syscalls at boot on a system that doesn't get to userspace, you can boot with: trace_event=syscalls tp_printk=on Which will trace syscalls from boot, and echo all output to the console. Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/entry_32.S | 77 ---------------------------------- arch/powerpc/kernel/entry_64.S | 13 ------ arch/powerpc/kernel/syscalls.c | 14 ------- 3 files changed, 104 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index da6379550fd216..46fc0f4d8982c6 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -33,9 +33,6 @@ #include #include -#undef SHOW_SYSCALLS -#undef SHOW_SYSCALLS_TASK - /* * MSR_KERNEL is > 0x10000 on 4xx/Book-E since it include MSR_CE. */ @@ -307,9 +304,6 @@ _GLOBAL(DoSyscall) lwz r11,_CCR(r1) /* Clear SO bit in CR */ rlwinm r11,r11,0,4,2 stw r11,_CCR(r1) -#ifdef SHOW_SYSCALLS - bl do_show_syscall -#endif /* SHOW_SYSCALLS */ #ifdef CONFIG_TRACE_IRQFLAGS /* Return from syscalls can (and generally will) hard enable * interrupts. You aren't supposed to call a syscall with @@ -352,9 +346,6 @@ syscall_dotrace_cont: blrl /* Call handler */ .globl ret_from_syscall ret_from_syscall: -#ifdef SHOW_SYSCALLS - bl do_show_syscall_exit -#endif mr r6,r3 CURRENT_THREAD_INFO(r12, r1) /* disable interrupts so current_thread_info()->flags can't change */ @@ -523,74 +514,6 @@ syscall_exit_work: bl do_syscall_trace_leave b ret_from_except_full -#ifdef SHOW_SYSCALLS -do_show_syscall: -#ifdef SHOW_SYSCALLS_TASK - lis r11,show_syscalls_task@ha - lwz r11,show_syscalls_task@l(r11) - cmp 0,r2,r11 - bnelr -#endif - stw r31,GPR31(r1) - mflr r31 - lis r3,7f@ha - addi r3,r3,7f@l - lwz r4,GPR0(r1) - lwz r5,GPR3(r1) - lwz r6,GPR4(r1) - lwz r7,GPR5(r1) - lwz r8,GPR6(r1) - lwz r9,GPR7(r1) - bl printk - lis r3,77f@ha - addi r3,r3,77f@l - lwz r4,GPR8(r1) - mr r5,r2 - bl printk - lwz r0,GPR0(r1) - lwz r3,GPR3(r1) - lwz r4,GPR4(r1) - lwz r5,GPR5(r1) - lwz r6,GPR6(r1) - lwz r7,GPR7(r1) - lwz r8,GPR8(r1) - mtlr r31 - lwz r31,GPR31(r1) - blr - -do_show_syscall_exit: -#ifdef SHOW_SYSCALLS_TASK - lis r11,show_syscalls_task@ha - lwz r11,show_syscalls_task@l(r11) - cmp 0,r2,r11 - bnelr -#endif - stw r31,GPR31(r1) - mflr r31 - stw r3,RESULT(r1) /* Save result */ - mr r4,r3 - lis r3,79f@ha - addi r3,r3,79f@l - bl printk - lwz r3,RESULT(r1) - mtlr r31 - lwz r31,GPR31(r1) - blr - -7: .string "syscall %d(%x, %x, %x, %x, %x, " -77: .string "%x), current=%p\n" -79: .string " -> %x\n" - .align 2,0 - -#ifdef SHOW_SYSCALLS_TASK - .data - .globl show_syscalls_task -show_syscalls_task: - .long -1 - .text -#endif -#endif /* SHOW_SYSCALLS */ - /* * The fork/clone functions need to copy the full register set into * the child process. Therefore we need to save all the nonvolatile diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index fefaa1485bd31e..d180caf2d6de74 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -49,8 +49,6 @@ exception_marker: .section ".text" .align 7 -#undef SHOW_SYSCALLS - .globl system_call_common system_call_common: andi. r10,r12,MSR_PR @@ -142,13 +140,6 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) li r10,1 std r10,SOFTE(r1) -#ifdef SHOW_SYSCALLS - bl do_show_syscall - REST_GPR(0,r1) - REST_4GPRS(3,r1) - REST_2GPRS(7,r1) - addi r9,r1,STACK_FRAME_OVERHEAD -#endif CURRENT_THREAD_INFO(r11, r1) ld r10,TI_FLAGS(r11) andi. r11,r10,_TIF_SYSCALL_DOTRACE @@ -180,10 +171,6 @@ system_call: /* label this so stack traces look sane */ .Lsyscall_exit: std r3,RESULT(r1) -#ifdef SHOW_SYSCALLS - bl do_show_syscall_exit - ld r3,RESULT(r1) -#endif CURRENT_THREAD_INFO(r12, r1) ld r8,_MSR(r1) diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index cd9be9aa016d18..b2702e87db0d44 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -121,17 +121,3 @@ long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, return sys_fadvise64(fd, (u64)offset_high << 32 | offset_low, (u64)len_high << 32 | len_low, advice); } - -void do_show_syscall(unsigned long r3, unsigned long r4, unsigned long r5, - unsigned long r6, unsigned long r7, unsigned long r8, - struct pt_regs *regs) -{ - printk("syscall %ld(%lx, %lx, %lx, %lx, %lx, %lx) regs=%p current=%p" - " cpu=%d\n", regs->gpr[0], r3, r4, r5, r6, r7, r8, regs, - current, smp_processor_id()); -} - -void do_show_syscall_exit(unsigned long r3) -{ - printk(" -> %lx, current=%p cpu=%d\n", r3, current, smp_processor_id()); -} From fe12545e7650de5332b5522a62686fab8bafc733 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 8 Jan 2015 16:41:47 +1100 Subject: [PATCH 375/601] powerpc/kernel: Avoid initializing device-tree pointer twice As commit 50ba08f3 ("of/fdt: Don't clear initial_boot_params if fdt_check_header() fails") does, the device-tree pointer "initial_boot_params" is initialized by early_init_dt_verify(), which is called by early_init_devtree(). So we needn't explicitly initialize that again in early_init_devtree(). Signed-off-by: Gavin Shan Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 6a799b3cc6b444..b8e15c678960f3 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -652,9 +652,6 @@ void __init early_init_devtree(void *params) if (!early_init_dt_verify(params)) panic("BUG: Failed verifying flat device tree, bad version?"); - /* Setup flat device-tree pointer */ - initial_boot_params = params; - #ifdef CONFIG_PPC_RTAS /* Some machines might need RTAS info for debugging, grab it now. */ of_scan_flat_dt(early_init_dt_scan_rtas, NULL); From fd979c0132074856975a6e79bc2226b99435ec5b Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 30 Jan 2015 13:45:57 -0800 Subject: [PATCH 376/601] perf: provide sysfs_show for struct perf_pmu_events_attr (struct perf_pmu_events_attr) is defined in include/linux/perf_event.h, but the only "show" for it is in x86 and contains x86 specific stuff. Make a generic one for those of us who are just using the event_str. Signed-off-by: Cody P Schafer Signed-off-by: Sukadev Bhattiprolu Acked-by: Jiri Olsa Signed-off-by: Michael Ellerman --- include/linux/perf_event.h | 3 +++ kernel/events/core.c | 12 ++++++++++++ 2 files changed, 15 insertions(+) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 486e84ccb1f925..58f59bdb590bea 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -897,6 +897,9 @@ struct perf_pmu_events_attr { const char *event_str; }; +ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr, + char *page); + #define PMU_EVENT_ATTR(_name, _var, _id, _show) \ static struct perf_pmu_events_attr _var = { \ .attr = __ATTR(_name, 0444, _show, NULL), \ diff --git a/kernel/events/core.c b/kernel/events/core.c index 4c1ee7f2bebc4b..934687f8d51b53 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8276,6 +8276,18 @@ void __init perf_event_init(void) != 1024); } +ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr, + char *page) +{ + struct perf_pmu_events_attr *pmu_attr = + container_of(attr, struct perf_pmu_events_attr, attr); + + if (pmu_attr->event_str) + return sprintf(page, "%s\n", pmu_attr->event_str); + + return 0; +} + static int __init perf_event_sysfs_init(void) { struct pmu *pmu; From f0405b816149665393cc62b9e5082fc2d79714df Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 30 Jan 2015 13:45:58 -0800 Subject: [PATCH 377/601] perf: add PMU_EVENT_ATTR_STRING() helper Helper for constructing static struct perf_pmu_events_attr s. Signed-off-by: Cody P Schafer Signed-off-by: Sukadev Bhattiprolu Acked-by: Jiri Olsa Signed-off-by: Michael Ellerman --- include/linux/perf_event.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 58f59bdb590bea..1d3631448b910e 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -906,6 +906,13 @@ static struct perf_pmu_events_attr _var = { \ .id = _id, \ }; +#define PMU_EVENT_ATTR_STRING(_name, _var, _str) \ +static struct perf_pmu_events_attr _var = { \ + .attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \ + .id = 0, \ + .event_str = _str, \ +}; + #define PMU_FORMAT_ATTR(_name, _format) \ static ssize_t \ _name##_show(struct device *dev, \ From e08e52824e41fb42e46593450d378ad1b33caedb Mon Sep 17 00:00:00 2001 From: "sukadev@linux.vnet.ibm.com" Date: Fri, 30 Jan 2015 13:45:59 -0800 Subject: [PATCH 378/601] perf: define EVENT_DEFINE_RANGE_FORMAT_LITE helper Define a lite version of the EVENT_DEFINE_RANGE_FORMAT() that avoids defining helper functions for the bit-field ranges. Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Michael Ellerman --- arch/powerpc/perf/hv-common.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/powerpc/perf/hv-common.h b/arch/powerpc/perf/hv-common.h index 5d79cecbd73da1..349aaba4d2d110 100644 --- a/arch/powerpc/perf/hv-common.h +++ b/arch/powerpc/perf/hv-common.h @@ -20,6 +20,16 @@ unsigned long hv_perf_caps_get(struct hv_perf_caps *caps); PMU_FORMAT_ATTR(name, #attr_var ":" #bit_start "-" #bit_end); \ EVENT_DEFINE_RANGE(name, attr_var, bit_start, bit_end) +/* + * The EVENT_DEFINE_RANGE_FORMAT() macro above includes helper functions + * for the fields (eg: event_get_starting_index()). For some fields we + * need the bit-range definition, but no the helper functions. Define a + * lite version of the above macro without the helpers and silence + * compiler warnings unused static functions. + */ +#define EVENT_DEFINE_RANGE_FORMAT_LITE(name, attr_var, bit_start, bit_end) \ +PMU_FORMAT_ATTR(name, #attr_var ":" #bit_start "-" #bit_end); + #define EVENT_DEFINE_RANGE(name, attr_var, bit_start, bit_end) \ static u64 event_get_##name##_max(void) \ { \ From 5c5cd7b502595f6b90509b8aa4bba6f81b69315c Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 30 Jan 2015 13:46:00 -0800 Subject: [PATCH 379/601] powerpc/perf/hv-24x7: parse catalog and populate sysfs with events Retrieves and parses the 24x7 catalog on POWER systems that supply it (right now, only POWER 8). Events are exposed via sysfs in the standard fashion, and are all parameterized. $ cd /sys/bus/event_source/devices/hv_24x7/events $ cat HPM_CS_FROM_L4_LDATA__PHYS_CORE domain=0x2,offset=0xd58,core=?,lpar=0x0 $ cat HPM_TLBIE__VCPU_HOME_CHIP domain=0x4,offset=0x358,vcpu=?,lpar=? where user is required to specify values for the fields with '?' (like core, vcpu, lpar above), when specifying the event with the perf tool. Catalog is (at the moment) only parsed on boot. It needs re-parsing when a some hypervisor events occur. At that point we'll also need to prevent old events from continuing to function (counter that is passed in via spare space in the config values?). Signed-off-by: Cody P Schafer Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Michael Ellerman --- arch/powerpc/perf/hv-24x7-catalog.h | 25 + arch/powerpc/perf/hv-24x7-domains.h | 28 + arch/powerpc/perf/hv-24x7.c | 793 +++++++++++++++++++++++++++- arch/powerpc/perf/hv-24x7.h | 12 +- 4 files changed, 841 insertions(+), 17 deletions(-) create mode 100644 arch/powerpc/perf/hv-24x7-domains.h diff --git a/arch/powerpc/perf/hv-24x7-catalog.h b/arch/powerpc/perf/hv-24x7-catalog.h index 21b19dd86d9c0f..69e2e1faf902c7 100644 --- a/arch/powerpc/perf/hv-24x7-catalog.h +++ b/arch/powerpc/perf/hv-24x7-catalog.h @@ -30,4 +30,29 @@ struct hv_24x7_catalog_page_0 { __u8 reserved6[2]; } __packed; +struct hv_24x7_event_data { + __be16 length; /* in bytes, must be a multiple of 16 */ + __u8 reserved1[2]; + __u8 domain; /* Chip = 1, Core = 2 */ + __u8 reserved2[1]; + __be16 event_group_record_offs; /* in bytes, must be 8 byte aligned */ + __be16 event_group_record_len; /* in bytes */ + + /* in bytes, offset from event_group_record */ + __be16 event_counter_offs; + + /* verified_state, unverified_state, caveat_state, broken_state, ... */ + __be32 flags; + + __be16 primary_group_ix; + __be16 group_count; + __be16 event_name_len; + __u8 remainder[]; + /* __u8 event_name[event_name_len - 2]; */ + /* __be16 event_description_len; */ + /* __u8 event_desc[event_description_len - 2]; */ + /* __be16 detailed_desc_len; */ + /* __u8 detailed_desc[detailed_desc_len - 2]; */ +} __packed; + #endif diff --git a/arch/powerpc/perf/hv-24x7-domains.h b/arch/powerpc/perf/hv-24x7-domains.h new file mode 100644 index 00000000000000..49c1efd500459b --- /dev/null +++ b/arch/powerpc/perf/hv-24x7-domains.h @@ -0,0 +1,28 @@ + +/* + * DOMAIN(name, num, index_kind, is_physical) + * + * @name: An all caps token, suitable for use in generating an enum + * member and appending to an event name in sysfs. + * + * @num: The number corresponding to the domain as given in + * documentation. We assume the catalog domain and the hcall + * domain have the same numbering (so far they do), but this + * may need to be changed in the future. + * + * @index_kind: A stringifiable token describing the meaning of the index + * within the given domain. Must fit the parsing rules of the + * perf sysfs api. + * + * @is_physical: True if the domain is physical, false otherwise (if virtual). + * + * Note: The terms PHYS_CHIP, PHYS_CORE, VCPU correspond to physical chip, + * physical core and virtual processor in 24x7 Counters specifications. + */ + +DOMAIN(PHYS_CHIP, 0x01, chip, true) +DOMAIN(PHYS_CORE, 0x02, core, true) +DOMAIN(VCPU_HOME_CORE, 0x03, vcpu, false) +DOMAIN(VCPU_HOME_CHIP, 0x04, vcpu, false) +DOMAIN(VCPU_HOME_NODE, 0x05, vcpu, false) +DOMAIN(VCPU_REMOTE_NODE, 0x06, vcpu, false) diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index f162d0b8eea380..9445a824819ea7 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -13,16 +13,66 @@ #define pr_fmt(fmt) "hv-24x7: " fmt #include +#include #include #include +#include + #include #include #include +#include #include "hv-24x7.h" #include "hv-24x7-catalog.h" #include "hv-common.h" +static const char *event_domain_suffix(unsigned domain) +{ + switch (domain) { +#define DOMAIN(n, v, x, c) \ + case HV_PERF_DOMAIN_##n: \ + return "__" #n; +#include "hv-24x7-domains.h" +#undef DOMAIN + default: + WARN(1, "unknown domain %d\n", domain); + return "__UNKNOWN_DOMAIN_SUFFIX"; + } +} + +static bool domain_is_valid(unsigned domain) +{ + switch (domain) { +#define DOMAIN(n, v, x, c) \ + case HV_PERF_DOMAIN_##n: \ + /* fall through */ +#include "hv-24x7-domains.h" +#undef DOMAIN + return true; + default: + return false; + } +} + +static bool is_physical_domain(unsigned domain) +{ + switch (domain) { +#define DOMAIN(n, v, x, c) \ + case HV_PERF_DOMAIN_##n: \ + return c; +#include "hv-24x7-domains.h" +#undef DOMAIN + default: + return false; + } +} + +static bool catalog_entry_domain_is_valid(unsigned domain) +{ + return is_physical_domain(domain); +} + /* * TODO: Merging events: * - Think of the hcall as an interface to a 4d array of counters: @@ -44,13 +94,14 @@ /* * Example usage: - * perf stat -e 'hv_24x7/domain=2,offset=8,starting_index=0,lpar=0xffffffff/' + * perf stat -e 'hv_24x7/domain=2,offset=8,vcpu=0,lpar=0xffffffff/' */ /* u3 0-6, one of HV_24X7_PERF_DOMAIN */ EVENT_DEFINE_RANGE_FORMAT(domain, config, 0, 3); /* u16 */ -EVENT_DEFINE_RANGE_FORMAT(starting_index, config, 16, 31); +EVENT_DEFINE_RANGE_FORMAT(core, config, 16, 31); +EVENT_DEFINE_RANGE_FORMAT(vcpu, config, 16, 31); /* u32, see "data_offset" */ EVENT_DEFINE_RANGE_FORMAT(offset, config, 32, 63); /* u16 */ @@ -63,7 +114,8 @@ EVENT_DEFINE_RANGE(reserved3, config2, 0, 63); static struct attribute *format_attrs[] = { &format_attr_domain.attr, &format_attr_offset.attr, - &format_attr_starting_index.attr, + &format_attr_core.attr, + &format_attr_vcpu.attr, &format_attr_lpar.attr, NULL, }; @@ -73,8 +125,115 @@ static struct attribute_group format_group = { .attrs = format_attrs, }; +static struct attribute_group event_group = { + .name = "events", + /* .attrs is set in init */ +}; + +static struct attribute_group event_desc_group = { + .name = "event_descs", + /* .attrs is set in init */ +}; + +static struct attribute_group event_long_desc_group = { + .name = "event_long_descs", + /* .attrs is set in init */ +}; + static struct kmem_cache *hv_page_cache; +static char *event_name(struct hv_24x7_event_data *ev, int *len) +{ + *len = be16_to_cpu(ev->event_name_len) - 2; + return (char *)ev->remainder; +} + +static char *event_desc(struct hv_24x7_event_data *ev, int *len) +{ + unsigned nl = be16_to_cpu(ev->event_name_len); + __be16 *desc_len = (__be16 *)(ev->remainder + nl - 2); + *len = be16_to_cpu(*desc_len) - 2; + return (char *)ev->remainder + nl; +} + +static char *event_long_desc(struct hv_24x7_event_data *ev, int *len) +{ + unsigned nl = be16_to_cpu(ev->event_name_len); + __be16 *desc_len_ = (__be16 *)(ev->remainder + nl - 2); + unsigned desc_len = be16_to_cpu(*desc_len_); + __be16 *long_desc_len = (__be16 *)(ev->remainder + nl + desc_len - 2); + *len = be16_to_cpu(*long_desc_len) - 2; + return (char *)ev->remainder + nl + desc_len; +} + +static bool event_fixed_portion_is_within(struct hv_24x7_event_data *ev, + void *end) +{ + void *start = ev; + + return (start + offsetof(struct hv_24x7_event_data, remainder)) < end; +} + +/* + * Things we don't check: + * - padding for desc, name, and long/detailed desc is required to be '\0' + * bytes. + * + * Return NULL if we pass end, + * Otherwise return the address of the byte just following the event. + */ +static void *event_end(struct hv_24x7_event_data *ev, void *end) +{ + void *start = ev; + __be16 *dl_, *ldl_; + unsigned dl, ldl; + unsigned nl = be16_to_cpu(ev->event_name_len); + + if (nl < 2) { + pr_debug("%s: name length too short: %d", __func__, nl); + return NULL; + } + + if (start + nl > end) { + pr_debug("%s: start=%p + nl=%u > end=%p", + __func__, start, nl, end); + return NULL; + } + + dl_ = (__be16 *)(ev->remainder + nl - 2); + if (!IS_ALIGNED((uintptr_t)dl_, 2)) + pr_warn("desc len not aligned %p", dl_); + dl = be16_to_cpu(*dl_); + if (dl < 2) { + pr_debug("%s: desc len too short: %d", __func__, dl); + return NULL; + } + + if (start + nl + dl > end) { + pr_debug("%s: (start=%p + nl=%u + dl=%u)=%p > end=%p", + __func__, start, nl, dl, start + nl + dl, end); + return NULL; + } + + ldl_ = (__be16 *)(ev->remainder + nl + dl - 2); + if (!IS_ALIGNED((uintptr_t)ldl_, 2)) + pr_warn("long desc len not aligned %p", ldl_); + ldl = be16_to_cpu(*ldl_); + if (ldl < 2) { + pr_debug("%s: long desc len too short (ldl=%u)", + __func__, ldl); + return NULL; + } + + if (start + nl + dl + ldl > end) { + pr_debug("%s: start=%p + nl=%u + dl=%u + ldl=%u > end=%p", + __func__, start, nl, dl, ldl, end); + return NULL; + } + + return start + nl + dl + ldl; +} + static unsigned long h_get_24x7_catalog_page_(unsigned long phys_4096, unsigned long version, unsigned long index) @@ -97,6 +256,609 @@ static unsigned long h_get_24x7_catalog_page(char page[], version, index); } +static unsigned core_domains[] = { + HV_PERF_DOMAIN_PHYS_CORE, + HV_PERF_DOMAIN_VCPU_HOME_CORE, + HV_PERF_DOMAIN_VCPU_HOME_CHIP, + HV_PERF_DOMAIN_VCPU_HOME_NODE, + HV_PERF_DOMAIN_VCPU_REMOTE_NODE, +}; +/* chip event data always yeilds a single event, core yeilds multiple */ +#define MAX_EVENTS_PER_EVENT_DATA ARRAY_SIZE(core_domains) + +static char *event_fmt(struct hv_24x7_event_data *event, unsigned domain) +{ + const char *sindex; + const char *lpar; + + if (is_physical_domain(domain)) { + lpar = "0x0"; + sindex = "core"; + } else { + lpar = "?"; + sindex = "vcpu"; + } + + return kasprintf(GFP_KERNEL, + "domain=0x%x,offset=0x%x,%s=?,lpar=%s", + domain, + be16_to_cpu(event->event_counter_offs) + + be16_to_cpu(event->event_group_record_offs), + sindex, + lpar); +} + +/* Avoid trusting fw to NUL terminate strings */ +static char *memdup_to_str(char *maybe_str, int max_len, gfp_t gfp) +{ + return kasprintf(gfp, "%.*s", max_len, maybe_str); +} + +static ssize_t device_show_string(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct dev_ext_attribute *d; + + d = container_of(attr, struct dev_ext_attribute, attr); + return sprintf(buf, "%s\n", (char *)d->var); +} + +static struct attribute *device_str_attr_create_(char *name, char *str) +{ + struct dev_ext_attribute *attr = kzalloc(sizeof(*attr), GFP_KERNEL); + + if (!attr) + return NULL; + + attr->var = str; + attr->attr.attr.name = name; + attr->attr.attr.mode = 0444; + attr->attr.show = device_show_string; + return &attr->attr.attr; +} + +static struct attribute *device_str_attr_create(char *name, int name_max, + int name_nonce, + char *str, size_t str_max) +{ + char *n; + char *s = memdup_to_str(str, str_max, GFP_KERNEL); + struct attribute *a; + + if (!s) + return NULL; + + if (!name_nonce) + n = kasprintf(GFP_KERNEL, "%.*s", name_max, name); + else + n = kasprintf(GFP_KERNEL, "%.*s__%d", name_max, name, + name_nonce); + if (!n) + goto out_s; + + a = device_str_attr_create_(n, s); + if (!a) + goto out_n; + + return a; +out_n: + kfree(n); +out_s: + kfree(s); + return NULL; +} + +static void device_str_attr_destroy(struct attribute *attr) +{ + struct dev_ext_attribute *d; + + d = container_of(attr, struct dev_ext_attribute, attr.attr); + kfree(d->var); + kfree(d->attr.attr.name); + kfree(d); +} + +static struct attribute *event_to_attr(unsigned ix, + struct hv_24x7_event_data *event, + unsigned domain, + int nonce) +{ + int event_name_len; + char *ev_name, *a_ev_name, *val; + const char *ev_suffix; + struct attribute *attr; + + if (!domain_is_valid(domain)) { + pr_warn("catalog event %u has invalid domain %u\n", + ix, domain); + return NULL; + } + + val = event_fmt(event, domain); + if (!val) + return NULL; + + ev_suffix = event_domain_suffix(domain); + ev_name = event_name(event, &event_name_len); + if (!nonce) + a_ev_name = kasprintf(GFP_KERNEL, "%.*s%s", + (int)event_name_len, ev_name, ev_suffix); + else + a_ev_name = kasprintf(GFP_KERNEL, "%.*s%s__%d", + (int)event_name_len, ev_name, ev_suffix, nonce); + + + if (!a_ev_name) + goto out_val; + + attr = device_str_attr_create_(a_ev_name, val); + if (!attr) + goto out_name; + + return attr; +out_name: + kfree(a_ev_name); +out_val: + kfree(val); + return NULL; +} + +static struct attribute *event_to_desc_attr(struct hv_24x7_event_data *event, + int nonce) +{ + int nl, dl; + char *name = event_name(event, &nl); + char *desc = event_desc(event, &dl); + + /* If there isn't a description, don't create the sysfs file */ + if (!dl) + return NULL; + + return device_str_attr_create(name, nl, nonce, desc, dl); +} + +static struct attribute * +event_to_long_desc_attr(struct hv_24x7_event_data *event, int nonce) +{ + int nl, dl; + char *name = event_name(event, &nl); + char *desc = event_long_desc(event, &dl); + + /* If there isn't a description, don't create the sysfs file */ + if (!dl) + return NULL; + + return device_str_attr_create(name, nl, nonce, desc, dl); +} + +static ssize_t event_data_to_attrs(unsigned ix, struct attribute **attrs, + struct hv_24x7_event_data *event, int nonce) +{ + unsigned i; + + switch (event->domain) { + case HV_PERF_DOMAIN_PHYS_CHIP: + *attrs = event_to_attr(ix, event, event->domain, nonce); + return 1; + case HV_PERF_DOMAIN_PHYS_CORE: + for (i = 0; i < ARRAY_SIZE(core_domains); i++) { + attrs[i] = event_to_attr(ix, event, core_domains[i], + nonce); + if (!attrs[i]) { + pr_warn("catalog event %u: individual attr %u " + "creation failure\n", ix, i); + for (; i; i--) + device_str_attr_destroy(attrs[i - 1]); + return -1; + } + } + return i; + default: + pr_warn("catalog event %u: domain %u is not allowed in the " + "catalog\n", ix, event->domain); + return -1; + } +} + +static size_t event_to_attr_ct(struct hv_24x7_event_data *event) +{ + switch (event->domain) { + case HV_PERF_DOMAIN_PHYS_CHIP: + return 1; + case HV_PERF_DOMAIN_PHYS_CORE: + return ARRAY_SIZE(core_domains); + default: + return 0; + } +} + +static unsigned long vmalloc_to_phys(void *v) +{ + struct page *p = vmalloc_to_page(v); + + BUG_ON(!p); + return page_to_phys(p) + offset_in_page(v); +} + +/* */ +struct event_uniq { + struct rb_node node; + const char *name; + int nl; + unsigned ct; + unsigned domain; +}; + +static int memord(const void *d1, size_t s1, const void *d2, size_t s2) +{ + if (s1 < s2) + return 1; + if (s2 > s1) + return -1; + + return memcmp(d1, d2, s1); +} + +static int ev_uniq_ord(const void *v1, size_t s1, unsigned d1, const void *v2, + size_t s2, unsigned d2) +{ + int r = memord(v1, s1, v2, s2); + + if (r) + return r; + if (d1 > d2) + return 1; + if (d2 > d1) + return -1; + return 0; +} + +static int event_uniq_add(struct rb_root *root, const char *name, int nl, + unsigned domain) +{ + struct rb_node **new = &(root->rb_node), *parent = NULL; + struct event_uniq *data; + + /* Figure out where to put new node */ + while (*new) { + struct event_uniq *it; + int result; + + it = container_of(*new, struct event_uniq, node); + result = ev_uniq_ord(name, nl, domain, it->name, it->nl, + it->domain); + + parent = *new; + if (result < 0) + new = &((*new)->rb_left); + else if (result > 0) + new = &((*new)->rb_right); + else { + it->ct++; + pr_info("found a duplicate event %.*s, ct=%u\n", nl, + name, it->ct); + return it->ct; + } + } + + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + *data = (struct event_uniq) { + .name = name, + .nl = nl, + .ct = 0, + .domain = domain, + }; + + /* Add new node and rebalance tree. */ + rb_link_node(&data->node, parent, new); + rb_insert_color(&data->node, root); + + /* data->ct */ + return 0; +} + +static void event_uniq_destroy(struct rb_root *root) +{ + /* + * the strings we point to are in the giant block of memory filled by + * the catalog, and are freed separately. + */ + struct event_uniq *pos, *n; + + rbtree_postorder_for_each_entry_safe(pos, n, root, node) + kfree(pos); +} + + +/* + * ensure the event structure's sizes are self consistent and don't cause us to + * read outside of the event + * + * On success, return the event length in bytes. + * Otherwise, return -1 (and print as appropriate). + */ +static ssize_t catalog_event_len_validate(struct hv_24x7_event_data *event, + size_t event_idx, + size_t event_data_bytes, + size_t event_entry_count, + size_t offset, void *end) +{ + ssize_t ev_len; + void *ev_end, *calc_ev_end; + + if (offset >= event_data_bytes) + return -1; + + if (event_idx >= event_entry_count) { + pr_devel("catalog event data has %zu bytes of padding after last event\n", + event_data_bytes - offset); + return -1; + } + + if (!event_fixed_portion_is_within(event, end)) { + pr_warn("event %zu fixed portion is not within range\n", + event_idx); + return -1; + } + + ev_len = be16_to_cpu(event->length); + + if (ev_len % 16) + pr_info("event %zu has length %zu not divisible by 16: event=%pK\n", + event_idx, ev_len, event); + + ev_end = (__u8 *)event + ev_len; + if (ev_end > end) { + pr_warn("event %zu has .length=%zu, ends after buffer end: ev_end=%pK > end=%pK, offset=%zu\n", + event_idx, ev_len, ev_end, end, + offset); + return -1; + } + + calc_ev_end = event_end(event, end); + if (!calc_ev_end) { + pr_warn("event %zu has a calculated length which exceeds buffer length %zu: event=%pK end=%pK, offset=%zu\n", + event_idx, event_data_bytes, event, end, + offset); + return -1; + } + + if (calc_ev_end > ev_end) { + pr_warn("event %zu exceeds it's own length: event=%pK, end=%pK, offset=%zu, calc_ev_end=%pK\n", + event_idx, event, ev_end, offset, calc_ev_end); + return -1; + } + + return ev_len; +} + +#define MAX_4K (SIZE_MAX / 4096) + +static void create_events_from_catalog(struct attribute ***events_, + struct attribute ***event_descs_, + struct attribute ***event_long_descs_) +{ + unsigned long hret; + size_t catalog_len, catalog_page_len, event_entry_count, + event_data_len, event_data_offs, + event_data_bytes, junk_events, event_idx, event_attr_ct, i, + attr_max, event_idx_last, desc_ct, long_desc_ct; + ssize_t ct, ev_len; + uint32_t catalog_version_num; + struct attribute **events, **event_descs, **event_long_descs; + struct hv_24x7_catalog_page_0 *page_0 = + kmem_cache_alloc(hv_page_cache, GFP_KERNEL); + void *page = page_0; + void *event_data, *end; + struct hv_24x7_event_data *event; + struct rb_root ev_uniq = RB_ROOT; + + if (!page) + goto e_out; + + hret = h_get_24x7_catalog_page(page, 0, 0); + if (hret) + goto e_free; + + catalog_version_num = be64_to_cpu(page_0->version); + catalog_page_len = be32_to_cpu(page_0->length); + + if (MAX_4K < catalog_page_len) { + pr_err("invalid page count: %zu\n", catalog_page_len); + goto e_free; + } + + catalog_len = catalog_page_len * 4096; + + event_entry_count = be16_to_cpu(page_0->event_entry_count); + event_data_offs = be16_to_cpu(page_0->event_data_offs); + event_data_len = be16_to_cpu(page_0->event_data_len); + + pr_devel("cv %zu cl %zu eec %zu edo %zu edl %zu\n", + (size_t)catalog_version_num, catalog_len, + event_entry_count, event_data_offs, event_data_len); + + if ((MAX_4K < event_data_len) + || (MAX_4K < event_data_offs) + || (MAX_4K - event_data_offs < event_data_len)) { + pr_err("invalid event data offs %zu and/or len %zu\n", + event_data_offs, event_data_len); + goto e_free; + } + + if ((event_data_offs + event_data_len) > catalog_page_len) { + pr_err("event data %zu-%zu does not fit inside catalog 0-%zu\n", + event_data_offs, + event_data_offs + event_data_len, + catalog_page_len); + goto e_free; + } + + if (SIZE_MAX / MAX_EVENTS_PER_EVENT_DATA - 1 < event_entry_count) { + pr_err("event_entry_count %zu is invalid\n", + event_entry_count); + goto e_free; + } + + event_data_bytes = event_data_len * 4096; + + /* + * event data can span several pages, events can cross between these + * pages. Use vmalloc to make this easier. + */ + event_data = vmalloc(event_data_bytes); + if (!event_data) { + pr_err("could not allocate event data\n"); + goto e_free; + } + + end = event_data + event_data_bytes; + + /* + * using vmalloc_to_phys() like this only works if PAGE_SIZE is + * divisible by 4096 + */ + BUILD_BUG_ON(PAGE_SIZE % 4096); + + for (i = 0; i < event_data_len; i++) { + hret = h_get_24x7_catalog_page_( + vmalloc_to_phys(event_data + i * 4096), + catalog_version_num, + i + event_data_offs); + if (hret) { + pr_err("failed to get event data in page %zu\n", + i + event_data_offs); + goto e_event_data; + } + } + + /* + * scan the catalog to determine the number of attributes we need, and + * verify it at the same time. + */ + for (junk_events = 0, event = event_data, event_idx = 0, attr_max = 0; + ; + event_idx++, event = (void *)event + ev_len) { + size_t offset = (void *)event - (void *)event_data; + char *name; + int nl; + + ev_len = catalog_event_len_validate(event, event_idx, + event_data_bytes, + event_entry_count, + offset, end); + if (ev_len < 0) + break; + + name = event_name(event, &nl); + + if (event->event_group_record_len == 0) { + pr_devel("invalid event %zu (%.*s): group_record_len == 0, skipping\n", + event_idx, nl, name); + junk_events++; + continue; + } + + if (!catalog_entry_domain_is_valid(event->domain)) { + pr_info("event %zu (%.*s) has invalid domain %d\n", + event_idx, nl, name, event->domain); + junk_events++; + continue; + } + + attr_max += event_to_attr_ct(event); + } + + event_idx_last = event_idx; + if (event_idx_last != event_entry_count) + pr_warn("event buffer ended before listed # of events were parsed (got %zu, wanted %zu, junk %zu)\n", + event_idx_last, event_entry_count, junk_events); + + events = kmalloc_array(attr_max + 1, sizeof(*events), GFP_KERNEL); + if (!events) + goto e_event_data; + + event_descs = kmalloc_array(event_idx + 1, sizeof(*event_descs), + GFP_KERNEL); + if (!event_descs) + goto e_event_attrs; + + event_long_descs = kmalloc_array(event_idx + 1, + sizeof(*event_long_descs), GFP_KERNEL); + if (!event_long_descs) + goto e_event_descs; + + /* Iterate over the catalog filling in the attribute vector */ + for (junk_events = 0, event_attr_ct = 0, desc_ct = 0, long_desc_ct = 0, + event = event_data, event_idx = 0; + event_idx < event_idx_last; + event_idx++, ev_len = be16_to_cpu(event->length), + event = (void *)event + ev_len) { + char *name; + int nl; + int nonce; + /* + * these are the only "bad" events that are intermixed and that + * we can ignore without issue. make sure to skip them here + */ + if (event->event_group_record_len == 0) + continue; + if (!catalog_entry_domain_is_valid(event->domain)) + continue; + + name = event_name(event, &nl); + nonce = event_uniq_add(&ev_uniq, name, nl, event->domain); + ct = event_data_to_attrs(event_idx, events + event_attr_ct, + event, nonce); + if (ct <= 0) { + pr_warn("event %zu (%.*s) creation failure, skipping\n", + event_idx, nl, name); + junk_events++; + } else { + event_attr_ct += ct; + event_descs[desc_ct] = event_to_desc_attr(event, nonce); + if (event_descs[desc_ct]) + desc_ct++; + event_long_descs[long_desc_ct] = + event_to_long_desc_attr(event, nonce); + if (event_long_descs[long_desc_ct]) + long_desc_ct++; + } + } + + pr_info("read %zu catalog entries, created %zu event attrs (%zu failures), %zu descs\n", + event_idx, event_attr_ct, junk_events, desc_ct); + + events[event_attr_ct] = NULL; + event_descs[desc_ct] = NULL; + event_long_descs[long_desc_ct] = NULL; + + event_uniq_destroy(&ev_uniq); + vfree(event_data); + kmem_cache_free(hv_page_cache, page); + + *events_ = events; + *event_descs_ = event_descs; + *event_long_descs_ = event_long_descs; + return; + +e_event_descs: + kfree(event_descs); +e_event_attrs: + kfree(events); +e_event_data: + vfree(event_data); +e_free: + kmem_cache_free(hv_page_cache, page); +e_out: + *events_ = NULL; + *event_descs_ = NULL; + *event_long_descs_ = NULL; +} + static ssize_t catalog_read(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t offset, size_t count) @@ -207,16 +969,13 @@ static struct attribute_group if_group = { static const struct attribute_group *attr_groups[] = { &format_group, + &event_group, + &event_desc_group, + &event_long_desc_group, &if_group, NULL, }; -static bool is_physical_domain(int domain) -{ - return domain == HV_24X7_PERF_DOMAIN_PHYSICAL_CHIP || - domain == HV_24X7_PERF_DOMAIN_PHYSICAL_CORE; -} - DEFINE_PER_CPU(char, hv_24x7_reqb[4096]) __aligned(4096); DEFINE_PER_CPU(char, hv_24x7_resb[4096]) __aligned(4096); @@ -291,9 +1050,17 @@ static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix, static unsigned long event_24x7_request(struct perf_event *event, u64 *res, bool success_expected) { + u16 idx; + unsigned domain = event_get_domain(event); + + if (is_physical_domain(domain)) + idx = event_get_core(event); + else + idx = event_get_vcpu(event); + return single_24x7_request(event_get_domain(event), event_get_offset(event), - event_get_starting_index(event), + idx, event_get_lpar(event), res, success_expected); @@ -356,7 +1123,7 @@ static int h_24x7_event_init(struct perf_event *event) return -EIO; } - /* PHYSICAL domains & other lpars require extra capabilities */ + /* Physical domains & other lpars require extra capabilities */ if (!caps.collect_privileged && (is_physical_domain(domain) || (event_get_lpar(event) != event_get_lpar_max()))) { pr_devel("hv permisions disallow: is_physical_domain:%d, lpar=0x%llx\n", @@ -452,6 +1219,10 @@ static int hv_24x7_init(void) /* sampling not supported */ h_24x7_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; + create_events_from_catalog(&event_group.attrs, + &event_desc_group.attrs, + &event_long_desc_group.attrs); + r = perf_pmu_register(&h_24x7_pmu, h_24x7_pmu.name, -1); if (r) return r; diff --git a/arch/powerpc/perf/hv-24x7.h b/arch/powerpc/perf/hv-24x7.h index 720ebce4b43505..69cd4e690f5877 100644 --- a/arch/powerpc/perf/hv-24x7.h +++ b/arch/powerpc/perf/hv-24x7.h @@ -3,14 +3,14 @@ #include +enum hv_perf_domains { +#define DOMAIN(n, v, x, c) HV_PERF_DOMAIN_##n = v, +#include "hv-24x7-domains.h" +#undef DOMAIN +}; + struct hv_24x7_request { /* PHYSICAL domains require enabling via phyp/hmc. */ -#define HV_24X7_PERF_DOMAIN_PHYSICAL_CHIP 0x01 -#define HV_24X7_PERF_DOMAIN_PHYSICAL_CORE 0x02 -#define HV_24X7_PERF_DOMAIN_VIRTUAL_PROCESSOR_HOME_CORE 0x03 -#define HV_24X7_PERF_DOMAIN_VIRTUAL_PROCESSOR_HOME_CHIP 0x04 -#define HV_24X7_PERF_DOMAIN_VIRTUAL_PROCESSOR_HOME_NODE 0x05 -#define HV_24X7_PERF_DOMAIN_VIRTUAL_PROCESSOR_REMOTE_NODE 0x06 __u8 performance_domain; __u8 reserved[0x1]; From 9e9f60108423f18a99c9cc93ef7f23490ecc709b Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 30 Jan 2015 13:46:01 -0800 Subject: [PATCH 380/601] powerpc/perf/{hv-gpci, hv-common}: generate requests with counters annotated This adds (in req-gen/) a framework for defining gpci counter requests. It uses macro magic similar to ftrace. Also convert the existing hv-gpci request structures and enum values to use the new framework (and adjust old users of the structs and enum values to cope with changes in naming). In exchange for this macro disaster, we get autogenerated event listing for GPCI in sysfs, build time field offset checking, and zero duplication of information about GPCI requests. Signed-off-by: Cody P Schafer Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Michael Ellerman --- arch/powerpc/perf/hv-common.c | 10 +- arch/powerpc/perf/hv-gpci-requests.h | 76 ++++++++++ arch/powerpc/perf/hv-gpci.c | 23 +++ arch/powerpc/perf/hv-gpci.h | 37 ++--- arch/powerpc/perf/req-gen/_begin.h | 13 ++ arch/powerpc/perf/req-gen/_clear.h | 5 + arch/powerpc/perf/req-gen/_end.h | 4 + arch/powerpc/perf/req-gen/_request-begin.h | 15 ++ arch/powerpc/perf/req-gen/_request-end.h | 8 ++ arch/powerpc/perf/req-gen/perf.h | 155 +++++++++++++++++++++ 10 files changed, 316 insertions(+), 30 deletions(-) create mode 100644 arch/powerpc/perf/hv-gpci-requests.h create mode 100644 arch/powerpc/perf/req-gen/_begin.h create mode 100644 arch/powerpc/perf/req-gen/_clear.h create mode 100644 arch/powerpc/perf/req-gen/_end.h create mode 100644 arch/powerpc/perf/req-gen/_request-begin.h create mode 100644 arch/powerpc/perf/req-gen/_request-end.h create mode 100644 arch/powerpc/perf/req-gen/perf.h diff --git a/arch/powerpc/perf/hv-common.c b/arch/powerpc/perf/hv-common.c index 47e02b366f58f4..7dce8f1099675d 100644 --- a/arch/powerpc/perf/hv-common.c +++ b/arch/powerpc/perf/hv-common.c @@ -9,13 +9,13 @@ unsigned long hv_perf_caps_get(struct hv_perf_caps *caps) unsigned long r; struct p { struct hv_get_perf_counter_info_params params; - struct cv_system_performance_capabilities caps; + struct hv_gpci_system_performance_capabilities caps; } __packed __aligned(sizeof(uint64_t)); struct p arg = { .params = { .counter_request = cpu_to_be32( - CIR_SYSTEM_PERFORMANCE_CAPABILITIES), + HV_GPCI_system_performance_capabilities), .starting_index = cpu_to_be32(-1), .counter_info_version_in = 0, } @@ -31,9 +31,9 @@ unsigned long hv_perf_caps_get(struct hv_perf_caps *caps) caps->version = arg.params.counter_info_version_out; caps->collect_privileged = !!arg.caps.perf_collect_privileged; - caps->ga = !!(arg.caps.capability_mask & CV_CM_GA); - caps->expanded = !!(arg.caps.capability_mask & CV_CM_EXPANDED); - caps->lab = !!(arg.caps.capability_mask & CV_CM_LAB); + caps->ga = !!(arg.caps.capability_mask & HV_GPCI_CM_GA); + caps->expanded = !!(arg.caps.capability_mask & HV_GPCI_CM_EXPANDED); + caps->lab = !!(arg.caps.capability_mask & HV_GPCI_CM_LAB); return r; } diff --git a/arch/powerpc/perf/hv-gpci-requests.h b/arch/powerpc/perf/hv-gpci-requests.h new file mode 100644 index 00000000000000..80085444b1a0ea --- /dev/null +++ b/arch/powerpc/perf/hv-gpci-requests.h @@ -0,0 +1,76 @@ + +#include "req-gen/_begin.h" + +/* + * Based on the document "getPerfCountInfo v1.07" + */ + +/* + * #define REQUEST_NAME counter_request_name + * #define REQUEST_NUM r_num + * #define REQUEST_IDX_KIND starting_index_kind + * #include I(REQUEST_BEGIN) + * REQUEST( + * __field(...) + * __field(...) + * __array(...) + * __count(...) + * ) + * #include I(REQUEST_END) + * + * - starting_index_kind is one of the following, depending on the event: + * + * chip_id: hardware chip id or -1 for current hw chip + * phys_processor_idx: + * 0xffffffffffffffff: or -1, which means it is irrelavant for the event + * + * __count(offset, bytes, name): + * a counter that should be exposed via perf + * __field(offset, bytes, name) + * a normal field + * __array(offset, bytes, name) + * an array of bytes + * + * + * @bytes for __count, and __field _must_ be a numeral token + * in decimal, not an expression and not in hex. + * + * + * TODO: + * - expose secondary index (if any counter ever uses it, only 0xA0 + * appears to use it right now, and it doesn't have any counters) + * - embed versioning info + * - include counter descriptions + */ +#define REQUEST_NAME dispatch_timebase_by_processor +#define REQUEST_NUM 0x10 +#define REQUEST_IDX_KIND "phys_processor_idx=?" +#include I(REQUEST_BEGIN) +REQUEST(__count(0, 8, processor_time_in_timebase_cycles) + __field(0x8, 4, hw_processor_id) + __field(0xC, 2, owning_part_id) + __field(0xE, 1, processor_state) + __field(0xF, 1, version) + __field(0x10, 4, hw_chip_id) + __field(0x14, 4, phys_module_id) + __field(0x18, 4, primary_affinity_domain_idx) + __field(0x1C, 4, secondary_affinity_domain_idx) + __field(0x20, 4, processor_version) + __field(0x24, 2, logical_processor_idx) + __field(0x26, 2, reserved) + __field(0x28, 4, processor_id_register) + __field(0x2C, 4, phys_processor_idx) +) +#include I(REQUEST_END) + +#define REQUEST_NAME system_performance_capabilities +#define REQUEST_NUM 0x40 +#define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff" +#include I(REQUEST_BEGIN) +REQUEST(__field(0, 1, perf_collect_privileged) + __field(0x1, 1, capability_mask) + __array(0x2, 0xE, reserved) +) +#include I(REQUEST_END) + +#include "req-gen/_end.h" diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index a051fe946c63a2..856fe6e03c2a84 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -31,7 +31,18 @@ /* u32 */ EVENT_DEFINE_RANGE_FORMAT(request, config, 0, 31); /* u32 */ +/* + * Note that starting_index, phys_processor_idx, sibling_part_id, + * hw_chip_id, partition_id all refer to the same bit range. They + * are basically aliases for the starting_index. The specific alias + * used depends on the event. See REQUEST_IDX_KIND in hv-gpci-requests.h + */ EVENT_DEFINE_RANGE_FORMAT(starting_index, config, 32, 63); +EVENT_DEFINE_RANGE_FORMAT_LITE(phys_processor_idx, config, 32, 63); +EVENT_DEFINE_RANGE_FORMAT_LITE(sibling_part_id, config, 32, 63); +EVENT_DEFINE_RANGE_FORMAT_LITE(hw_chip_id, config, 32, 63); +EVENT_DEFINE_RANGE_FORMAT_LITE(partition_id, config, 32, 63); + /* u16 */ EVENT_DEFINE_RANGE_FORMAT(secondary_index, config1, 0, 15); /* u8 */ @@ -44,6 +55,10 @@ EVENT_DEFINE_RANGE_FORMAT(offset, config1, 32, 63); static struct attribute *format_attrs[] = { &format_attr_request.attr, &format_attr_starting_index.attr, + &format_attr_phys_processor_idx.attr, + &format_attr_sibling_part_id.attr, + &format_attr_hw_chip_id.attr, + &format_attr_partition_id.attr, &format_attr_secondary_index.attr, &format_attr_counter_info_version.attr, @@ -57,6 +72,11 @@ static struct attribute_group format_group = { .attrs = format_attrs, }; +static struct attribute_group event_group = { + .name = "events", + .attrs = hv_gpci_event_attrs, +}; + #define HV_CAPS_ATTR(_name, _format) \ static ssize_t _name##_show(struct device *dev, \ struct device_attribute *attr, \ @@ -102,6 +122,7 @@ static struct attribute_group interface_group = { static const struct attribute_group *attr_groups[] = { &format_group, + &event_group, &interface_group, NULL, }; @@ -265,6 +286,8 @@ static int hv_gpci_init(void) unsigned long hret; struct hv_perf_caps caps; + hv_gpci_assert_offsets_correct(); + if (!firmware_has_feature(FW_FEATURE_LPAR)) { pr_debug("not a virtualized system, not enabling\n"); return -ENODEV; diff --git a/arch/powerpc/perf/hv-gpci.h b/arch/powerpc/perf/hv-gpci.h index b25f460c9cce47..86ede827596118 100644 --- a/arch/powerpc/perf/hv-gpci.h +++ b/arch/powerpc/perf/hv-gpci.h @@ -42,32 +42,19 @@ struct hv_get_perf_counter_info_params { */ #define COUNTER_INFO_VERSION_CURRENT 0x8 -/* - * These determine the counter_value[] layout and the meaning of starting_index - * and secondary_index. - * - * Unless otherwise noted, @secondary_index is unused and ignored. - */ -enum counter_info_requests { - - /* GENERAL */ - - /* @starting_index: must be -1 (to refer to the current partition) - */ - CIR_SYSTEM_PERFORMANCE_CAPABILITIES = 0X40, +/* capability mask masks. */ +enum { + HV_GPCI_CM_GA = (1 << 7), + HV_GPCI_CM_EXPANDED = (1 << 6), + HV_GPCI_CM_LAB = (1 << 5) }; -struct cv_system_performance_capabilities { - /* If != 0, allowed to collect data from other partitions */ - __u8 perf_collect_privileged; - - /* These following are only valid if counter_info_version >= 0x3 */ -#define CV_CM_GA (1 << 7) -#define CV_CM_EXPANDED (1 << 6) -#define CV_CM_LAB (1 << 5) - /* remaining bits are reserved */ - __u8 capability_mask; - __u8 reserved[0xE]; -} __packed; +#define REQUEST_FILE "../hv-gpci-requests.h" +#define NAME_LOWER hv_gpci +#define NAME_UPPER HV_GPCI +#include "req-gen/perf.h" +#undef REQUEST_FILE +#undef NAME_LOWER +#undef NAME_UPPER #endif diff --git a/arch/powerpc/perf/req-gen/_begin.h b/arch/powerpc/perf/req-gen/_begin.h new file mode 100644 index 00000000000000..acfb17a55c1617 --- /dev/null +++ b/arch/powerpc/perf/req-gen/_begin.h @@ -0,0 +1,13 @@ +/* Include paths to be used in interface defining headers */ +#ifndef POWERPC_PERF_REQ_GEN_H_ +#define POWERPC_PERF_REQ_GEN_H_ + +#define CAT2_STR_(t, s) __stringify(t/s) +#define CAT2_STR(t, s) CAT2_STR_(t, s) +#define I(...) __VA_ARGS__ + +#endif + +#define REQ_GEN_PREFIX req-gen +#define REQUEST_BEGIN CAT2_STR(REQ_GEN_PREFIX, _request-begin.h) +#define REQUEST_END CAT2_STR(REQ_GEN_PREFIX, _request-end.h) diff --git a/arch/powerpc/perf/req-gen/_clear.h b/arch/powerpc/perf/req-gen/_clear.h new file mode 100644 index 00000000000000..422974f895737b --- /dev/null +++ b/arch/powerpc/perf/req-gen/_clear.h @@ -0,0 +1,5 @@ + +#undef __field_ +#undef __count_ +#undef __array_ +#undef REQUEST_ diff --git a/arch/powerpc/perf/req-gen/_end.h b/arch/powerpc/perf/req-gen/_end.h new file mode 100644 index 00000000000000..8a406980b6bf14 --- /dev/null +++ b/arch/powerpc/perf/req-gen/_end.h @@ -0,0 +1,4 @@ + +#undef REQ_GEN_PREFIX +#undef REQUEST_BEGIN +#undef REQUEST_END diff --git a/arch/powerpc/perf/req-gen/_request-begin.h b/arch/powerpc/perf/req-gen/_request-begin.h new file mode 100644 index 00000000000000..f6d98642cf1dea --- /dev/null +++ b/arch/powerpc/perf/req-gen/_request-begin.h @@ -0,0 +1,15 @@ + +#define REQUEST(r_contents) \ + REQUEST_(REQUEST_NAME, REQUEST_NUM, REQUEST_IDX_KIND, I(r_contents)) + +#define __field(f_offset, f_bytes, f_name) \ + __field_(REQUEST_NAME, REQUEST_NUM, REQUEST_IDX_KIND, \ + f_offset, f_bytes, f_name) + +#define __array(f_offset, f_bytes, f_name) \ + __array_(REQUEST_NAME, REQUEST_NUM, REQUEST_IDX_KIND, \ + f_offset, f_bytes, f_name) + +#define __count(f_offset, f_bytes, f_name) \ + __count_(REQUEST_NAME, REQUEST_NUM, REQUEST_IDX_KIND, \ + f_offset, f_bytes, f_name) diff --git a/arch/powerpc/perf/req-gen/_request-end.h b/arch/powerpc/perf/req-gen/_request-end.h new file mode 100644 index 00000000000000..5573be6c35881c --- /dev/null +++ b/arch/powerpc/perf/req-gen/_request-end.h @@ -0,0 +1,8 @@ +#undef REQUEST +#undef __field +#undef __array +#undef __count + +#undef REQUEST_NAME +#undef REQUEST_NUM +#undef REQUEST_IDX_KIND diff --git a/arch/powerpc/perf/req-gen/perf.h b/arch/powerpc/perf/req-gen/perf.h new file mode 100644 index 00000000000000..1b122469323de8 --- /dev/null +++ b/arch/powerpc/perf/req-gen/perf.h @@ -0,0 +1,155 @@ +#ifndef LINUX_POWERPC_PERF_REQ_GEN_PERF_H_ +#define LINUX_POWERPC_PERF_REQ_GEN_PERF_H_ + +#include + +#ifndef REQUEST_FILE +#error "REQUEST_FILE must be defined before including" +#endif + +#ifndef NAME_LOWER +#error "NAME_LOWER must be defined before including" +#endif + +#ifndef NAME_UPPER +#error "NAME_UPPER must be defined before including" +#endif + +#define BE_TYPE_b1 __u8 +#define BE_TYPE_b2 __be16 +#define BE_TYPE_b4 __be32 +#define BE_TYPE_b8 __be64 + +#define BYTES_TO_BE_TYPE(bytes) \ + BE_TYPE_b##bytes + +#define CAT2_(a, b) a ## b +#define CAT2(a, b) CAT2_(a, b) +#define CAT3_(a, b, c) a ## b ## c +#define CAT3(a, b, c) CAT3_(a, b, c) + +/* + * enumerate the request values as + * _ = + */ +#define REQUEST_VALUE__(name_upper, r_name) name_upper ## _ ## r_name +#define REQUEST_VALUE_(name_upper, r_name) REQUEST_VALUE__(name_upper, r_name) +#define REQUEST_VALUE(r_name) REQUEST_VALUE_(NAME_UPPER, r_name) + +#include "_clear.h" +#define REQUEST_(r_name, r_value, r_idx_1, r_fields) \ + REQUEST_VALUE(r_name) = r_value, +enum CAT2(NAME_LOWER, _requests) { +#include REQUEST_FILE +}; + +/* + * For each request: + * struct _ { + * r_fields + * }; + */ +#include "_clear.h" +#define STRUCT_NAME__(name_lower, r_name) name_lower ## _ ## r_name +#define STRUCT_NAME_(name_lower, r_name) STRUCT_NAME__(name_lower, r_name) +#define STRUCT_NAME(r_name) STRUCT_NAME_(NAME_LOWER, r_name) +#define REQUEST_(r_name, r_value, r_idx_1, r_fields) \ +struct STRUCT_NAME(r_name) { \ + r_fields \ +}; +#define __field_(r_name, r_value, r_idx_1, f_offset, f_bytes, f_name) \ + BYTES_TO_BE_TYPE(f_bytes) f_name; +#define __count_(r_name, r_value, r_idx_1, f_offset, f_bytes, f_name) \ + __field_(r_name, r_value, r_idx_1, f_offset, f_bytes, f_name) +#define __array_(r_name, r_value, r_idx_1, a_offset, a_bytes, a_name) \ + __u8 a_name[a_bytes]; + +#include REQUEST_FILE + +/* + * Generate a check of the field offsets + * _assert_offsets_correct() + */ +#include "_clear.h" +#define REQUEST_(r_name, r_value, index, r_fields) \ +r_fields +#define __field_(r_name, r_value, r_idx_1, f_offset, f_size, f_name) \ + BUILD_BUG_ON(offsetof(struct STRUCT_NAME(r_name), f_name) != f_offset); +#define __count_(r_name, r_value, r_idx_1, c_offset, c_size, c_name) \ + __field_(r_name, r_value, r_idx_1, c_offset, c_size, c_name) +#define __array_(r_name, r_value, r_idx_1, a_offset, a_size, a_name) \ + __field_(r_name, r_value, r_idx_1, a_offset, a_size, a_name) + +static inline void CAT2(NAME_LOWER, _assert_offsets_correct)(void) +{ +#include REQUEST_FILE +} + +/* + * Generate event attributes: + * PMU_EVENT_ATTR_STRING(_, + * _event_attr__, + * "request=" + * "starting_index=" + * "counter_info_version=CURRENT_COUNTER_INFO_VERSION" + * "length=" + * "offset=") + * + * TODO: counter_info_version may need to vary, we should interperate the + * value to some extent + */ +#define EVENT_ATTR_NAME__(name, r_name, c_name) \ + name ## _event_attr_ ## r_name ## _ ## c_name +#define EVENT_ATTR_NAME_(name, r_name, c_name) \ + EVENT_ATTR_NAME__(name, r_name, c_name) +#define EVENT_ATTR_NAME(r_name, c_name) \ + EVENT_ATTR_NAME_(NAME_LOWER, r_name, c_name) + +#include "_clear.h" +#define __field_(r_name, r_value, r_idx_1, f_offset, f_size, f_name) +#define __array_(r_name, r_value, r_idx_1, a_offset, a_size, a_name) +#define __count_(r_name, r_value, r_idx_1, c_offset, c_size, c_name) \ +PMU_EVENT_ATTR_STRING( \ + CAT3(r_name, _, c_name), \ + EVENT_ATTR_NAME(r_name, c_name), \ + "request=" __stringify(r_value) "," \ + r_idx_1 "," \ + "counter_info_version=" \ + __stringify(COUNTER_INFO_VERSION_CURRENT) "," \ + "length=" #c_size "," \ + "offset=" #c_offset) +#define REQUEST_(r_name, r_value, r_idx_1, r_fields) \ + r_fields + +#include REQUEST_FILE + +/* + * Define event attribute array + * static struct attribute *hv_gpci_event_attrs[] = { + * &_event_attr__.attr, + * }; + */ +#include "_clear.h" +#define __field_(r_name, r_value, r_idx_1, f_offset, f_size, f_name) +#define __count_(r_name, r_value, r_idx_1, c_offset, c_size, c_name) \ + &EVENT_ATTR_NAME(r_name, c_name).attr.attr, +#define __array_(r_name, r_value, r_idx_1, a_offset, a_size, a_name) +#define REQUEST_(r_name, r_value, r_idx_1, r_fields) \ + r_fields + +static __maybe_unused struct attribute *hv_gpci_event_attrs[] = { +#include REQUEST_FILE + NULL +}; + +/* cleanup */ +#include "_clear.h" +#undef EVENT_ATTR_NAME +#undef EVENT_ATTR_NAME_ +#undef BIT_NAME +#undef BIT_NAME_ +#undef STRUCT_NAME +#undef REQUEST_VALUE +#undef REQUEST_VALUE_ + +#endif From 97bf2640184f4fb2b2bf2c58ae3112768a6174fa Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 30 Jan 2015 13:46:02 -0800 Subject: [PATCH 381/601] powerpc/perf/hv-gpci: add the remaining gpci requests Add the remaining gpci requests that contain counters suitable for use by perf. Omit those that don't contain any counters (but note their ommision). Signed-off-by: Cody P Schafer Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Michael Ellerman --- arch/powerpc/perf/hv-gpci-requests.h | 187 ++++++++++++++++++++++++++- 1 file changed, 186 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/perf/hv-gpci-requests.h b/arch/powerpc/perf/hv-gpci-requests.h index 80085444b1a0ea..acd17648cd188f 100644 --- a/arch/powerpc/perf/hv-gpci-requests.h +++ b/arch/powerpc/perf/hv-gpci-requests.h @@ -20,7 +20,9 @@ * * - starting_index_kind is one of the following, depending on the event: * - * chip_id: hardware chip id or -1 for current hw chip + * hw_chip_id: hardware chip id or -1 for current hw chip + * partition_id + * sibling_part_id, * phys_processor_idx: * 0xffffffffffffffff: or -1, which means it is irrelavant for the event * @@ -63,6 +65,33 @@ REQUEST(__count(0, 8, processor_time_in_timebase_cycles) ) #include I(REQUEST_END) +#define REQUEST_NAME entitled_capped_uncapped_donated_idle_timebase_by_partition +#define REQUEST_NUM 0x20 +#define REQUEST_IDX_KIND "sibling_part_id=?" +#include I(REQUEST_BEGIN) +REQUEST(__field(0, 8, partition_id) + __count(0x8, 8, entitled_cycles) + __count(0x10, 8, consumed_capped_cycles) + __count(0x18, 8, consumed_uncapped_cycles) + __count(0x20, 8, cycles_donated) + __count(0x28, 8, purr_idle_cycles) +) +#include I(REQUEST_END) + +/* + * Not available for counter_info_version >= 0x8, use + * run_instruction_cycles_by_partition(0x100) instead. + */ +#define REQUEST_NAME run_instructions_run_cycles_by_partition +#define REQUEST_NUM 0x30 +#define REQUEST_IDX_KIND "sibling_part_id=?" +#include I(REQUEST_BEGIN) +REQUEST(__field(0, 8, partition_id) + __count(0x8, 8, instructions_completed) + __count(0x10, 8, cycles) +) +#include I(REQUEST_END) + #define REQUEST_NAME system_performance_capabilities #define REQUEST_NUM 0x40 #define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff" @@ -73,4 +102,160 @@ REQUEST(__field(0, 1, perf_collect_privileged) ) #include I(REQUEST_END) +#define REQUEST_NAME processor_bus_utilization_abc_links +#define REQUEST_NUM 0x50 +#define REQUEST_IDX_KIND "hw_chip_id=?" +#include I(REQUEST_BEGIN) +REQUEST(__field(0, 4, hw_chip_id) + __array(0x4, 0xC, reserved1) + __count(0x10, 8, total_link_cycles) + __count(0x18, 8, idle_cycles_for_a_link) + __count(0x20, 8, idle_cycles_for_b_link) + __count(0x28, 8, idle_cycles_for_c_link) + __array(0x30, 0x20, reserved2) +) +#include I(REQUEST_END) + +#define REQUEST_NAME processor_bus_utilization_wxyz_links +#define REQUEST_NUM 0x60 +#define REQUEST_IDX_KIND "hw_chip_id=?" +#include I(REQUEST_BEGIN) +REQUEST(__field(0, 4, hw_chip_id) + __array(0x4, 0xC, reserved1) + __count(0x10, 8, total_link_cycles) + __count(0x18, 8, idle_cycles_for_w_link) + __count(0x20, 8, idle_cycles_for_x_link) + __count(0x28, 8, idle_cycles_for_y_link) + __count(0x30, 8, idle_cycles_for_z_link) + __array(0x38, 0x28, reserved2) +) +#include I(REQUEST_END) + +#define REQUEST_NAME processor_bus_utilization_gx_links +#define REQUEST_NUM 0x70 +#define REQUEST_IDX_KIND "hw_chip_id=?" +#include I(REQUEST_BEGIN) +REQUEST(__field(0, 4, hw_chip_id) + __array(0x4, 0xC, reserved1) + __count(0x10, 8, gx0_in_address_cycles) + __count(0x18, 8, gx0_in_data_cycles) + __count(0x20, 8, gx0_in_retries) + __count(0x28, 8, gx0_in_bus_cycles) + __count(0x30, 8, gx0_in_cycles_total) + __count(0x38, 8, gx0_out_address_cycles) + __count(0x40, 8, gx0_out_data_cycles) + __count(0x48, 8, gx0_out_retries) + __count(0x50, 8, gx0_out_bus_cycles) + __count(0x58, 8, gx0_out_cycles_total) + __count(0x60, 8, gx1_in_address_cycles) + __count(0x68, 8, gx1_in_data_cycles) + __count(0x70, 8, gx1_in_retries) + __count(0x78, 8, gx1_in_bus_cycles) + __count(0x80, 8, gx1_in_cycles_total) + __count(0x88, 8, gx1_out_address_cycles) + __count(0x90, 8, gx1_out_data_cycles) + __count(0x98, 8, gx1_out_retries) + __count(0xA0, 8, gx1_out_bus_cycles) + __count(0xA8, 8, gx1_out_cycles_total) +) +#include I(REQUEST_END) + +#define REQUEST_NAME processor_bus_utilization_mc_links +#define REQUEST_NUM 0x80 +#define REQUEST_IDX_KIND "hw_chip_id=?" +#include I(REQUEST_BEGIN) +REQUEST(__field(0, 4, hw_chip_id) + __array(0x4, 0xC, reserved1) + __count(0x10, 8, mc0_frames) + __count(0x18, 8, mc0_reads) + __count(0x20, 8, mc0_write) + __count(0x28, 8, mc0_total_cycles) + __count(0x30, 8, mc1_frames) + __count(0x38, 8, mc1_reads) + __count(0x40, 8, mc1_writes) + __count(0x48, 8, mc1_total_cycles) +) +#include I(REQUEST_END) + +/* Processor_config (0x90) skipped, no counters */ +/* Current_processor_frequency (0x91) skipped, no counters */ + +#define REQUEST_NAME processor_core_utilization +#define REQUEST_NUM 0x94 +#define REQUEST_IDX_KIND "phys_processor_idx=?" +#include I(REQUEST_BEGIN) +REQUEST(__field(0, 4, phys_processor_idx) + __field(0x4, 4, hw_processor_id) + __count(0x8, 8, cycles_across_any_thread) + __count(0x10, 8, timebase_at_collection) + __count(0x18, 8, purr_cycles) + __count(0x20, 8, sum_of_cycles_across_all_threads) + __count(0x28, 8, instructions_completed) +) +#include I(REQUEST_END) + +/* Processor_core_power_mode (0x95) skipped, no counters */ +/* Affinity_domain_information_by_virtual_processor (0xA0) skipped, + * no counters */ +/* Affinity_domain_information_by_domain (0xB0) skipped, no counters */ +/* Affinity_domain_information_by_partition (0xB1) skipped, no counters */ +/* Physical_memory_info (0xC0) skipped, no counters */ +/* Processor_bus_topology (0xD0) skipped, no counters */ + +#define REQUEST_NAME partition_hypervisor_queuing_times +#define REQUEST_NUM 0xE0 +#define REQUEST_IDX_KIND "partition_id=?" +#include I(REQUEST_BEGIN) +REQUEST(__field(0, 2, partition_id) + __array(0x2, 6, reserved1) + __count(0x8, 8, time_waiting_for_entitlement) + __count(0x10, 8, times_waited_for_entitlement) + __count(0x18, 8, time_waiting_for_phys_processor) + __count(0x20, 8, times_waited_for_phys_processor) + __count(0x28, 8, dispatches_on_home_core) + __count(0x30, 8, dispatches_on_home_primary_affinity_domain) + __count(0x38, 8, dispatches_on_home_secondary_affinity_domain) + __count(0x40, 8, dispatches_off_home_secondary_affinity_domain) + __count(0x48, 8, dispatches_on_dedicated_processor_donating_cycles) +) +#include I(REQUEST_END) + +#define REQUEST_NAME system_hypervisor_times +#define REQUEST_NUM 0xF0 +#define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff" +#include I(REQUEST_BEGIN) +REQUEST(__count(0, 8, time_spent_to_dispatch_virtual_processors) + __count(0x8, 8, time_spent_processing_virtual_processor_timers) + __count(0x10, 8, time_spent_managing_partitions_over_entitlement) + __count(0x18, 8, time_spent_on_system_management) +) +#include I(REQUEST_END) + +#define REQUEST_NAME system_tlbie_count_and_time +#define REQUEST_NUM 0xF4 +#define REQUEST_IDX_KIND "starting_index=0xffffffffffffffff" +#include I(REQUEST_BEGIN) +REQUEST(__count(0, 8, tlbie_instructions_issued) + /* + * FIXME: The spec says the offset here is 0x10, which I suspect + * is wrong. + */ + __count(0x8, 8, time_spent_issuing_tlbies) +) +#include I(REQUEST_END) + +#define REQUEST_NAME partition_instruction_count_and_time +#define REQUEST_NUM 0x100 +#define REQUEST_IDX_KIND "partition_id=?" +#include I(REQUEST_BEGIN) +REQUEST(__field(0, 2, partition_id) + __array(0x2, 0x6, reserved1) + __count(0x8, 8, instructions_performed) + __count(0x10, 8, time_collected) +) +#include I(REQUEST_END) + +/* set_mmcrh (0x80001000) skipped, no counters */ +/* retrieve_hpmcx (0x80002000) skipped, no counters */ + #include "req-gen/_end.h" From 5c65670c9bfa3be1234d29f36b742897c0360ef3 Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 30 Jan 2015 13:46:03 -0800 Subject: [PATCH 382/601] powerpc/perf/hv-24x7: Document sysfs event description entries Signed-off-by: Cody P Schafer Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Michael Ellerman --- .../sysfs-bus-event_source-devices-hv_24x7 | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 index 32f3f5f8bba211..f893337570c1b4 100644 --- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 @@ -21,3 +21,25 @@ Contact: Linux on PowerPC Developer List Description: Exposes the "version" field of the 24x7 catalog. This is also extractable from the provided binary "catalog" sysfs entry. + +What: /sys/bus/event_source/devices/hv_24x7/event_descs/ +Date: February 2014 +Contact: Linux on PowerPC Developer List +Description: + Provides the description of a particular event as provided by + the firmware. If firmware does not provide a description, no + file will be created. + + Note that the event-name lacks the domain suffix appended for + events in the events/ dir. + +What: /sys/bus/event_source/devices/hv_24x7/event_long_descs/ +Date: February 2014 +Contact: Linux on PowerPC Developer List +Description: + Provides the "long" description of a particular event as + provided by the firmware. If firmware does not provide a + description, no file will be created. + + Note that the event-name lacks the domain suffix appended for + events in the events/ dir. From c602894814adc93589dde028182101818c5f938b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 26 Jan 2015 20:38:39 -0500 Subject: [PATCH 383/601] tracing: Make tracing_init_dentry_tr() static tracing_init_dentry_tr() is not used outside of trace.c, it should be static. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 2 +- kernel/trace/trace.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f82e53b0e5a7ca..2668a0d742eeda 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5815,7 +5815,7 @@ static __init int register_snapshot_cmd(void) static inline __init int register_snapshot_cmd(void) { return 0; } #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */ -struct dentry *tracing_init_dentry_tr(struct trace_array *tr) +static struct dentry *tracing_init_dentry_tr(struct trace_array *tr) { if (tr->dir) return tr->dir; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 0eddfeb05fee13..dd8205a35760d7 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -542,7 +542,6 @@ struct dentry *trace_create_file(const char *name, void *data, const struct file_operations *fops); -struct dentry *tracing_init_dentry_tr(struct trace_array *tr); struct dentry *tracing_init_dentry(void); struct ring_buffer_event; From 7eeafbcab47fe9860e5106286db83d60e3f35644 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 26 Jan 2015 21:00:48 -0500 Subject: [PATCH 384/601] tracing: Separate out initializing top level dir from instances The top level trace array is treated a little different than the instances, as it has to deal with more of the general tracing. The tr->dir is the tracing directory, which is an immutable dentry, where as the tr->dir of instances are the dentry that was created, and can be destroyed later. These should have different functions accessing them. As only tracing_init_dentry() deals with the top level array, fold the code for it into that function, and remove the trace_init_dentry_tr() that was also used by the instances to get their directory dentry. Add a tracing_get_dentry() to just get the tracing dir entry for instances as well as the top level array. Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 51 ++++++++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 21 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2668a0d742eeda..5afce60e1b68e4 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5815,28 +5815,11 @@ static __init int register_snapshot_cmd(void) static inline __init int register_snapshot_cmd(void) { return 0; } #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */ -static struct dentry *tracing_init_dentry_tr(struct trace_array *tr) +static struct dentry *tracing_get_dentry(struct trace_array *tr) { - if (tr->dir) - return tr->dir; - - if (!debugfs_initialized()) - return ERR_PTR(-ENODEV); - - if (tr->flags & TRACE_ARRAY_FL_GLOBAL) - tr->dir = debugfs_create_dir("tracing", NULL); - - if (!tr->dir) - pr_warn_once("Could not create debugfs directory 'tracing'\n"); - return tr->dir; } -struct dentry *tracing_init_dentry(void) -{ - return tracing_init_dentry_tr(&global_trace); -} - static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu) { struct dentry *d_tracer; @@ -5844,7 +5827,7 @@ static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu) if (tr->percpu_dir) return tr->percpu_dir; - d_tracer = tracing_init_dentry_tr(tr); + d_tracer = tracing_get_dentry(tr); if (IS_ERR(d_tracer)) return NULL; @@ -6047,7 +6030,7 @@ static struct dentry *trace_options_init_dentry(struct trace_array *tr) if (tr->options) return tr->options; - d_tracer = tracing_init_dentry_tr(tr); + d_tracer = tracing_get_dentry(tr); if (IS_ERR(d_tracer)) return NULL; @@ -6532,6 +6515,33 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer) } +/** + * tracing_init_dentry - initialize top level trace array + * + * This is called when creating files or directories in the tracing + * directory. It is called via fs_initcall() by any of the boot up code + * and expects to return the dentry of the top level tracing directory. + */ +struct dentry *tracing_init_dentry(void) +{ + struct trace_array *tr = &global_trace; + + if (tr->dir) + return tr->dir; + + if (WARN_ON(!debugfs_initialized())) + return ERR_PTR(-ENODEV); + + tr->dir = debugfs_create_dir("tracing", NULL); + + if (!tr->dir) { + pr_warn_once("Could not create debugfs directory 'tracing'\n"); + return ERR_PTR(-ENOMEM); + } + + return tr->dir; +} + static __init int tracer_init_debugfs(void) { struct dentry *d_tracer; @@ -6772,7 +6782,6 @@ __init static int tracer_alloc_buffers(void) int ring_buf_size; int ret = -ENOMEM; - if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL)) goto out; From 9f2cdcbbb90e70e5e8fe6cd30151b8ac1c8745ac Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 28 Jan 2015 13:38:38 -0600 Subject: [PATCH 385/601] ktest: Give console process a dedicated tty Create a pseudoterminal (pty pair) to give the console a dedicated tty so it doesn't mess with ktest's terminal settings. Link: http://lkml.kernel.org/r/37b0127f9efad09ff4fc994334db998141e4f6ca.1422473610.git.jpoimboe@redhat.com Signed-off-by: Josh Poimboeuf Signed-off-by: Steven Rostedt --- tools/testing/ktest/ktest.pl | 66 +++++++++++++++++++++++++++++++----- 1 file changed, 57 insertions(+), 9 deletions(-) diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 6ae890a0d48681..2d319169356723 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -1410,23 +1410,71 @@ sub dodie { die @_, "\n"; } -sub open_console { - my ($fp) = @_; +sub create_pty { + my ($ptm, $pts) = @_; + my $tmp; + my $TIOCSPTLCK = 0x40045431; + my $TIOCGPTN = 0x80045430; + + sysopen($ptm, "/dev/ptmx", O_RDWR | O_NONBLOCK) or + dodie "Cant open /dev/ptmx"; + + # unlockpt() + $tmp = pack("i", 0); + ioctl($ptm, $TIOCSPTLCK, $tmp) or + dodie "ioctl TIOCSPTLCK for /dev/ptmx failed"; + + # ptsname() + ioctl($ptm, $TIOCGPTN, $tmp) or + dodie "ioctl TIOCGPTN for /dev/ptmx failed"; + $tmp = unpack("i", $tmp); + + sysopen($pts, "/dev/pts/$tmp", O_RDWR | O_NONBLOCK) or + dodie "Can't open /dev/pts/$tmp"; +} + +sub exec_console { + my ($ptm, $pts) = @_; + + close($ptm); + + close(\*STDIN); + close(\*STDOUT); + close(\*STDERR); - my $flags; + open(\*STDIN, '<&', $pts); + open(\*STDOUT, '>&', $pts); + open(\*STDERR, '>&', $pts); + + close($pts); + + exec $console or + dodie "Can't open console $console"; +} + +sub open_console { + my ($ptm) = @_; + my $pts = \*PTSFD; + my $pid; # save terminal settings $stty = `stty -g`; - my $pid = open($fp, "$console|") or - dodie "Can't open console $console"; + create_pty($ptm, $pts); - $flags = fcntl($fp, F_GETFL, 0) or - dodie "Can't get flags for the socket: $!"; - $flags = fcntl($fp, F_SETFL, $flags | O_NONBLOCK) or - dodie "Can't set flags for the socket: $!"; + $pid = fork; + + if (!$pid) { + # child + exec_console($ptm, $pts) + } + + # parent + close($pts); return $pid; + + open(PTSFD, "Stop perl from warning about single use of PTSFD"); } sub close_console { From 9d2f7f051b8917305ea20ed79ff08254ea73f26d Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 28 Jan 2015 13:38:39 -0600 Subject: [PATCH 386/601] ktest: Enable user input to the console Allow the user to send input to the console by putting the terminal in cbreak mode (to allow reading stdin one character at a time) and copying all stdin data to the console's pty. Link: http://lkml.kernel.org/r/bb1bbe7d202c95a3ce7894cfffdd8c725875978e.1422473610.git.jpoimboe@redhat.com Signed-off-by: Josh Poimboeuf Signed-off-by: Steven Rostedt --- tools/testing/ktest/ktest.pl | 51 +++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 2d319169356723..ef1d99f3859cf0 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -178,7 +178,7 @@ my $localversion; my $iteration = 0; my $successes = 0; -my $stty; +my $stty_orig; my $bisect_good; my $bisect_bad; @@ -1458,7 +1458,11 @@ sub open_console { my $pid; # save terminal settings - $stty = `stty -g`; + $stty_orig = `stty -g`; + + # place terminal in cbreak mode so that stdin can be read one character at + # a time without having to wait for a newline + system("stty -icanon -echo -icrnl"); create_pty($ptm, $pts); @@ -1487,7 +1491,7 @@ sub close_console { close($fp); # restore terminal settings - system("stty $stty"); + system("stty $stty_orig"); } sub start_monitor { @@ -1827,7 +1831,9 @@ sub wait_for_input { my ($fp, $time) = @_; my $rin; - my $ready; + my $rout; + my $nr; + my $buf; my $line; my $ch; @@ -1837,21 +1843,36 @@ sub wait_for_input $rin = ''; vec($rin, fileno($fp), 1) = 1; - ($ready, $time) = select($rin, undef, undef, $time); + vec($rin, fileno(\*STDIN), 1) = 1; - $line = ""; + while (1) { + $nr = select($rout=$rin, undef, undef, $time); - # try to read one char at a time - while (sysread $fp, $ch, 1) { - $line .= $ch; - last if ($ch eq "\n"); - } + if ($nr <= 0) { + return undef; + } - if (!length($line)) { - return undef; - } + # copy data from stdin to the console + if (vec($rout, fileno(\*STDIN), 1) == 1) { + sysread(\*STDIN, $buf, 1000); + syswrite($fp, $buf, 1000); + next; + } - return $line; + $line = ""; + + # try to read one char at a time + while (sysread $fp, $ch, 1) { + $line .= $ch; + last if ($ch eq "\n"); + } + + if (!length($line)) { + return undef; + } + + return $line; + } } sub reboot_to { From 4bf6e1fc992a19e3a1ce7798df969817467c4360 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Thu, 29 Jan 2015 10:06:07 -0500 Subject: [PATCH 387/601] ktest: Print build,install,boot,test times at success and failure Since both success and failure may shortcut and exit ktest, it is better to print the status times there too. Once times are printed, the values for the times are reset, so they will not print more than once. Signed-off-by: Steven Rostedt --- tools/testing/ktest/ktest.pl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index ef1d99f3859cf0..ca20ce2dc8b685 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -1643,6 +1643,8 @@ sub fail { $name = " ($test_name)"; } + print_times; + doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; doprint "KTEST RESULT: TEST $i$name Failed: ", @_, "\n"; @@ -2494,6 +2496,8 @@ sub success { $name = " ($test_name)"; } + print_times; + doprint "\n\n*******************************************\n"; doprint "*******************************************\n"; doprint "KTEST RESULT: TEST $i$name SUCCESS!!!! **\n"; From 1cb9e64298e9af6b6a8962d43abefdf0b8a635f9 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 29 Jan 2015 20:54:53 -0600 Subject: [PATCH 388/601] ktest: Cleanup terminal on dodie() failure If dodie() is called with the console open, restore the terminal's original settings before dying. Link: http://lkml.kernel.org/r/20150130025453.GB20952@treble.redhat.com Signed-off-by: Josh Poimboeuf Signed-off-by: Steven Rostedt --- tools/testing/ktest/ktest.pl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index ca20ce2dc8b685..840803b3cd41d3 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -1407,6 +1407,11 @@ sub dodie { print " See $opt{LOG_FILE} for more info.\n"; } + if ($monitor_cnt) { + # restore terminal settings + system("stty $stty_orig"); + } + die @_, "\n"; } @@ -1449,7 +1454,7 @@ sub exec_console { close($pts); exec $console or - dodie "Can't open console $console"; + die "Can't open console $console"; } sub open_console { From d476d94f180af3f0fca77394651d4a98f4df1c54 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 2 Feb 2015 16:44:39 +0000 Subject: [PATCH 389/601] arm64: compat: Remove incorrect comment in compat_siginfo The comment was right originally but the _pad array size was wrong. It was fixed in the meantime but the comment not updated. Reported-by: Peter Maydell Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/compat.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index 3fb053fa6e981b..7fbed6919b5404 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -161,7 +161,6 @@ typedef struct compat_siginfo { int si_code; union { - /* The padding is the same size as AArch64. */ int _pad[128/sizeof(int) - 3]; /* kill() */ From cd70d4659ff3ab7e25b29a6f70be5bcfd078db1f Mon Sep 17 00:00:00 2001 From: Emil Medve Date: Wed, 28 Jan 2015 08:34:33 -0600 Subject: [PATCH 390/601] iommu/fsl: Various cleanups Currently a PAMU driver patch is very likely to receive some checkpatch complaints about the code in the context of the patch. This patch is an attempt to fix most of that and make the driver more readable Also fixed a subset of the sparse and coccinelle reported issues. Signed-off-by: Emil Medve Signed-off-by: Joerg Roedel --- arch/powerpc/include/asm/fsl_pamu_stash.h | 4 +- drivers/iommu/fsl_pamu.c | 203 ++++++++++------------ drivers/iommu/fsl_pamu.h | 15 +- drivers/iommu/fsl_pamu_domain.c | 173 ++++++++---------- 4 files changed, 179 insertions(+), 216 deletions(-) diff --git a/arch/powerpc/include/asm/fsl_pamu_stash.h b/arch/powerpc/include/asm/fsl_pamu_stash.h index caa1b21c25cd20..38311c98eed9ea 100644 --- a/arch/powerpc/include/asm/fsl_pamu_stash.h +++ b/arch/powerpc/include/asm/fsl_pamu_stash.h @@ -32,8 +32,8 @@ enum pamu_stash_target { */ struct pamu_stash_attribute { - u32 cpu; /* cpu number */ - u32 cache; /* cache to stash to: L1,L2,L3 */ + u32 cpu; /* cpu number */ + u32 cache; /* cache to stash to: L1,L2,L3 */ }; #endif /* __FSL_PAMU_STASH_H */ diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c index 6ee947e41870f5..abeedc9a78c27c 100644 --- a/drivers/iommu/fsl_pamu.c +++ b/drivers/iommu/fsl_pamu.c @@ -18,22 +18,13 @@ #define pr_fmt(fmt) "fsl-pamu: %s: " fmt, __func__ -#include -#include -#include -#include -#include -#include +#include "fsl_pamu.h" + #include -#include -#include -#include #include -#include -#include -#include -#include "fsl_pamu.h" +#include +#include /* define indexes for each operation mapping scenario */ #define OMI_QMAN 0x00 @@ -44,13 +35,13 @@ #define make64(high, low) (((u64)(high) << 32) | (low)) struct pamu_isr_data { - void __iomem *pamu_reg_base; /* Base address of PAMU regs*/ + void __iomem *pamu_reg_base; /* Base address of PAMU regs */ unsigned int count; /* The number of PAMUs */ }; static struct paace *ppaact; static struct paace *spaact; -static struct ome *omt; +static struct ome *omt __initdata; /* * Table for matching compatible strings, for device tree @@ -58,14 +49,13 @@ static struct ome *omt; * "fsl,qoriq-device-config-2.0" corresponds to T4 & B4 * SOCs. For the older SOCs "fsl,qoriq-device-config-1.0" * string would be used. -*/ -static const struct of_device_id guts_device_ids[] = { + */ +static const struct of_device_id guts_device_ids[] __initconst = { { .compatible = "fsl,qoriq-device-config-1.0", }, { .compatible = "fsl,qoriq-device-config-2.0", }, {} }; - /* * Table for matching compatible strings, for device tree * L3 cache controller node. @@ -73,7 +63,7 @@ static const struct of_device_id guts_device_ids[] = { * "fsl,b4860-l3-cache-controller" corresponds to B4 & * "fsl,p4080-l3-cache-controller" corresponds to other, * SOCs. -*/ + */ static const struct of_device_id l3_device_ids[] = { { .compatible = "fsl,t4240-l3-cache-controller", }, { .compatible = "fsl,b4860-l3-cache-controller", }, @@ -85,7 +75,7 @@ static const struct of_device_id l3_device_ids[] = { static u32 max_subwindow_count; /* Pool for fspi allocation */ -struct gen_pool *spaace_pool; +static struct gen_pool *spaace_pool; /** * pamu_get_max_subwin_cnt() - Return the maximum supported @@ -170,7 +160,7 @@ int pamu_disable_liodn(int liodn) static unsigned int map_addrspace_size_to_wse(phys_addr_t addrspace_size) { /* Bug if not a power of 2 */ - BUG_ON((addrspace_size & (addrspace_size - 1))); + BUG_ON(addrspace_size & (addrspace_size - 1)); /* window size is 2^(WSE+1) bytes */ return fls64(addrspace_size) - 2; @@ -179,8 +169,8 @@ static unsigned int map_addrspace_size_to_wse(phys_addr_t addrspace_size) /* Derive the PAACE window count encoding for the subwindow count */ static unsigned int map_subwindow_cnt_to_wce(u32 subwindow_cnt) { - /* window count is 2^(WCE+1) bytes */ - return __ffs(subwindow_cnt) - 1; + /* window count is 2^(WCE+1) bytes */ + return __ffs(subwindow_cnt) - 1; } /* @@ -241,7 +231,7 @@ static struct paace *pamu_get_spaace(struct paace *paace, u32 wnum) * If no SPAACE entry is available or the allocator can not reserve the required * number of contiguous entries function returns ULONG_MAX indicating a failure. * -*/ + */ static unsigned long pamu_get_fspi_and_allocate(u32 subwin_cnt) { unsigned long spaace_addr; @@ -288,9 +278,8 @@ int pamu_update_paace_stash(int liodn, u32 subwin, u32 value) } if (subwin) { paace = pamu_get_spaace(paace, subwin - 1); - if (!paace) { + if (!paace) return -ENOENT; - } } set_bf(paace->impl_attr, PAACE_IA_CID, value); @@ -311,14 +300,12 @@ int pamu_disable_spaace(int liodn, u32 subwin) } if (subwin) { paace = pamu_get_spaace(paace, subwin - 1); - if (!paace) { + if (!paace) return -ENOENT; - } - set_bf(paace->addr_bitfields, PAACE_AF_V, - PAACE_V_INVALID); + set_bf(paace->addr_bitfields, PAACE_AF_V, PAACE_V_INVALID); } else { set_bf(paace->addr_bitfields, PAACE_AF_AP, - PAACE_AP_PERMS_DENIED); + PAACE_AP_PERMS_DENIED); } mb(); @@ -326,7 +313,6 @@ int pamu_disable_spaace(int liodn, u32 subwin) return 0; } - /** * pamu_config_paace() - Sets up PPAACE entry for specified liodn * @@ -352,7 +338,8 @@ int pamu_config_ppaace(int liodn, phys_addr_t win_addr, phys_addr_t win_size, unsigned long fspi; if ((win_size & (win_size - 1)) || win_size < PAMU_PAGE_SIZE) { - pr_debug("window size too small or not a power of two %pa\n", &win_size); + pr_debug("window size too small or not a power of two %pa\n", + &win_size); return -EINVAL; } @@ -362,13 +349,12 @@ int pamu_config_ppaace(int liodn, phys_addr_t win_addr, phys_addr_t win_size, } ppaace = pamu_get_ppaace(liodn); - if (!ppaace) { + if (!ppaace) return -ENOENT; - } /* window size is 2^(WSE+1) bytes */ set_bf(ppaace->addr_bitfields, PPAACE_AF_WSE, - map_addrspace_size_to_wse(win_size)); + map_addrspace_size_to_wse(win_size)); pamu_init_ppaace(ppaace); @@ -442,7 +428,6 @@ int pamu_config_spaace(int liodn, u32 subwin_cnt, u32 subwin, { struct paace *paace; - /* setup sub-windows */ if (!subwin_cnt) { pr_debug("Invalid subwindow count\n"); @@ -510,11 +495,11 @@ int pamu_config_spaace(int liodn, u32 subwin_cnt, u32 subwin, } /** -* get_ome_index() - Returns the index in the operation mapping table -* for device. -* @*omi_index: pointer for storing the index value -* -*/ + * get_ome_index() - Returns the index in the operation mapping table + * for device. + * @*omi_index: pointer for storing the index value + * + */ void get_ome_index(u32 *omi_index, struct device *dev) { if (of_device_is_compatible(dev->of_node, "fsl,qman-portal")) @@ -544,9 +529,10 @@ u32 get_stash_id(u32 stash_dest_hint, u32 vcpu) if (stash_dest_hint == PAMU_ATTR_CACHE_L3) { node = of_find_matching_node(NULL, l3_device_ids); if (node) { - prop = of_get_property(node, "cache-stash-id", 0); + prop = of_get_property(node, "cache-stash-id", NULL); if (!prop) { - pr_debug("missing cache-stash-id at %s\n", node->full_name); + pr_debug("missing cache-stash-id at %s\n", + node->full_name); of_node_put(node); return ~(u32)0; } @@ -570,9 +556,10 @@ u32 get_stash_id(u32 stash_dest_hint, u32 vcpu) /* find the hwnode that represents the cache */ for (cache_level = PAMU_ATTR_CACHE_L1; (cache_level < PAMU_ATTR_CACHE_L3) && found; cache_level++) { if (stash_dest_hint == cache_level) { - prop = of_get_property(node, "cache-stash-id", 0); + prop = of_get_property(node, "cache-stash-id", NULL); if (!prop) { - pr_debug("missing cache-stash-id at %s\n", node->full_name); + pr_debug("missing cache-stash-id at %s\n", + node->full_name); of_node_put(node); return ~(u32)0; } @@ -580,10 +567,10 @@ u32 get_stash_id(u32 stash_dest_hint, u32 vcpu) return be32_to_cpup(prop); } - prop = of_get_property(node, "next-level-cache", 0); + prop = of_get_property(node, "next-level-cache", NULL); if (!prop) { pr_debug("can't find next-level-cache at %s\n", - node->full_name); + node->full_name); of_node_put(node); return ~(u32)0; /* can't traverse any further */ } @@ -598,7 +585,7 @@ u32 get_stash_id(u32 stash_dest_hint, u32 vcpu) } pr_debug("stash dest not found for %d on vcpu %d\n", - stash_dest_hint, vcpu); + stash_dest_hint, vcpu); return ~(u32)0; } @@ -612,7 +599,7 @@ u32 get_stash_id(u32 stash_dest_hint, u32 vcpu) * Memory accesses to QMAN and BMAN private memory need not be coherent, so * clear the PAACE entry coherency attribute for them. */ -static void setup_qbman_paace(struct paace *ppaace, int paace_type) +static void __init setup_qbman_paace(struct paace *ppaace, int paace_type) { switch (paace_type) { case QMAN_PAACE: @@ -626,7 +613,7 @@ static void setup_qbman_paace(struct paace *ppaace, int paace_type) case QMAN_PORTAL_PAACE: set_bf(ppaace->impl_attr, PAACE_IA_OTM, PAACE_OTM_INDEXED); ppaace->op_encode.index_ot.omi = OMI_QMAN; - /*Set DQRR and Frame stashing for the L3 cache */ + /* Set DQRR and Frame stashing for the L3 cache */ set_bf(ppaace->impl_attr, PAACE_IA_CID, get_stash_id(PAMU_ATTR_CACHE_L3, 0)); break; case BMAN_PAACE: @@ -679,7 +666,7 @@ static void __init setup_omt(struct ome *omt) * Get the maximum number of PAACT table entries * and subwindows supported by PAMU */ -static void get_pamu_cap_values(unsigned long pamu_reg_base) +static void __init get_pamu_cap_values(unsigned long pamu_reg_base) { u32 pc_val; @@ -689,9 +676,9 @@ static void get_pamu_cap_values(unsigned long pamu_reg_base) } /* Setup PAMU registers pointing to PAACT, SPAACT and OMT */ -int setup_one_pamu(unsigned long pamu_reg_base, unsigned long pamu_reg_size, - phys_addr_t ppaact_phys, phys_addr_t spaact_phys, - phys_addr_t omt_phys) +static int __init setup_one_pamu(unsigned long pamu_reg_base, unsigned long pamu_reg_size, + phys_addr_t ppaact_phys, phys_addr_t spaact_phys, + phys_addr_t omt_phys) { u32 *pc; struct pamu_mmap_regs *pamu_regs; @@ -727,7 +714,7 @@ int setup_one_pamu(unsigned long pamu_reg_base, unsigned long pamu_reg_size, */ out_be32((u32 *)(pamu_reg_base + PAMU_PICS), - PAMU_ACCESS_VIOLATION_ENABLE); + PAMU_ACCESS_VIOLATION_ENABLE); out_be32(pc, PAMU_PC_PE | PAMU_PC_OCE | PAMU_PC_SPCC | PAMU_PC_PPCC); return 0; } @@ -757,9 +744,9 @@ static void __init setup_liodns(void) ppaace->wbah = 0; set_bf(ppaace->addr_bitfields, PPAACE_AF_WBAL, 0); set_bf(ppaace->impl_attr, PAACE_IA_ATM, - PAACE_ATM_NO_XLATE); + PAACE_ATM_NO_XLATE); set_bf(ppaace->addr_bitfields, PAACE_AF_AP, - PAACE_AP_PERMS_ALL); + PAACE_AP_PERMS_ALL); if (of_device_is_compatible(node, "fsl,qman-portal")) setup_qbman_paace(ppaace, QMAN_PORTAL_PAACE); if (of_device_is_compatible(node, "fsl,qman")) @@ -772,7 +759,7 @@ static void __init setup_liodns(void) } } -irqreturn_t pamu_av_isr(int irq, void *arg) +static irqreturn_t pamu_av_isr(int irq, void *arg) { struct pamu_isr_data *data = arg; phys_addr_t phys; @@ -792,14 +779,16 @@ irqreturn_t pamu_av_isr(int irq, void *arg) pr_emerg("POES2=%08x\n", in_be32(p + PAMU_POES2)); pr_emerg("AVS1=%08x\n", avs1); pr_emerg("AVS2=%08x\n", in_be32(p + PAMU_AVS2)); - pr_emerg("AVA=%016llx\n", make64(in_be32(p + PAMU_AVAH), - in_be32(p + PAMU_AVAL))); + pr_emerg("AVA=%016llx\n", + make64(in_be32(p + PAMU_AVAH), + in_be32(p + PAMU_AVAL))); pr_emerg("UDAD=%08x\n", in_be32(p + PAMU_UDAD)); - pr_emerg("POEA=%016llx\n", make64(in_be32(p + PAMU_POEAH), - in_be32(p + PAMU_POEAL))); + pr_emerg("POEA=%016llx\n", + make64(in_be32(p + PAMU_POEAH), + in_be32(p + PAMU_POEAL))); phys = make64(in_be32(p + PAMU_POEAH), - in_be32(p + PAMU_POEAL)); + in_be32(p + PAMU_POEAL)); /* Assume that POEA points to a PAACE */ if (phys) { @@ -807,11 +796,12 @@ irqreturn_t pamu_av_isr(int irq, void *arg) /* Only the first four words are relevant */ for (j = 0; j < 4; j++) - pr_emerg("PAACE[%u]=%08x\n", j, in_be32(paace + j)); + pr_emerg("PAACE[%u]=%08x\n", + j, in_be32(paace + j)); } /* clear access violation condition */ - out_be32((p + PAMU_AVS1), avs1 & PAMU_AV_MASK); + out_be32(p + PAMU_AVS1, avs1 & PAMU_AV_MASK); paace = pamu_get_ppaace(avs1 >> PAMU_AVS1_LIODN_SHIFT); BUG_ON(!paace); /* check if we got a violation for a disabled LIODN */ @@ -827,13 +817,13 @@ irqreturn_t pamu_av_isr(int irq, void *arg) /* Disable the LIODN */ ret = pamu_disable_liodn(avs1 >> PAMU_AVS1_LIODN_SHIFT); BUG_ON(ret); - pr_emerg("Disabling liodn %x\n", avs1 >> PAMU_AVS1_LIODN_SHIFT); + pr_emerg("Disabling liodn %x\n", + avs1 >> PAMU_AVS1_LIODN_SHIFT); } out_be32((p + PAMU_PICS), pics); } } - return IRQ_HANDLED; } @@ -952,7 +942,7 @@ static int __init create_csd(phys_addr_t phys, size_t size, u32 csd_port_id) } if (i == 0 || i == num_laws) { - /* This should never happen*/ + /* This should never happen */ ret = -ENOENT; goto error; } @@ -998,26 +988,27 @@ static int __init create_csd(phys_addr_t phys, size_t size, u32 csd_port_id) static const struct { u32 svr; u32 port_id; -} port_id_map[] = { - {0x82100010, 0xFF000000}, /* P2040 1.0 */ - {0x82100011, 0xFF000000}, /* P2040 1.1 */ - {0x82100110, 0xFF000000}, /* P2041 1.0 */ - {0x82100111, 0xFF000000}, /* P2041 1.1 */ - {0x82110310, 0xFF000000}, /* P3041 1.0 */ - {0x82110311, 0xFF000000}, /* P3041 1.1 */ - {0x82010020, 0xFFF80000}, /* P4040 2.0 */ - {0x82000020, 0xFFF80000}, /* P4080 2.0 */ - {0x82210010, 0xFC000000}, /* P5010 1.0 */ - {0x82210020, 0xFC000000}, /* P5010 2.0 */ - {0x82200010, 0xFC000000}, /* P5020 1.0 */ - {0x82050010, 0xFF800000}, /* P5021 1.0 */ - {0x82040010, 0xFF800000}, /* P5040 1.0 */ +} port_id_map[] __initconst = { + {(SVR_P2040 << 8) | 0x10, 0xFF000000}, /* P2040 1.0 */ + {(SVR_P2040 << 8) | 0x11, 0xFF000000}, /* P2040 1.1 */ + {(SVR_P2041 << 8) | 0x10, 0xFF000000}, /* P2041 1.0 */ + {(SVR_P2041 << 8) | 0x11, 0xFF000000}, /* P2041 1.1 */ + {(SVR_P3041 << 8) | 0x10, 0xFF000000}, /* P3041 1.0 */ + {(SVR_P3041 << 8) | 0x11, 0xFF000000}, /* P3041 1.1 */ + {(SVR_P4040 << 8) | 0x20, 0xFFF80000}, /* P4040 2.0 */ + {(SVR_P4080 << 8) | 0x20, 0xFFF80000}, /* P4080 2.0 */ + {(SVR_P5010 << 8) | 0x10, 0xFC000000}, /* P5010 1.0 */ + {(SVR_P5010 << 8) | 0x20, 0xFC000000}, /* P5010 2.0 */ + {(SVR_P5020 << 8) | 0x10, 0xFC000000}, /* P5020 1.0 */ + {(SVR_P5021 << 8) | 0x10, 0xFF800000}, /* P5021 1.0 */ + {(SVR_P5040 << 8) | 0x10, 0xFF800000}, /* P5040 1.0 */ }; #define SVR_SECURITY 0x80000 /* The Security (E) bit */ static int __init fsl_pamu_probe(struct platform_device *pdev) { + struct device *dev = &pdev->dev; void __iomem *pamu_regs = NULL; struct ccsr_guts __iomem *guts_regs = NULL; u32 pamubypenr, pamu_counter; @@ -1042,22 +1033,21 @@ static int __init fsl_pamu_probe(struct platform_device *pdev) * NOTE : All PAMUs share the same LIODN tables. */ - pamu_regs = of_iomap(pdev->dev.of_node, 0); + pamu_regs = of_iomap(dev->of_node, 0); if (!pamu_regs) { - dev_err(&pdev->dev, "ioremap of PAMU node failed\n"); + dev_err(dev, "ioremap of PAMU node failed\n"); return -ENOMEM; } - of_get_address(pdev->dev.of_node, 0, &size, NULL); + of_get_address(dev->of_node, 0, &size, NULL); - irq = irq_of_parse_and_map(pdev->dev.of_node, 0); + irq = irq_of_parse_and_map(dev->of_node, 0); if (irq == NO_IRQ) { - dev_warn(&pdev->dev, "no interrupts listed in PAMU node\n"); + dev_warn(dev, "no interrupts listed in PAMU node\n"); goto error; } - data = kzalloc(sizeof(struct pamu_isr_data), GFP_KERNEL); + data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) { - dev_err(&pdev->dev, "PAMU isr data memory allocation failed\n"); ret = -ENOMEM; goto error; } @@ -1067,15 +1057,14 @@ static int __init fsl_pamu_probe(struct platform_device *pdev) /* The ISR needs access to the regs, so we won't iounmap them */ ret = request_irq(irq, pamu_av_isr, 0, "pamu", data); if (ret < 0) { - dev_err(&pdev->dev, "error %i installing ISR for irq %i\n", - ret, irq); + dev_err(dev, "error %i installing ISR for irq %i\n", ret, irq); goto error; } guts_node = of_find_matching_node(NULL, guts_device_ids); if (!guts_node) { - dev_err(&pdev->dev, "could not find GUTS node %s\n", - pdev->dev.of_node->full_name); + dev_err(dev, "could not find GUTS node %s\n", + dev->of_node->full_name); ret = -ENODEV; goto error; } @@ -1083,7 +1072,7 @@ static int __init fsl_pamu_probe(struct platform_device *pdev) guts_regs = of_iomap(guts_node, 0); of_node_put(guts_node); if (!guts_regs) { - dev_err(&pdev->dev, "ioremap of GUTS node failed\n"); + dev_err(dev, "ioremap of GUTS node failed\n"); ret = -ENODEV; goto error; } @@ -1103,7 +1092,7 @@ static int __init fsl_pamu_probe(struct platform_device *pdev) p = alloc_pages(GFP_KERNEL | __GFP_ZERO, order); if (!p) { - dev_err(&pdev->dev, "unable to allocate PAACT/SPAACT/OMT block\n"); + dev_err(dev, "unable to allocate PAACT/SPAACT/OMT block\n"); ret = -ENOMEM; goto error; } @@ -1113,7 +1102,7 @@ static int __init fsl_pamu_probe(struct platform_device *pdev) /* Make sure the memory is naturally aligned */ if (ppaact_phys & ((PAGE_SIZE << order) - 1)) { - dev_err(&pdev->dev, "PAACT/OMT block is unaligned\n"); + dev_err(dev, "PAACT/OMT block is unaligned\n"); ret = -ENOMEM; goto error; } @@ -1121,7 +1110,7 @@ static int __init fsl_pamu_probe(struct platform_device *pdev) spaact = (void *)ppaact + (PAGE_SIZE << get_order(PAACT_SIZE)); omt = (void *)spaact + (PAGE_SIZE << get_order(SPAACT_SIZE)); - dev_dbg(&pdev->dev, "ppaact virt=%p phys=%pa\n", ppaact, &ppaact_phys); + dev_dbg(dev, "ppaact virt=%p phys=%pa\n", ppaact, &ppaact_phys); /* Check to see if we need to implement the work-around on this SOC */ @@ -1129,21 +1118,19 @@ static int __init fsl_pamu_probe(struct platform_device *pdev) for (i = 0; i < ARRAY_SIZE(port_id_map); i++) { if (port_id_map[i].svr == (mfspr(SPRN_SVR) & ~SVR_SECURITY)) { csd_port_id = port_id_map[i].port_id; - dev_dbg(&pdev->dev, "found matching SVR %08x\n", + dev_dbg(dev, "found matching SVR %08x\n", port_id_map[i].svr); break; } } if (csd_port_id) { - dev_dbg(&pdev->dev, "creating coherency subdomain at address " - "%pa, size %zu, port id 0x%08x", &ppaact_phys, - mem_size, csd_port_id); + dev_dbg(dev, "creating coherency subdomain at address %pa, size %zu, port id 0x%08x", + &ppaact_phys, mem_size, csd_port_id); ret = create_csd(ppaact_phys, mem_size, csd_port_id); if (ret) { - dev_err(&pdev->dev, "could not create coherence " - "subdomain\n"); + dev_err(dev, "could not create coherence subdomain\n"); return ret; } } @@ -1154,7 +1141,7 @@ static int __init fsl_pamu_probe(struct platform_device *pdev) spaace_pool = gen_pool_create(ilog2(sizeof(struct paace)), -1); if (!spaace_pool) { ret = -ENOMEM; - dev_err(&pdev->dev, "PAMU : failed to allocate spaace gen pool\n"); + dev_err(dev, "Failed to allocate spaace gen pool\n"); goto error; } @@ -1167,9 +1154,9 @@ static int __init fsl_pamu_probe(struct platform_device *pdev) for (pamu_reg_off = 0, pamu_counter = 0x80000000; pamu_reg_off < size; pamu_reg_off += PAMU_OFFSET, pamu_counter >>= 1) { - pamu_reg_base = (unsigned long) pamu_regs + pamu_reg_off; + pamu_reg_base = (unsigned long)pamu_regs + pamu_reg_off; setup_one_pamu(pamu_reg_base, pamu_reg_off, ppaact_phys, - spaact_phys, omt_phys); + spaact_phys, omt_phys); /* Disable PAMU bypass for this PAMU */ pamubypenr &= ~pamu_counter; } @@ -1181,7 +1168,7 @@ static int __init fsl_pamu_probe(struct platform_device *pdev) iounmap(guts_regs); - /* Enable DMA for the LIODNs in the device tree*/ + /* Enable DMA for the LIODNs in the device tree */ setup_liodns(); diff --git a/drivers/iommu/fsl_pamu.h b/drivers/iommu/fsl_pamu.h index 8fc1a125b16ecc..aab723f91f1294 100644 --- a/drivers/iommu/fsl_pamu.h +++ b/drivers/iommu/fsl_pamu.h @@ -19,13 +19,15 @@ #ifndef __FSL_PAMU_H #define __FSL_PAMU_H +#include + #include /* Bit Field macros * v = bit field variable; m = mask, m##_SHIFT = shift, x = value to load */ -#define set_bf(v, m, x) (v = ((v) & ~(m)) | (((x) << (m##_SHIFT)) & (m))) -#define get_bf(v, m) (((v) & (m)) >> (m##_SHIFT)) +#define set_bf(v, m, x) (v = ((v) & ~(m)) | (((x) << m##_SHIFT) & (m))) +#define get_bf(v, m) (((v) & (m)) >> m##_SHIFT) /* PAMU CCSR space */ #define PAMU_PGC 0x00000000 /* Allows all peripheral accesses */ @@ -65,7 +67,7 @@ struct pamu_mmap_regs { #define PAMU_AVS1_GCV 0x2000 #define PAMU_AVS1_PDV 0x4000 #define PAMU_AV_MASK (PAMU_AVS1_AV | PAMU_AVS1_OTV | PAMU_AVS1_APV | PAMU_AVS1_WAV \ - | PAMU_AVS1_LAV | PAMU_AVS1_GCV | PAMU_AVS1_PDV) + | PAMU_AVS1_LAV | PAMU_AVS1_GCV | PAMU_AVS1_PDV) #define PAMU_AVS1_LIODN_SHIFT 16 #define PAMU_LAV_LIODN_NOT_IN_PPAACT 0x400 @@ -198,8 +200,7 @@ struct pamu_mmap_regs { #define PAACE_ATM_NO_XLATE 0x00 #define PAACE_ATM_WINDOW_XLATE 0x01 #define PAACE_ATM_PAGE_XLATE 0x02 -#define PAACE_ATM_WIN_PG_XLATE \ - (PAACE_ATM_WINDOW_XLATE | PAACE_ATM_PAGE_XLATE) +#define PAACE_ATM_WIN_PG_XLATE (PAACE_ATM_WINDOW_XLATE | PAACE_ATM_PAGE_XLATE) #define PAACE_OTM_NO_XLATE 0x00 #define PAACE_OTM_IMMEDIATE 0x01 #define PAACE_OTM_INDEXED 0x02 @@ -219,7 +220,7 @@ struct pamu_mmap_regs { #define PAACE_TCEF_FORMAT0_8B 0x00 #define PAACE_TCEF_FORMAT1_RSVD 0x01 /* - * Hard coded value for the PAACT size to accomodate + * Hard coded value for the PAACT size to accommodate * maximum LIODN value generated by u-boot. */ #define PAACE_NUMBER_ENTRIES 0x500 @@ -332,7 +333,7 @@ struct paace { #define NUM_MOE 128 struct ome { u8 moe[NUM_MOE]; -} __attribute__((packed)); +} __packed; #define PAACT_SIZE (sizeof(struct paace) * PAACE_NUMBER_ENTRIES) #define SPAACT_SIZE (sizeof(struct paace) * SPAACE_NUMBER_ENTRIES) diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index c828f80d48b0c7..ceebd287b6602e 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -19,26 +19,10 @@ #define pr_fmt(fmt) "fsl-pamu-domain: %s: " fmt, __func__ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - #include "fsl_pamu_domain.h" +#include + /* * Global spinlock that needs to be held while * configuring PAMU. @@ -51,23 +35,21 @@ static DEFINE_SPINLOCK(device_domain_lock); static int __init iommu_init_mempool(void) { - fsl_pamu_domain_cache = kmem_cache_create("fsl_pamu_domain", - sizeof(struct fsl_dma_domain), - 0, - SLAB_HWCACHE_ALIGN, - - NULL); + sizeof(struct fsl_dma_domain), + 0, + SLAB_HWCACHE_ALIGN, + NULL); if (!fsl_pamu_domain_cache) { pr_debug("Couldn't create fsl iommu_domain cache\n"); return -ENOMEM; } iommu_devinfo_cache = kmem_cache_create("iommu_devinfo", - sizeof(struct device_domain_info), - 0, - SLAB_HWCACHE_ALIGN, - NULL); + sizeof(struct device_domain_info), + 0, + SLAB_HWCACHE_ALIGN, + NULL); if (!iommu_devinfo_cache) { pr_debug("Couldn't create devinfo cache\n"); kmem_cache_destroy(fsl_pamu_domain_cache); @@ -80,8 +62,7 @@ static int __init iommu_init_mempool(void) static phys_addr_t get_phys_addr(struct fsl_dma_domain *dma_domain, dma_addr_t iova) { u32 win_cnt = dma_domain->win_cnt; - struct dma_window *win_ptr = - &dma_domain->win_arr[0]; + struct dma_window *win_ptr = &dma_domain->win_arr[0]; struct iommu_domain_geometry *geom; geom = &dma_domain->iommu_domain->geometry; @@ -103,22 +84,20 @@ static phys_addr_t get_phys_addr(struct fsl_dma_domain *dma_domain, dma_addr_t i } if (win_ptr->valid) - return (win_ptr->paddr + (iova & (win_ptr->size - 1))); + return win_ptr->paddr + (iova & (win_ptr->size - 1)); return 0; } static int map_subwins(int liodn, struct fsl_dma_domain *dma_domain) { - struct dma_window *sub_win_ptr = - &dma_domain->win_arr[0]; + struct dma_window *sub_win_ptr = &dma_domain->win_arr[0]; int i, ret; unsigned long rpn, flags; for (i = 0; i < dma_domain->win_cnt; i++) { if (sub_win_ptr[i].valid) { - rpn = sub_win_ptr[i].paddr >> - PAMU_PAGE_SHIFT; + rpn = sub_win_ptr[i].paddr >> PAMU_PAGE_SHIFT; spin_lock_irqsave(&iommu_lock, flags); ret = pamu_config_spaace(liodn, dma_domain->win_cnt, i, sub_win_ptr[i].size, @@ -130,7 +109,7 @@ static int map_subwins(int liodn, struct fsl_dma_domain *dma_domain) sub_win_ptr[i].prot); spin_unlock_irqrestore(&iommu_lock, flags); if (ret) { - pr_debug("PAMU SPAACE configuration failed for liodn %d\n", + pr_debug("SPAACE configuration failed for liodn %d\n", liodn); return ret; } @@ -156,8 +135,7 @@ static int map_win(int liodn, struct fsl_dma_domain *dma_domain) 0, wnd->prot); spin_unlock_irqrestore(&iommu_lock, flags); if (ret) - pr_debug("PAMU PAACE configuration failed for liodn %d\n", - liodn); + pr_debug("PAACE configuration failed for liodn %d\n", liodn); return ret; } @@ -169,7 +147,6 @@ static int map_liodn(int liodn, struct fsl_dma_domain *dma_domain) return map_subwins(liodn, dma_domain); else return map_win(liodn, dma_domain); - } /* Update window/subwindow mapping for the LIODN */ @@ -190,7 +167,8 @@ static int update_liodn(int liodn, struct fsl_dma_domain *dma_domain, u32 wnd_nr (wnd_nr > 0) ? 1 : 0, wnd->prot); if (ret) - pr_debug("Subwindow reconfiguration failed for liodn %d\n", liodn); + pr_debug("Subwindow reconfiguration failed for liodn %d\n", + liodn); } else { phys_addr_t wnd_addr; @@ -200,10 +178,11 @@ static int update_liodn(int liodn, struct fsl_dma_domain *dma_domain, u32 wnd_nr wnd->size, ~(u32)0, wnd->paddr >> PAMU_PAGE_SHIFT, - dma_domain->snoop_id, dma_domain->stash_id, - 0, wnd->prot); + dma_domain->snoop_id, dma_domain->stash_id, + 0, wnd->prot); if (ret) - pr_debug("Window reconfiguration failed for liodn %d\n", liodn); + pr_debug("Window reconfiguration failed for liodn %d\n", + liodn); } spin_unlock_irqrestore(&iommu_lock, flags); @@ -212,14 +191,15 @@ static int update_liodn(int liodn, struct fsl_dma_domain *dma_domain, u32 wnd_nr } static int update_liodn_stash(int liodn, struct fsl_dma_domain *dma_domain, - u32 val) + u32 val) { int ret = 0, i; unsigned long flags; spin_lock_irqsave(&iommu_lock, flags); if (!dma_domain->win_arr) { - pr_debug("Windows not configured, stash destination update failed for liodn %d\n", liodn); + pr_debug("Windows not configured, stash destination update failed for liodn %d\n", + liodn); spin_unlock_irqrestore(&iommu_lock, flags); return -EINVAL; } @@ -227,7 +207,8 @@ static int update_liodn_stash(int liodn, struct fsl_dma_domain *dma_domain, for (i = 0; i < dma_domain->win_cnt; i++) { ret = pamu_update_paace_stash(liodn, i, val); if (ret) { - pr_debug("Failed to update SPAACE %d field for liodn %d\n ", i, liodn); + pr_debug("Failed to update SPAACE %d field for liodn %d\n ", + i, liodn); spin_unlock_irqrestore(&iommu_lock, flags); return ret; } @@ -240,9 +221,9 @@ static int update_liodn_stash(int liodn, struct fsl_dma_domain *dma_domain, /* Set the geometry parameters for a LIODN */ static int pamu_set_liodn(int liodn, struct device *dev, - struct fsl_dma_domain *dma_domain, - struct iommu_domain_geometry *geom_attr, - u32 win_cnt) + struct fsl_dma_domain *dma_domain, + struct iommu_domain_geometry *geom_attr, + u32 win_cnt) { phys_addr_t window_addr, window_size; phys_addr_t subwin_size; @@ -268,7 +249,8 @@ static int pamu_set_liodn(int liodn, struct device *dev, dma_domain->stash_id, win_cnt, 0); spin_unlock_irqrestore(&iommu_lock, flags); if (ret) { - pr_debug("PAMU PAACE configuration failed for liodn %d, win_cnt =%d\n", liodn, win_cnt); + pr_debug("PAACE configuration failed for liodn %d, win_cnt =%d\n", + liodn, win_cnt); return ret; } @@ -285,7 +267,8 @@ static int pamu_set_liodn(int liodn, struct device *dev, 0, 0); spin_unlock_irqrestore(&iommu_lock, flags); if (ret) { - pr_debug("PAMU SPAACE configuration failed for liodn %d\n", liodn); + pr_debug("SPAACE configuration failed for liodn %d\n", + liodn); return ret; } } @@ -301,13 +284,13 @@ static int check_size(u64 size, dma_addr_t iova) * to PAMU page size. */ if ((size & (size - 1)) || size < PAMU_PAGE_SIZE) { - pr_debug("%s: size too small or not a power of two\n", __func__); + pr_debug("Size too small or not a power of two\n"); return -EINVAL; } - /* iova must be page size aligned*/ + /* iova must be page size aligned */ if (iova & (size - 1)) { - pr_debug("%s: address is not aligned with window size\n", __func__); + pr_debug("Address is not aligned with window size\n"); return -EINVAL; } @@ -396,16 +379,15 @@ static void attach_device(struct fsl_dma_domain *dma_domain, int liodn, struct d if (!dev->archdata.iommu_domain) dev->archdata.iommu_domain = info; spin_unlock_irqrestore(&device_domain_lock, flags); - } static phys_addr_t fsl_pamu_iova_to_phys(struct iommu_domain *domain, - dma_addr_t iova) + dma_addr_t iova) { struct fsl_dma_domain *dma_domain = domain->priv; - if ((iova < domain->geometry.aperture_start) || - iova > (domain->geometry.aperture_end)) + if (iova < domain->geometry.aperture_start || + iova > domain->geometry.aperture_end) return 0; return get_phys_addr(dma_domain, iova); @@ -460,7 +442,7 @@ static int pamu_set_domain_geometry(struct fsl_dma_domain *dma_domain, list_for_each_entry(info, &dma_domain->devices, link) { ret = pamu_set_liodn(info->liodn, info->dev, dma_domain, - geom_attr, win_cnt); + geom_attr, win_cnt); if (ret) break; } @@ -543,7 +525,6 @@ static void fsl_pamu_window_disable(struct iommu_domain *domain, u32 wnd_nr) } spin_unlock_irqrestore(&dma_domain->domain_lock, flags); - } static int fsl_pamu_window_enable(struct iommu_domain *domain, u32 wnd_nr, @@ -576,7 +557,7 @@ static int fsl_pamu_window_enable(struct iommu_domain *domain, u32 wnd_nr, win_size = dma_domain->geom_size >> ilog2(dma_domain->win_cnt); if (size > win_size) { - pr_debug("Invalid window size \n"); + pr_debug("Invalid window size\n"); spin_unlock_irqrestore(&dma_domain->domain_lock, flags); return -EINVAL; } @@ -622,8 +603,8 @@ static int fsl_pamu_window_enable(struct iommu_domain *domain, u32 wnd_nr, * and window mappings. */ static int handle_attach_device(struct fsl_dma_domain *dma_domain, - struct device *dev, const u32 *liodn, - int num) + struct device *dev, const u32 *liodn, + int num) { unsigned long flags; struct iommu_domain *domain = dma_domain->iommu_domain; @@ -632,11 +613,10 @@ static int handle_attach_device(struct fsl_dma_domain *dma_domain, spin_lock_irqsave(&dma_domain->domain_lock, flags); for (i = 0; i < num; i++) { - /* Ensure that LIODN value is valid */ if (liodn[i] >= PAACE_NUMBER_ENTRIES) { pr_debug("Invalid liodn %d, attach device failed for %s\n", - liodn[i], dev->of_node->full_name); + liodn[i], dev->of_node->full_name); ret = -EINVAL; break; } @@ -649,9 +629,9 @@ static int handle_attach_device(struct fsl_dma_domain *dma_domain, */ if (dma_domain->win_arr) { u32 win_cnt = dma_domain->win_cnt > 1 ? dma_domain->win_cnt : 0; + ret = pamu_set_liodn(liodn[i], dev, dma_domain, - &domain->geometry, - win_cnt); + &domain->geometry, win_cnt); if (ret) break; if (dma_domain->mapped) { @@ -698,19 +678,18 @@ static int fsl_pamu_attach_device(struct iommu_domain *domain, liodn = of_get_property(dev->of_node, "fsl,liodn", &len); if (liodn) { liodn_cnt = len / sizeof(u32); - ret = handle_attach_device(dma_domain, dev, - liodn, liodn_cnt); + ret = handle_attach_device(dma_domain, dev, liodn, liodn_cnt); } else { pr_debug("missing fsl,liodn property at %s\n", - dev->of_node->full_name); - ret = -EINVAL; + dev->of_node->full_name); + ret = -EINVAL; } return ret; } static void fsl_pamu_detach_device(struct iommu_domain *domain, - struct device *dev) + struct device *dev) { struct fsl_dma_domain *dma_domain = domain->priv; const u32 *prop; @@ -738,7 +717,7 @@ static void fsl_pamu_detach_device(struct iommu_domain *domain, detach_device(dev, dma_domain); else pr_debug("missing fsl,liodn property at %s\n", - dev->of_node->full_name); + dev->of_node->full_name); } static int configure_domain_geometry(struct iommu_domain *domain, void *data) @@ -754,10 +733,10 @@ static int configure_domain_geometry(struct iommu_domain *domain, void *data) * DMA outside of the geometry. */ if (check_size(geom_size, geom_attr->aperture_start) || - !geom_attr->force_aperture) { - pr_debug("Invalid PAMU geometry attributes\n"); - return -EINVAL; - } + !geom_attr->force_aperture) { + pr_debug("Invalid PAMU geometry attributes\n"); + return -EINVAL; + } spin_lock_irqsave(&dma_domain->domain_lock, flags); if (dma_domain->enabled) { @@ -786,7 +765,7 @@ static int configure_domain_stash(struct fsl_dma_domain *dma_domain, void *data) spin_lock_irqsave(&dma_domain->domain_lock, flags); memcpy(&dma_domain->dma_stash, stash_attr, - sizeof(struct pamu_stash_attribute)); + sizeof(struct pamu_stash_attribute)); dma_domain->stash_id = get_stash_id(stash_attr->cache, stash_attr->cpu); @@ -803,7 +782,7 @@ static int configure_domain_stash(struct fsl_dma_domain *dma_domain, void *data) return ret; } -/* Configure domain dma state i.e. enable/disable DMA*/ +/* Configure domain dma state i.e. enable/disable DMA */ static int configure_domain_dma_state(struct fsl_dma_domain *dma_domain, bool enable) { struct device_domain_info *info; @@ -819,8 +798,7 @@ static int configure_domain_dma_state(struct fsl_dma_domain *dma_domain, bool en } dma_domain->enabled = enable; - list_for_each_entry(info, &dma_domain->devices, - link) { + list_for_each_entry(info, &dma_domain->devices, link) { ret = (enable) ? pamu_enable_liodn(info->liodn) : pamu_disable_liodn(info->liodn); if (ret) @@ -833,12 +811,11 @@ static int configure_domain_dma_state(struct fsl_dma_domain *dma_domain, bool en } static int fsl_pamu_set_domain_attr(struct iommu_domain *domain, - enum iommu_attr attr_type, void *data) + enum iommu_attr attr_type, void *data) { struct fsl_dma_domain *dma_domain = domain->priv; int ret = 0; - switch (attr_type) { case DOMAIN_ATTR_GEOMETRY: ret = configure_domain_geometry(domain, data); @@ -853,22 +830,21 @@ static int fsl_pamu_set_domain_attr(struct iommu_domain *domain, pr_debug("Unsupported attribute type\n"); ret = -EINVAL; break; - }; + } return ret; } static int fsl_pamu_get_domain_attr(struct iommu_domain *domain, - enum iommu_attr attr_type, void *data) + enum iommu_attr attr_type, void *data) { struct fsl_dma_domain *dma_domain = domain->priv; int ret = 0; - switch (attr_type) { case DOMAIN_ATTR_FSL_PAMU_STASH: - memcpy((struct pamu_stash_attribute *) data, &dma_domain->dma_stash, - sizeof(struct pamu_stash_attribute)); + memcpy(data, &dma_domain->dma_stash, + sizeof(struct pamu_stash_attribute)); break; case DOMAIN_ATTR_FSL_PAMU_ENABLE: *(int *)data = dma_domain->enabled; @@ -880,7 +856,7 @@ static int fsl_pamu_get_domain_attr(struct iommu_domain *domain, pr_debug("Unsupported attribute type\n"); ret = -EINVAL; break; - }; + } return ret; } @@ -903,11 +879,8 @@ static bool check_pci_ctl_endpt_part(struct pci_controller *pci_ctl) /* Check the PCI controller version number by readding BRR1 register */ version = in_be32(pci_ctl->cfg_addr + (PCI_FSL_BRR1 >> 2)); version &= PCI_FSL_BRR1_VER; - /* If PCI controller version is >= 0x204 we can partition endpoints*/ - if (version >= 0x204) - return 1; - - return 0; + /* If PCI controller version is >= 0x204 we can partition endpoints */ + return version >= 0x204; } /* Get iommu group information from peer devices or devices on the parent bus */ @@ -968,8 +941,9 @@ static struct iommu_group *get_pci_device_group(struct pci_dev *pdev) if (pci_ctl->parent->iommu_group) { group = get_device_iommu_group(pci_ctl->parent); iommu_group_remove_device(pci_ctl->parent); - } else + } else { group = get_shared_pci_device_group(pdev); + } } if (!group) @@ -1055,11 +1029,12 @@ static int fsl_pamu_set_windows(struct iommu_domain *domain, u32 w_count) } ret = pamu_set_domain_geometry(dma_domain, &domain->geometry, - ((w_count > 1) ? w_count : 0)); + w_count > 1 ? w_count : 0); if (!ret) { kfree(dma_domain->win_arr); - dma_domain->win_arr = kzalloc(sizeof(struct dma_window) * - w_count, GFP_ATOMIC); + dma_domain->win_arr = kcalloc(w_count, + sizeof(*dma_domain->win_arr), + GFP_ATOMIC); if (!dma_domain->win_arr) { spin_unlock_irqrestore(&dma_domain->domain_lock, flags); return -ENOMEM; @@ -1095,7 +1070,7 @@ static const struct iommu_ops fsl_pamu_ops = { .remove_device = fsl_pamu_remove_device, }; -int pamu_domain_init(void) +int __init pamu_domain_init(void) { int ret = 0; From a4188beee59763b3507939968677776561adbba5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 30 Jan 2015 22:55:55 +0100 Subject: [PATCH 391/601] iommu/arm-smmu: Avoid build warning ARM allmodconfig gained a new warning when dma_addr_t is 32-bit wide: drivers/iommu/arm-smmu.c: In function 'arm_smmu_iova_to_phys_hard': drivers/iommu/arm-smmu.c:1255:3: warning: right shift count >= width of type This changes the calculation so that the effective type is always 64-bit. Signed-off-by: Arnd Bergmann Fixes: 859a732e4f713 ("iommu/arm-smmu: add support for iova_to_phys through ATS1PR") Acked-by: Will Deacon Signed-off-by: Joerg Roedel --- drivers/iommu/arm-smmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 1d6d43bb339529..fc13dd56953e1e 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1252,7 +1252,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, } else { u32 reg = iova & ~0xfff; writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_LO); - reg = (iova & ~0xfff) >> 32; + reg = ((u64)iova & ~0xfff) >> 32; writel_relaxed(reg, cb_base + ARM_SMMU_CB_ATS1PR_HI); } From f54bcf2ecee982da47c2baf8bd87fd9ad9984651 Mon Sep 17 00:00:00 2001 From: Tom Haynes Date: Thu, 11 Dec 2014 15:34:59 -0500 Subject: [PATCH 392/601] pnfs: Prepare for flexfiles by pulling out common code The flexfilelayout driver will share some common code with the filelayout driver. This set of changes refactors that common code out to avoid any module depenencies. Signed-off-by: Tom Haynes --- fs/nfs/Makefile | 2 +- fs/nfs/filelayout/filelayout.c | 291 ++---------------------------- fs/nfs/filelayout/filelayout.h | 11 -- fs/nfs/filelayout/filelayoutdev.c | 2 +- fs/nfs/pnfs.h | 23 +++ fs/nfs/pnfs_nfs.c | 291 ++++++++++++++++++++++++++++++ 6 files changed, 330 insertions(+), 290 deletions(-) create mode 100644 fs/nfs/pnfs_nfs.c diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 04cb830fa09fe5..23abffa8a4cef3 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -27,7 +27,7 @@ nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o dns_resolve.o nfs4trace.o nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o -nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o +nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o pnfs_nfs.o nfsv4-$(CONFIG_NFS_V4_2) += nfs42proc.o obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/ diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 7afb52f6a25a1b..bc36ed350a685a 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -118,13 +118,6 @@ static void filelayout_reset_read(struct nfs_pgio_header *hdr) } } -static void filelayout_fenceme(struct inode *inode, struct pnfs_layout_hdr *lo) -{ - if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) - return; - pnfs_return_layout(inode); -} - static int filelayout_async_handle_error(struct rpc_task *task, struct nfs4_state *state, struct nfs_client *clp, @@ -339,16 +332,6 @@ static void filelayout_read_count_stats(struct rpc_task *task, void *data) rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics); } -static void filelayout_read_release(void *data) -{ - struct nfs_pgio_header *hdr = data; - struct pnfs_layout_hdr *lo = hdr->lseg->pls_layout; - - filelayout_fenceme(lo->plh_inode, lo); - nfs_put_client(hdr->ds_clp); - hdr->mds_ops->rpc_release(data); -} - static int filelayout_write_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr) { @@ -371,17 +354,6 @@ static int filelayout_write_done_cb(struct rpc_task *task, return 0; } -/* Fake up some data that will cause nfs_commit_release to retry the writes. */ -static void prepare_to_resend_writes(struct nfs_commit_data *data) -{ - struct nfs_page *first = nfs_list_entry(data->pages.next); - - data->task.tk_status = 0; - memcpy(&data->verf.verifier, &first->wb_verf, - sizeof(data->verf.verifier)); - data->verf.verifier.data[0]++; /* ensure verifier mismatch */ -} - static int filelayout_commit_done_cb(struct rpc_task *task, struct nfs_commit_data *data) { @@ -393,7 +365,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task, switch (err) { case -NFS4ERR_RESET_TO_MDS: - prepare_to_resend_writes(data); + pnfs_generic_prepare_to_resend_writes(data); return -EAGAIN; case -EAGAIN: rpc_restart_call_prepare(task); @@ -451,16 +423,6 @@ static void filelayout_write_count_stats(struct rpc_task *task, void *data) rpc_count_iostats(task, NFS_SERVER(hdr->inode)->client->cl_metrics); } -static void filelayout_write_release(void *data) -{ - struct nfs_pgio_header *hdr = data; - struct pnfs_layout_hdr *lo = hdr->lseg->pls_layout; - - filelayout_fenceme(lo->plh_inode, lo); - nfs_put_client(hdr->ds_clp); - hdr->mds_ops->rpc_release(data); -} - static void filelayout_commit_prepare(struct rpc_task *task, void *data) { struct nfs_commit_data *wdata = data; @@ -471,14 +433,6 @@ static void filelayout_commit_prepare(struct rpc_task *task, void *data) task); } -static void filelayout_write_commit_done(struct rpc_task *task, void *data) -{ - struct nfs_commit_data *wdata = data; - - /* Note this may cause RPC to be resent */ - wdata->mds_ops->rpc_call_done(task, data); -} - static void filelayout_commit_count_stats(struct rpc_task *task, void *data) { struct nfs_commit_data *cdata = data; @@ -486,35 +440,25 @@ static void filelayout_commit_count_stats(struct rpc_task *task, void *data) rpc_count_iostats(task, NFS_SERVER(cdata->inode)->client->cl_metrics); } -static void filelayout_commit_release(void *calldata) -{ - struct nfs_commit_data *data = calldata; - - data->completion_ops->completion(data); - pnfs_put_lseg(data->lseg); - nfs_put_client(data->ds_clp); - nfs_commitdata_release(data); -} - static const struct rpc_call_ops filelayout_read_call_ops = { .rpc_call_prepare = filelayout_read_prepare, .rpc_call_done = filelayout_read_call_done, .rpc_count_stats = filelayout_read_count_stats, - .rpc_release = filelayout_read_release, + .rpc_release = pnfs_generic_rw_release, }; static const struct rpc_call_ops filelayout_write_call_ops = { .rpc_call_prepare = filelayout_write_prepare, .rpc_call_done = filelayout_write_call_done, .rpc_count_stats = filelayout_write_count_stats, - .rpc_release = filelayout_write_release, + .rpc_release = pnfs_generic_rw_release, }; static const struct rpc_call_ops filelayout_commit_call_ops = { .rpc_call_prepare = filelayout_commit_prepare, - .rpc_call_done = filelayout_write_commit_done, + .rpc_call_done = pnfs_generic_write_commit_done, .rpc_count_stats = filelayout_commit_count_stats, - .rpc_release = filelayout_commit_release, + .rpc_release = pnfs_generic_commit_release, }; static enum pnfs_try_status @@ -1004,33 +948,6 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) return j; } -/* The generic layer is about to remove the req from the commit list. - * If this will make the bucket empty, it will need to put the lseg reference. - * Note this is must be called holding the inode (/cinfo) lock - */ -static void -filelayout_clear_request_commit(struct nfs_page *req, - struct nfs_commit_info *cinfo) -{ - struct pnfs_layout_segment *freeme = NULL; - - if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) - goto out; - cinfo->ds->nwritten--; - if (list_is_singular(&req->wb_list)) { - struct pnfs_commit_bucket *bucket; - - bucket = list_first_entry(&req->wb_list, - struct pnfs_commit_bucket, - written); - freeme = bucket->wlseg; - bucket->wlseg = NULL; - } -out: - nfs_request_remove_commit_list(req, cinfo); - pnfs_put_lseg_locked(freeme); -} - static void filelayout_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, @@ -1064,7 +981,7 @@ filelayout_mark_request_commit(struct nfs_page *req, * is normally transferred to the COMMIT call and released * there. It could also be released if the last req is pulled * off due to a rewrite, in which case it will be done in - * filelayout_clear_request_commit + * pnfs_generic_clear_request_commit */ buckets[i].wlseg = pnfs_get_lseg(lseg); } @@ -1142,97 +1059,11 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how) &filelayout_commit_call_ops, how, RPC_TASK_SOFTCONN); out_err: - prepare_to_resend_writes(data); - filelayout_commit_release(data); + pnfs_generic_prepare_to_resend_writes(data); + pnfs_generic_commit_release(data); return -EAGAIN; } -static int -transfer_commit_list(struct list_head *src, struct list_head *dst, - struct nfs_commit_info *cinfo, int max) -{ - struct nfs_page *req, *tmp; - int ret = 0; - - list_for_each_entry_safe(req, tmp, src, wb_list) { - if (!nfs_lock_request(req)) - continue; - kref_get(&req->wb_kref); - if (cond_resched_lock(cinfo->lock)) - list_safe_reset_next(req, tmp, wb_list); - nfs_request_remove_commit_list(req, cinfo); - clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); - nfs_list_add_request(req, dst); - ret++; - if ((ret == max) && !cinfo->dreq) - break; - } - return ret; -} - -/* Note called with cinfo->lock held. */ -static int -filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, - struct nfs_commit_info *cinfo, - int max) -{ - struct list_head *src = &bucket->written; - struct list_head *dst = &bucket->committing; - int ret; - - ret = transfer_commit_list(src, dst, cinfo, max); - if (ret) { - cinfo->ds->nwritten -= ret; - cinfo->ds->ncommitting += ret; - bucket->clseg = bucket->wlseg; - if (list_empty(src)) - bucket->wlseg = NULL; - else - pnfs_get_lseg(bucket->clseg); - } - return ret; -} - -/* Move reqs from written to committing lists, returning count of number moved. - * Note called with cinfo->lock held. - */ -static int filelayout_scan_commit_lists(struct nfs_commit_info *cinfo, - int max) -{ - int i, rv = 0, cnt; - - for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) { - cnt = filelayout_scan_ds_commit_list(&cinfo->ds->buckets[i], - cinfo, max); - max -= cnt; - rv += cnt; - } - return rv; -} - -/* Pull everything off the committing lists and dump into @dst */ -static void filelayout_recover_commit_reqs(struct list_head *dst, - struct nfs_commit_info *cinfo) -{ - struct pnfs_commit_bucket *b; - struct pnfs_layout_segment *freeme; - int i; - -restart: - spin_lock(cinfo->lock); - for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { - if (transfer_commit_list(&b->written, dst, cinfo, 0)) { - freeme = b->wlseg; - b->wlseg = NULL; - spin_unlock(cinfo->lock); - pnfs_put_lseg(freeme); - goto restart; - } - } - cinfo->ds->nwritten = 0; - spin_unlock(cinfo->lock); -} - /* filelayout_search_commit_reqs - Search lists in @cinfo for the head reqest * for @page * @cinfo - commit info for current inode @@ -1263,108 +1094,14 @@ filelayout_search_commit_reqs(struct nfs_commit_info *cinfo, struct page *page) return NULL; } -static void filelayout_retry_commit(struct nfs_commit_info *cinfo, int idx) -{ - struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; - struct pnfs_commit_bucket *bucket; - struct pnfs_layout_segment *freeme; - int i; - - for (i = idx; i < fl_cinfo->nbuckets; i++) { - bucket = &fl_cinfo->buckets[i]; - if (list_empty(&bucket->committing)) - continue; - nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo); - spin_lock(cinfo->lock); - freeme = bucket->clseg; - bucket->clseg = NULL; - spin_unlock(cinfo->lock); - pnfs_put_lseg(freeme); - } -} - -static unsigned int -alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list) -{ - struct pnfs_ds_commit_info *fl_cinfo; - struct pnfs_commit_bucket *bucket; - struct nfs_commit_data *data; - int i; - unsigned int nreq = 0; - - fl_cinfo = cinfo->ds; - bucket = fl_cinfo->buckets; - for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) { - if (list_empty(&bucket->committing)) - continue; - data = nfs_commitdata_alloc(); - if (!data) - break; - data->ds_commit_index = i; - spin_lock(cinfo->lock); - data->lseg = bucket->clseg; - bucket->clseg = NULL; - spin_unlock(cinfo->lock); - list_add(&data->pages, list); - nreq++; - } - - /* Clean up on error */ - filelayout_retry_commit(cinfo, i); - /* Caller will clean up entries put on list */ - return nreq; -} - -/* This follows nfs_commit_list pretty closely */ static int filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, int how, struct nfs_commit_info *cinfo) { - struct nfs_commit_data *data, *tmp; - LIST_HEAD(list); - unsigned int nreq = 0; - - if (!list_empty(mds_pages)) { - data = nfs_commitdata_alloc(); - if (data != NULL) { - data->lseg = NULL; - list_add(&data->pages, &list); - nreq++; - } else { - nfs_retry_commit(mds_pages, NULL, cinfo); - filelayout_retry_commit(cinfo, 0); - cinfo->completion_ops->error_cleanup(NFS_I(inode)); - return -ENOMEM; - } - } - - nreq += alloc_ds_commits(cinfo, &list); - - if (nreq == 0) { - cinfo->completion_ops->error_cleanup(NFS_I(inode)); - goto out; - } - - atomic_add(nreq, &cinfo->mds->rpcs_out); - - list_for_each_entry_safe(data, tmp, &list, pages) { - list_del_init(&data->pages); - if (!data->lseg) { - nfs_init_commit(data, mds_pages, NULL, cinfo); - nfs_initiate_commit(NFS_CLIENT(inode), data, - data->mds_ops, how, 0); - } else { - struct pnfs_commit_bucket *buckets; - - buckets = cinfo->ds->buckets; - nfs_init_commit(data, &buckets[data->ds_commit_index].committing, data->lseg, cinfo); - filelayout_initiate_commit(data, how); - } - } -out: - cinfo->ds->ncommitting = 0; - return PNFS_ATTEMPTED; + return pnfs_generic_commit_pagelist(inode, mds_pages, how, cinfo, + filelayout_initiate_commit); } + static struct nfs4_deviceid_node * filelayout_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, gfp_t gfp_flags) @@ -1421,9 +1158,9 @@ static struct pnfs_layoutdriver_type filelayout_type = { .pg_write_ops = &filelayout_pg_write_ops, .get_ds_info = &filelayout_get_ds_info, .mark_request_commit = filelayout_mark_request_commit, - .clear_request_commit = filelayout_clear_request_commit, - .scan_commit_lists = filelayout_scan_commit_lists, - .recover_commit_reqs = filelayout_recover_commit_reqs, + .clear_request_commit = pnfs_generic_clear_request_commit, + .scan_commit_lists = pnfs_generic_scan_commit_lists, + .recover_commit_reqs = pnfs_generic_recover_commit_reqs, .search_commit_reqs = filelayout_search_commit_reqs, .commit_pagelist = filelayout_commit_pagelist, .read_pagelist = filelayout_read_pagelist, diff --git a/fs/nfs/filelayout/filelayout.h b/fs/nfs/filelayout/filelayout.h index 7c9f800c49d78c..a5ce9b4bf2f8e1 100644 --- a/fs/nfs/filelayout/filelayout.h +++ b/fs/nfs/filelayout/filelayout.h @@ -119,17 +119,6 @@ FILELAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg) return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node; } -static inline void -filelayout_mark_devid_invalid(struct nfs4_deviceid_node *node) -{ - u32 *p = (u32 *)&node->deviceid; - - printk(KERN_WARNING "NFS: Deviceid [%x%x%x%x] marked out of use.\n", - p[0], p[1], p[2], p[3]); - - set_bit(NFS_DEVICEID_INVALID, &node->flags); -} - static inline bool filelayout_test_devid_invalid(struct nfs4_deviceid_node *node) { diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index bfecac781f1993..d21080aed9b200 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -708,7 +708,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) if (ds == NULL) { printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", __func__, ds_idx); - filelayout_mark_devid_invalid(devid); + pnfs_generic_mark_devid_invalid(devid); goto out; } smp_rmb(); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 9ae5b765b07351..f17663446acc17 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -275,6 +275,23 @@ void nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node); bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node); void nfs4_deviceid_purge_client(const struct nfs_client *); +/* pnfs_nfs.c */ +void pnfs_generic_clear_request_commit(struct nfs_page *req, + struct nfs_commit_info *cinfo); +void pnfs_generic_commit_release(void *calldata); +void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data *data); +void pnfs_generic_rw_release(void *data); +void pnfs_generic_recover_commit_reqs(struct list_head *dst, + struct nfs_commit_info *cinfo); +int pnfs_generic_commit_pagelist(struct inode *inode, + struct list_head *mds_pages, + int how, + struct nfs_commit_info *cinfo, + int (*initiate_commit)(struct nfs_commit_data *data, + int how)); +int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo, int max); +void pnfs_generic_write_commit_done(struct rpc_task *task, void *data); + static inline struct nfs4_deviceid_node * nfs4_get_deviceid(struct nfs4_deviceid_node *d) { @@ -317,6 +334,12 @@ pnfs_get_ds_info(struct inode *inode) return ld->get_ds_info(inode); } +static inline void +pnfs_generic_mark_devid_invalid(struct nfs4_deviceid_node *node) +{ + set_bit(NFS_DEVICEID_INVALID, &node->flags); +} + static inline bool pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, struct nfs_commit_info *cinfo) diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c new file mode 100644 index 00000000000000..e5f841cb6227f5 --- /dev/null +++ b/fs/nfs/pnfs_nfs.c @@ -0,0 +1,291 @@ +/* + * Common NFS I/O operations for the pnfs file based + * layout drivers. + * + * Copyright (c) 2014, Primary Data, Inc. All rights reserved. + * + * Tom Haynes + */ + +#include +#include + +#include "internal.h" +#include "pnfs.h" + +static void pnfs_generic_fenceme(struct inode *inode, + struct pnfs_layout_hdr *lo) +{ + if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) + return; + pnfs_return_layout(inode); +} + +void pnfs_generic_rw_release(void *data) +{ + struct nfs_pgio_header *hdr = data; + struct pnfs_layout_hdr *lo = hdr->lseg->pls_layout; + + pnfs_generic_fenceme(lo->plh_inode, lo); + nfs_put_client(hdr->ds_clp); + hdr->mds_ops->rpc_release(data); +} +EXPORT_SYMBOL_GPL(pnfs_generic_rw_release); + +/* Fake up some data that will cause nfs_commit_release to retry the writes. */ +void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data *data) +{ + struct nfs_page *first = nfs_list_entry(data->pages.next); + + data->task.tk_status = 0; + memcpy(&data->verf.verifier, &first->wb_verf, + sizeof(data->verf.verifier)); + data->verf.verifier.data[0]++; /* ensure verifier mismatch */ +} +EXPORT_SYMBOL_GPL(pnfs_generic_prepare_to_resend_writes); + +void pnfs_generic_write_commit_done(struct rpc_task *task, void *data) +{ + struct nfs_commit_data *wdata = data; + + /* Note this may cause RPC to be resent */ + wdata->mds_ops->rpc_call_done(task, data); +} +EXPORT_SYMBOL_GPL(pnfs_generic_write_commit_done); + +void pnfs_generic_commit_release(void *calldata) +{ + struct nfs_commit_data *data = calldata; + + data->completion_ops->completion(data); + pnfs_put_lseg(data->lseg); + nfs_put_client(data->ds_clp); + nfs_commitdata_release(data); +} +EXPORT_SYMBOL_GPL(pnfs_generic_commit_release); + +/* The generic layer is about to remove the req from the commit list. + * If this will make the bucket empty, it will need to put the lseg reference. + * Note this is must be called holding the inode (/cinfo) lock + */ +void +pnfs_generic_clear_request_commit(struct nfs_page *req, + struct nfs_commit_info *cinfo) +{ + struct pnfs_layout_segment *freeme = NULL; + + if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags)) + goto out; + cinfo->ds->nwritten--; + if (list_is_singular(&req->wb_list)) { + struct pnfs_commit_bucket *bucket; + + bucket = list_first_entry(&req->wb_list, + struct pnfs_commit_bucket, + written); + freeme = bucket->wlseg; + bucket->wlseg = NULL; + } +out: + nfs_request_remove_commit_list(req, cinfo); + pnfs_put_lseg_locked(freeme); +} +EXPORT_SYMBOL_GPL(pnfs_generic_clear_request_commit); + +static int +pnfs_generic_transfer_commit_list(struct list_head *src, struct list_head *dst, + struct nfs_commit_info *cinfo, int max) +{ + struct nfs_page *req, *tmp; + int ret = 0; + + list_for_each_entry_safe(req, tmp, src, wb_list) { + if (!nfs_lock_request(req)) + continue; + kref_get(&req->wb_kref); + if (cond_resched_lock(cinfo->lock)) + list_safe_reset_next(req, tmp, wb_list); + nfs_request_remove_commit_list(req, cinfo); + clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); + nfs_list_add_request(req, dst); + ret++; + if ((ret == max) && !cinfo->dreq) + break; + } + return ret; +} + +/* Note called with cinfo->lock held. */ +static int +pnfs_generic_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, + struct nfs_commit_info *cinfo, + int max) +{ + struct list_head *src = &bucket->written; + struct list_head *dst = &bucket->committing; + int ret; + + ret = pnfs_generic_transfer_commit_list(src, dst, cinfo, max); + if (ret) { + cinfo->ds->nwritten -= ret; + cinfo->ds->ncommitting += ret; + bucket->clseg = bucket->wlseg; + if (list_empty(src)) + bucket->wlseg = NULL; + else + pnfs_get_lseg(bucket->clseg); + } + return ret; +} + +/* Move reqs from written to committing lists, returning count of number moved. + * Note called with cinfo->lock held. + */ +int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo, + int max) +{ + int i, rv = 0, cnt; + + for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) { + cnt = pnfs_generic_scan_ds_commit_list(&cinfo->ds->buckets[i], + cinfo, max); + max -= cnt; + rv += cnt; + } + return rv; +} +EXPORT_SYMBOL_GPL(pnfs_generic_scan_commit_lists); + +/* Pull everything off the committing lists and dump into @dst */ +void pnfs_generic_recover_commit_reqs(struct list_head *dst, + struct nfs_commit_info *cinfo) +{ + struct pnfs_commit_bucket *b; + struct pnfs_layout_segment *freeme; + int i; + +restart: + spin_lock(cinfo->lock); + for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { + if (pnfs_generic_transfer_commit_list(&b->written, dst, + cinfo, 0)) { + freeme = b->wlseg; + b->wlseg = NULL; + spin_unlock(cinfo->lock); + pnfs_put_lseg(freeme); + goto restart; + } + } + cinfo->ds->nwritten = 0; + spin_unlock(cinfo->lock); +} +EXPORT_SYMBOL_GPL(pnfs_generic_recover_commit_reqs); + +static void pnfs_generic_retry_commit(struct nfs_commit_info *cinfo, int idx) +{ + struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; + struct pnfs_commit_bucket *bucket; + struct pnfs_layout_segment *freeme; + int i; + + for (i = idx; i < fl_cinfo->nbuckets; i++) { + bucket = &fl_cinfo->buckets[i]; + if (list_empty(&bucket->committing)) + continue; + nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo); + spin_lock(cinfo->lock); + freeme = bucket->clseg; + bucket->clseg = NULL; + spin_unlock(cinfo->lock); + pnfs_put_lseg(freeme); + } +} + +static unsigned int +pnfs_generic_alloc_ds_commits(struct nfs_commit_info *cinfo, + struct list_head *list) +{ + struct pnfs_ds_commit_info *fl_cinfo; + struct pnfs_commit_bucket *bucket; + struct nfs_commit_data *data; + int i; + unsigned int nreq = 0; + + fl_cinfo = cinfo->ds; + bucket = fl_cinfo->buckets; + for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) { + if (list_empty(&bucket->committing)) + continue; + data = nfs_commitdata_alloc(); + if (!data) + break; + data->ds_commit_index = i; + spin_lock(cinfo->lock); + data->lseg = bucket->clseg; + bucket->clseg = NULL; + spin_unlock(cinfo->lock); + list_add(&data->pages, list); + nreq++; + } + + /* Clean up on error */ + pnfs_generic_retry_commit(cinfo, i); + return nreq; +} + +/* This follows nfs_commit_list pretty closely */ +int +pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, + int how, struct nfs_commit_info *cinfo, + int (*initiate_commit)(struct nfs_commit_data *data, + int how)) +{ + struct nfs_commit_data *data, *tmp; + LIST_HEAD(list); + unsigned int nreq = 0; + + if (!list_empty(mds_pages)) { + data = nfs_commitdata_alloc(); + if (data != NULL) { + data->lseg = NULL; + list_add(&data->pages, &list); + nreq++; + } else { + nfs_retry_commit(mds_pages, NULL, cinfo); + pnfs_generic_retry_commit(cinfo, 0); + cinfo->completion_ops->error_cleanup(NFS_I(inode)); + return -ENOMEM; + } + } + + nreq += pnfs_generic_alloc_ds_commits(cinfo, &list); + + if (nreq == 0) { + cinfo->completion_ops->error_cleanup(NFS_I(inode)); + goto out; + } + + atomic_add(nreq, &cinfo->mds->rpcs_out); + + list_for_each_entry_safe(data, tmp, &list, pages) { + list_del_init(&data->pages); + if (!data->lseg) { + nfs_init_commit(data, mds_pages, NULL, cinfo); + nfs_initiate_commit(NFS_CLIENT(inode), data, + data->mds_ops, how, 0); + } else { + struct pnfs_commit_bucket *buckets; + + buckets = cinfo->ds->buckets; + nfs_init_commit(data, + &buckets[data->ds_commit_index].committing, + data->lseg, + cinfo); + initiate_commit(data, how); + } + } +out: + cinfo->ds->ncommitting = 0; + return PNFS_ATTEMPTED; +} +EXPORT_SYMBOL_GPL(pnfs_generic_commit_pagelist); From 085d1e33a6a8495d9afa58ad2b8b7ea74d613515 Mon Sep 17 00:00:00 2001 From: Tom Haynes Date: Thu, 11 Dec 2014 13:04:55 -0500 Subject: [PATCH 393/601] pnfs: Do not grab the commit_info lock twice when rescheduling writes Acked-by: Jeff Layton Signed-off-by: Tom Haynes --- fs/nfs/direct.c | 19 +++++++++++++++---- fs/nfs/pnfs.h | 15 --------------- fs/nfs/pnfs_nfs.c | 15 ++++++++------- 3 files changed, 23 insertions(+), 26 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 10bf07280f4ab2..e84f764b9dcdb6 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -573,6 +573,20 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, return result; } +static void +nfs_direct_write_scan_commit_list(struct inode *inode, + struct list_head *list, + struct nfs_commit_info *cinfo) +{ + spin_lock(cinfo->lock); +#ifdef CONFIG_NFS_V4_1 + if (cinfo->ds != NULL && cinfo->ds->nwritten != 0) + NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo); +#endif + nfs_scan_commit_list(&cinfo->mds->list, list, cinfo, 0); + spin_unlock(cinfo->lock); +} + static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) { struct nfs_pageio_descriptor desc; @@ -582,10 +596,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) LIST_HEAD(failed); nfs_init_cinfo_from_dreq(&cinfo, dreq); - pnfs_recover_commit_reqs(dreq->inode, &reqs, &cinfo); - spin_lock(cinfo.lock); - nfs_scan_commit_list(&cinfo.mds->list, &reqs, &cinfo, 0); - spin_unlock(cinfo.lock); + nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo); dreq->count = 0; get_dreq(dreq); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index f17663446acc17..e94f6050e9b176 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -375,15 +375,6 @@ pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo, return NFS_SERVER(inode)->pnfs_curr_ld->scan_commit_lists(cinfo, max); } -static inline void -pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list, - struct nfs_commit_info *cinfo) -{ - if (cinfo->ds == NULL || cinfo->ds->nwritten == 0) - return; - NFS_SERVER(inode)->pnfs_curr_ld->recover_commit_reqs(list, cinfo); -} - static inline struct nfs_page * pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo, struct page *page) @@ -554,12 +545,6 @@ pnfs_scan_commit_lists(struct inode *inode, struct nfs_commit_info *cinfo, return 0; } -static inline void -pnfs_recover_commit_reqs(struct inode *inode, struct list_head *list, - struct nfs_commit_info *cinfo) -{ -} - static inline struct nfs_page * pnfs_search_commit_reqs(struct inode *inode, struct nfs_commit_info *cinfo, struct page *page) diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index e5f841cb6227f5..fd2a2f0e8cbbb4 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -66,7 +66,7 @@ EXPORT_SYMBOL_GPL(pnfs_generic_commit_release); /* The generic layer is about to remove the req from the commit list. * If this will make the bucket empty, it will need to put the lseg reference. - * Note this is must be called holding the inode (/cinfo) lock + * Note this must be called holding the inode (/cinfo) lock */ void pnfs_generic_clear_request_commit(struct nfs_page *req, @@ -115,7 +115,6 @@ pnfs_generic_transfer_commit_list(struct list_head *src, struct list_head *dst, return ret; } -/* Note called with cinfo->lock held. */ static int pnfs_generic_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, struct nfs_commit_info *cinfo, @@ -125,6 +124,7 @@ pnfs_generic_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, struct list_head *dst = &bucket->committing; int ret; + lockdep_assert_held(cinfo->lock); ret = pnfs_generic_transfer_commit_list(src, dst, cinfo, max); if (ret) { cinfo->ds->nwritten -= ret; @@ -138,14 +138,15 @@ pnfs_generic_scan_ds_commit_list(struct pnfs_commit_bucket *bucket, return ret; } -/* Move reqs from written to committing lists, returning count of number moved. - * Note called with cinfo->lock held. +/* Move reqs from written to committing lists, returning count + * of number moved. */ int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo, int max) { int i, rv = 0, cnt; + lockdep_assert_held(cinfo->lock); for (i = 0; i < cinfo->ds->nbuckets && max != 0; i++) { cnt = pnfs_generic_scan_ds_commit_list(&cinfo->ds->buckets[i], cinfo, max); @@ -156,7 +157,7 @@ int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo, } EXPORT_SYMBOL_GPL(pnfs_generic_scan_commit_lists); -/* Pull everything off the committing lists and dump into @dst */ +/* Pull everything off the committing lists and dump into @dst. */ void pnfs_generic_recover_commit_reqs(struct list_head *dst, struct nfs_commit_info *cinfo) { @@ -164,8 +165,8 @@ void pnfs_generic_recover_commit_reqs(struct list_head *dst, struct pnfs_layout_segment *freeme; int i; + lockdep_assert_held(cinfo->lock); restart: - spin_lock(cinfo->lock); for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { if (pnfs_generic_transfer_commit_list(&b->written, dst, cinfo, 0)) { @@ -173,11 +174,11 @@ void pnfs_generic_recover_commit_reqs(struct list_head *dst, b->wlseg = NULL; spin_unlock(cinfo->lock); pnfs_put_lseg(freeme); + spin_lock(cinfo->lock); goto restart; } } cinfo->ds->nwritten = 0; - spin_unlock(cinfo->lock); } EXPORT_SYMBOL_GPL(pnfs_generic_recover_commit_reqs); From 875ae0694be48f3e3bdddd435b79abf52b680299 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 29 May 2014 21:06:57 +0800 Subject: [PATCH 394/601] nfs41: pull data server cache from file layout to generic pnfs Also pull nfs4_pnfs_ds_addr and nfs4_pnfs_ds to generic pnfs. They can all be reused by flexfile layout as well. Reviewed-by: Jeff Layton Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/filelayout/filelayout.h | 19 --- fs/nfs/filelayout/filelayoutdev.c | 235 +---------------------------- fs/nfs/pnfs.h | 21 +++ fs/nfs/pnfs_nfs.c | 242 ++++++++++++++++++++++++++++++ 4 files changed, 265 insertions(+), 252 deletions(-) diff --git a/fs/nfs/filelayout/filelayout.h b/fs/nfs/filelayout/filelayout.h index a5ce9b4bf2f8e1..f97eea627c4f90 100644 --- a/fs/nfs/filelayout/filelayout.h +++ b/fs/nfs/filelayout/filelayout.h @@ -56,24 +56,6 @@ enum stripetype4 { STRIPE_DENSE = 2 }; -/* Individual ip address */ -struct nfs4_pnfs_ds_addr { - struct sockaddr_storage da_addr; - size_t da_addrlen; - struct list_head da_node; /* nfs4_pnfs_dev_hlist dev_dslist */ - char *da_remotestr; /* human readable addr+port */ -}; - -struct nfs4_pnfs_ds { - struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */ - char *ds_remotestr; /* comma sep list of addrs */ - struct list_head ds_addrs; - struct nfs_client *ds_clp; - atomic_t ds_count; - unsigned long ds_state; -#define NFS4DS_CONNECTING 0 /* ds is establishing connection */ -}; - struct nfs4_file_layout_dsaddr { struct nfs4_deviceid_node id_node; u32 stripe_count; @@ -131,7 +113,6 @@ filelayout_test_devid_unavailable(struct nfs4_deviceid_node *node); extern struct nfs_fh * nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); -extern void print_ds(struct nfs4_pnfs_ds *ds); u32 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset); u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j); struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index d21080aed9b200..fbfbb701159d79 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -42,114 +42,6 @@ static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO; static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS; -/* - * Data server cache - * - * Data servers can be mapped to different device ids. - * nfs4_pnfs_ds reference counting - * - set to 1 on allocation - * - incremented when a device id maps a data server already in the cache. - * - decremented when deviceid is removed from the cache. - */ -static DEFINE_SPINLOCK(nfs4_ds_cache_lock); -static LIST_HEAD(nfs4_data_server_cache); - -/* Debug routines */ -void -print_ds(struct nfs4_pnfs_ds *ds) -{ - if (ds == NULL) { - printk("%s NULL device\n", __func__); - return; - } - printk(" ds %s\n" - " ref count %d\n" - " client %p\n" - " cl_exchange_flags %x\n", - ds->ds_remotestr, - atomic_read(&ds->ds_count), ds->ds_clp, - ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); -} - -static bool -same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2) -{ - struct sockaddr_in *a, *b; - struct sockaddr_in6 *a6, *b6; - - if (addr1->sa_family != addr2->sa_family) - return false; - - switch (addr1->sa_family) { - case AF_INET: - a = (struct sockaddr_in *)addr1; - b = (struct sockaddr_in *)addr2; - - if (a->sin_addr.s_addr == b->sin_addr.s_addr && - a->sin_port == b->sin_port) - return true; - break; - - case AF_INET6: - a6 = (struct sockaddr_in6 *)addr1; - b6 = (struct sockaddr_in6 *)addr2; - - /* LINKLOCAL addresses must have matching scope_id */ - if (ipv6_addr_src_scope(&a6->sin6_addr) == - IPV6_ADDR_SCOPE_LINKLOCAL && - a6->sin6_scope_id != b6->sin6_scope_id) - return false; - - if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) && - a6->sin6_port == b6->sin6_port) - return true; - break; - - default: - dprintk("%s: unhandled address family: %u\n", - __func__, addr1->sa_family); - return false; - } - - return false; -} - -static bool -_same_data_server_addrs_locked(const struct list_head *dsaddrs1, - const struct list_head *dsaddrs2) -{ - struct nfs4_pnfs_ds_addr *da1, *da2; - - /* step through both lists, comparing as we go */ - for (da1 = list_first_entry(dsaddrs1, typeof(*da1), da_node), - da2 = list_first_entry(dsaddrs2, typeof(*da2), da_node); - da1 != NULL && da2 != NULL; - da1 = list_entry(da1->da_node.next, typeof(*da1), da_node), - da2 = list_entry(da2->da_node.next, typeof(*da2), da_node)) { - if (!same_sockaddr((struct sockaddr *)&da1->da_addr, - (struct sockaddr *)&da2->da_addr)) - return false; - } - if (da1 == NULL && da2 == NULL) - return true; - - return false; -} - -/* - * Lookup DS by addresses. nfs4_ds_cache_lock is held - */ -static struct nfs4_pnfs_ds * -_data_server_lookup_locked(const struct list_head *dsaddrs) -{ - struct nfs4_pnfs_ds *ds; - - list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) - if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs)) - return ds; - return NULL; -} - /* * Create an rpc connection to the nfs4_pnfs_ds data server * Currently only supports IPv4 and IPv6 addresses @@ -195,30 +87,6 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) goto out; } -static void -destroy_ds(struct nfs4_pnfs_ds *ds) -{ - struct nfs4_pnfs_ds_addr *da; - - dprintk("--> %s\n", __func__); - ifdebug(FACILITY) - print_ds(ds); - - nfs_put_client(ds->ds_clp); - - while (!list_empty(&ds->ds_addrs)) { - da = list_first_entry(&ds->ds_addrs, - struct nfs4_pnfs_ds_addr, - da_node); - list_del_init(&da->da_node); - kfree(da->da_remotestr); - kfree(da); - } - - kfree(ds->ds_remotestr); - kfree(ds); -} - void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) { @@ -229,112 +97,13 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) for (i = 0; i < dsaddr->ds_num; i++) { ds = dsaddr->ds_list[i]; - if (ds != NULL) { - if (atomic_dec_and_lock(&ds->ds_count, - &nfs4_ds_cache_lock)) { - list_del_init(&ds->ds_node); - spin_unlock(&nfs4_ds_cache_lock); - destroy_ds(ds); - } - } + if (ds != NULL) + nfs4_pnfs_ds_put(ds); } kfree(dsaddr->stripe_indices); kfree(dsaddr); } -/* - * Create a string with a human readable address and port to avoid - * complicated setup around many dprinks. - */ -static char * -nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags) -{ - struct nfs4_pnfs_ds_addr *da; - char *remotestr; - size_t len; - char *p; - - len = 3; /* '{', '}' and eol */ - list_for_each_entry(da, dsaddrs, da_node) { - len += strlen(da->da_remotestr) + 1; /* string plus comma */ - } - - remotestr = kzalloc(len, gfp_flags); - if (!remotestr) - return NULL; - - p = remotestr; - *(p++) = '{'; - len--; - list_for_each_entry(da, dsaddrs, da_node) { - size_t ll = strlen(da->da_remotestr); - - if (ll > len) - goto out_err; - - memcpy(p, da->da_remotestr, ll); - p += ll; - len -= ll; - - if (len < 1) - goto out_err; - (*p++) = ','; - len--; - } - if (len < 2) - goto out_err; - *(p++) = '}'; - *p = '\0'; - return remotestr; -out_err: - kfree(remotestr); - return NULL; -} - -static struct nfs4_pnfs_ds * -nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) -{ - struct nfs4_pnfs_ds *tmp_ds, *ds = NULL; - char *remotestr; - - if (list_empty(dsaddrs)) { - dprintk("%s: no addresses defined\n", __func__); - goto out; - } - - ds = kzalloc(sizeof(*ds), gfp_flags); - if (!ds) - goto out; - - /* this is only used for debugging, so it's ok if its NULL */ - remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags); - - spin_lock(&nfs4_ds_cache_lock); - tmp_ds = _data_server_lookup_locked(dsaddrs); - if (tmp_ds == NULL) { - INIT_LIST_HEAD(&ds->ds_addrs); - list_splice_init(dsaddrs, &ds->ds_addrs); - ds->ds_remotestr = remotestr; - atomic_set(&ds->ds_count, 1); - INIT_LIST_HEAD(&ds->ds_node); - ds->ds_clp = NULL; - list_add(&ds->ds_node, &nfs4_data_server_cache); - dprintk("%s add new data server %s\n", __func__, - ds->ds_remotestr); - } else { - kfree(remotestr); - kfree(ds); - atomic_inc(&tmp_ds->ds_count); - dprintk("%s data server %s found, inc'ed ds_count to %d\n", - __func__, tmp_ds->ds_remotestr, - atomic_read(&tmp_ds->ds_count)); - ds = tmp_ds; - } - spin_unlock(&nfs4_ds_cache_lock); -out: - return ds; -} - /* * Currently only supports ipv4, ipv6 and one multi-path address. */ diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index e94f6050e9b176..b0168f1dd0728d 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -40,6 +40,24 @@ enum { NFS_LSEG_LAYOUTCOMMIT, /* layoutcommit bit set for layoutcommit */ }; +/* Individual ip address */ +struct nfs4_pnfs_ds_addr { + struct sockaddr_storage da_addr; + size_t da_addrlen; + struct list_head da_node; /* nfs4_pnfs_dev_hlist dev_dslist */ + char *da_remotestr; /* human readable addr+port */ +}; + +struct nfs4_pnfs_ds { + struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */ + char *ds_remotestr; /* comma sep list of addrs */ + struct list_head ds_addrs; + struct nfs_client *ds_clp; + atomic_t ds_count; + unsigned long ds_state; +#define NFS4DS_CONNECTING 0 /* ds is establishing connection */ +}; + struct pnfs_layout_segment { struct list_head pls_list; struct list_head pls_lc_list; @@ -291,6 +309,9 @@ int pnfs_generic_commit_pagelist(struct inode *inode, int how)); int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo, int max); void pnfs_generic_write_commit_done(struct rpc_task *task, void *data); +void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds); +struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(struct list_head *dsaddrs, + gfp_t gfp_flags); static inline struct nfs4_deviceid_node * nfs4_get_deviceid(struct nfs4_deviceid_node *d) diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index fd2a2f0e8cbbb4..3bb2b74cf60041 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -13,6 +13,8 @@ #include "internal.h" #include "pnfs.h" +#define NFSDBG_FACILITY NFSDBG_PNFS + static void pnfs_generic_fenceme(struct inode *inode, struct pnfs_layout_hdr *lo) { @@ -290,3 +292,243 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, return PNFS_ATTEMPTED; } EXPORT_SYMBOL_GPL(pnfs_generic_commit_pagelist); + +/* + * Data server cache + * + * Data servers can be mapped to different device ids. + * nfs4_pnfs_ds reference counting + * - set to 1 on allocation + * - incremented when a device id maps a data server already in the cache. + * - decremented when deviceid is removed from the cache. + */ +static DEFINE_SPINLOCK(nfs4_ds_cache_lock); +static LIST_HEAD(nfs4_data_server_cache); + +/* Debug routines */ +static void +print_ds(struct nfs4_pnfs_ds *ds) +{ + if (ds == NULL) { + printk(KERN_WARNING "%s NULL device\n", __func__); + return; + } + printk(KERN_WARNING " ds %s\n" + " ref count %d\n" + " client %p\n" + " cl_exchange_flags %x\n", + ds->ds_remotestr, + atomic_read(&ds->ds_count), ds->ds_clp, + ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); +} + +static bool +same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2) +{ + struct sockaddr_in *a, *b; + struct sockaddr_in6 *a6, *b6; + + if (addr1->sa_family != addr2->sa_family) + return false; + + switch (addr1->sa_family) { + case AF_INET: + a = (struct sockaddr_in *)addr1; + b = (struct sockaddr_in *)addr2; + + if (a->sin_addr.s_addr == b->sin_addr.s_addr && + a->sin_port == b->sin_port) + return true; + break; + + case AF_INET6: + a6 = (struct sockaddr_in6 *)addr1; + b6 = (struct sockaddr_in6 *)addr2; + + /* LINKLOCAL addresses must have matching scope_id */ + if (ipv6_addr_src_scope(&a6->sin6_addr) == + IPV6_ADDR_SCOPE_LINKLOCAL && + a6->sin6_scope_id != b6->sin6_scope_id) + return false; + + if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) && + a6->sin6_port == b6->sin6_port) + return true; + break; + + default: + dprintk("%s: unhandled address family: %u\n", + __func__, addr1->sa_family); + return false; + } + + return false; +} + +static bool +_same_data_server_addrs_locked(const struct list_head *dsaddrs1, + const struct list_head *dsaddrs2) +{ + struct nfs4_pnfs_ds_addr *da1, *da2; + + /* step through both lists, comparing as we go */ + for (da1 = list_first_entry(dsaddrs1, typeof(*da1), da_node), + da2 = list_first_entry(dsaddrs2, typeof(*da2), da_node); + da1 != NULL && da2 != NULL; + da1 = list_entry(da1->da_node.next, typeof(*da1), da_node), + da2 = list_entry(da2->da_node.next, typeof(*da2), da_node)) { + if (!same_sockaddr((struct sockaddr *)&da1->da_addr, + (struct sockaddr *)&da2->da_addr)) + return false; + } + if (da1 == NULL && da2 == NULL) + return true; + + return false; +} + +/* + * Lookup DS by addresses. nfs4_ds_cache_lock is held + */ +static struct nfs4_pnfs_ds * +_data_server_lookup_locked(const struct list_head *dsaddrs) +{ + struct nfs4_pnfs_ds *ds; + + list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) + if (_same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs)) + return ds; + return NULL; +} + +static void destroy_ds(struct nfs4_pnfs_ds *ds) +{ + struct nfs4_pnfs_ds_addr *da; + + dprintk("--> %s\n", __func__); + ifdebug(FACILITY) + print_ds(ds); + + nfs_put_client(ds->ds_clp); + + while (!list_empty(&ds->ds_addrs)) { + da = list_first_entry(&ds->ds_addrs, + struct nfs4_pnfs_ds_addr, + da_node); + list_del_init(&da->da_node); + kfree(da->da_remotestr); + kfree(da); + } + + kfree(ds->ds_remotestr); + kfree(ds); +} + +void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds) +{ + if (atomic_dec_and_lock(&ds->ds_count, + &nfs4_ds_cache_lock)) { + list_del_init(&ds->ds_node); + spin_unlock(&nfs4_ds_cache_lock); + destroy_ds(ds); + } +} +EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_put); + +/* + * Create a string with a human readable address and port to avoid + * complicated setup around many dprinks. + */ +static char * +nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags) +{ + struct nfs4_pnfs_ds_addr *da; + char *remotestr; + size_t len; + char *p; + + len = 3; /* '{', '}' and eol */ + list_for_each_entry(da, dsaddrs, da_node) { + len += strlen(da->da_remotestr) + 1; /* string plus comma */ + } + + remotestr = kzalloc(len, gfp_flags); + if (!remotestr) + return NULL; + + p = remotestr; + *(p++) = '{'; + len--; + list_for_each_entry(da, dsaddrs, da_node) { + size_t ll = strlen(da->da_remotestr); + + if (ll > len) + goto out_err; + + memcpy(p, da->da_remotestr, ll); + p += ll; + len -= ll; + + if (len < 1) + goto out_err; + (*p++) = ','; + len--; + } + if (len < 2) + goto out_err; + *(p++) = '}'; + *p = '\0'; + return remotestr; +out_err: + kfree(remotestr); + return NULL; +} + +/* + * Given a list of multipath struct nfs4_pnfs_ds_addr, add it to ds cache if + * uncached and return cached struct nfs4_pnfs_ds. + */ +struct nfs4_pnfs_ds * +nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) +{ + struct nfs4_pnfs_ds *tmp_ds, *ds = NULL; + char *remotestr; + + if (list_empty(dsaddrs)) { + dprintk("%s: no addresses defined\n", __func__); + goto out; + } + + ds = kzalloc(sizeof(*ds), gfp_flags); + if (!ds) + goto out; + + /* this is only used for debugging, so it's ok if its NULL */ + remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags); + + spin_lock(&nfs4_ds_cache_lock); + tmp_ds = _data_server_lookup_locked(dsaddrs); + if (tmp_ds == NULL) { + INIT_LIST_HEAD(&ds->ds_addrs); + list_splice_init(dsaddrs, &ds->ds_addrs); + ds->ds_remotestr = remotestr; + atomic_set(&ds->ds_count, 1); + INIT_LIST_HEAD(&ds->ds_node); + ds->ds_clp = NULL; + list_add(&ds->ds_node, &nfs4_data_server_cache); + dprintk("%s add new data server %s\n", __func__, + ds->ds_remotestr); + } else { + kfree(remotestr); + kfree(ds); + atomic_inc(&tmp_ds->ds_count); + dprintk("%s data server %s found, inc'ed ds_count to %d\n", + __func__, tmp_ds->ds_remotestr, + atomic_read(&tmp_ds->ds_count)); + ds = tmp_ds; + } + spin_unlock(&nfs4_ds_cache_lock); +out: + return ds; +} +EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_add); From 6b7f3cf96364eaf597940cb5c68a682894829915 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 29 May 2014 21:06:59 +0800 Subject: [PATCH 395/601] nfs41: pull decode_ds_addr from file layout to generic pnfs It can be reused by flexfile layout. Reviewed-by: Jeff Layton Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/filelayout/filelayoutdev.c | 152 +----------------------------- fs/nfs/pnfs.h | 3 + fs/nfs/pnfs_nfs.c | 149 +++++++++++++++++++++++++++++ 3 files changed, 154 insertions(+), 150 deletions(-) diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index fbfbb701159d79..c7f6041a287fd7 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -31,7 +31,6 @@ #include #include #include -#include #include "../internal.h" #include "../nfs4session.h" @@ -104,153 +103,6 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) kfree(dsaddr); } -/* - * Currently only supports ipv4, ipv6 and one multi-path address. - */ -static struct nfs4_pnfs_ds_addr * -decode_ds_addr(struct net *net, struct xdr_stream *streamp, gfp_t gfp_flags) -{ - struct nfs4_pnfs_ds_addr *da = NULL; - char *buf, *portstr; - __be16 port; - int nlen, rlen; - int tmp[2]; - __be32 *p; - char *netid, *match_netid; - size_t len, match_netid_len; - char *startsep = ""; - char *endsep = ""; - - - /* r_netid */ - p = xdr_inline_decode(streamp, 4); - if (unlikely(!p)) - goto out_err; - nlen = be32_to_cpup(p++); - - p = xdr_inline_decode(streamp, nlen); - if (unlikely(!p)) - goto out_err; - - netid = kmalloc(nlen+1, gfp_flags); - if (unlikely(!netid)) - goto out_err; - - netid[nlen] = '\0'; - memcpy(netid, p, nlen); - - /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */ - p = xdr_inline_decode(streamp, 4); - if (unlikely(!p)) - goto out_free_netid; - rlen = be32_to_cpup(p); - - p = xdr_inline_decode(streamp, rlen); - if (unlikely(!p)) - goto out_free_netid; - - /* port is ".ABC.DEF", 8 chars max */ - if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) { - dprintk("%s: Invalid address, length %d\n", __func__, - rlen); - goto out_free_netid; - } - buf = kmalloc(rlen + 1, gfp_flags); - if (!buf) { - dprintk("%s: Not enough memory\n", __func__); - goto out_free_netid; - } - buf[rlen] = '\0'; - memcpy(buf, p, rlen); - - /* replace port '.' with '-' */ - portstr = strrchr(buf, '.'); - if (!portstr) { - dprintk("%s: Failed finding expected dot in port\n", - __func__); - goto out_free_buf; - } - *portstr = '-'; - - /* find '.' between address and port */ - portstr = strrchr(buf, '.'); - if (!portstr) { - dprintk("%s: Failed finding expected dot between address and " - "port\n", __func__); - goto out_free_buf; - } - *portstr = '\0'; - - da = kzalloc(sizeof(*da), gfp_flags); - if (unlikely(!da)) - goto out_free_buf; - - INIT_LIST_HEAD(&da->da_node); - - if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr, - sizeof(da->da_addr))) { - dprintk("%s: error parsing address %s\n", __func__, buf); - goto out_free_da; - } - - portstr++; - sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]); - port = htons((tmp[0] << 8) | (tmp[1])); - - switch (da->da_addr.ss_family) { - case AF_INET: - ((struct sockaddr_in *)&da->da_addr)->sin_port = port; - da->da_addrlen = sizeof(struct sockaddr_in); - match_netid = "tcp"; - match_netid_len = 3; - break; - - case AF_INET6: - ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port; - da->da_addrlen = sizeof(struct sockaddr_in6); - match_netid = "tcp6"; - match_netid_len = 4; - startsep = "["; - endsep = "]"; - break; - - default: - dprintk("%s: unsupported address family: %u\n", - __func__, da->da_addr.ss_family); - goto out_free_da; - } - - if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) { - dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n", - __func__, netid, match_netid); - goto out_free_da; - } - - /* save human readable address */ - len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7; - da->da_remotestr = kzalloc(len, gfp_flags); - - /* NULL is ok, only used for dprintk */ - if (da->da_remotestr) - snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep, - buf, endsep, ntohs(port)); - - dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr); - kfree(buf); - kfree(netid); - return da; - -out_free_da: - kfree(da); -out_free_buf: - dprintk("%s: Error parsing DS addr: %s\n", __func__, buf); - kfree(buf); -out_free_netid: - kfree(netid); -out_err: - return NULL; -} - /* Decode opaque device data and return the result */ struct nfs4_file_layout_dsaddr * nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, @@ -353,8 +205,8 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, mp_count = be32_to_cpup(p); /* multipath count */ for (j = 0; j < mp_count; j++) { - da = decode_ds_addr(server->nfs_client->cl_net, - &stream, gfp_flags); + da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net, + &stream, gfp_flags); if (da) list_add_tail(&da->da_node, &dsaddrs); } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index b0168f1dd0728d..403d7bb67c4167 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -312,6 +312,9 @@ void pnfs_generic_write_commit_done(struct rpc_task *task, void *data); void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds); struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags); +struct nfs4_pnfs_ds_addr *nfs4_decode_mp_ds_addr(struct net *net, + struct xdr_stream *xdr, + gfp_t gfp_flags); static inline struct nfs4_deviceid_node * nfs4_get_deviceid(struct nfs4_deviceid_node *d) diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 3bb2b74cf60041..81ec449138a8a8 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -9,6 +9,7 @@ #include #include +#include #include "internal.h" #include "pnfs.h" @@ -532,3 +533,151 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) return ds; } EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_add); + +/* + * Currently only supports ipv4, ipv6 and one multi-path address. + */ +struct nfs4_pnfs_ds_addr * +nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags) +{ + struct nfs4_pnfs_ds_addr *da = NULL; + char *buf, *portstr; + __be16 port; + int nlen, rlen; + int tmp[2]; + __be32 *p; + char *netid, *match_netid; + size_t len, match_netid_len; + char *startsep = ""; + char *endsep = ""; + + + /* r_netid */ + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_err; + nlen = be32_to_cpup(p++); + + p = xdr_inline_decode(xdr, nlen); + if (unlikely(!p)) + goto out_err; + + netid = kmalloc(nlen+1, gfp_flags); + if (unlikely(!netid)) + goto out_err; + + netid[nlen] = '\0'; + memcpy(netid, p, nlen); + + /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */ + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_free_netid; + rlen = be32_to_cpup(p); + + p = xdr_inline_decode(xdr, rlen); + if (unlikely(!p)) + goto out_free_netid; + + /* port is ".ABC.DEF", 8 chars max */ + if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) { + dprintk("%s: Invalid address, length %d\n", __func__, + rlen); + goto out_free_netid; + } + buf = kmalloc(rlen + 1, gfp_flags); + if (!buf) { + dprintk("%s: Not enough memory\n", __func__); + goto out_free_netid; + } + buf[rlen] = '\0'; + memcpy(buf, p, rlen); + + /* replace port '.' with '-' */ + portstr = strrchr(buf, '.'); + if (!portstr) { + dprintk("%s: Failed finding expected dot in port\n", + __func__); + goto out_free_buf; + } + *portstr = '-'; + + /* find '.' between address and port */ + portstr = strrchr(buf, '.'); + if (!portstr) { + dprintk("%s: Failed finding expected dot between address and " + "port\n", __func__); + goto out_free_buf; + } + *portstr = '\0'; + + da = kzalloc(sizeof(*da), gfp_flags); + if (unlikely(!da)) + goto out_free_buf; + + INIT_LIST_HEAD(&da->da_node); + + if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr, + sizeof(da->da_addr))) { + dprintk("%s: error parsing address %s\n", __func__, buf); + goto out_free_da; + } + + portstr++; + sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]); + port = htons((tmp[0] << 8) | (tmp[1])); + + switch (da->da_addr.ss_family) { + case AF_INET: + ((struct sockaddr_in *)&da->da_addr)->sin_port = port; + da->da_addrlen = sizeof(struct sockaddr_in); + match_netid = "tcp"; + match_netid_len = 3; + break; + + case AF_INET6: + ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port; + da->da_addrlen = sizeof(struct sockaddr_in6); + match_netid = "tcp6"; + match_netid_len = 4; + startsep = "["; + endsep = "]"; + break; + + default: + dprintk("%s: unsupported address family: %u\n", + __func__, da->da_addr.ss_family); + goto out_free_da; + } + + if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) { + dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n", + __func__, netid, match_netid); + goto out_free_da; + } + + /* save human readable address */ + len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7; + da->da_remotestr = kzalloc(len, gfp_flags); + + /* NULL is ok, only used for dprintk */ + if (da->da_remotestr) + snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep, + buf, endsep, ntohs(port)); + + dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr); + kfree(buf); + kfree(netid); + return da; + +out_free_da: + kfree(da); +out_free_buf: + dprintk("%s: Error parsing DS addr: %s\n", __func__, buf); + kfree(buf); +out_free_netid: + kfree(netid); +out_err: + return NULL; +} +EXPORT_SYMBOL_GPL(nfs4_decode_mp_ds_addr); From 7405f9e195aab95e147cc225f203d11fa74b65a8 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 29 May 2014 21:06:58 +0800 Subject: [PATCH 396/601] nfs41: pull nfs4_ds_connect from file layout to generic pnfs It can be reused by flexfiles layout client. Reviewed-by: Jeff Layton Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/filelayout/filelayoutdev.c | 78 ++--------------------------- fs/nfs/pnfs.h | 3 ++ fs/nfs/pnfs_nfs.c | 81 +++++++++++++++++++++++++++++++ 3 files changed, 89 insertions(+), 73 deletions(-) diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index c7f6041a287fd7..27bdd8ce177e71 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -41,51 +41,6 @@ static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO; static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS; -/* - * Create an rpc connection to the nfs4_pnfs_ds data server - * Currently only supports IPv4 and IPv6 addresses - */ -static int -nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) -{ - struct nfs_client *clp = ERR_PTR(-EIO); - struct nfs4_pnfs_ds_addr *da; - int status = 0; - - dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr, - mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor); - - list_for_each_entry(da, &ds->ds_addrs, da_node) { - dprintk("%s: DS %s: trying address %s\n", - __func__, ds->ds_remotestr, da->da_remotestr); - - clp = nfs4_set_ds_client(mds_srv->nfs_client, - (struct sockaddr *)&da->da_addr, - da->da_addrlen, IPPROTO_TCP, - dataserver_timeo, dataserver_retrans); - if (!IS_ERR(clp)) - break; - } - - if (IS_ERR(clp)) { - status = PTR_ERR(clp); - goto out; - } - - status = nfs4_init_ds_session(clp, mds_srv->nfs_client->cl_lease_time); - if (status) - goto out_put; - - smp_wmb(); - ds->ds_clp = clp; - dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); -out: - return status; -out_put: - nfs_put_client(clp); - goto out; -} - void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) { @@ -302,22 +257,7 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j) return flseg->fh_array[i]; } -static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds) -{ - might_sleep(); - wait_on_bit_action(&ds->ds_state, NFS4DS_CONNECTING, - nfs_wait_bit_killable, TASK_KILLABLE); -} - -static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds) -{ - smp_mb__before_atomic(); - clear_bit(NFS4DS_CONNECTING, &ds->ds_state); - smp_mb__after_atomic(); - wake_up_bit(&ds->ds_state, NFS4DS_CONNECTING); -} - - +/* Upon return, either ds is connected, or ds is NULL */ struct nfs4_pnfs_ds * nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) { @@ -325,6 +265,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); struct nfs4_pnfs_ds *ret = ds; + struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode); if (ds == NULL) { printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", @@ -336,18 +277,9 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) if (ds->ds_clp) goto out_test_devid; - if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) { - struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode); - int err; - - err = nfs4_ds_connect(s, ds); - if (err) - nfs4_mark_deviceid_unavailable(devid); - nfs4_clear_ds_conn_bit(ds); - } else { - /* Either ds is connected, or ds is NULL */ - nfs4_wait_ds_connect(ds); - } + nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo, + dataserver_retrans); + out_test_devid: if (filelayout_test_devid_unavailable(devid)) ret = NULL; diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 403d7bb67c4167..9a8937c31d973d 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -312,6 +312,9 @@ void pnfs_generic_write_commit_done(struct rpc_task *task, void *data); void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds); struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags); +void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, + struct nfs4_deviceid_node *devid, unsigned int timeo, + unsigned int retrans); struct nfs4_pnfs_ds_addr *nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags); diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 81ec449138a8a8..5a92e76c6c53b1 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -11,6 +11,7 @@ #include #include +#include "nfs4session.h" #include "internal.h" #include "pnfs.h" @@ -534,6 +535,86 @@ nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) } EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_add); +static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds) +{ + might_sleep(); + wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING, + TASK_KILLABLE); +} + +static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds) +{ + smp_mb__before_atomic(); + clear_bit(NFS4DS_CONNECTING, &ds->ds_state); + smp_mb__after_atomic(); + wake_up_bit(&ds->ds_state, NFS4DS_CONNECTING); +} + +static int _nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, + struct nfs4_pnfs_ds *ds, + unsigned int timeo, + unsigned int retrans) +{ + struct nfs_client *clp = ERR_PTR(-EIO); + struct nfs4_pnfs_ds_addr *da; + int status = 0; + + dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr, + mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor); + + list_for_each_entry(da, &ds->ds_addrs, da_node) { + dprintk("%s: DS %s: trying address %s\n", + __func__, ds->ds_remotestr, da->da_remotestr); + + clp = nfs4_set_ds_client(mds_srv->nfs_client, + (struct sockaddr *)&da->da_addr, + da->da_addrlen, IPPROTO_TCP, + timeo, retrans); + if (!IS_ERR(clp)) + break; + } + + if (IS_ERR(clp)) { + status = PTR_ERR(clp); + goto out; + } + + status = nfs4_init_ds_session(clp, mds_srv->nfs_client->cl_lease_time); + if (status) + goto out_put; + + smp_wmb(); + ds->ds_clp = clp; + dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); +out: + return status; +out_put: + nfs_put_client(clp); + goto out; +} + +/* + * Create an rpc connection to the nfs4_pnfs_ds data server. + * Currently only supports IPv4 and IPv6 addresses. + * If connection fails, make devid unavailable. + */ +void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, + struct nfs4_deviceid_node *devid, unsigned int timeo, + unsigned int retrans) +{ + if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) { + int err = 0; + + err = _nfs4_pnfs_ds_connect(mds_srv, ds, timeo, retrans); + if (err) + nfs4_mark_deviceid_unavailable(devid); + nfs4_clear_ds_conn_bit(ds); + } else { + nfs4_wait_ds_connect(ds); + } +} +EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_connect); + /* * Currently only supports ipv4, ipv6 and one multi-path address. */ From 064172f3459a914277aa309b2afd3bd5d1c3289a Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 29 May 2014 21:07:00 +0800 Subject: [PATCH 397/601] nfs41: allow LD to choose DS connection auth flavor flexfile layout may use different auth flavor as specified by MDS. Reviewed-by: Jeff Layton Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/filelayout/filelayoutdev.c | 3 ++- fs/nfs/internal.h | 3 ++- fs/nfs/nfs4client.c | 5 +++-- fs/nfs/pnfs.h | 2 +- fs/nfs/pnfs_nfs.c | 10 ++++++---- 5 files changed, 14 insertions(+), 9 deletions(-) diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index 27bdd8ce177e71..5e4b0cea84c8f3 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -278,7 +278,8 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) goto out_test_devid; nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo, - dataserver_retrans); + dataserver_retrans, + s->nfs_client->cl_rpcclient->cl_auth->au_flavor); out_test_devid: if (filelayout_test_devid_unavailable(devid)) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index efaa31c70fbe1c..7d7c36ff09faac 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -189,7 +189,8 @@ extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, const struct sockaddr *ds_addr, int ds_addrlen, int ds_proto, unsigned int ds_timeo, - unsigned int ds_retrans); + unsigned int ds_retrans, + rpc_authflavor_t au_flavor); extern struct rpc_clnt *nfs4_find_or_create_ds_client(struct nfs_client *, struct inode *); #ifdef CONFIG_PROC_FS diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 953daa44a28232..62d93a116790e0 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -849,7 +849,8 @@ static int nfs4_set_client(struct nfs_server *server, */ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, const struct sockaddr *ds_addr, int ds_addrlen, - int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans) + int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans, + rpc_authflavor_t au_flavor) { struct nfs_client_initdata cl_init = { .addr = ds_addr, @@ -874,7 +875,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, */ nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans); clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr, - mds_clp->cl_rpcclient->cl_auth->au_flavor); + au_flavor); dprintk("<-- %s %p\n", __func__, clp); return clp; diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 9a8937c31d973d..2ea9e9a7d85e71 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -314,7 +314,7 @@ struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags); void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, struct nfs4_deviceid_node *devid, unsigned int timeo, - unsigned int retrans); + unsigned int retrans, rpc_authflavor_t au_flavor); struct nfs4_pnfs_ds_addr *nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags); diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 5a92e76c6c53b1..106ee08ef52fa1 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -553,7 +553,8 @@ static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds) static int _nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, unsigned int timeo, - unsigned int retrans) + unsigned int retrans, + rpc_authflavor_t au_flavor) { struct nfs_client *clp = ERR_PTR(-EIO); struct nfs4_pnfs_ds_addr *da; @@ -569,7 +570,7 @@ static int _nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, clp = nfs4_set_ds_client(mds_srv->nfs_client, (struct sockaddr *)&da->da_addr, da->da_addrlen, IPPROTO_TCP, - timeo, retrans); + timeo, retrans, au_flavor); if (!IS_ERR(clp)) break; } @@ -600,12 +601,13 @@ static int _nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, */ void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, struct nfs4_deviceid_node *devid, unsigned int timeo, - unsigned int retrans) + unsigned int retrans, rpc_authflavor_t au_flavor) { if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) { int err = 0; - err = _nfs4_pnfs_ds_connect(mds_srv, ds, timeo, retrans); + err = _nfs4_pnfs_ds_connect(mds_srv, ds, timeo, + retrans, au_flavor); if (err) nfs4_mark_deviceid_unavailable(devid); nfs4_clear_ds_conn_bit(ds); From 39280a5ae8443dcc1ab3bb5ebc205aab0855b849 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 30 May 2014 18:15:55 +0800 Subject: [PATCH 398/601] nfs41: move file layout macros to generic pnfs They can be reused by flexfile layout as well. Also add a code such that if read fails on one DS and there are other DSes available to use, don't resend through MDS but through pNFS so that client can read from other DSes. Reviewed-by: Jeff Layton Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/filelayout/filelayout.h | 10 ---------- fs/nfs/pnfs.h | 11 +++++++++++ 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/nfs/filelayout/filelayout.h b/fs/nfs/filelayout/filelayout.h index f97eea627c4f90..2896cb833a1137 100644 --- a/fs/nfs/filelayout/filelayout.h +++ b/fs/nfs/filelayout/filelayout.h @@ -32,13 +32,6 @@ #include "../pnfs.h" -/* - * Default data server connection timeout and retrans vaules. - * Set by module paramters dataserver_timeo and dataserver_retrans. - */ -#define NFS4_DEF_DS_TIMEO 600 /* in tenths of a second */ -#define NFS4_DEF_DS_RETRANS 5 - /* * Field testing shows we need to support up to 4096 stripe indices. * We store each index as a u8 (u32 on the wire) to keep the memory footprint @@ -48,9 +41,6 @@ #define NFS4_PNFS_MAX_STRIPE_CNT 4096 #define NFS4_PNFS_MAX_MULTI_CNT 256 /* 256 fit into a u8 stripe_index */ -/* error codes for internal use */ -#define NFS4ERR_RESET_TO_MDS 12001 - enum stripetype4 { STRIPE_SPARSE = 1, STRIPE_DENSE = 2 diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 2ea9e9a7d85e71..aef89b347bdc84 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -77,6 +77,17 @@ enum pnfs_try_status { #define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4" +/* + * Default data server connection timeout and retrans vaules. + * Set by module parameters dataserver_timeo and dataserver_retrans. + */ +#define NFS4_DEF_DS_TIMEO 600 /* in tenths of a second */ +#define NFS4_DEF_DS_RETRANS 5 + +/* error codes for internal use */ +#define NFS4ERR_RESET_TO_MDS 12001 +#define NFS4ERR_RESET_TO_PNFS 12002 + enum { NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */ NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ From 1a04c6e1a26a43305fe124a0978a3e4be861af89 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 30 May 2014 18:15:57 +0800 Subject: [PATCH 399/601] nfsv3: introduce nfs3_set_ds_client The flexfiles layout wants to create DS connection over NFSv3. Add nfs3_set_ds_client to allow that to happen. Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/internal.h | 4 ++++ fs/nfs/nfs3_fs.h | 2 ++ fs/nfs/nfs3client.c | 34 ++++++++++++++++++++++++++++++++++ fs/nfs/nfs3super.c | 2 +- include/linux/nfs_fs_sb.h | 9 +++++---- 5 files changed, 46 insertions(+), 5 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 7d7c36ff09faac..7332ba1f693b24 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -193,6 +193,10 @@ extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, rpc_authflavor_t au_flavor); extern struct rpc_clnt *nfs4_find_or_create_ds_client(struct nfs_client *, struct inode *); +extern struct nfs_client *nfs3_set_ds_client(struct nfs_client *mds_clp, + const struct sockaddr *ds_addr, int ds_addrlen, + int ds_proto, unsigned int ds_timeo, + unsigned int ds_retrans, rpc_authflavor_t au_flavor); #ifdef CONFIG_PROC_FS extern int __init nfs_fs_proc_init(void); extern void nfs_fs_proc_exit(void); diff --git a/fs/nfs/nfs3_fs.h b/fs/nfs/nfs3_fs.h index 333ae40685066b..e134d6548ab724 100644 --- a/fs/nfs/nfs3_fs.h +++ b/fs/nfs/nfs3_fs.h @@ -30,5 +30,7 @@ struct nfs_server *nfs3_create_server(struct nfs_mount_info *, struct nfs_subver struct nfs_server *nfs3_clone_server(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, rpc_authflavor_t); +/* nfs3super.c */ +extern struct nfs_subversion nfs_v3; #endif /* __LINUX_FS_NFS_NFS3_FS_H */ diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c index 8c1b437c540363..52e2344bf9a1fa 100644 --- a/fs/nfs/nfs3client.c +++ b/fs/nfs/nfs3client.c @@ -64,3 +64,37 @@ struct nfs_server *nfs3_clone_server(struct nfs_server *source, nfs_init_server_aclclient(server); return server; } + +/* + * Set up a pNFS Data Server client over NFSv3. + * + * Return any existing nfs_client that matches server address,port,version + * and minorversion. + * + * For a new nfs_client, use a soft mount (default), a low retrans and a + * low timeout interval so that if a connection is lost, we retry through + * the MDS. + */ +struct nfs_client *nfs3_set_ds_client(struct nfs_client *mds_clp, + const struct sockaddr *ds_addr, int ds_addrlen, + int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans, + rpc_authflavor_t au_flavor) +{ + struct nfs_client_initdata cl_init = { + .addr = ds_addr, + .addrlen = ds_addrlen, + .nfs_mod = &nfs_v3, + .proto = ds_proto, + .net = mds_clp->cl_net, + }; + struct rpc_timeout ds_timeout; + struct nfs_client *clp; + + /* Use the MDS nfs_client cl_ipaddr. */ + nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans); + clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr, + au_flavor); + + return clp; +} +EXPORT_SYMBOL_GPL(nfs3_set_ds_client); diff --git a/fs/nfs/nfs3super.c b/fs/nfs/nfs3super.c index 6af29c2da35292..5c4394e4656b61 100644 --- a/fs/nfs/nfs3super.c +++ b/fs/nfs/nfs3super.c @@ -7,7 +7,7 @@ #include "nfs3_fs.h" #include "nfs.h" -static struct nfs_subversion nfs_v3 = { +struct nfs_subversion nfs_v3 = { .owner = THIS_MODULE, .nfs_fs = &nfs_fs_type, .rpc_vers = &nfs_version3, diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index ddea982355f3be..5e1273d4de1406 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -77,10 +77,6 @@ struct nfs_client { /* Client owner identifier */ const char * cl_owner_id; - /* Our own IP address, as a null-terminated string. - * This is used to generate the mv0 callback address. - */ - char cl_ipaddr[48]; u32 cl_cb_ident; /* v4.0 callback identifier */ const struct nfs4_minor_version_ops *cl_mvops; unsigned long cl_mig_gen; @@ -108,6 +104,11 @@ struct nfs_client { #define NFS_SP4_MACH_CRED_COMMIT 6 /* COMMIT */ #endif /* CONFIG_NFS_V4 */ + /* Our own IP address, as a null-terminated string. + * This is used to generate the mv0 callback address. + */ + char cl_ipaddr[48]; + #ifdef CONFIG_NFS_FSCACHE struct fscache_cookie *fscache; /* client index cache cookie */ #endif From 30626f9c32f0ad5e2c4173f10fb4b1358bbba6ec Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 30 May 2014 18:15:58 +0800 Subject: [PATCH 400/601] nfs41: allow LD to choose DS connection version/minor_version flexfile layout may need to set such when making DS connections. Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/filelayout/filelayoutdev.c | 3 ++- fs/nfs/internal.h | 1 + fs/nfs/nfs4client.c | 4 ++-- fs/nfs/pnfs.h | 3 ++- fs/nfs/pnfs_nfs.c | 11 +++++++---- 5 files changed, 14 insertions(+), 8 deletions(-) diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index 5e4b0cea84c8f3..4f372e22460344 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -278,7 +278,8 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) goto out_test_devid; nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo, - dataserver_retrans, + dataserver_retrans, 4, + s->nfs_client->cl_minorversion, s->nfs_client->cl_rpcclient->cl_auth->au_flavor); out_test_devid: diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 7332ba1f693b24..5543850268d2a2 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -190,6 +190,7 @@ extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, int ds_addrlen, int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans, + u32 minor_version, rpc_authflavor_t au_flavor); extern struct rpc_clnt *nfs4_find_or_create_ds_client(struct nfs_client *, struct inode *); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 62d93a116790e0..102d96777d427a 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -850,14 +850,14 @@ static int nfs4_set_client(struct nfs_server *server, struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, const struct sockaddr *ds_addr, int ds_addrlen, int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans, - rpc_authflavor_t au_flavor) + u32 minor_version, rpc_authflavor_t au_flavor) { struct nfs_client_initdata cl_init = { .addr = ds_addr, .addrlen = ds_addrlen, .nfs_mod = &nfs_v4, .proto = ds_proto, - .minorversion = mds_clp->cl_minorversion, + .minorversion = minor_version, .net = mds_clp->cl_net, }; struct rpc_timeout ds_timeout; diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index aef89b347bdc84..70ffec1356964b 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -325,7 +325,8 @@ struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags); void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, struct nfs4_deviceid_node *devid, unsigned int timeo, - unsigned int retrans, rpc_authflavor_t au_flavor); + unsigned int retrans, u32 versoin, u32 minor_version, + rpc_authflavor_t au_flavor); struct nfs4_pnfs_ds_addr *nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags); diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 106ee08ef52fa1..ad211a4e1874fa 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -554,6 +554,7 @@ static int _nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, unsigned int timeo, unsigned int retrans, + u32 minor_version, rpc_authflavor_t au_flavor) { struct nfs_client *clp = ERR_PTR(-EIO); @@ -570,7 +571,8 @@ static int _nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, clp = nfs4_set_ds_client(mds_srv->nfs_client, (struct sockaddr *)&da->da_addr, da->da_addrlen, IPPROTO_TCP, - timeo, retrans, au_flavor); + timeo, retrans, minor_version, + au_flavor); if (!IS_ERR(clp)) break; } @@ -601,13 +603,14 @@ static int _nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, */ void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, struct nfs4_deviceid_node *devid, unsigned int timeo, - unsigned int retrans, rpc_authflavor_t au_flavor) + unsigned int retrans, u32 version, + u32 minor_version, rpc_authflavor_t au_flavor) { if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) { int err = 0; - err = _nfs4_pnfs_ds_connect(mds_srv, ds, timeo, - retrans, au_flavor); + err = _nfs4_pnfs_ds_connect(mds_srv, ds, timeo, retrans, + minor_version, au_flavor); if (err) nfs4_mark_deviceid_unavailable(devid); nfs4_clear_ds_conn_bit(ds); From 5f01d9539496577b9ee62e213f4122a2a209550c Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 30 May 2014 18:15:59 +0800 Subject: [PATCH 401/601] nfs41: create NFSv3 DS connection if specified Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/nfs4super.c | 3 ++ fs/nfs/pnfs.h | 7 +++- fs/nfs/pnfs_nfs.c | 88 +++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 93 insertions(+), 5 deletions(-) diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 6f340f02f2baa4..48cea3c30e5de0 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -346,6 +346,9 @@ static int __init init_nfs_v4(void) static void __exit exit_nfs_v4(void) { + /* Not called in the _init(), conditionally loaded */ + nfs4_pnfs_v3_ds_connect_unload(); + unregister_nfs_version(&nfs_v4); nfs4_unregister_sysctl(); nfs_idmap_quit(); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 70ffec1356964b..c3988219165194 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -323,9 +323,10 @@ void pnfs_generic_write_commit_done(struct rpc_task *task, void *data); void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds); struct nfs4_pnfs_ds *nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags); +void nfs4_pnfs_v3_ds_connect_unload(void); void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, struct nfs4_deviceid_node *devid, unsigned int timeo, - unsigned int retrans, u32 versoin, u32 minor_version, + unsigned int retrans, u32 version, u32 minor_version, rpc_authflavor_t au_flavor); struct nfs4_pnfs_ds_addr *nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, @@ -615,6 +616,10 @@ static inline struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) return NULL; } +static inline void nfs4_pnfs_v3_ds_connect_unload(void) +{ +} + #endif /* CONFIG_NFS_V4_1 */ #endif /* FS_NFS_PNFS_H */ diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index ad211a4e1874fa..23c851d4c9a9cb 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "nfs4session.h" #include "internal.h" @@ -550,7 +551,75 @@ static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds) wake_up_bit(&ds->ds_state, NFS4DS_CONNECTING); } -static int _nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, +static struct nfs_client *(*get_v3_ds_connect)( + struct nfs_client *mds_clp, + const struct sockaddr *ds_addr, + int ds_addrlen, + int ds_proto, + unsigned int ds_timeo, + unsigned int ds_retrans, + rpc_authflavor_t au_flavor); + +static bool load_v3_ds_connect(void) +{ + if (!get_v3_ds_connect) { + get_v3_ds_connect = symbol_request(nfs3_set_ds_client); + WARN_ON_ONCE(!get_v3_ds_connect); + } + + return(get_v3_ds_connect != NULL); +} + +void __exit nfs4_pnfs_v3_ds_connect_unload(void) +{ + if (get_v3_ds_connect) { + symbol_put(nfs3_set_ds_client); + get_v3_ds_connect = NULL; + } +} +EXPORT_SYMBOL_GPL(nfs4_pnfs_v3_ds_connect_unload); + +static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv, + struct nfs4_pnfs_ds *ds, + unsigned int timeo, + unsigned int retrans, + rpc_authflavor_t au_flavor) +{ + struct nfs_client *clp = ERR_PTR(-EIO); + struct nfs4_pnfs_ds_addr *da; + int status = 0; + + dprintk("--> %s DS %s au_flavor %d\n", __func__, + ds->ds_remotestr, au_flavor); + + if (!load_v3_ds_connect()) + goto out; + + list_for_each_entry(da, &ds->ds_addrs, da_node) { + dprintk("%s: DS %s: trying address %s\n", + __func__, ds->ds_remotestr, da->da_remotestr); + + clp = get_v3_ds_connect(mds_srv->nfs_client, + (struct sockaddr *)&da->da_addr, + da->da_addrlen, IPPROTO_TCP, + timeo, retrans, au_flavor); + if (!IS_ERR(clp)) + break; + } + + if (IS_ERR(clp)) { + status = PTR_ERR(clp); + goto out; + } + + smp_wmb(); + ds->ds_clp = clp; + dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); +out: + return status; +} + +static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, unsigned int timeo, unsigned int retrans, @@ -562,7 +631,7 @@ static int _nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, int status = 0; dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr, - mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor); + au_flavor); list_for_each_entry(da, &ds->ds_addrs, da_node) { dprintk("%s: DS %s: trying address %s\n", @@ -609,8 +678,19 @@ void nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds, if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) { int err = 0; - err = _nfs4_pnfs_ds_connect(mds_srv, ds, timeo, retrans, - minor_version, au_flavor); + if (version == 3) { + err = _nfs4_pnfs_v3_ds_connect(mds_srv, ds, timeo, + retrans, au_flavor); + } else if (version == 4) { + err = _nfs4_pnfs_v4_ds_connect(mds_srv, ds, timeo, + retrans, minor_version, + au_flavor); + } else { + dprintk("%s: unsupported DS version %d\n", __func__, + version); + err = -EPROTONOSUPPORT; + } + if (err) nfs4_mark_deviceid_unavailable(devid); nfs4_clear_ds_conn_bit(ds); From abde71f4d3c027a30f8d725e1e22001313b4481a Mon Sep 17 00:00:00 2001 From: Tom Haynes Date: Mon, 9 Jun 2014 13:12:20 -0700 Subject: [PATCH 402/601] pnfs: Add nfs_rpc_ops in calls to nfs_initiate_pgio Signed-off-by: Tom Haynes --- fs/nfs/filelayout/filelayout.c | 4 ++-- fs/nfs/internal.h | 1 + fs/nfs/pagelist.c | 6 ++++-- fs/nfs/read.c | 3 ++- fs/nfs/write.c | 6 +++--- include/linux/nfs_page.h | 1 + 6 files changed, 13 insertions(+), 8 deletions(-) diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index bc36ed350a685a..25c4896887ca25 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -501,7 +501,7 @@ filelayout_read_pagelist(struct nfs_pgio_header *hdr) hdr->mds_offset = offset; /* Perform an asynchronous read to ds */ - nfs_initiate_pgio(ds_clnt, hdr, + nfs_initiate_pgio(ds_clnt, hdr, NFS_PROTO(hdr->inode), &filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN); return PNFS_ATTEMPTED; } @@ -542,7 +542,7 @@ filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync) hdr->args.offset = filelayout_get_dserver_offset(lseg, offset); /* Perform an asynchronous write */ - nfs_initiate_pgio(ds_clnt, hdr, + nfs_initiate_pgio(ds_clnt, hdr, NFS_PROTO(hdr->inode), &filelayout_write_call_ops, sync, RPC_TASK_SOFTCONN); return PNFS_ATTEMPTED; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 5543850268d2a2..1d15ffa949377a 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -251,6 +251,7 @@ void nfs_pgio_header_free(struct nfs_pgio_header *); void nfs_pgio_data_destroy(struct nfs_pgio_header *); int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_header *, + const struct nfs_rpc_ops *, const struct rpc_call_ops *, int, int); void nfs_free_request(struct nfs_page *req); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 2b5e769beb16b4..35a2626a6922d1 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -597,6 +597,7 @@ static void nfs_pgio_prepare(struct rpc_task *task, void *calldata) } int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, + const struct nfs_rpc_ops *rpc_ops, const struct rpc_call_ops *call_ops, int how, int flags) { struct rpc_task *task; @@ -616,7 +617,7 @@ int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, }; int ret = 0; - hdr->rw_ops->rw_initiate(hdr, &msg, &task_setup_data, how); + hdr->rw_ops->rw_initiate(hdr, &msg, rpc_ops, &task_setup_data, how); dprintk("NFS: %5u initiated pgio call " "(req %s/%llu, %u bytes @ offset %llu)\n", @@ -792,7 +793,8 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) ret = nfs_generic_pgio(desc, hdr); if (ret == 0) ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode), - hdr, desc->pg_rpc_callops, + hdr, NFS_PROTO(hdr->inode), + desc->pg_rpc_callops, desc->pg_ioflags, 0); return ret; } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index c91a4799c5627a..092ab499f2b691 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -168,13 +168,14 @@ static void nfs_read_completion(struct nfs_pgio_header *hdr) static void nfs_initiate_read(struct nfs_pgio_header *hdr, struct rpc_message *msg, + const struct nfs_rpc_ops *rpc_ops, struct rpc_task_setup *task_setup_data, int how) { struct inode *inode = hdr->inode; int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; task_setup_data->flags |= swap_flags; - NFS_PROTO(inode)->read_setup(hdr, msg); + rpc_ops->read_setup(hdr, msg); } static void diff --git a/fs/nfs/write.c b/fs/nfs/write.c index af3af685a9e379..54d4857e0e2b1a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1240,15 +1240,15 @@ static int flush_task_priority(int how) static void nfs_initiate_write(struct nfs_pgio_header *hdr, struct rpc_message *msg, + const struct nfs_rpc_ops *rpc_ops, struct rpc_task_setup *task_setup_data, int how) { - struct inode *inode = hdr->inode; int priority = flush_task_priority(how); task_setup_data->priority = priority; - NFS_PROTO(inode)->write_setup(hdr, msg); + rpc_ops->write_setup(hdr, msg); - nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client, + nfs4_state_protect_write(NFS_SERVER(hdr->inode)->nfs_client, &task_setup_data->rpc_client, msg, hdr); } diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 6c3e06ee2fb7af..4c3aa809ab95bd 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -69,6 +69,7 @@ struct nfs_rw_ops { struct inode *); void (*rw_result)(struct rpc_task *, struct nfs_pgio_header *); void (*rw_initiate)(struct nfs_pgio_header *, struct rpc_message *, + const struct nfs_rpc_ops *, struct rpc_task_setup *, int); }; From c36aae9ad95afa2f9a9e9109d989c21af221fabd Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Mon, 9 Jun 2014 07:10:14 +0800 Subject: [PATCH 403/601] nfs: allow different protocol in nfs_initiate_commit pnfs flexfile layout client may want to use NFSv3 ops rather than the default MDS v4 ops. Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/filelayout/filelayout.c | 2 +- fs/nfs/internal.h | 1 + fs/nfs/pnfs_nfs.c | 1 + fs/nfs/write.c | 7 ++++--- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 25c4896887ca25..e5a3c5b1398f9f 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -1055,7 +1055,7 @@ static int filelayout_initiate_commit(struct nfs_commit_data *data, int how) fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); if (fh) data->args.fh = fh; - return nfs_initiate_commit(ds_clnt, data, + return nfs_initiate_commit(ds_clnt, data, NFS_PROTO(data->inode), &filelayout_commit_call_ops, how, RPC_TASK_SOFTCONN); out_err: diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 1d15ffa949377a..98dee834e9d639 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -436,6 +436,7 @@ extern void nfs_write_prepare(struct rpc_task *task, void *calldata); extern void nfs_commit_prepare(struct rpc_task *task, void *calldata); extern int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, + const struct nfs_rpc_ops *nfs_ops, const struct rpc_call_ops *call_ops, int how, int flags); extern void nfs_init_commit(struct nfs_commit_data *data, diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 23c851d4c9a9cb..c87f664587ee73 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -278,6 +278,7 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, if (!data->lseg) { nfs_init_commit(data, mds_pages, NULL, cinfo); nfs_initiate_commit(NFS_CLIENT(inode), data, + NFS_PROTO(data->inode), data->mds_ops, how, 0); } else { struct pnfs_commit_bucket *buckets; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 54d4857e0e2b1a..8800bd3b235d09 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1465,6 +1465,7 @@ void nfs_commitdata_release(struct nfs_commit_data *data) EXPORT_SYMBOL_GPL(nfs_commitdata_release); int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, + const struct nfs_rpc_ops *nfs_ops, const struct rpc_call_ops *call_ops, int how, int flags) { @@ -1486,7 +1487,7 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, .priority = priority, }; /* Set up the initial task struct. */ - NFS_PROTO(data->inode)->commit_setup(data, &msg); + nfs_ops->commit_setup(data, &msg); dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); @@ -1589,8 +1590,8 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, /* Set up the argument struct */ nfs_init_commit(data, head, NULL, cinfo); atomic_inc(&cinfo->mds->rpcs_out); - return nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops, - how, 0); + return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode), + data->mds_ops, how, 0); out_bad: nfs_retry_commit(head, NULL, cinfo); cinfo->completion_ops->error_cleanup(NFS_I(inode)); From cb04ad2a2bc8978e69f7f582ca2869909c4e0571 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Wed, 11 Jun 2014 05:24:15 +0800 Subject: [PATCH 404/601] nfs4: pass slot table to nfs40_setup_sequence flexclient needs this as there is no nfs_server to DS connection. Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/nfs4_fs.h | 4 ++++ fs/nfs/nfs4proc.c | 24 +++++++++++++----------- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index a08178764cf96e..90c4ffe084d713 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -443,6 +443,10 @@ extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid); extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid); extern void nfs_release_seqid(struct nfs_seqid *seqid); extern void nfs_free_seqid(struct nfs_seqid *seqid); +extern int nfs40_setup_sequence(struct nfs4_slot_table *tbl, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + struct rpc_task *task); extern void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c347705b016104..e98dda7180c132 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -495,12 +495,11 @@ static void nfs4_set_sequence_privileged(struct nfs4_sequence_args *args) args->sa_privileged = 1; } -static int nfs40_setup_sequence(const struct nfs_server *server, - struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res, - struct rpc_task *task) +int nfs40_setup_sequence(struct nfs4_slot_table *tbl, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + struct rpc_task *task) { - struct nfs4_slot_table *tbl = server->nfs_client->cl_slot_tbl; struct nfs4_slot *slot; /* slot already allocated? */ @@ -535,6 +534,7 @@ static int nfs40_setup_sequence(const struct nfs_server *server, spin_unlock(&tbl->slot_tbl_lock); return -EAGAIN; } +EXPORT_SYMBOL_GPL(nfs40_setup_sequence); static int nfs40_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) @@ -777,7 +777,8 @@ static int nfs4_setup_sequence(const struct nfs_server *server, int ret = 0; if (!session) - return nfs40_setup_sequence(server, args, res, task); + return nfs40_setup_sequence(server->nfs_client->cl_slot_tbl, + args, res, task); dprintk("--> %s clp %p session %p sr_slot %u\n", __func__, session->clp, session, res->sr_slot ? @@ -818,7 +819,8 @@ static int nfs4_setup_sequence(const struct nfs_server *server, struct nfs4_sequence_res *res, struct rpc_task *task) { - return nfs40_setup_sequence(server, args, res, task); + return nfs40_setup_sequence(server->nfs_client->cl_slot_tbl, + args, res, task); } static int nfs4_sequence_done(struct rpc_task *task, @@ -1679,8 +1681,8 @@ static void nfs4_open_confirm_prepare(struct rpc_task *task, void *calldata) { struct nfs4_opendata *data = calldata; - nfs40_setup_sequence(data->o_arg.server, &data->c_arg.seq_args, - &data->c_res.seq_res, task); + nfs40_setup_sequence(data->o_arg.server->nfs_client->cl_slot_tbl, + &data->c_arg.seq_args, &data->c_res.seq_res, task); } static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata) @@ -5974,8 +5976,8 @@ static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata { struct nfs_release_lockowner_data *data = calldata; struct nfs_server *server = data->server; - nfs40_setup_sequence(server, &data->args.seq_args, - &data->res.seq_res, task); + nfs40_setup_sequence(server->nfs_client->cl_slot_tbl, + &data->args.seq_args, &data->res.seq_res, task); data->args.lock_owner.clientid = server->nfs_client->cl_clientid; data->timestamp = jiffies; } From 2c4b131dea469e4df5bd59ccb490063b69e06155 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Wed, 11 Jun 2014 05:24:16 +0800 Subject: [PATCH 405/601] nfs4: export nfs4_sequence_done Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/nfs4_fs.h | 2 ++ fs/nfs/nfs4proc.c | 9 +++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 90c4ffe084d713..b3c771e2ac68f4 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -447,6 +447,8 @@ extern int nfs40_setup_sequence(struct nfs4_slot_table *tbl, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, struct rpc_task *task); +extern int nfs4_sequence_done(struct rpc_task *task, + struct nfs4_sequence_res *res); extern void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e98dda7180c132..f358262a95f98b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -694,8 +694,7 @@ int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) } EXPORT_SYMBOL_GPL(nfs41_sequence_done); -static int nfs4_sequence_done(struct rpc_task *task, - struct nfs4_sequence_res *res) +int nfs4_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) { if (res->sr_slot == NULL) return 1; @@ -703,6 +702,7 @@ static int nfs4_sequence_done(struct rpc_task *task, return nfs40_sequence_done(task, res); return nfs41_sequence_done(task, res); } +EXPORT_SYMBOL_GPL(nfs4_sequence_done); int nfs41_setup_sequence(struct nfs4_session *session, struct nfs4_sequence_args *args, @@ -823,11 +823,12 @@ static int nfs4_setup_sequence(const struct nfs_server *server, args, res, task); } -static int nfs4_sequence_done(struct rpc_task *task, - struct nfs4_sequence_res *res) +int nfs4_sequence_done(struct rpc_task *task, + struct nfs4_sequence_res *res) { return nfs40_sequence_done(task, res); } +EXPORT_SYMBOL_GPL(nfs4_sequence_done); #endif /* !CONFIG_NFS_V4_1 */ From 46a5ab4754cad6aeefd96feae8ba65db8655e1af Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 13 Jun 2014 23:02:25 +0800 Subject: [PATCH 406/601] nfs: allow to specify cred in nfs_initiate_pgio so that flexfile layout client can pass in DS credential instead of using user cred, which will be done in the next patch. Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/filelayout/filelayout.c | 11 ++++++----- fs/nfs/internal.h | 6 +++--- fs/nfs/pagelist.c | 8 +++++--- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index e5a3c5b1398f9f..bfa8547eb2d660 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -501,8 +501,9 @@ filelayout_read_pagelist(struct nfs_pgio_header *hdr) hdr->mds_offset = offset; /* Perform an asynchronous read to ds */ - nfs_initiate_pgio(ds_clnt, hdr, NFS_PROTO(hdr->inode), - &filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN); + nfs_initiate_pgio(ds_clnt, hdr, hdr->cred, + NFS_PROTO(hdr->inode), &filelayout_read_call_ops, + 0, RPC_TASK_SOFTCONN); return PNFS_ATTEMPTED; } @@ -542,9 +543,9 @@ filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync) hdr->args.offset = filelayout_get_dserver_offset(lseg, offset); /* Perform an asynchronous write */ - nfs_initiate_pgio(ds_clnt, hdr, NFS_PROTO(hdr->inode), - &filelayout_write_call_ops, sync, - RPC_TASK_SOFTCONN); + nfs_initiate_pgio(ds_clnt, hdr, hdr->cred, + NFS_PROTO(hdr->inode), &filelayout_write_call_ops, + sync, RPC_TASK_SOFTCONN); return PNFS_ATTEMPTED; } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 98dee834e9d639..e9305e98b78218 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -250,9 +250,9 @@ struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *); void nfs_pgio_header_free(struct nfs_pgio_header *); void nfs_pgio_data_destroy(struct nfs_pgio_header *); int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *); -int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_header *, - const struct nfs_rpc_ops *, - const struct rpc_call_ops *, int, int); +int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, + struct rpc_cred *cred, const struct nfs_rpc_ops *rpc_ops, + const struct rpc_call_ops *call_ops, int how, int flags); void nfs_free_request(struct nfs_page *req); static inline void nfs_iocounter_init(struct nfs_io_counter *c) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 35a2626a6922d1..c4d175829880eb 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -597,14 +597,14 @@ static void nfs_pgio_prepare(struct rpc_task *task, void *calldata) } int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, - const struct nfs_rpc_ops *rpc_ops, + struct rpc_cred *cred, const struct nfs_rpc_ops *rpc_ops, const struct rpc_call_ops *call_ops, int how, int flags) { struct rpc_task *task; struct rpc_message msg = { .rpc_argp = &hdr->args, .rpc_resp = &hdr->res, - .rpc_cred = hdr->cred, + .rpc_cred = cred, }; struct rpc_task_setup task_setup_data = { .rpc_client = clnt, @@ -793,7 +793,9 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) ret = nfs_generic_pgio(desc, hdr); if (ret == 0) ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode), - hdr, NFS_PROTO(hdr->inode), + hdr, + hdr->cred, + NFS_PROTO(hdr->inode), desc->pg_rpc_callops, desc->pg_ioflags, 0); return ret; From 16cecdf620eb23d2654a265d9b20e089370d7425 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 22 Jun 2014 12:55:11 -0400 Subject: [PATCH 407/601] NFSv4.1/NFSv3: Add pNFS callbacks for nfs3_(read|write|commit)_done() Enable pNFS callbacks to allow flex files to work correctly with a NFSv3-enabled data server. Signed-off-by: Trond Myklebust --- fs/nfs/nfs3proc.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 524f9f83740825..78e557c3ab87d0 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -800,6 +800,9 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { struct inode *inode = hdr->inode; + if (hdr->pgio_done_cb != NULL) + return hdr->pgio_done_cb(task, hdr); + if (nfs3_async_handle_jukebox(task, inode)) return -EAGAIN; @@ -825,6 +828,9 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { struct inode *inode = hdr->inode; + if (hdr->pgio_done_cb != NULL) + return hdr->pgio_done_cb(task, hdr); + if (nfs3_async_handle_jukebox(task, inode)) return -EAGAIN; if (task->tk_status >= 0) @@ -845,6 +851,9 @@ static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commi static int nfs3_commit_done(struct rpc_task *task, struct nfs_commit_data *data) { + if (data->commit_done_cb != NULL) + return data->commit_done_cb(task, data); + if (nfs3_async_handle_jukebox(task, data->inode)) return -EAGAIN; nfs_refresh_inode(data->inode, data->res.fattr); From 840210fc4872bcbc17ab4f435f28021dce9d0aff Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Tue, 24 Jun 2014 10:59:52 -0400 Subject: [PATCH 408/601] sunrpc: add rpc_count_iostats_idx Add a call to tally stats for a task under a different statsidx than what's contained in the task structure. This is needed to properly account for pnfs reads/writes when the DS nfs version != the MDS version. Signed-off-by: Weston Andros Adamson Signed-off-by: Tom Haynes --- include/linux/sunrpc/metrics.h | 2 ++ net/sunrpc/stats.c | 26 +++++++++++++++++++------- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/include/linux/sunrpc/metrics.h b/include/linux/sunrpc/metrics.h index eecb5a71e6c0e4..89f2ca17887348 100644 --- a/include/linux/sunrpc/metrics.h +++ b/include/linux/sunrpc/metrics.h @@ -79,6 +79,8 @@ struct rpc_clnt; struct rpc_iostats * rpc_alloc_iostats(struct rpc_clnt *); void rpc_count_iostats(const struct rpc_task *, struct rpc_iostats *); +void rpc_count_iostats_metrics(const struct rpc_task *, + struct rpc_iostats *); void rpc_print_iostats(struct seq_file *, struct rpc_clnt *); void rpc_free_iostats(struct rpc_iostats *); diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index 9711a155bc5007..2ecb994314c11b 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -140,22 +140,20 @@ void rpc_free_iostats(struct rpc_iostats *stats) EXPORT_SYMBOL_GPL(rpc_free_iostats); /** - * rpc_count_iostats - tally up per-task stats + * rpc_count_iostats_metrics - tally up per-task stats * @task: completed rpc_task - * @stats: array of stat structures + * @op_metrics: stat structure for OP that will accumulate stats from @task */ -void rpc_count_iostats(const struct rpc_task *task, struct rpc_iostats *stats) +void rpc_count_iostats_metrics(const struct rpc_task *task, + struct rpc_iostats *op_metrics) { struct rpc_rqst *req = task->tk_rqstp; - struct rpc_iostats *op_metrics; ktime_t delta, now; - if (!stats || !req) + if (!op_metrics || !req) return; now = ktime_get(); - op_metrics = &stats[task->tk_msg.rpc_proc->p_statidx]; - spin_lock(&op_metrics->om_lock); op_metrics->om_ops++; @@ -175,6 +173,20 @@ void rpc_count_iostats(const struct rpc_task *task, struct rpc_iostats *stats) spin_unlock(&op_metrics->om_lock); } +EXPORT_SYMBOL_GPL(rpc_count_iostats_metrics); + +/** + * rpc_count_iostats - tally up per-task stats + * @task: completed rpc_task + * @stats: array of stat structures + * + * Uses the statidx from @task + */ +void rpc_count_iostats(const struct rpc_task *task, struct rpc_iostats *stats) +{ + rpc_count_iostats_metrics(task, + &stats[task->tk_msg.rpc_proc->p_statidx]); +} EXPORT_SYMBOL_GPL(rpc_count_iostats); static void _print_name(struct seq_file *seq, unsigned int op, From 36d3e3dcc93f1d3f70916ace76d94267b8948a2a Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Tue, 8 Jul 2014 06:21:10 +0800 Subject: [PATCH 409/601] nfs: set hostname when creating nfsv3 ds connection lockd assumes hostname exists otherwise kernel oops. It can be reproduced by following steps: 1. mount flexfile MDS 2. write some files 3. mount DS via nfsv3 BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] strlen+0x2/0x20 PGD 0 Oops: 0000 [#1] SMP Modules linked in: nfsd(F) nfs_layout_flexfiles(F) rpcsec_gss_krb5(F) auth_rpcgss(F) nfsv4(F) dns_resolver(F) nfsv3(F) nfs_acl(F) nfs(F) lockd(F) sunrpc(F) fscache(F) ebtable_nat(F) nf_conntrack_netbios_ns(F) nf_conntrack_broadcast(F) ipt_MASQUERADE(F) ip6table_nat(F) nf_nat_ipv6(F) ip6table_mangle(F) ip6t_REJECT(F) nf_conntrack_ipv6(F) nf_defrag_ipv6(F) iptable_nat(F) nf_nat_ipv4(F) nf_nat(F) iptable_mangle(F) nf_conntrack_ipv4(F) nf_defrag_ipv4(F) xt_conntrack(F) nf_conntrack(F) ebtable_filter(F) ebtables(F) ip6table_filter(F) ip6_tables(F) bnep(F) snd_ens1371(F) snd_rawmidi(F) snd_ac97_codec(F) btusb(F) ac97_bus(F) snd_seq(F) snd_seq_device(F) snd_pcm(F) ppdev(F) bluetooth(F) 6lowpan_iphc(F) rfkill(F) vmw_balloon(F) snd_timer(F) snd(F) soundcore(F) gameport(F) i2c_piix4(F) e1000(F) vmw_vmci(F) parport_pc(F) parport(F) shpchp(F) uinput(F) xfs(F) libcrc32c(F) vmwgfx(F) ttm(F) drm(F) mptspi(F) scsi_transport_spi(F) mptscsih(F) mptbase(F) i2c_core(F) CPU: 0 PID: 10397 Comm: mount.nfs Tainted: GF 3.14.7-100.pd_client.001.fc16.x86_64 #1 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/31/2013 task: ffff880008942600 ti: ffff880007990000 task.ti: ffff880007990000 RIP: 0010:[] [] strlen+0x2/0x20 RSP: 0018:ffff880007991aa0 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff880038d39c20 RCX: 0000000000000004 RDX: 0000000000000006 RSI: 0000000000000010 RDI: 0000000000000000 RBP: ffff880007991b38 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000014600 R11: 0000000000000400 R12: ffffffff81cc8580 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000004 FS: 00007f90cd2ef880(0000) GS:ffff88003f600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000000001710000 CR4: 00000000001407f0 Stack: ffffffffa045f52c ffff880001782230 ffff880004141e28 0006880007991ac8 ffffffff816dc14b ffff880000000000 ffff880038d39c20 0000000000000010 0000000481cc0006 0000000000000000 ffffffffa0410be8 000000000000c014 Call Trace: [] ? nlmclnt_lookup_host+0x4c/0x2c0 [lockd] [] ? _raw_spin_unlock_bh+0x1b/0x20 [] ? svc_destroy+0xb8/0x140 [sunrpc] [] nlmclnt_init+0x53/0xc0 [lockd] [] ? nfs_get_client+0x1cc/0x340 [nfs] [] nfs_start_lockd+0xa7/0xd0 [nfs] [] nfs_create_server+0x181/0x5c0 [nfs] [] nfs3_create_server+0x13/0x30 [nfsv3] [] nfs_try_mount+0x21c/0x300 [nfs] [] ? __kmalloc_track_caller+0x1ad/0x240 [] ? nfs_fs_mount+0xc37/0xd80 [nfs] [] nfs_fs_mount+0x2c5/0xd80 [nfs] [] ? nfs_clone_super+0x140/0x140 [nfs] [] ? nfs_clone_sb_security+0x40/0x40 [nfs] [] mount_fs+0x43/0x1b0 [] ? __alloc_percpu+0x10/0x20 [] vfs_kern_mount+0x76/0x120 [] do_mount+0x237/0xa80 Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/nfs3client.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c index 52e2344bf9a1fa..9e9fa347a94869 100644 --- a/fs/nfs/nfs3client.c +++ b/fs/nfs/nfs3client.c @@ -1,5 +1,6 @@ #include #include +#include #include "internal.h" #include "nfs3_fs.h" @@ -89,6 +90,12 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_client *mds_clp, }; struct rpc_timeout ds_timeout; struct nfs_client *clp; + char buf[INET6_ADDRSTRLEN + 1]; + + /* fake a hostname because lockd wants it */ + if (rpc_ntop(ds_addr, buf, sizeof(buf)) <= 0) + return ERR_PTR(-EINVAL); + cl_init.hostname = buf; /* Use the MDS nfs_client cl_ipaddr. */ nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans); From 72cff4494ea981202c8db6fd18940c8506f14db4 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 7 Aug 2014 10:12:38 +0800 Subject: [PATCH 410/601] nfs/flexclient: export pnfs_layoutcommit_inode flexfiles needs to start layoutcommit when necessary Signed-off-by: Peng Tao --- fs/nfs/pnfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 0a5dda4d85c27b..2d25670bbe44aa 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1966,6 +1966,7 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync) pnfs_clear_layoutcommitting(inode); goto out; } +EXPORT_SYMBOL_GPL(pnfs_layoutcommit_inode); struct nfs4_threshold *pnfs_mdsthreshold_alloc(void) { From abb9a0079c7f06360b83a5dd27ce74b8dc6d01b6 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 22 Aug 2014 17:37:40 +0800 Subject: [PATCH 411/601] nfs41: close a small race window when adding new layout to global list Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/pnfs.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 2d25670bbe44aa..fa00b56f176a40 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1288,7 +1288,6 @@ pnfs_update_layout(struct inode *ino, struct nfs_client *clp = server->nfs_client; struct pnfs_layout_hdr *lo; struct pnfs_layout_segment *lseg = NULL; - bool first; if (!pnfs_enabled_sb(NFS_SERVER(ino))) goto out; @@ -1321,16 +1320,15 @@ pnfs_update_layout(struct inode *ino, if (pnfs_layoutgets_blocked(lo, 0)) goto out_unlock; atomic_inc(&lo->plh_outstanding); - - first = list_empty(&lo->plh_layouts) ? true : false; spin_unlock(&ino->i_lock); - if (first) { + if (list_empty(&lo->plh_layouts)) { /* The lo must be on the clp list if there is any * chance of a CB_LAYOUTRECALL(FILE) coming in. */ spin_lock(&clp->cl_lock); - list_add_tail(&lo->plh_layouts, &server->layouts); + if (list_empty(&lo->plh_layouts)) + list_add_tail(&lo->plh_layouts, &server->layouts); spin_unlock(&clp->cl_lock); } From 9bf87482ddc6f8db884177a2a16b1a1dc12f8777 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 22 Aug 2014 17:37:41 +0800 Subject: [PATCH 412/601] nfs41: serialize first layoutget of a file Per RFC 5661 Errata 3208: | A client MAY always forget its layout state and associated | layout stateid at any time (See also section 12.5.5.1). | In such case, the client MUST use a non-layout stateid for the next | LAYOUTGET operation. This will signal the server that the client has | no more layouts on the file and its respective layout state can be | released before issuing a new layout in response to LAYOUTGET. In order to make such a signal unique to server, client needs to serialize all layoutgets using non-layout stateid. We implement this by serializing layoutgets when client has no layout segments at hand. Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/pnfs.c | 35 +++++++++++++++++++++++++++++++---- fs/nfs/pnfs.h | 1 + 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index fa00b56f176a40..7e1bac189d1cb9 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1288,6 +1288,7 @@ pnfs_update_layout(struct inode *ino, struct nfs_client *clp = server->nfs_client; struct pnfs_layout_hdr *lo; struct pnfs_layout_segment *lseg = NULL; + bool first; if (!pnfs_enabled_sb(NFS_SERVER(ino))) goto out; @@ -1295,6 +1296,8 @@ pnfs_update_layout(struct inode *ino, if (pnfs_within_mdsthreshold(ctx, ino, iomode)) goto out; +lookup_again: + first = false; spin_lock(&ino->i_lock); lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); if (lo == NULL) { @@ -1312,10 +1315,27 @@ pnfs_update_layout(struct inode *ino, if (pnfs_layout_io_test_failed(lo, iomode)) goto out_unlock; - /* Check to see if the layout for the given range already exists */ - lseg = pnfs_find_lseg(lo, &arg); - if (lseg) - goto out_unlock; + first = list_empty(&lo->plh_segs); + if (first) { + /* The first layoutget for the file. Need to serialize per + * RFC 5661 Errata 3208. + */ + if (test_and_set_bit(NFS_LAYOUT_FIRST_LAYOUTGET, + &lo->plh_flags)) { + spin_unlock(&ino->i_lock); + wait_on_bit(&lo->plh_flags, NFS_LAYOUT_FIRST_LAYOUTGET, + TASK_UNINTERRUPTIBLE); + pnfs_put_layout_hdr(lo); + goto lookup_again; + } + } else { + /* Check to see if the layout for the given range + * already exists + */ + lseg = pnfs_find_lseg(lo, &arg); + if (lseg) + goto out_unlock; + } if (pnfs_layoutgets_blocked(lo, 0)) goto out_unlock; @@ -1343,6 +1363,13 @@ pnfs_update_layout(struct inode *ino, lseg = send_layoutget(lo, ctx, &arg, gfp_flags); atomic_dec(&lo->plh_outstanding); out_put_layout_hdr: + if (first) { + unsigned long *bitlock = &lo->plh_flags; + + clear_bit_unlock(NFS_LAYOUT_FIRST_LAYOUTGET, bitlock); + smp_mb__after_atomic(); + wake_up_bit(bitlock, NFS_LAYOUT_FIRST_LAYOUTGET); + } pnfs_put_layout_hdr(lo); out: dprintk("%s: inode %s/%llu pNFS layout segment %s for " diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index c3988219165194..4cf0d54e14c349 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -95,6 +95,7 @@ enum { NFS_LAYOUT_ROC, /* some lseg had roc bit set */ NFS_LAYOUT_RETURN, /* Return this layout ASAP */ NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */ + NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */ }; enum layoutdriver_policy_flags { From aabff4ddcac0d36dd26546f5b905c27682e7bf89 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Wed, 27 Aug 2014 10:47:14 +0800 Subject: [PATCH 413/601] nfs: save server READ/WRITE/COMMIT status Flexfiles layout would want to use them to report DS IO status. Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/nfs2xdr.c | 10 +++++++--- fs/nfs/nfs3xdr.c | 3 +++ fs/nfs/nfs4xdr.c | 3 +++ include/linux/nfs_xdr.h | 2 ++ 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 5f61b83f4a1ce2..b4e03ed8599de5 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -481,7 +481,8 @@ static int decode_path(struct xdr_stream *xdr) * void; * }; */ -static int decode_attrstat(struct xdr_stream *xdr, struct nfs_fattr *result) +static int decode_attrstat(struct xdr_stream *xdr, struct nfs_fattr *result, + __u32 *op_status) { enum nfs_stat status; int error; @@ -489,6 +490,8 @@ static int decode_attrstat(struct xdr_stream *xdr, struct nfs_fattr *result) error = decode_stat(xdr, &status); if (unlikely(error)) goto out; + if (op_status) + *op_status = status; if (status != NFS_OK) goto out_default; error = decode_fattr(xdr, result); @@ -808,7 +811,7 @@ static int nfs2_xdr_dec_stat(struct rpc_rqst *req, struct xdr_stream *xdr, static int nfs2_xdr_dec_attrstat(struct rpc_rqst *req, struct xdr_stream *xdr, struct nfs_fattr *result) { - return decode_attrstat(xdr, result); + return decode_attrstat(xdr, result, NULL); } static int nfs2_xdr_dec_diropres(struct rpc_rqst *req, struct xdr_stream *xdr, @@ -865,6 +868,7 @@ static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr, error = decode_stat(xdr, &status); if (unlikely(error)) goto out; + result->op_status = status; if (status != NFS_OK) goto out_default; error = decode_fattr(xdr, result->fattr); @@ -882,7 +886,7 @@ static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr, { /* All NFSv2 writes are "file sync" writes */ result->verf->committed = NFS_FILE_SYNC; - return decode_attrstat(xdr, result->fattr); + return decode_attrstat(xdr, result->fattr, &result->op_status); } /** diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 8f4cbe7f4aa81e..2a932fdc57cb37 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -1636,6 +1636,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr, error = decode_post_op_attr(xdr, result->fattr); if (unlikely(error)) goto out; + result->op_status = status; if (status != NFS3_OK) goto out_status; error = decode_read3resok(xdr, result); @@ -1708,6 +1709,7 @@ static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr, error = decode_wcc_data(xdr, result->fattr); if (unlikely(error)) goto out; + result->op_status = status; if (status != NFS3_OK) goto out_status; error = decode_write3resok(xdr, result); @@ -2323,6 +2325,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req, error = decode_wcc_data(xdr, result->fattr); if (unlikely(error)) goto out; + result->op_status = status; if (status != NFS3_OK) goto out_status; error = decode_writeverf3(xdr, &result->verf->verifier); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index cb4376b78ed917..7d8d7a47f771bd 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -6567,6 +6567,7 @@ static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, struct xdr_stream *xdr, int status; status = decode_compound_hdr(xdr, &hdr); + res->op_status = hdr.status; if (status) goto out; status = decode_sequence(xdr, &res->seq_res, rqstp); @@ -6592,6 +6593,7 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr, int status; status = decode_compound_hdr(xdr, &hdr); + res->op_status = hdr.status; if (status) goto out; status = decode_sequence(xdr, &res->seq_res, rqstp); @@ -6621,6 +6623,7 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr, int status; status = decode_compound_hdr(xdr, &hdr); + res->op_status = hdr.status; if (status) goto out; status = decode_sequence(xdr, &res->seq_res, rqstp); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 467c84efb5960f..962f461c065d75 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -513,6 +513,7 @@ struct nfs_pgio_res { struct nfs4_sequence_res seq_res; struct nfs_fattr * fattr; __u32 count; + __u32 op_status; int eof; /* used by read */ struct nfs_writeverf * verf; /* used by write */ const struct nfs_server *server; /* used by write */ @@ -532,6 +533,7 @@ struct nfs_commitargs { struct nfs_commitres { struct nfs4_sequence_res seq_res; + __u32 op_status; struct nfs_fattr *fattr; struct nfs_writeverf *verf; const struct nfs_server *server; From 4579d6b897ee1b2557517fd536fb17eeb13481ad Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 6 Sep 2014 00:53:21 +0800 Subject: [PATCH 414/601] nfs41: pass iomode through layoutreturn args So that it is possible to return a specific iomode layouts. Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/nfs4xdr.c | 2 +- fs/nfs/pnfs.c | 1 + include/linux/nfs_xdr.h | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 7d8d7a47f771bd..3c3ff633dd171e 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2012,7 +2012,7 @@ encode_layoutreturn(struct xdr_stream *xdr, p = reserve_space(xdr, 16); *p++ = cpu_to_be32(0); /* reclaim. always 0 for now */ *p++ = cpu_to_be32(args->layout_type); - *p++ = cpu_to_be32(IOMODE_ANY); + *p++ = cpu_to_be32(args->iomode); *p = cpu_to_be32(RETURN_FILE); p = reserve_space(xdr, 16); p = xdr_encode_hyper(p, 0); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 7e1bac189d1cb9..1b544c1a746c1a 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -914,6 +914,7 @@ _pnfs_return_layout(struct inode *ino) lrp->args.stateid = stateid; lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; lrp->args.inode = ino; + lrp->args.iomode = IOMODE_ANY; lrp->args.layout = lo; lrp->clp = NFS_SERVER(ino)->nfs_client; lrp->cred = lo->plh_lc_cred; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 962f461c065d75..4fd7793d45d16f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -293,6 +293,7 @@ struct nfs4_layoutreturn_args { struct nfs4_sequence_args seq_args; struct pnfs_layout_hdr *layout; struct inode *inode; + enum pnfs_iomode iomode; nfs4_stateid stateid; __u32 layout_type; }; From f40eb5d044e2eea3f866eeeeb45ca30753773cda Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 6 Sep 2014 00:53:22 +0800 Subject: [PATCH 415/601] nfs41: make a helper function to send layoutreturn It allows to specify different iomode to return. Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/pnfs.c | 53 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 20 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 1b544c1a746c1a..1b9720992608bf 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -845,6 +845,38 @@ static void pnfs_clear_layoutcommit(struct inode *inode, } } +static int +pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, + enum pnfs_iomode iomode) +{ + struct inode *ino = lo->plh_inode; + struct nfs4_layoutreturn *lrp; + int status = 0; + + lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); + if (unlikely(lrp == NULL)) { + status = -ENOMEM; + spin_lock(&ino->i_lock); + lo->plh_block_lgets--; + spin_unlock(&ino->i_lock); + pnfs_put_layout_hdr(lo); + goto out; + } + + lrp->args.stateid = stateid; + lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; + lrp->args.inode = ino; + lrp->args.iomode = iomode; + lrp->args.layout = lo; + lrp->clp = NFS_SERVER(ino)->nfs_client; + lrp->cred = lo->plh_lc_cred; + + status = nfs4_proc_layoutreturn(lrp); +out: + dprintk("<-- %s status: %d\n", __func__, status); + return status; +} + /* * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr * when the layout segment list is empty. @@ -859,7 +891,6 @@ _pnfs_return_layout(struct inode *ino) struct pnfs_layout_hdr *lo = NULL; struct nfs_inode *nfsi = NFS_I(ino); LIST_HEAD(tmp_list); - struct nfs4_layoutreturn *lrp; nfs4_stateid stateid; int status = 0, empty; @@ -901,25 +932,7 @@ _pnfs_return_layout(struct inode *ino) spin_unlock(&ino->i_lock); pnfs_free_lseg_list(&tmp_list); - lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); - if (unlikely(lrp == NULL)) { - status = -ENOMEM; - spin_lock(&ino->i_lock); - lo->plh_block_lgets--; - spin_unlock(&ino->i_lock); - pnfs_put_layout_hdr(lo); - goto out; - } - - lrp->args.stateid = stateid; - lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; - lrp->args.inode = ino; - lrp->args.iomode = IOMODE_ANY; - lrp->args.layout = lo; - lrp->clp = NFS_SERVER(ino)->nfs_client; - lrp->cred = lo->plh_lc_cred; - - status = nfs4_proc_layoutreturn(lrp); + status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY); out: dprintk("<-- %s status: %d\n", __func__, status); return status; From 016256df3a7e9eeb3f4dea5ccd0e21a0b63841eb Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 6 Sep 2014 00:53:23 +0800 Subject: [PATCH 416/601] nfs41: add a helper to mark layout for return It marks all matching layout segments as NFS_LSEG_LAYOUTRETURN, which is an indicator for pnfs_put_lseg() to send layoutreturn, and also prevents pnfs_update_layout() from using the returning segments. Once it is set, it never gets cleared. It also sets proper io failure bit so that pnfs path can be retried after PNFS_LAYOUTGET_RETRY_TIMEOUT second. Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/pnfs.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/pnfs.h | 4 ++++ 2 files changed, 59 insertions(+) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 1b9720992608bf..0bd149baca71e0 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1479,6 +1479,61 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) goto out; } +static void +pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, + struct list_head *tmp_list, + struct pnfs_layout_range *return_range) +{ + struct pnfs_layout_segment *lseg, *next; + + dprintk("%s:Begin lo %p\n", __func__, lo); + + if (list_empty(&lo->plh_segs)) + return; + + list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) + if (should_free_lseg(&lseg->pls_range, return_range)) { + dprintk("%s: marking lseg %p iomode %d " + "offset %llu length %llu\n", __func__, + lseg, lseg->pls_range.iomode, + lseg->pls_range.offset, + lseg->pls_range.length); + set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags); + mark_lseg_invalid(lseg, tmp_list); + } +} + +void pnfs_error_mark_layout_for_return(struct inode *inode, + struct pnfs_layout_segment *lseg) +{ + struct pnfs_layout_hdr *lo = NFS_I(inode)->layout; + int iomode = pnfs_iomode_to_fail_bit(lseg->pls_range.iomode); + struct pnfs_layout_range range = { + .iomode = lseg->pls_range.iomode, + .offset = 0, + .length = NFS4_MAX_UINT64, + }; + LIST_HEAD(free_me); + + spin_lock(&inode->i_lock); + /* set failure bit so that pnfs path will be retried later */ + pnfs_layout_set_fail_bit(lo, iomode); + set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); + if (lo->plh_return_iomode == 0) + lo->plh_return_iomode = range.iomode; + else if (lo->plh_return_iomode != range.iomode) + lo->plh_return_iomode = IOMODE_ANY; + /* + * mark all matching lsegs so that we are sure to have no live + * segments at hand when sending layoutreturn. See pnfs_put_lseg() + * for how it works. + */ + pnfs_mark_matching_lsegs_return(lo, &free_me, &range); + spin_unlock(&inode->i_lock); + pnfs_free_lseg_list(&free_me); +} +EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return); + void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 4cf0d54e14c349..bea2030eec74f2 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -38,6 +38,7 @@ enum { NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */ NFS_LSEG_ROC, /* roc bit received from server */ NFS_LSEG_LAYOUTCOMMIT, /* layoutcommit bit set for layoutcommit */ + NFS_LSEG_LAYOUTRETURN, /* layoutreturn bit set for layoutreturn */ }; /* Individual ip address */ @@ -184,6 +185,7 @@ struct pnfs_layout_hdr { u32 plh_barrier; /* ignore lower seqids */ unsigned long plh_retry_timestamp; unsigned long plh_flags; + enum pnfs_iomode plh_return_iomode; loff_t plh_lwb; /* last write byte for layoutcommit */ struct rpc_cred *plh_lc_cred; /* layoutcommit cred */ struct inode *plh_inode; @@ -274,6 +276,8 @@ void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp); int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *); int pnfs_write_done_resend_to_mds(struct nfs_pgio_header *); struct nfs4_threshold *pnfs_mdsthreshold_alloc(void); +void pnfs_error_mark_layout_for_return(struct inode *inode, + struct pnfs_layout_segment *lseg); /* nfs4_deviceid_flags */ enum { From ce6ab4f238cb76d356229e97e1fefb7192388e13 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 6 Sep 2014 00:53:24 +0800 Subject: [PATCH 417/601] nfs41: don't use a layout if it is marked for returning And if we are to return the same type of layouts, don't bother sending more layoutgets. Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/nfs4proc.c | 1 + fs/nfs/pnfs.c | 23 ++++++++++++++++++----- fs/nfs/pnfs.h | 1 + 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f358262a95f98b..19432842b2dc7f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7540,6 +7540,7 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata) return; if (pnfs_choose_layoutget_stateid(&lgp->args.stateid, NFS_I(lgp->args.inode)->layout, + &lgp->args.range, lgp->args.ctx->state)) { rpc_exit(task, NFS4_OK); } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 0bd149baca71e0..853b544f2efc46 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -740,25 +740,37 @@ pnfs_layout_stateid_blocked(const struct pnfs_layout_hdr *lo, return !pnfs_seqid_is_newer(seqid, lo->plh_barrier); } +static bool +pnfs_layout_returning(const struct pnfs_layout_hdr *lo, + struct pnfs_layout_range *range) +{ + return test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags) && + (lo->plh_return_iomode == IOMODE_ANY || + lo->plh_return_iomode == range->iomode); +} + /* lget is set to 1 if called from inside send_layoutget call chain */ static bool -pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo, int lget) +pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo, + struct pnfs_layout_range *range, int lget) { return lo->plh_block_lgets || test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || (list_empty(&lo->plh_segs) && - (atomic_read(&lo->plh_outstanding) > lget)); + (atomic_read(&lo->plh_outstanding) > lget)) || + pnfs_layout_returning(lo, range); } int pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, + struct pnfs_layout_range *range, struct nfs4_state *open_state) { int status = 0; dprintk("--> %s\n", __func__); spin_lock(&lo->plh_inode->i_lock); - if (pnfs_layoutgets_blocked(lo, 1)) { + if (pnfs_layoutgets_blocked(lo, range, 1)) { status = -EAGAIN; } else if (!nfs4_valid_open_stateid(open_state)) { status = -EBADF; @@ -1192,6 +1204,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, list_for_each_entry(lseg, &lo->plh_segs, pls_list) { if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && + !test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags) && pnfs_lseg_range_match(&lseg->pls_range, range)) { ret = pnfs_get_lseg(lseg); break; @@ -1351,7 +1364,7 @@ pnfs_update_layout(struct inode *ino, goto out_unlock; } - if (pnfs_layoutgets_blocked(lo, 0)) + if (pnfs_layoutgets_blocked(lo, &arg, 0)) goto out_unlock; atomic_inc(&lo->plh_outstanding); spin_unlock(&ino->i_lock); @@ -1432,7 +1445,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) goto out_forget_reply; } - if (pnfs_layoutgets_blocked(lo, 1)) { + if (pnfs_layoutgets_blocked(lo, &lgp->args.range, 1)) { dprintk("%s forget reply due to state\n", __func__); goto out_forget_reply; } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index bea2030eec74f2..9e6edd1ebbc61a 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -249,6 +249,7 @@ void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, bool update_barrier); int pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, + struct pnfs_layout_range *range, struct nfs4_state *open_state); int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, struct list_head *tmp_list, From aa1e0e3a8e3f16ff50a72a8c623d7e1c467383bc Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 6 Sep 2014 00:53:25 +0800 Subject: [PATCH 418/601] nfs41: send layoutreturn in last put_lseg If current lseg is the last lseg marked with NFS_LSEG_LAYOUTRETURN, send layoutreturn. Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/pnfs.c | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 853b544f2efc46..e9acfcfdc9a983 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -50,6 +50,10 @@ static DEFINE_SPINLOCK(pnfs_spinlock); */ static LIST_HEAD(pnfs_modules_tbl); +static int +pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, + enum pnfs_iomode iomode); + /* Return the registered pnfs layout driver module matching given id */ static struct pnfs_layoutdriver_type * find_pnfs_driver_locked(u32 id) @@ -337,6 +341,29 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); } +/* Return true if layoutreturn is needed */ +static bool +pnfs_layout_need_return(struct pnfs_layout_hdr *lo, + struct pnfs_layout_segment *lseg, + nfs4_stateid *stateid, enum pnfs_iomode *iomode) +{ + struct pnfs_layout_segment *s; + + if (!test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) + return false; + + list_for_each_entry(s, &lo->plh_segs, pls_list) + if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) + return false; + + *stateid = lo->plh_stateid; + *iomode = lo->plh_return_iomode; + /* decreased in pnfs_send_layoutreturn() */ + lo->plh_block_lgets++; + lo->plh_return_iomode = 0; + return true; +} + void pnfs_put_lseg(struct pnfs_layout_segment *lseg) { @@ -352,11 +379,20 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg) lo = lseg->pls_layout; inode = lo->plh_inode; if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { + bool need_return; + nfs4_stateid stateid; + enum pnfs_iomode iomode; + pnfs_get_layout_hdr(lo); pnfs_layout_remove_lseg(lo, lseg); + need_return = pnfs_layout_need_return(lo, lseg, + &stateid, &iomode); spin_unlock(&inode->i_lock); pnfs_free_lseg(lseg); - pnfs_put_layout_hdr(lo); + if (need_return) + pnfs_send_layoutreturn(lo, stateid, iomode); + else + pnfs_put_layout_hdr(lo); } } EXPORT_SYMBOL_GPL(pnfs_put_lseg); From e736a5b98c7aa98fe572990caf5fed9593c72a67 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 6 Sep 2014 00:53:26 +0800 Subject: [PATCH 419/601] nfs41: clear NFS_LAYOUT_RETURN if layoutreturn is sent or failed to send So that pnfs path is not disabled for ever. Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/nfs4proc.c | 1 + fs/nfs/pnfs.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 19432842b2dc7f..e19b5dbe535abe 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7796,6 +7796,7 @@ static void nfs4_layoutreturn_release(void *calldata) spin_lock(&lo->plh_inode->i_lock); if (lrp->res.lrs_present) pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); + clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); lo->plh_block_lgets--; spin_unlock(&lo->plh_inode->i_lock); pnfs_put_layout_hdr(lrp->args.layout); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index e9acfcfdc9a983..63992c826faf0b 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -921,6 +921,11 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, status = nfs4_proc_layoutreturn(lrp); out: + if (status) { + spin_lock(&ino->i_lock); + clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); + spin_unlock(&ino->i_lock); + } dprintk("<-- %s status: %d\n", __func__, status); return status; } From c220106fb45909719295474e2497ffe03e47dfb3 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 6 Sep 2014 00:53:29 +0800 Subject: [PATCH 420/601] nfs/filelayout: use pnfs_error_mark_layout_for_return Instead of calling layoutreturn directly, call pnfs_error_mark_layout_for_return to mark layouts for return and let generic code return layout when layout segments are freed. Signed-off-by: Peng Tao Signed-off-by: Tom Haynes Conflicts: fs/nfs/filelayout/filelayout.c --- fs/nfs/filelayout/filelayout.c | 2 +- fs/nfs/pnfs_nfs.c | 10 ---------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index bfa8547eb2d660..5d2eadc65167af 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -200,7 +200,7 @@ static int filelayout_async_handle_error(struct rpc_task *task, dprintk("%s DS connection error %d\n", __func__, task->tk_status); nfs4_mark_deviceid_unavailable(devid); - set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); + pnfs_error_mark_layout_for_return(inode, lseg); rpc_wake_up(&tbl->slot_tbl_waitq); /* fall through */ default: diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index c87f664587ee73..55bff41180e87a 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -18,20 +18,10 @@ #define NFSDBG_FACILITY NFSDBG_PNFS -static void pnfs_generic_fenceme(struct inode *inode, - struct pnfs_layout_hdr *lo) -{ - if (!test_and_clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) - return; - pnfs_return_layout(inode); -} - void pnfs_generic_rw_release(void *data) { struct nfs_pgio_header *hdr = data; - struct pnfs_layout_hdr *lo = hdr->lseg->pls_layout; - pnfs_generic_fenceme(lo->plh_inode, lo); nfs_put_client(hdr->ds_clp); hdr->mds_ops->rpc_release(data); } From 2176bf4269a37a7742230ed6c91668241bfe1b2b Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Wed, 10 Sep 2014 15:44:18 -0400 Subject: [PATCH 421/601] nfs: introduce pg_cleanup op for pgio descriptors Add a new operation to nfs_pageio_ops that is called on nfs_pageio_complete. Signed-off-by: Weston Andros Adamson --- fs/nfs/pagelist.c | 5 ++++- include/linux/nfs_page.h | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index c4d175829880eb..1c031878c75257 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -1050,7 +1050,7 @@ int nfs_pageio_resend(struct nfs_pageio_descriptor *desc, EXPORT_SYMBOL_GPL(nfs_pageio_resend); /** - * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor + * nfs_pageio_complete - Complete I/O then cleanup an nfs_pageio_descriptor * @desc: pointer to io descriptor */ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) @@ -1062,6 +1062,9 @@ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) if (!nfs_do_recoalesce(desc)) break; } + + if (desc->pg_ops->pg_cleanup) + desc->pg_ops->pg_cleanup(desc); } /** diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 4c3aa809ab95bd..479c566c4ddcb6 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -58,6 +58,7 @@ struct nfs_pageio_ops { size_t (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); int (*pg_doio)(struct nfs_pageio_descriptor *); + void (*pg_cleanup)(struct nfs_pageio_descriptor *); }; struct nfs_rw_ops { From 180bb5ec06ce3a95dccc751fbf6bf11d3003da98 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Wed, 10 Sep 2014 15:48:01 -0400 Subject: [PATCH 422/601] pnfs: release lseg in pnfs_generic_pg_cleanup This is needed to support mirrored writes - the first write can't just trash the lseg, we need to keep it around until all mirrors have written. Signed-off-by: Weston Andros Adamson --- fs/nfs/blocklayout/blocklayout.c | 2 ++ fs/nfs/filelayout/filelayout.c | 2 ++ fs/nfs/objlayout/objio_osd.c | 2 ++ fs/nfs/pnfs.c | 32 ++++++++++++++------------------ fs/nfs/pnfs.h | 1 + 5 files changed, 21 insertions(+), 18 deletions(-) diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 77fec6a55f5795..1cac3c175d1870 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -860,12 +860,14 @@ static const struct nfs_pageio_ops bl_pg_read_ops = { .pg_init = bl_pg_init_read, .pg_test = bl_pg_test_read, .pg_doio = pnfs_generic_pg_readpages, + .pg_cleanup = pnfs_generic_pg_cleanup, }; static const struct nfs_pageio_ops bl_pg_write_ops = { .pg_init = bl_pg_init_write, .pg_test = bl_pg_test_write, .pg_doio = pnfs_generic_pg_writepages, + .pg_cleanup = pnfs_generic_pg_cleanup, }; static struct pnfs_layoutdriver_type blocklayout_type = { diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 5d2eadc65167af..2af32fc39d600a 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -933,12 +933,14 @@ static const struct nfs_pageio_ops filelayout_pg_read_ops = { .pg_init = filelayout_pg_init_read, .pg_test = filelayout_pg_test, .pg_doio = pnfs_generic_pg_readpages, + .pg_cleanup = pnfs_generic_pg_cleanup, }; static const struct nfs_pageio_ops filelayout_pg_write_ops = { .pg_init = filelayout_pg_init_write, .pg_test = filelayout_pg_test, .pg_doio = pnfs_generic_pg_writepages, + .pg_cleanup = pnfs_generic_pg_cleanup, }; static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 9e5bc42180e44d..d00778077df13e 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -607,12 +607,14 @@ static const struct nfs_pageio_ops objio_pg_read_ops = { .pg_init = objio_init_read, .pg_test = objio_pg_test, .pg_doio = pnfs_generic_pg_readpages, + .pg_cleanup = pnfs_generic_pg_cleanup, }; static const struct nfs_pageio_ops objio_pg_write_ops = { .pg_init = objio_init_write, .pg_test = objio_pg_test, .pg_doio = pnfs_generic_pg_writepages, + .pg_cleanup = pnfs_generic_pg_cleanup, }; static struct pnfs_layoutdriver_type objlayout_type = { diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 63992c826faf0b..2da2e771fefe91 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1631,6 +1631,16 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, } EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); +void +pnfs_generic_pg_cleanup(struct nfs_pageio_descriptor *desc) +{ + if (desc->pg_lseg) { + pnfs_put_lseg(desc->pg_lseg); + desc->pg_lseg = NULL; + } +} +EXPORT_SYMBOL_GPL(pnfs_generic_pg_cleanup); + /* * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number * of bytes (maximum @req->wb_bytes) that can be coalesced. @@ -1756,11 +1766,9 @@ pnfs_do_write(struct nfs_pageio_descriptor *desc, struct pnfs_layout_segment *lseg = desc->pg_lseg; enum pnfs_try_status trypnfs; - desc->pg_lseg = NULL; trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how); if (trypnfs == PNFS_NOT_ATTEMPTED) pnfs_write_through_mds(desc, hdr); - pnfs_put_lseg(lseg); } static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) @@ -1779,17 +1787,13 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); if (!hdr) { desc->pg_completion_ops->error_cleanup(&desc->pg_list); - pnfs_put_lseg(desc->pg_lseg); - desc->pg_lseg = NULL; return -ENOMEM; } nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); + hdr->lseg = pnfs_get_lseg(desc->pg_lseg); ret = nfs_generic_pgio(desc, hdr); - if (ret != 0) { - pnfs_put_lseg(desc->pg_lseg); - desc->pg_lseg = NULL; - } else + if (!ret) pnfs_do_write(desc, hdr, desc->pg_ioflags); return ret; } @@ -1874,11 +1878,9 @@ pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) struct pnfs_layout_segment *lseg = desc->pg_lseg; enum pnfs_try_status trypnfs; - desc->pg_lseg = NULL; trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg); if (trypnfs == PNFS_NOT_ATTEMPTED) pnfs_read_through_mds(desc, hdr); - pnfs_put_lseg(lseg); } static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) @@ -1897,18 +1899,12 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); if (!hdr) { desc->pg_completion_ops->error_cleanup(&desc->pg_list); - ret = -ENOMEM; - pnfs_put_lseg(desc->pg_lseg); - desc->pg_lseg = NULL; - return ret; + return -ENOMEM; } nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); hdr->lseg = pnfs_get_lseg(desc->pg_lseg); ret = nfs_generic_pgio(desc, hdr); - if (ret != 0) { - pnfs_put_lseg(desc->pg_lseg); - desc->pg_lseg = NULL; - } else + if (!ret) pnfs_do_read(desc, hdr); return ret; } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 9e6edd1ebbc61a..59c831efb5dee4 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -230,6 +230,7 @@ void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page * int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req, u64 wb_size); +void pnfs_generic_pg_cleanup(struct nfs_pageio_descriptor *); int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); size_t pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); From 309a1d65b11de24d172f7dbbc21583ebd649c912 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Fri, 5 Sep 2014 16:34:29 -0400 Subject: [PATCH 423/601] nfs: handle overlapping reqs in lock_and_join This is needed for mirrored DS support, where multuple requests cover the same range. Signed-off-by: Weston Andros Adamson --- fs/nfs/write.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 8800bd3b235d09..e9974574b19ac2 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -473,13 +473,18 @@ nfs_lock_and_join_requests(struct page *page, bool nonblock) do { /* * Subrequests are always contiguous, non overlapping - * and in order. If not, it's a programming error. + * and in order - but may be repeated (mirrored writes). */ - WARN_ON_ONCE(subreq->wb_offset != - (head->wb_offset + total_bytes)); - - /* keep track of how many bytes this group covers */ - total_bytes += subreq->wb_bytes; + if (subreq->wb_offset == (head->wb_offset + total_bytes)) { + /* keep track of how many bytes this group covers */ + total_bytes += subreq->wb_bytes; + } else if (WARN_ON_ONCE(subreq->wb_offset < head->wb_offset || + ((subreq->wb_offset + subreq->wb_bytes) > + (head->wb_offset + total_bytes)))) { + nfs_page_group_unlock(head); + spin_unlock(&inode->i_lock); + return ERR_PTR(-EIO); + } if (!nfs_lock_request(subreq)) { /* releases page group bit lock and From 6cccbb6f52dceec5f4faed8846ac05ae830640e6 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Tue, 16 Sep 2014 17:35:51 -0400 Subject: [PATCH 424/601] nfs: rename pgio header ds_idx to ds_commit_idx 'ds_commit_idx' is a better name - it is used to select the right commit bucket for pnfs. Signed-off-by: Weston Andros Adamson --- fs/nfs/direct.c | 14 ++++++-------- fs/nfs/filelayout/filelayout.c | 4 ++-- include/linux/nfs_xdr.h | 2 +- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index e84f764b9dcdb6..d7c2d430b04d2d 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -112,22 +112,22 @@ static inline int put_dreq(struct nfs_direct_req *dreq) * nfs_direct_select_verf - select the right verifier * @dreq - direct request possibly spanning multiple servers * @ds_clp - nfs_client of data server or NULL if MDS / non-pnfs - * @ds_idx - index of data server in data server list, only valid if ds_clp set + * @commit_idx - commit bucket index for the DS * * returns the correct verifier to use given the role of the server */ static struct nfs_writeverf * nfs_direct_select_verf(struct nfs_direct_req *dreq, struct nfs_client *ds_clp, - int ds_idx) + int commit_idx) { struct nfs_writeverf *verfp = &dreq->verf; #ifdef CONFIG_NFS_V4_1 if (ds_clp) { /* pNFS is in use, use the DS verf */ - if (ds_idx >= 0 && ds_idx < dreq->ds_cinfo.nbuckets) - verfp = &dreq->ds_cinfo.buckets[ds_idx].direct_verf; + if (commit_idx >= 0 && commit_idx < dreq->ds_cinfo.nbuckets) + verfp = &dreq->ds_cinfo.buckets[commit_idx].direct_verf; else WARN_ON_ONCE(1); } @@ -148,8 +148,7 @@ static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq, { struct nfs_writeverf *verfp; - verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, - hdr->ds_idx); + verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, hdr->ds_commit_idx); WARN_ON_ONCE(verfp->committed >= 0); memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); WARN_ON_ONCE(verfp->committed < 0); @@ -169,8 +168,7 @@ static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq, { struct nfs_writeverf *verfp; - verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, - hdr->ds_idx); + verfp = nfs_direct_select_verf(dreq, hdr->ds_clp, hdr->ds_commit_idx); if (verfp->committed < 0) { nfs_direct_set_hdr_verf(dreq, hdr); return 0; diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 2af32fc39d600a..520cbc53e035d2 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -492,7 +492,7 @@ filelayout_read_pagelist(struct nfs_pgio_header *hdr) /* No multipath support. Use first DS */ atomic_inc(&ds->ds_clp->cl_count); hdr->ds_clp = ds->ds_clp; - hdr->ds_idx = idx; + hdr->ds_commit_idx = idx; fh = nfs4_fl_select_ds_fh(lseg, j); if (fh) hdr->args.fh = fh; @@ -536,7 +536,7 @@ filelayout_write_pagelist(struct nfs_pgio_header *hdr, int sync) hdr->pgio_done_cb = filelayout_write_done_cb; atomic_inc(&ds->ds_clp->cl_count); hdr->ds_clp = ds->ds_clp; - hdr->ds_idx = idx; + hdr->ds_commit_idx = idx; fh = nfs4_fl_select_ds_fh(lseg, j); if (fh) hdr->args.fh = fh; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 4fd7793d45d16f..5bc99f04a55071 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1328,7 +1328,7 @@ struct nfs_pgio_header { __u64 mds_offset; /* Filelayout dense stripe */ struct nfs_page_array page_array; struct nfs_client *ds_clp; /* pNFS data server */ - int ds_idx; /* ds index if ds_clp is set */ + int ds_commit_idx; /* ds index if ds_clp is set */ }; struct nfs_mds_commit_info { From b57ff1303a2d4d1484c7a82bd80a3e014d6cdf5e Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Fri, 5 Sep 2014 18:20:21 -0400 Subject: [PATCH 425/601] pnfs: pass ds_commit_idx through the commit path Pass ds_commit_idx through the nfs commit path. It's used to select the commit bucket when using pnfs and is ignored when not using pnfs. Several functions had to be changed: nfs_retry_commit, nfs_mark_request_commit, pnfs_mark_request_commit and the pnfs layout driver .mark_request_commit functions. Signed-off-by: Tom Haynes --- fs/nfs/direct.c | 5 +++-- fs/nfs/filelayout/filelayout.c | 3 ++- fs/nfs/internal.h | 6 ++++-- fs/nfs/pnfs.h | 9 +++++---- fs/nfs/pnfs_nfs.c | 4 ++-- fs/nfs/write.c | 14 ++++++++------ 6 files changed, 24 insertions(+), 17 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index d7c2d430b04d2d..1ee41d74c31c76 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -649,7 +649,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) nfs_list_remove_request(req); if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) { /* Note the rewrite will go through mds */ - nfs_mark_request_commit(req, NULL, &cinfo); + nfs_mark_request_commit(req, NULL, &cinfo, 0); } else nfs_release_request(req); nfs_unlock_and_release_request(req); @@ -748,7 +748,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) nfs_list_remove_request(req); if (request_commit) { kref_get(&req->wb_kref); - nfs_mark_request_commit(req, hdr->lseg, &cinfo); + nfs_mark_request_commit(req, hdr->lseg, &cinfo, + hdr->ds_commit_idx); } nfs_unlock_and_release_request(req); } diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 520cbc53e035d2..3c9769441f3641 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -954,7 +954,8 @@ static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) static void filelayout_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, - struct nfs_commit_info *cinfo) + struct nfs_commit_info *cinfo, + u32 ds_commit_idx) { struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index e9305e98b78218..05f9a87cdab46b 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -450,13 +450,15 @@ int nfs_scan_commit(struct inode *inode, struct list_head *dst, struct nfs_commit_info *cinfo); void nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, - struct nfs_commit_info *cinfo); + struct nfs_commit_info *cinfo, + u32 ds_commit_idx); int nfs_write_need_commit(struct nfs_pgio_header *); int nfs_generic_commit_list(struct inode *inode, struct list_head *head, int how, struct nfs_commit_info *cinfo); void nfs_retry_commit(struct list_head *page_list, struct pnfs_layout_segment *lseg, - struct nfs_commit_info *cinfo); + struct nfs_commit_info *cinfo, + u32 ds_commit_idx); void nfs_commitdata_release(struct nfs_commit_data *data); void nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst, struct nfs_commit_info *cinfo); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 59c831efb5dee4..a0ab81cc9cf372 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -137,7 +137,8 @@ struct pnfs_layoutdriver_type { struct pnfs_ds_commit_info *(*get_ds_info) (struct inode *inode); void (*mark_request_commit) (struct nfs_page *req, struct pnfs_layout_segment *lseg, - struct nfs_commit_info *cinfo); + struct nfs_commit_info *cinfo, + u32 ds_commit_idx); void (*clear_request_commit) (struct nfs_page *req, struct nfs_commit_info *cinfo); int (*scan_commit_lists) (struct nfs_commit_info *cinfo, @@ -389,14 +390,14 @@ pnfs_generic_mark_devid_invalid(struct nfs4_deviceid_node *node) static inline bool pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, - struct nfs_commit_info *cinfo) + struct nfs_commit_info *cinfo, u32 ds_commit_idx) { struct inode *inode = req->wb_context->dentry->d_inode; struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; if (lseg == NULL || ld->mark_request_commit == NULL) return false; - ld->mark_request_commit(req, lseg, cinfo); + ld->mark_request_commit(req, lseg, cinfo, ds_commit_idx); return true; } @@ -574,7 +575,7 @@ pnfs_get_ds_info(struct inode *inode) static inline bool pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, - struct nfs_commit_info *cinfo) + struct nfs_commit_info *cinfo, u32 ds_commit_idx) { return false; } diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 55bff41180e87a..fdc4f6562bb7ef 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -188,7 +188,7 @@ static void pnfs_generic_retry_commit(struct nfs_commit_info *cinfo, int idx) bucket = &fl_cinfo->buckets[i]; if (list_empty(&bucket->committing)) continue; - nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo); + nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo, i); spin_lock(cinfo->lock); freeme = bucket->clseg; bucket->clseg = NULL; @@ -247,7 +247,7 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, list_add(&data->pages, &list); nreq++; } else { - nfs_retry_commit(mds_pages, NULL, cinfo); + nfs_retry_commit(mds_pages, NULL, cinfo, 0); pnfs_generic_retry_commit(cinfo, 0); cinfo->completion_ops->error_cleanup(NFS_I(inode)); return -ENOMEM; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e9974574b19ac2..2bee165fddcff6 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -847,9 +847,9 @@ EXPORT_SYMBOL_GPL(nfs_init_cinfo); */ void nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, - struct nfs_commit_info *cinfo) + struct nfs_commit_info *cinfo, u32 ds_commit_idx) { - if (pnfs_mark_request_commit(req, lseg, cinfo)) + if (pnfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx)) return; nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo); } @@ -905,7 +905,8 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) } if (nfs_write_need_commit(hdr)) { memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); - nfs_mark_request_commit(req, hdr->lseg, &cinfo); + nfs_mark_request_commit(req, hdr->lseg, &cinfo, + 0); goto next; } remove_req: @@ -1560,14 +1561,15 @@ EXPORT_SYMBOL_GPL(nfs_init_commit); void nfs_retry_commit(struct list_head *page_list, struct pnfs_layout_segment *lseg, - struct nfs_commit_info *cinfo) + struct nfs_commit_info *cinfo, + u32 ds_commit_idx) { struct nfs_page *req; while (!list_empty(page_list)) { req = nfs_list_entry(page_list->next); nfs_list_remove_request(req); - nfs_mark_request_commit(req, lseg, cinfo); + nfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx); if (!cinfo->dreq) { dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); dec_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info, @@ -1598,7 +1600,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode), data->mds_ops, how, 0); out_bad: - nfs_retry_commit(head, NULL, cinfo); + nfs_retry_commit(head, NULL, cinfo, 0); cinfo->completion_ops->error_cleanup(NFS_I(inode)); return -ENOMEM; } From a7d42ddb3099727f58366fa006f850a219cce6c8 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Fri, 19 Sep 2014 10:55:07 -0400 Subject: [PATCH 426/601] nfs: add mirroring support to pgio layer This patch adds mirrored write support to the pgio layer. The default is to use one mirror, but pgio callers may define callbacks to change this to any value up to the (arbitrarily selected) limit of 16. The basic idea is to break out members of nfs_pageio_descriptor that cannot be shared between mirrored DSes and put them in a new structure. Signed-off-by: Weston Andros Adamson --- fs/nfs/direct.c | 17 ++- fs/nfs/internal.h | 1 + fs/nfs/objlayout/objio_osd.c | 3 +- fs/nfs/pagelist.c | 270 +++++++++++++++++++++++++++++------ fs/nfs/pnfs.c | 26 ++-- fs/nfs/read.c | 30 +++- fs/nfs/write.c | 10 +- include/linux/nfs_page.h | 20 ++- include/linux/nfs_xdr.h | 1 + 9 files changed, 311 insertions(+), 67 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 1ee41d74c31c76..0178d4fe8ab74d 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -360,8 +360,14 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) spin_lock(&dreq->lock); if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0)) dreq->error = hdr->error; - else - dreq->count += hdr->good_bytes; + else { + /* + * FIXME: right now this only accounts for bytes written + * to the first mirror + */ + if (hdr->pgio_mirror_idx == 0) + dreq->count += hdr->good_bytes; + } spin_unlock(&dreq->lock); while (!list_empty(&hdr->pages)) { @@ -724,7 +730,12 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) dreq->error = hdr->error; } if (dreq->error == 0) { - dreq->count += hdr->good_bytes; + /* + * FIXME: right now this only accounts for bytes written + * to the first mirror + */ + if (hdr->pgio_mirror_idx == 0) + dreq->count += hdr->good_bytes; if (nfs_write_need_commit(hdr)) { if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) request_commit = true; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 05f9a87cdab46b..ef1c703e487b01 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -469,6 +469,7 @@ void nfs_init_cinfo(struct nfs_commit_info *cinfo, struct nfs_direct_req *dreq); int nfs_key_timeout_notify(struct file *filp, struct inode *inode); bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx); +void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio); #ifdef CONFIG_MIGRATION extern int nfs_migrate_page(struct address_space *, diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index d00778077df13e..9a5f2ee6001f92 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -537,11 +537,12 @@ int objio_write_pagelist(struct nfs_pgio_header *hdr, int how) static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { + struct nfs_pgio_mirror *mirror = &pgio->pg_mirrors[pgio->pg_mirror_idx]; unsigned int size; size = pnfs_generic_pg_test(pgio, prev, req); - if (!size || pgio->pg_count + req->wb_bytes > + if (!size || mirror->pg_count + req->wb_bytes > (unsigned long)pgio->pg_layout_private) return 0; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 1c031878c75257..eec12b75c2325f 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -46,17 +46,22 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr, void (*release)(struct nfs_pgio_header *hdr)) { - hdr->req = nfs_list_entry(desc->pg_list.next); + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + + + hdr->req = nfs_list_entry(mirror->pg_list.next); hdr->inode = desc->pg_inode; hdr->cred = hdr->req->wb_context->cred; hdr->io_start = req_offset(hdr->req); - hdr->good_bytes = desc->pg_count; + hdr->good_bytes = mirror->pg_count; hdr->dreq = desc->pg_dreq; hdr->layout_private = desc->pg_layout_private; hdr->release = release; hdr->completion_ops = desc->pg_completion_ops; if (hdr->completion_ops->init_hdr) hdr->completion_ops->init_hdr(hdr); + + hdr->pgio_mirror_idx = desc->pg_mirror_idx; } EXPORT_SYMBOL_GPL(nfs_pgheader_init); @@ -480,7 +485,10 @@ nfs_wait_on_request(struct nfs_page *req) size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req) { - if (desc->pg_count > desc->pg_bsize) { + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + + + if (mirror->pg_count > mirror->pg_bsize) { /* should never happen */ WARN_ON_ONCE(1); return 0; @@ -490,11 +498,11 @@ size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, * Limit the request size so that we can still allocate a page array * for it without upsetting the slab allocator. */ - if (((desc->pg_count + req->wb_bytes) >> PAGE_SHIFT) * + if (((mirror->pg_count + req->wb_bytes) >> PAGE_SHIFT) * sizeof(struct page) > PAGE_SIZE) return 0; - return min(desc->pg_bsize - desc->pg_count, (size_t)req->wb_bytes); + return min(mirror->pg_bsize - mirror->pg_count, (size_t)req->wb_bytes); } EXPORT_SYMBOL_GPL(nfs_generic_pg_test); @@ -651,10 +659,18 @@ EXPORT_SYMBOL_GPL(nfs_initiate_pgio); static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { + struct nfs_pgio_mirror *mirror; + u32 midx; + set_bit(NFS_IOHDR_REDO, &hdr->flags); nfs_pgio_data_destroy(hdr); hdr->completion_ops->completion(hdr); - desc->pg_completion_ops->error_cleanup(&desc->pg_list); + /* TODO: Make sure it's right to clean up all mirrors here + * and not just hdr->pgio_mirror_idx */ + for (midx = 0; midx < desc->pg_mirror_count; midx++) { + mirror = &desc->pg_mirrors[midx]; + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); + } return -ENOMEM; } @@ -671,6 +687,17 @@ static void nfs_pgio_release(void *calldata) hdr->completion_ops->completion(hdr); } +static void nfs_pageio_mirror_init(struct nfs_pgio_mirror *mirror, + unsigned int bsize) +{ + INIT_LIST_HEAD(&mirror->pg_list); + mirror->pg_bytes_written = 0; + mirror->pg_count = 0; + mirror->pg_bsize = bsize; + mirror->pg_base = 0; + mirror->pg_recoalesce = 0; +} + /** * nfs_pageio_init - initialise a page io descriptor * @desc: pointer to descriptor @@ -687,13 +714,10 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, size_t bsize, int io_flags) { - INIT_LIST_HEAD(&desc->pg_list); - desc->pg_bytes_written = 0; - desc->pg_count = 0; - desc->pg_bsize = bsize; - desc->pg_base = 0; + struct nfs_pgio_mirror *new; + int i; + desc->pg_moreio = 0; - desc->pg_recoalesce = 0; desc->pg_inode = inode; desc->pg_ops = pg_ops; desc->pg_completion_ops = compl_ops; @@ -703,6 +727,26 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, desc->pg_lseg = NULL; desc->pg_dreq = NULL; desc->pg_layout_private = NULL; + desc->pg_bsize = bsize; + + desc->pg_mirror_count = 1; + desc->pg_mirror_idx = 0; + + if (pg_ops->pg_get_mirror_count) { + /* until we have a request, we don't have an lseg and no + * idea how many mirrors there will be */ + new = kcalloc(NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX, + sizeof(struct nfs_pgio_mirror), GFP_KERNEL); + desc->pg_mirrors_dynamic = new; + desc->pg_mirrors = new; + + for (i = 0; i < NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX; i++) + nfs_pageio_mirror_init(&desc->pg_mirrors[i], bsize); + } else { + desc->pg_mirrors_dynamic = NULL; + desc->pg_mirrors = desc->pg_mirrors_static; + nfs_pageio_mirror_init(&desc->pg_mirrors[0], bsize); + } } EXPORT_SYMBOL_GPL(nfs_pageio_init); @@ -738,14 +782,16 @@ static void nfs_pgio_result(struct rpc_task *task, void *calldata) int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + struct nfs_page *req; struct page **pages, *last_page; - struct list_head *head = &desc->pg_list; + struct list_head *head = &mirror->pg_list; struct nfs_commit_info cinfo; unsigned int pagecount, pageused; - pagecount = nfs_page_array_len(desc->pg_base, desc->pg_count); + pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count); if (!nfs_pgarray_set(&hdr->page_array, pagecount)) return nfs_pgio_error(desc, hdr); @@ -773,7 +819,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, desc->pg_ioflags &= ~FLUSH_COND_STABLE; /* Set up the argument struct */ - nfs_pgio_rpcsetup(hdr, desc->pg_count, 0, desc->pg_ioflags, &cinfo); + nfs_pgio_rpcsetup(hdr, mirror->pg_count, 0, desc->pg_ioflags, &cinfo); desc->pg_rpc_callops = &nfs_pgio_common_ops; return 0; } @@ -781,12 +827,17 @@ EXPORT_SYMBOL_GPL(nfs_generic_pgio); static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) { + struct nfs_pgio_mirror *mirror; struct nfs_pgio_header *hdr; int ret; + mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); if (!hdr) { - desc->pg_completion_ops->error_cleanup(&desc->pg_list); + /* TODO: make sure this is right with mirroring - or + * should it back out all mirrors? */ + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); return -ENOMEM; } nfs_pgheader_init(desc, hdr, nfs_pgio_header_free); @@ -801,6 +852,49 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) return ret; } +/* + * nfs_pageio_setup_mirroring - determine if mirroring is to be used + * by calling the pg_get_mirror_count op + */ +static int nfs_pageio_setup_mirroring(struct nfs_pageio_descriptor *pgio, + struct nfs_page *req) +{ + int mirror_count = 1; + + if (!pgio->pg_ops->pg_get_mirror_count) + return 0; + + mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req); + + if (!mirror_count || mirror_count > NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX) + return -EINVAL; + + if (WARN_ON_ONCE(!pgio->pg_mirrors_dynamic)) + return -EINVAL; + + pgio->pg_mirror_count = mirror_count; + + return 0; +} + +/* + * nfs_pageio_stop_mirroring - stop using mirroring (set mirror count to 1) + */ +void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio) +{ + pgio->pg_mirror_count = 1; + pgio->pg_mirror_idx = 0; +} + +static void nfs_pageio_cleanup_mirroring(struct nfs_pageio_descriptor *pgio) +{ + pgio->pg_mirror_count = 1; + pgio->pg_mirror_idx = 0; + pgio->pg_mirrors = pgio->pg_mirrors_static; + kfree(pgio->pg_mirrors_dynamic); + pgio->pg_mirrors_dynamic = NULL; +} + static bool nfs_match_open_context(const struct nfs_open_context *ctx1, const struct nfs_open_context *ctx2) { @@ -867,19 +961,22 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, struct nfs_page *req) { + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + struct nfs_page *prev = NULL; - if (desc->pg_count != 0) { - prev = nfs_list_entry(desc->pg_list.prev); + + if (mirror->pg_count != 0) { + prev = nfs_list_entry(mirror->pg_list.prev); } else { if (desc->pg_ops->pg_init) desc->pg_ops->pg_init(desc, req); - desc->pg_base = req->wb_pgbase; + mirror->pg_base = req->wb_pgbase; } if (!nfs_can_coalesce_requests(prev, req, desc)) return 0; nfs_list_remove_request(req); - nfs_list_add_request(req, &desc->pg_list); - desc->pg_count += req->wb_bytes; + nfs_list_add_request(req, &mirror->pg_list); + mirror->pg_count += req->wb_bytes; return 1; } @@ -888,16 +985,19 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, */ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) { - if (!list_empty(&desc->pg_list)) { + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + + + if (!list_empty(&mirror->pg_list)) { int error = desc->pg_ops->pg_doio(desc); if (error < 0) desc->pg_error = error; else - desc->pg_bytes_written += desc->pg_count; + mirror->pg_bytes_written += mirror->pg_count; } - if (list_empty(&desc->pg_list)) { - desc->pg_count = 0; - desc->pg_base = 0; + if (list_empty(&mirror->pg_list)) { + mirror->pg_count = 0; + mirror->pg_base = 0; } } @@ -915,10 +1015,14 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, struct nfs_page *req) { + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + struct nfs_page *subreq; unsigned int bytes_left = 0; unsigned int offset, pgbase; + WARN_ON_ONCE(desc->pg_mirror_idx >= desc->pg_mirror_count); + nfs_page_group_lock(req, false); subreq = req; @@ -938,7 +1042,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, nfs_pageio_doio(desc); if (desc->pg_error < 0) return 0; - if (desc->pg_recoalesce) + if (mirror->pg_recoalesce) return 0; /* retry add_request for this subreq */ nfs_page_group_lock(req, false); @@ -976,14 +1080,16 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) { + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; LIST_HEAD(head); do { - list_splice_init(&desc->pg_list, &head); - desc->pg_bytes_written -= desc->pg_count; - desc->pg_count = 0; - desc->pg_base = 0; - desc->pg_recoalesce = 0; + list_splice_init(&mirror->pg_list, &head); + mirror->pg_bytes_written -= mirror->pg_count; + mirror->pg_count = 0; + mirror->pg_base = 0; + mirror->pg_recoalesce = 0; + desc->pg_moreio = 0; while (!list_empty(&head)) { @@ -997,11 +1103,11 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) return 0; break; } - } while (desc->pg_recoalesce); + } while (mirror->pg_recoalesce); return 1; } -int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, +static int nfs_pageio_add_request_mirror(struct nfs_pageio_descriptor *desc, struct nfs_page *req) { int ret; @@ -1014,9 +1120,78 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, break; ret = nfs_do_recoalesce(desc); } while (ret); + return ret; } +int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, + struct nfs_page *req) +{ + u32 midx; + unsigned int pgbase, offset, bytes; + struct nfs_page *dupreq, *lastreq; + + pgbase = req->wb_pgbase; + offset = req->wb_offset; + bytes = req->wb_bytes; + + nfs_pageio_setup_mirroring(desc, req); + + for (midx = 0; midx < desc->pg_mirror_count; midx++) { + if (midx) { + nfs_page_group_lock(req, false); + + /* find the last request */ + for (lastreq = req->wb_head; + lastreq->wb_this_page != req->wb_head; + lastreq = lastreq->wb_this_page) + ; + + dupreq = nfs_create_request(req->wb_context, + req->wb_page, lastreq, pgbase, bytes); + + if (IS_ERR(dupreq)) { + nfs_page_group_unlock(req); + return 0; + } + + nfs_lock_request(dupreq); + nfs_page_group_unlock(req); + dupreq->wb_offset = offset; + dupreq->wb_index = req->wb_index; + } else + dupreq = req; + + desc->pg_mirror_idx = midx; + if (!nfs_pageio_add_request_mirror(desc, dupreq)) + return 0; + } + + return 1; +} + +/* + * nfs_pageio_complete_mirror - Complete I/O on the current mirror of an + * nfs_pageio_descriptor + * @desc: pointer to io descriptor + */ +static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc, + u32 mirror_idx) +{ + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[mirror_idx]; + u32 restore_idx = desc->pg_mirror_idx; + + desc->pg_mirror_idx = mirror_idx; + for (;;) { + nfs_pageio_doio(desc); + if (!mirror->pg_recoalesce) + break; + if (!nfs_do_recoalesce(desc)) + break; + } + desc->pg_mirror_idx = restore_idx; +} + /* * nfs_pageio_resend - Transfer requests to new descriptor and resend * @hdr - the pgio header to move request from @@ -1055,16 +1230,14 @@ EXPORT_SYMBOL_GPL(nfs_pageio_resend); */ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) { - for (;;) { - nfs_pageio_doio(desc); - if (!desc->pg_recoalesce) - break; - if (!nfs_do_recoalesce(desc)) - break; - } + u32 midx; + + for (midx = 0; midx < desc->pg_mirror_count; midx++) + nfs_pageio_complete_mirror(desc, midx); if (desc->pg_ops->pg_cleanup) desc->pg_ops->pg_cleanup(desc); + nfs_pageio_cleanup_mirroring(desc); } /** @@ -1080,10 +1253,17 @@ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) */ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) { - if (!list_empty(&desc->pg_list)) { - struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev); - if (index != prev->wb_index + 1) - nfs_pageio_complete(desc); + struct nfs_pgio_mirror *mirror; + struct nfs_page *prev; + u32 midx; + + for (midx = 0; midx < desc->pg_mirror_count; midx++) { + mirror = &desc->pg_mirrors[midx]; + if (!list_empty(&mirror->pg_list)) { + prev = nfs_list_entry(mirror->pg_list.prev); + if (index != prev->wb_index + 1) + nfs_pageio_complete_mirror(desc, midx); + } } } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 2da2e771fefe91..5f7c422ebb5dc8 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1646,8 +1646,8 @@ EXPORT_SYMBOL_GPL(pnfs_generic_pg_cleanup); * of bytes (maximum @req->wb_bytes) that can be coalesced. */ size_t -pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, - struct nfs_page *req) +pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, + struct nfs_page *prev, struct nfs_page *req) { unsigned int size; u64 seg_end, req_start, seg_left; @@ -1729,10 +1729,12 @@ static void pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { - list_splice_tail_init(&hdr->pages, &desc->pg_list); + list_splice_tail_init(&hdr->pages, &mirror->pg_list); nfs_pageio_reset_write_mds(desc); - desc->pg_recoalesce = 1; + mirror->pg_recoalesce = 1; } nfs_pgio_data_destroy(hdr); } @@ -1781,12 +1783,14 @@ EXPORT_SYMBOL_GPL(pnfs_writehdr_free); int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) { + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + struct nfs_pgio_header *hdr; int ret; hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); if (!hdr) { - desc->pg_completion_ops->error_cleanup(&desc->pg_list); + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); return -ENOMEM; } nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); @@ -1795,6 +1799,7 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) ret = nfs_generic_pgio(desc, hdr); if (!ret) pnfs_do_write(desc, hdr, desc->pg_ioflags); + return ret; } EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); @@ -1839,10 +1844,13 @@ static void pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + + if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { - list_splice_tail_init(&hdr->pages, &desc->pg_list); + list_splice_tail_init(&hdr->pages, &mirror->pg_list); nfs_pageio_reset_read_mds(desc); - desc->pg_recoalesce = 1; + mirror->pg_recoalesce = 1; } nfs_pgio_data_destroy(hdr); } @@ -1893,12 +1901,14 @@ EXPORT_SYMBOL_GPL(pnfs_readhdr_free); int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) { + struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + struct nfs_pgio_header *hdr; int ret; hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); if (!hdr) { - desc->pg_completion_ops->error_cleanup(&desc->pg_list); + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); return -ENOMEM; } nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 092ab499f2b691..568ecf0a880f11 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -70,8 +70,15 @@ EXPORT_SYMBOL_GPL(nfs_pageio_init_read); void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) { + struct nfs_pgio_mirror *mirror; + pgio->pg_ops = &nfs_pgio_rw_ops; - pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; + + /* read path should never have more than one mirror */ + WARN_ON_ONCE(pgio->pg_mirror_count != 1); + + mirror = &pgio->pg_mirrors[0]; + mirror->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; } EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); @@ -81,6 +88,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, struct nfs_page *new; unsigned int len; struct nfs_pageio_descriptor pgio; + struct nfs_pgio_mirror *pgm; len = nfs_page_length(page); if (len == 0) @@ -97,7 +105,13 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, &nfs_async_read_completion_ops); nfs_pageio_add_request(&pgio, new); nfs_pageio_complete(&pgio); - NFS_I(inode)->read_io += pgio.pg_bytes_written; + + /* It doesn't make sense to do mirrored reads! */ + WARN_ON_ONCE(pgio.pg_mirror_count != 1); + + pgm = &pgio.pg_mirrors[0]; + NFS_I(inode)->read_io += pgm->pg_bytes_written; + return 0; } @@ -352,6 +366,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { struct nfs_pageio_descriptor pgio; + struct nfs_pgio_mirror *pgm; struct nfs_readdesc desc = { .pgio = &pgio, }; @@ -387,10 +402,15 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, &nfs_async_read_completion_ops); ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); - nfs_pageio_complete(&pgio); - NFS_I(inode)->read_io += pgio.pg_bytes_written; - npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + + /* It doesn't make sense to do mirrored reads! */ + WARN_ON_ONCE(pgio.pg_mirror_count != 1); + + pgm = &pgio.pg_mirrors[0]; + NFS_I(inode)->read_io += pgm->pg_bytes_written; + npages = (pgm->pg_bytes_written + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; nfs_add_stats(inode, NFSIOS_READPAGES, npages); read_complete: put_nfs_open_context(desc.ctx); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 2bee165fddcff6..ceacfeeb28c26f 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -906,7 +906,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) if (nfs_write_need_commit(hdr)) { memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf)); nfs_mark_request_commit(req, hdr->lseg, &cinfo, - 0); + hdr->pgio_mirror_idx); goto next; } remove_req: @@ -1304,8 +1304,14 @@ EXPORT_SYMBOL_GPL(nfs_pageio_init_write); void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) { + struct nfs_pgio_mirror *mirror; + pgio->pg_ops = &nfs_pgio_rw_ops; - pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; + + nfs_pageio_stop_mirroring(pgio); + + mirror = &pgio->pg_mirrors[0]; + mirror->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; } EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 479c566c4ddcb6..3eb072dbce833d 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -58,6 +58,8 @@ struct nfs_pageio_ops { size_t (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); int (*pg_doio)(struct nfs_pageio_descriptor *); + unsigned int (*pg_get_mirror_count)(struct nfs_pageio_descriptor *, + struct nfs_page *); void (*pg_cleanup)(struct nfs_pageio_descriptor *); }; @@ -74,15 +76,17 @@ struct nfs_rw_ops { struct rpc_task_setup *, int); }; -struct nfs_pageio_descriptor { +struct nfs_pgio_mirror { struct list_head pg_list; unsigned long pg_bytes_written; size_t pg_count; size_t pg_bsize; unsigned int pg_base; - unsigned char pg_moreio : 1, - pg_recoalesce : 1; + unsigned char pg_recoalesce : 1; +}; +struct nfs_pageio_descriptor { + unsigned char pg_moreio : 1; struct inode *pg_inode; const struct nfs_pageio_ops *pg_ops; const struct nfs_rw_ops *pg_rw_ops; @@ -93,8 +97,18 @@ struct nfs_pageio_descriptor { struct pnfs_layout_segment *pg_lseg; struct nfs_direct_req *pg_dreq; void *pg_layout_private; + unsigned int pg_bsize; /* default bsize for mirrors */ + + u32 pg_mirror_count; + struct nfs_pgio_mirror *pg_mirrors; + struct nfs_pgio_mirror pg_mirrors_static[1]; + struct nfs_pgio_mirror *pg_mirrors_dynamic; + u32 pg_mirror_idx; /* current mirror */ }; +/* arbitrarily selected limit to number of mirrors */ +#define NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX 16 + #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx, diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 5bc99f04a55071..6400a1e01aa49f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1329,6 +1329,7 @@ struct nfs_pgio_header { struct nfs_page_array page_array; struct nfs_client *ds_clp; /* pNFS data server */ int ds_commit_idx; /* ds index if ds_clp is set */ + int pgio_mirror_idx;/* mirror index in pgio layer */ }; struct nfs_mds_commit_info { From 0a00b77b331a0e4aac461d4e920677661256918a Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Fri, 19 Sep 2014 12:48:33 -0400 Subject: [PATCH 427/601] nfs: mirroring support for direct io The current mirroring code only notices short writes to the first mirror. This patch keeps per-mirror byte counts and only considers a byte to be written once all mirrors report so. Signed-off-by: Weston Andros Adamson --- fs/nfs/direct.c | 71 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 57 insertions(+), 14 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 0178d4fe8ab74d..651387bbfd9fac 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -66,6 +66,10 @@ static struct kmem_cache *nfs_direct_cachep; /* * This represents a set of asynchronous requests that we're waiting on */ +struct nfs_direct_mirror { + ssize_t count; +}; + struct nfs_direct_req { struct kref kref; /* release manager */ @@ -78,6 +82,10 @@ struct nfs_direct_req { /* completion state */ atomic_t io_count; /* i/os we're waiting for */ spinlock_t lock; /* protect completion state */ + + struct nfs_direct_mirror mirrors[NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX]; + int mirror_count; + ssize_t count, /* bytes actually processed */ bytes_left, /* bytes left to be sent */ error; /* any reported error */ @@ -108,6 +116,29 @@ static inline int put_dreq(struct nfs_direct_req *dreq) return atomic_dec_and_test(&dreq->io_count); } +static void +nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr) +{ + int i; + ssize_t count; + + WARN_ON_ONCE(hdr->pgio_mirror_idx >= dreq->mirror_count); + + dreq->mirrors[hdr->pgio_mirror_idx].count += hdr->good_bytes; + + if (hdr->pgio_mirror_idx == 0) + dreq->count += hdr->good_bytes; + + /* update the dreq->count by finding the minimum agreed count from all + * mirrors */ + count = dreq->mirrors[0].count; + + for (i = 1; i < dreq->mirror_count; i++) + count = min(count, dreq->mirrors[i].count); + + dreq->count = count; +} + /* * nfs_direct_select_verf - select the right verifier * @dreq - direct request possibly spanning multiple servers @@ -241,6 +272,18 @@ void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, cinfo->completion_ops = &nfs_direct_commit_completion_ops; } +static inline void nfs_direct_setup_mirroring(struct nfs_direct_req *dreq, + struct nfs_pageio_descriptor *pgio, + struct nfs_page *req) +{ + int mirror_count = 1; + + if (pgio->pg_ops->pg_get_mirror_count) + mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req); + + dreq->mirror_count = mirror_count; +} + static inline struct nfs_direct_req *nfs_direct_req_alloc(void) { struct nfs_direct_req *dreq; @@ -255,6 +298,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void) INIT_LIST_HEAD(&dreq->mds_cinfo.list); dreq->verf.committed = NFS_INVALID_STABLE_HOW; /* not set yet */ INIT_WORK(&dreq->work, nfs_direct_write_schedule_work); + dreq->mirror_count = 1; spin_lock_init(&dreq->lock); return dreq; @@ -360,14 +404,9 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) spin_lock(&dreq->lock); if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes == 0)) dreq->error = hdr->error; - else { - /* - * FIXME: right now this only accounts for bytes written - * to the first mirror - */ - if (hdr->pgio_mirror_idx == 0) - dreq->count += hdr->good_bytes; - } + else + nfs_direct_good_bytes(dreq, hdr); + spin_unlock(&dreq->lock); while (!list_empty(&hdr->pages)) { @@ -598,17 +637,23 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) LIST_HEAD(reqs); struct nfs_commit_info cinfo; LIST_HEAD(failed); + int i; nfs_init_cinfo_from_dreq(&cinfo, dreq); nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo); dreq->count = 0; + for (i = 0; i < dreq->mirror_count; i++) + dreq->mirrors[i].count = 0; get_dreq(dreq); nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false, &nfs_direct_write_completion_ops); desc.pg_dreq = dreq; + req = nfs_list_entry(reqs.next); + nfs_direct_setup_mirroring(dreq, &desc, req); + list_for_each_entry_safe(req, tmp, &reqs, wb_list) { if (!nfs_pageio_add_request(&desc, req)) { nfs_list_remove_request(req); @@ -730,12 +775,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) dreq->error = hdr->error; } if (dreq->error == 0) { - /* - * FIXME: right now this only accounts for bytes written - * to the first mirror - */ - if (hdr->pgio_mirror_idx == 0) - dreq->count += hdr->good_bytes; + nfs_direct_good_bytes(dreq, hdr); if (nfs_write_need_commit(hdr)) { if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) request_commit = true; @@ -841,6 +881,9 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, result = PTR_ERR(req); break; } + + nfs_direct_setup_mirroring(dreq, &desc, req); + nfs_lock_request(req); req->wb_index = pos >> PAGE_SHIFT; req->wb_offset = pos & ~PAGE_MASK; From 80c76fe314c2859d5aac94b5d66d2b9895aa73d4 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Wed, 1 Oct 2014 12:58:25 -0400 Subject: [PATCH 428/601] pnfs: fail comparison when bucket verifier not set This skips the WARN_ON_ONCE, but doesnt change behavior (the memcmp would fail). Signed-off-by: Weston Andros Adamson Signed-off-by: Tom Haynes --- fs/nfs/direct.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 651387bbfd9fac..eb814789f7002c 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -222,7 +222,11 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq, verfp = nfs_direct_select_verf(dreq, data->ds_clp, data->ds_commit_index); - WARN_ON_ONCE(verfp->committed < 0); + + /* verifier not set so always fail */ + if (verfp->committed < 0) + return 1; + return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf)); } From 566f8737630390b743d79e26e4ac855fe2758129 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 10 Oct 2014 23:25:46 +0800 Subject: [PATCH 429/601] nfs41: add a debug warning if we destroy an unempty layout So that we can detect the case if some layout segments are still pinned which is surely a bug that we need to fix. Signed-off-by: Peng Tao --- fs/nfs/pnfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 5f7c422ebb5dc8..e123cfce54ee62 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -242,6 +242,8 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) struct inode *inode = lo->plh_inode; if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { + if (!list_empty(&lo->plh_segs)) + WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n"); pnfs_detach_layout_hdr(lo); spin_unlock(&inode->i_lock); pnfs_free_layout_hdr(lo); From 47af81f29556a45493e5c87289c3c16ce911096c Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Mon, 10 Nov 2014 08:35:34 +0800 Subject: [PATCH 430/601] nfs: only reset desc->pg_mirror_idx when mirroring is supported so that we don't reset desc->pg_mirror_idx for read unnecessarily. Remove WARN_ON_ONCE from __nfs_pageio_add_request to allow LD to set pg_mirror_idx for read where pg_mirror_count is always 1. Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/internal.h | 7 +++++++ fs/nfs/pagelist.c | 8 ++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index ef1c703e487b01..5be06bcafa2f74 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -6,6 +6,7 @@ #include #include #include +#include #define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS) @@ -261,6 +262,12 @@ static inline void nfs_iocounter_init(struct nfs_io_counter *c) atomic_set(&c->io_count, 0); } +static inline bool nfs_pgio_has_mirroring(struct nfs_pageio_descriptor *desc) +{ + WARN_ON_ONCE(desc->pg_mirror_count < 1); + return desc->pg_mirror_count > 1; +} + /* nfs2xdr.c */ extern struct rpc_procinfo nfs_procedures[]; extern int nfs2_decode_dirent(struct xdr_stream *, diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index eec12b75c2325f..f9d8c469114970 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -1021,8 +1021,6 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, unsigned int bytes_left = 0; unsigned int offset, pgbase; - WARN_ON_ONCE(desc->pg_mirror_idx >= desc->pg_mirror_count); - nfs_page_group_lock(req, false); subreq = req; @@ -1162,7 +1160,8 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, } else dupreq = req; - desc->pg_mirror_idx = midx; + if (nfs_pgio_has_mirroring(desc)) + desc->pg_mirror_idx = midx; if (!nfs_pageio_add_request_mirror(desc, dupreq)) return 0; } @@ -1181,7 +1180,8 @@ static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc, struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[mirror_idx]; u32 restore_idx = desc->pg_mirror_idx; - desc->pg_mirror_idx = mirror_idx; + if (nfs_pgio_has_mirroring(desc)) + desc->pg_mirror_idx = mirror_idx; for (;;) { nfs_pageio_doio(desc); if (!mirror->pg_recoalesce) From 48d635f14a544c2b3ca870d2c7349b41160496d2 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Mon, 10 Nov 2014 08:35:35 +0800 Subject: [PATCH 431/601] nfs: add nfs_pgio_current_mirror helper Let it return current nfs_pgio_mirror in use depending on pg_mirror_count. For read, we always use pg_mirrors[0], so this effectively gives us freedom to use pg_mirror_idx to track the actual mirror to read from through out the IO stack. Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/internal.h | 2 ++ fs/nfs/objlayout/objio_osd.c | 2 +- fs/nfs/pagelist.c | 25 +++++++++++++++++-------- fs/nfs/pnfs.c | 9 ++++----- 4 files changed, 24 insertions(+), 14 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 5be06bcafa2f74..ffe4b7ac9e6beb 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -255,6 +255,8 @@ int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, struct rpc_cred *cred, const struct nfs_rpc_ops *rpc_ops, const struct rpc_call_ops *call_ops, int how, int flags); void nfs_free_request(struct nfs_page *req); +struct nfs_pgio_mirror * +nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc); static inline void nfs_iocounter_init(struct nfs_io_counter *c) { diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 9a5f2ee6001f92..24e1d7403c0be2 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -537,7 +537,7 @@ int objio_write_pagelist(struct nfs_pgio_header *hdr, int how) static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { - struct nfs_pgio_mirror *mirror = &pgio->pg_mirrors[pgio->pg_mirror_idx]; + struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(pgio); unsigned int size; size = pnfs_generic_pg_test(pgio, prev, req); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index f9d8c469114970..960c99f75d3f28 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -42,11 +42,20 @@ static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount) return p->pagevec != NULL; } +struct nfs_pgio_mirror * +nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc) +{ + return nfs_pgio_has_mirroring(desc) ? + &desc->pg_mirrors[desc->pg_mirror_idx] : + &desc->pg_mirrors[0]; +} +EXPORT_SYMBOL_GPL(nfs_pgio_current_mirror); + void nfs_pgheader_init(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr, void (*release)(struct nfs_pgio_header *hdr)) { - struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); hdr->req = nfs_list_entry(mirror->pg_list.next); @@ -485,7 +494,7 @@ nfs_wait_on_request(struct nfs_page *req) size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req) { - struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); if (mirror->pg_count > mirror->pg_bsize) { @@ -782,7 +791,7 @@ static void nfs_pgio_result(struct rpc_task *task, void *calldata) int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { - struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); struct nfs_page *req; struct page **pages, @@ -831,7 +840,7 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) struct nfs_pgio_header *hdr; int ret; - mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + mirror = nfs_pgio_current_mirror(desc); hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); if (!hdr) { @@ -961,7 +970,7 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, struct nfs_page *req) { - struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); struct nfs_page *prev = NULL; @@ -985,7 +994,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, */ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) { - struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); if (!list_empty(&mirror->pg_list)) { @@ -1015,7 +1024,7 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, struct nfs_page *req) { - struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); struct nfs_page *subreq; unsigned int bytes_left = 0; @@ -1078,7 +1087,7 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) { - struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); LIST_HEAD(head); do { diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index e123cfce54ee62..b822b1749643dc 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1731,7 +1731,7 @@ static void pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { - struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { list_splice_tail_init(&hdr->pages, &mirror->pg_list); @@ -1785,7 +1785,7 @@ EXPORT_SYMBOL_GPL(pnfs_writehdr_free); int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) { - struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); struct nfs_pgio_header *hdr; int ret; @@ -1846,8 +1846,7 @@ static void pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { - struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; - + struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { list_splice_tail_init(&hdr->pages, &mirror->pg_list); @@ -1903,7 +1902,7 @@ EXPORT_SYMBOL_GPL(pnfs_readhdr_free); int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) { - struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[desc->pg_mirror_idx]; + struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); struct nfs_pgio_header *hdr; int ret; From ceb11e13df3e78b450730c615037133c57b90c3b Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Mon, 10 Nov 2014 08:35:38 +0800 Subject: [PATCH 432/601] pnfs: allow LD to ask to resend read through pnfs If current IO cannot be completed due to some transient errors, LD may want to ask generic layer to resend the request through pnfs again. Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/pnfs.c | 15 ++++++++++++++- fs/nfs/pnfs.h | 2 ++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index b822b1749643dc..685af4fb39cabd 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1880,15 +1880,28 @@ pnfs_try_to_read_data(struct nfs_pgio_header *hdr, return trypnfs; } +/* Resend all requests through pnfs. */ +int pnfs_read_resend_pnfs(struct nfs_pgio_header *hdr) +{ + struct nfs_pageio_descriptor pgio; + + nfs_pageio_init_read(&pgio, hdr->inode, false, hdr->completion_ops); + return nfs_pageio_resend(&pgio, hdr); +} +EXPORT_SYMBOL_GPL(pnfs_read_resend_pnfs); + static void pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) { const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; struct pnfs_layout_segment *lseg = desc->pg_lseg; enum pnfs_try_status trypnfs; + int err = 0; trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg); - if (trypnfs == PNFS_NOT_ATTEMPTED) + if (trypnfs == PNFS_TRY_AGAIN) + err = pnfs_read_resend_pnfs(hdr); + if (trypnfs == PNFS_NOT_ATTEMPTED || err) pnfs_read_through_mds(desc, hdr); } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index a0ab81cc9cf372..84c25cd476f85d 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -72,6 +72,7 @@ struct pnfs_layout_segment { enum pnfs_try_status { PNFS_ATTEMPTED = 0, PNFS_NOT_ATTEMPTED = 1, + PNFS_TRY_AGAIN = 2, }; #ifdef CONFIG_NFS_V4_1 @@ -268,6 +269,7 @@ int _pnfs_return_layout(struct inode *); int pnfs_commit_and_return_layout(struct inode *); void pnfs_ld_write_done(struct nfs_pgio_header *); void pnfs_ld_read_done(struct nfs_pgio_header *); +int pnfs_read_resend_pnfs(struct nfs_pgio_header *); struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, loff_t pos, From 15eb67c15342d212b0c8a540b6d6bd2dfad52a63 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Mon, 17 Nov 2014 09:30:36 +0800 Subject: [PATCH 433/601] nfs41: add range to layoutreturn args So that callers can specify which range to return. Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/nfs4xdr.c | 6 +++--- fs/nfs/pnfs.c | 4 +++- include/linux/nfs_xdr.h | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 3c3ff633dd171e..56d4c91a48f3de 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2012,11 +2012,11 @@ encode_layoutreturn(struct xdr_stream *xdr, p = reserve_space(xdr, 16); *p++ = cpu_to_be32(0); /* reclaim. always 0 for now */ *p++ = cpu_to_be32(args->layout_type); - *p++ = cpu_to_be32(args->iomode); + *p++ = cpu_to_be32(args->range.iomode); *p = cpu_to_be32(RETURN_FILE); p = reserve_space(xdr, 16); - p = xdr_encode_hyper(p, 0); - p = xdr_encode_hyper(p, NFS4_MAX_UINT64); + p = xdr_encode_hyper(p, args->range.offset); + p = xdr_encode_hyper(p, args->range.length); spin_lock(&args->inode->i_lock); encode_nfs4_stateid(xdr, &args->stateid); spin_unlock(&args->inode->i_lock); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 685af4fb39cabd..9549b89e494bb2 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -916,7 +916,9 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, lrp->args.stateid = stateid; lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; lrp->args.inode = ino; - lrp->args.iomode = iomode; + lrp->args.range.iomode = iomode; + lrp->args.range.offset = 0; + lrp->args.range.length = NFS4_MAX_UINT64; lrp->args.layout = lo; lrp->clp = NFS_SERVER(ino)->nfs_client; lrp->cred = lo->plh_lc_cred; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 6400a1e01aa49f..363792356d25a4 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -293,7 +293,7 @@ struct nfs4_layoutreturn_args { struct nfs4_sequence_args seq_args; struct pnfs_layout_hdr *layout; struct inode *inode; - enum pnfs_iomode iomode; + struct pnfs_layout_range range; nfs4_stateid stateid; __u32 layout_type; }; From 6c16605d6ef0dfb2e154119700d58b85c6b4dc71 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Mon, 17 Nov 2014 09:30:40 +0800 Subject: [PATCH 434/601] nfs41: allow async version layoutreturn Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/nfs4proc.c | 11 +++++++++-- fs/nfs/pnfs.c | 11 ++++++----- fs/nfs/pnfs.h | 2 +- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e19b5dbe535abe..2397c0f080d30d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7810,7 +7810,7 @@ static const struct rpc_call_ops nfs4_layoutreturn_call_ops = { .rpc_release = nfs4_layoutreturn_release, }; -int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp) +int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync) { struct rpc_task *task; struct rpc_message msg = { @@ -7824,16 +7824,23 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp) .rpc_message = &msg, .callback_ops = &nfs4_layoutreturn_call_ops, .callback_data = lrp, + .flags = RPC_TASK_ASYNC, }; - int status; + int status = 0; dprintk("--> %s\n", __func__); nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); + if (sync == false) + goto out; + status = nfs4_wait_for_completion_rpc_task(task); + if (status != 0) + goto out; status = task->tk_status; trace_nfs4_layoutreturn(lrp->args.inode, status); +out: dprintk("<-- %s status=%d\n", __func__, status); rpc_put_task(task); return status; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 9549b89e494bb2..0a0e209e8262ea 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -52,7 +52,7 @@ static LIST_HEAD(pnfs_modules_tbl); static int pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, - enum pnfs_iomode iomode); + enum pnfs_iomode iomode, bool sync); /* Return the registered pnfs layout driver module matching given id */ static struct pnfs_layoutdriver_type * @@ -392,7 +392,8 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg) spin_unlock(&inode->i_lock); pnfs_free_lseg(lseg); if (need_return) - pnfs_send_layoutreturn(lo, stateid, iomode); + pnfs_send_layoutreturn(lo, stateid, iomode, + true); else pnfs_put_layout_hdr(lo); } @@ -897,7 +898,7 @@ static void pnfs_clear_layoutcommit(struct inode *inode, static int pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, - enum pnfs_iomode iomode) + enum pnfs_iomode iomode, bool sync) { struct inode *ino = lo->plh_inode; struct nfs4_layoutreturn *lrp; @@ -923,7 +924,7 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, lrp->clp = NFS_SERVER(ino)->nfs_client; lrp->cred = lo->plh_lc_cred; - status = nfs4_proc_layoutreturn(lrp); + status = nfs4_proc_layoutreturn(lrp, sync); out: if (status) { spin_lock(&ino->i_lock); @@ -989,7 +990,7 @@ _pnfs_return_layout(struct inode *ino) spin_unlock(&ino->i_lock); pnfs_free_lseg_list(&tmp_list); - status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY); + status = pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); out: dprintk("<-- %s status: %d\n", __func__, status); return status; diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 84c25cd476f85d..b79f494d59ac45 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -219,7 +219,7 @@ extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *dev, struct rpc_cred *cred); extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags); -extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); +extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync); /* pnfs.c */ void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo); From 193e3aa2ccfb5a53acf7a690b80a1e415b74dbd7 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Mon, 17 Nov 2014 09:30:41 +0800 Subject: [PATCH 435/601] nfs41: introduce NFS_LAYOUT_RETURN_BEFORE_CLOSE When it is set, generic pnfs would try to send layoutreturn right before last close/delegation_return regard less NFS_LAYOUT_ROC is set or not. LD can then make sure layoutreturn is always sent rather than being omitted. The difference against NFS_LAYOUT_RETURN is that NFS_LAYOUT_RETURN_BEFORE_CLOSE does not block usage of the layout so LD can set it and expect generic layer to try pnfs path at the same time. Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/nfs4proc.c | 2 ++ fs/nfs/pnfs.c | 40 +++++++++++++++++++++++++++++++++------- fs/nfs/pnfs.h | 1 + 3 files changed, 36 insertions(+), 7 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 2397c0f080d30d..7e1a97a54f99df 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7797,6 +7797,8 @@ static void nfs4_layoutreturn_release(void *calldata) if (lrp->res.lrs_present) pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); + clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags); + rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); lo->plh_block_lgets--; spin_unlock(&lo->plh_inode->i_lock); pnfs_put_layout_hdr(lrp->args.layout); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 0a0e209e8262ea..d3c2ca71a76d88 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -909,6 +909,7 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, status = -ENOMEM; spin_lock(&ino->i_lock); lo->plh_block_lgets--; + rpc_wake_up(&NFS_SERVER(ino)->roc_rpcwaitq); spin_unlock(&ino->i_lock); pnfs_put_layout_hdr(lo); goto out; @@ -926,11 +927,6 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, status = nfs4_proc_layoutreturn(lrp, sync); out: - if (status) { - spin_lock(&ino->i_lock); - clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); - spin_unlock(&ino->i_lock); - } dprintk("<-- %s status: %d\n", __func__, status); return status; } @@ -1028,8 +1024,9 @@ bool pnfs_roc(struct inode *ino) { struct pnfs_layout_hdr *lo; struct pnfs_layout_segment *lseg, *tmp; + nfs4_stateid stateid; LIST_HEAD(tmp_list); - bool found = false; + bool found = false, layoutreturn = false; spin_lock(&ino->i_lock); lo = NFS_I(ino)->layout; @@ -1050,7 +1047,20 @@ bool pnfs_roc(struct inode *ino) return true; out_nolayout: + if (lo) { + stateid = lo->plh_stateid; + layoutreturn = + test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, + &lo->plh_flags); + if (layoutreturn) { + lo->plh_block_lgets++; + pnfs_get_layout_hdr(lo); + } + } spin_unlock(&ino->i_lock); + if (layoutreturn) + pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, 0, + NFS4_MAX_UINT64, true); return false; } @@ -1085,8 +1095,9 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task) struct nfs_inode *nfsi = NFS_I(ino); struct pnfs_layout_hdr *lo; struct pnfs_layout_segment *lseg; + nfs4_stateid stateid; u32 current_seqid; - bool found = false; + bool found = false, layoutreturn = false; spin_lock(&ino->i_lock); list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list) @@ -1103,7 +1114,22 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task) */ *barrier = current_seqid + atomic_read(&lo->plh_outstanding); out: + if (!found) { + stateid = lo->plh_stateid; + layoutreturn = + test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, + &lo->plh_flags); + if (layoutreturn) { + lo->plh_block_lgets++; + pnfs_get_layout_hdr(lo); + } + } spin_unlock(&ino->i_lock); + if (layoutreturn) { + rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); + pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, 0, + NFS4_MAX_UINT64, false); + } return found; } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index b79f494d59ac45..080bf90498d4ac 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -96,6 +96,7 @@ enum { NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ NFS_LAYOUT_ROC, /* some lseg had roc bit set */ NFS_LAYOUT_RETURN, /* Return this layout ASAP */ + NFS_LAYOUT_RETURN_BEFORE_CLOSE, /* Return this layout before close */ NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */ NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */ }; From 27b6f53987d61822a858b4680c3727bfb19e620a Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Mon, 20 Oct 2014 14:44:38 +0800 Subject: [PATCH 436/601] nfs/flexfiles: send layoutreturn before freeing lseg Otherwise we'll lose error tracking information when encoding layoutreturn. pnfs_put_lseg may be called from rpc callbacks. So we should not call pnfs_send_layoutreturn directly because it can deadlock in the rpc layer. Signed-off-by: Peng Tao Signed-off-by: Tom Haynes --- fs/nfs/pnfs.c | 81 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 56 insertions(+), 25 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index d3c2ca71a76d88..108a619861e50b 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -346,8 +346,7 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo, /* Return true if layoutreturn is needed */ static bool pnfs_layout_need_return(struct pnfs_layout_hdr *lo, - struct pnfs_layout_segment *lseg, - nfs4_stateid *stateid, enum pnfs_iomode *iomode) + struct pnfs_layout_segment *lseg) { struct pnfs_layout_segment *s; @@ -355,17 +354,54 @@ pnfs_layout_need_return(struct pnfs_layout_hdr *lo, return false; list_for_each_entry(s, &lo->plh_segs, pls_list) - if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) + if (s != lseg && test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags)) return false; - *stateid = lo->plh_stateid; - *iomode = lo->plh_return_iomode; - /* decreased in pnfs_send_layoutreturn() */ - lo->plh_block_lgets++; - lo->plh_return_iomode = 0; return true; } +static void pnfs_layoutreturn_free_lseg(struct work_struct *work) +{ + struct pnfs_layout_segment *lseg; + struct pnfs_layout_hdr *lo; + struct inode *inode; + + lseg = container_of(work, struct pnfs_layout_segment, pls_work); + WARN_ON(atomic_read(&lseg->pls_refcount)); + lo = lseg->pls_layout; + inode = lo->plh_inode; + + spin_lock(&inode->i_lock); + if (pnfs_layout_need_return(lo, lseg)) { + nfs4_stateid stateid; + enum pnfs_iomode iomode; + + stateid = lo->plh_stateid; + iomode = lo->plh_return_iomode; + /* decreased in pnfs_send_layoutreturn() */ + lo->plh_block_lgets++; + lo->plh_return_iomode = 0; + spin_unlock(&inode->i_lock); + + pnfs_send_layoutreturn(lo, stateid, iomode, true); + spin_lock(&inode->i_lock); + } else + /* match pnfs_get_layout_hdr #2 in pnfs_put_lseg */ + pnfs_put_layout_hdr(lo); + pnfs_layout_remove_lseg(lo, lseg); + spin_unlock(&inode->i_lock); + pnfs_free_lseg(lseg); + /* match pnfs_get_layout_hdr #1 in pnfs_put_lseg */ + pnfs_put_layout_hdr(lo); +} + +static void +pnfs_layoutreturn_free_lseg_async(struct pnfs_layout_segment *lseg) +{ + INIT_WORK(&lseg->pls_work, pnfs_layoutreturn_free_lseg); + queue_work(nfsiod_workqueue, &lseg->pls_work); +} + void pnfs_put_lseg(struct pnfs_layout_segment *lseg) { @@ -381,21 +417,18 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg) lo = lseg->pls_layout; inode = lo->plh_inode; if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { - bool need_return; - nfs4_stateid stateid; - enum pnfs_iomode iomode; - pnfs_get_layout_hdr(lo); - pnfs_layout_remove_lseg(lo, lseg); - need_return = pnfs_layout_need_return(lo, lseg, - &stateid, &iomode); - spin_unlock(&inode->i_lock); - pnfs_free_lseg(lseg); - if (need_return) - pnfs_send_layoutreturn(lo, stateid, iomode, - true); - else + if (pnfs_layout_need_return(lo, lseg)) { + spin_unlock(&inode->i_lock); + /* hdr reference dropped in nfs4_layoutreturn_release */ + pnfs_get_layout_hdr(lo); + pnfs_layoutreturn_free_lseg_async(lseg); + } else { + pnfs_layout_remove_lseg(lo, lseg); + spin_unlock(&inode->i_lock); + pnfs_free_lseg(lseg); pnfs_put_layout_hdr(lo); + } } } EXPORT_SYMBOL_GPL(pnfs_put_lseg); @@ -1059,8 +1092,7 @@ bool pnfs_roc(struct inode *ino) } spin_unlock(&ino->i_lock); if (layoutreturn) - pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, 0, - NFS4_MAX_UINT64, true); + pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true); return false; } @@ -1127,8 +1159,7 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task) spin_unlock(&ino->i_lock); if (layoutreturn) { rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL); - pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, 0, - NFS4_MAX_UINT64, false); + pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, false); } return found; } From c829013dca33110d57c7f625443b716bd7a17671 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Mon, 1 Dec 2014 08:22:18 +0800 Subject: [PATCH 437/601] nfs41: add NFS_LAYOUT_RETRY_LAYOUTGET to layout header flags Use it to indicate that LD wants to retry layoutget. LD can set it whenever it wants the common pnfs code to return and retry pnfs path through a new layout. The bit gets cleared when client does a new layoutget, when client closes the file (ROC case), or when kernel needs to evict the inode (non-ROC case). Signed-off-by: Peng Tao --- fs/nfs/pnfs.c | 3 +++ fs/nfs/pnfs.h | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 108a619861e50b..893f6b5afe6a55 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -615,6 +615,7 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) pnfs_get_layout_hdr(lo); pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED); pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED); + pnfs_clear_retry_layoutget(lo); spin_unlock(&nfsi->vfs_inode.i_lock); pnfs_free_lseg_list(&tmp_list); pnfs_put_layout_hdr(lo); @@ -1066,6 +1067,7 @@ bool pnfs_roc(struct inode *ino) if (!lo || !test_and_clear_bit(NFS_LAYOUT_ROC, &lo->plh_flags) || test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) goto out_nolayout; + pnfs_clear_retry_layoutget(lo); list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { mark_lseg_invalid(lseg, &tmp_list); @@ -1491,6 +1493,7 @@ pnfs_update_layout(struct inode *ino, arg.length = PAGE_CACHE_ALIGN(arg.length); lseg = send_layoutget(lo, ctx, &arg, gfp_flags); + pnfs_clear_retry_layoutget(lo); atomic_dec(&lo->plh_outstanding); out_put_layout_hdr: if (first) { diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 080bf90498d4ac..fed6ae067acba5 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -99,6 +99,7 @@ enum { NFS_LAYOUT_RETURN_BEFORE_CLOSE, /* Return this layout before close */ NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */ NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */ + NFS_LAYOUT_RETRY_LAYOUTGET, /* Retry layoutget */ }; enum layoutdriver_policy_flags { @@ -350,6 +351,23 @@ nfs4_get_deviceid(struct nfs4_deviceid_node *d) return d; } +static inline void pnfs_set_retry_layoutget(struct pnfs_layout_hdr *lo) +{ + if (!test_and_set_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags)) + atomic_inc(&lo->plh_refcount); +} + +static inline void pnfs_clear_retry_layoutget(struct pnfs_layout_hdr *lo) +{ + if (test_and_clear_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags)) + atomic_dec(&lo->plh_refcount); +} + +static inline bool pnfs_should_retry_layoutget(struct pnfs_layout_hdr *lo) +{ + return test_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags); +} + static inline struct pnfs_layout_segment * pnfs_get_lseg(struct pnfs_layout_segment *lseg) { From 012fa16dca0da6c487dd066829ff0b0954925fe6 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Mon, 1 Dec 2014 08:22:21 +0800 Subject: [PATCH 438/601] nfs: add a helper to set NFS_ODIRECT_RESCHED_WRITES to direct writes To allow pnfs LD to ask direct writes to be resend. Signed-off-by: Peng Tao --- fs/nfs/direct.c | 6 ++++++ fs/nfs/internal.h | 1 + 2 files changed, 7 insertions(+) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index eb814789f7002c..4fad6b727eb441 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -116,6 +116,12 @@ static inline int put_dreq(struct nfs_direct_req *dreq) return atomic_dec_and_test(&dreq->io_count); } +void nfs_direct_set_resched_writes(struct nfs_direct_req *dreq) +{ + dreq->flags = NFS_ODIRECT_RESCHED_WRITES; +} +EXPORT_SYMBOL_GPL(nfs_direct_set_resched_writes); + static void nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr) { diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index ffe4b7ac9e6beb..44e84960a26f6b 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -502,6 +502,7 @@ static inline void nfs_inode_dio_wait(struct inode *inode) inode_dio_wait(inode); } extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); +extern void nfs_direct_set_resched_writes(struct nfs_direct_req *dreq); /* nfs4proc.c */ extern void __nfs4_read_done_cb(struct nfs_pgio_header *); From aa8a45ee974dfe3ffe290daaf5db457afae56fde Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Mon, 1 Dec 2014 08:22:23 +0800 Subject: [PATCH 439/601] nfs41: wait for LAYOUTRETURN before retrying LAYOUTGET Also take care to stop waiting if someone clears retry bit. Signed-off-by: Peng Tao --- fs/nfs/nfs4proc.c | 4 +++- fs/nfs/pnfs.c | 39 ++++++++++++++++++++++++++++++++++++++- fs/nfs/pnfs.h | 5 ++++- 3 files changed, 45 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7e1a97a54f99df..44c600aac90752 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7796,7 +7796,9 @@ static void nfs4_layoutreturn_release(void *calldata) spin_lock(&lo->plh_inode->i_lock); if (lrp->res.lrs_present) pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); - clear_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); + clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); + smp_mb__after_atomic(); + wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags); rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); lo->plh_block_lgets--; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 893f6b5afe6a55..c4c9fe606ae66e 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1398,6 +1398,26 @@ static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx, return ret; } +/* stop waiting if someone clears NFS_LAYOUT_RETRY_LAYOUTGET bit. */ +static int pnfs_layoutget_retry_bit_wait(struct wait_bit_key *key) +{ + if (!test_bit(NFS_LAYOUT_RETRY_LAYOUTGET, key->flags)) + return 1; + return nfs_wait_bit_killable(key); +} + +static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo) +{ + /* + * send layoutcommit as it can hold up layoutreturn due to lseg + * reference + */ + pnfs_layoutcommit_inode(lo->plh_inode, false); + return !wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN, + pnfs_layoutget_retry_bit_wait, + TASK_UNINTERRUPTIBLE); +} + /* * Layout segment is retreived from the server if not cached. * The appropriate layout segment is referenced and returned to the caller. @@ -1444,7 +1464,8 @@ pnfs_update_layout(struct inode *ino, } /* if LAYOUTGET already failed once we don't try again */ - if (pnfs_layout_io_test_failed(lo, iomode)) + if (pnfs_layout_io_test_failed(lo, iomode) && + !pnfs_should_retry_layoutget(lo)) goto out_unlock; first = list_empty(&lo->plh_segs); @@ -1469,6 +1490,22 @@ pnfs_update_layout(struct inode *ino, goto out_unlock; } + /* + * Because we free lsegs before sending LAYOUTRETURN, we need to wait + * for LAYOUTRETURN even if first is true. + */ + if (!lseg && pnfs_should_retry_layoutget(lo) && + test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) { + spin_unlock(&ino->i_lock); + dprintk("%s wait for layoutreturn\n", __func__); + if (pnfs_prepare_to_retry_layoutget(lo)) { + pnfs_put_layout_hdr(lo); + dprintk("%s retrying\n", __func__); + goto lookup_again; + } + goto out_put_layout_hdr; + } + if (pnfs_layoutgets_blocked(lo, &arg, 0)) goto out_unlock; atomic_inc(&lo->plh_outstanding); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index fed6ae067acba5..49a4667084001c 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -359,8 +359,11 @@ static inline void pnfs_set_retry_layoutget(struct pnfs_layout_hdr *lo) static inline void pnfs_clear_retry_layoutget(struct pnfs_layout_hdr *lo) { - if (test_and_clear_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags)) + if (test_and_clear_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags)) { atomic_dec(&lo->plh_refcount); + /* wake up waiters for LAYOUTRETURN as that is not needed */ + wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); + } } static inline bool pnfs_should_retry_layoutget(struct pnfs_layout_hdr *lo) From 5fadeb47dcc5c30d4b6cf481b4a78689eab59443 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Mon, 19 Jan 2015 12:41:16 +0800 Subject: [PATCH 440/601] nfs: count DIO good bytes correctly with mirroring When resending to MDS, we might resend multiple mirroring requests to MDS. As a result, nfs_direct_good_bytes() ends up counting bytes multiple times, causing application to get wrong return results in read/write syscalls. Fix it by tracking start of a dreq and checking the range of pgio header. Cc: Weston Andros Adamson Signed-off-by: Peng Tao --- fs/nfs/direct.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 4fad6b727eb441..3715b4957abc0a 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -88,6 +88,7 @@ struct nfs_direct_req { ssize_t count, /* bytes actually processed */ bytes_left, /* bytes left to be sent */ + io_start, /* start of IO */ error; /* any reported error */ struct completion completion; /* wait for i/o completion */ @@ -130,10 +131,11 @@ nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr) WARN_ON_ONCE(hdr->pgio_mirror_idx >= dreq->mirror_count); - dreq->mirrors[hdr->pgio_mirror_idx].count += hdr->good_bytes; - - if (hdr->pgio_mirror_idx == 0) - dreq->count += hdr->good_bytes; + count = dreq->mirrors[hdr->pgio_mirror_idx].count; + if (count + dreq->io_start < hdr->io_start + hdr->good_bytes) { + count = hdr->io_start + hdr->good_bytes - dreq->io_start; + dreq->mirrors[hdr->pgio_mirror_idx].count = count; + } /* update the dreq->count by finding the minimum agreed count from all * mirrors */ @@ -594,6 +596,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, dreq->inode = inode; dreq->bytes_left = count; + dreq->io_start = pos; dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); l_ctx = nfs_get_lock_context(dreq->ctx); if (IS_ERR(l_ctx)) { @@ -1002,6 +1005,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, dreq->inode = inode; dreq->bytes_left = count; + dreq->io_start = pos; dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); l_ctx = nfs_get_lock_context(dreq->ctx); if (IS_ERR(l_ctx)) { From d67ae825a59d639e4d8b82413af84d854617a87e Mon Sep 17 00:00:00 2001 From: Tom Haynes Date: Thu, 11 Dec 2014 17:02:04 -0500 Subject: [PATCH 441/601] pnfs/flexfiles: Add the FlexFile Layout Driver The flexfile layout is a new layout that extends the file layout. It is currently being drafted as a specification at https://datatracker.ietf.org/doc/draft-ietf-nfsv4-layout-types/ Signed-off-by: Weston Andros Adamson Signed-off-by: Tom Haynes Signed-off-by: Tao Peng --- fs/nfs/Kconfig | 5 + fs/nfs/Makefile | 1 + fs/nfs/flexfilelayout/Makefile | 5 + fs/nfs/flexfilelayout/flexfilelayout.c | 1574 +++++++++++++++++++++ fs/nfs/flexfilelayout/flexfilelayout.h | 155 ++ fs/nfs/flexfilelayout/flexfilelayoutdev.c | 552 ++++++++ fs/nfs/idmap.c | 3 +- fs/nfs/nfs4proc.c | 4 +- fs/nfs/pnfs.c | 32 +- fs/nfs/pnfs.h | 1 + include/linux/nfs4.h | 1 + include/linux/nfs_idmap.h | 2 + include/linux/sunrpc/metrics.h | 2 + 13 files changed, 2325 insertions(+), 12 deletions(-) create mode 100644 fs/nfs/flexfilelayout/Makefile create mode 100644 fs/nfs/flexfilelayout/flexfilelayout.c create mode 100644 fs/nfs/flexfilelayout/flexfilelayout.h create mode 100644 fs/nfs/flexfilelayout/flexfilelayoutdev.c diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 3dece03f2fc8a4..c7abc10279af33 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -128,6 +128,11 @@ config PNFS_OBJLAYOUT depends on NFS_V4_1 && SCSI_OSD_ULD default NFS_V4 +config PNFS_FLEXFILE_LAYOUT + tristate + depends on NFS_V4_1 && NFS_V3 + default m + config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN string "NFSv4.1 Implementation ID Domain" depends on NFS_V4_1 diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 23abffa8a4cef3..1e987acf20c941 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -33,3 +33,4 @@ nfsv4-$(CONFIG_NFS_V4_2) += nfs42proc.o obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/ obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/ obj-$(CONFIG_PNFS_BLOCK) += blocklayout/ +obj-$(CONFIG_PNFS_FLEXFILE_LAYOUT) += flexfilelayout/ diff --git a/fs/nfs/flexfilelayout/Makefile b/fs/nfs/flexfilelayout/Makefile new file mode 100644 index 00000000000000..1d2c9f6bbcd480 --- /dev/null +++ b/fs/nfs/flexfilelayout/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the pNFS Flexfile Layout Driver kernel module +# +obj-$(CONFIG_PNFS_FLEXFILE_LAYOUT) += nfs_layout_flexfiles.o +nfs_layout_flexfiles-y := flexfilelayout.o flexfilelayoutdev.o diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c new file mode 100644 index 00000000000000..f29fb7d7e8f84a --- /dev/null +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -0,0 +1,1574 @@ +/* + * Module for pnfs flexfile layout driver. + * + * Copyright (c) 2014, Primary Data, Inc. All rights reserved. + * + * Tao Peng + */ + +#include +#include +#include + +#include +#include + +#include "flexfilelayout.h" +#include "../nfs4session.h" +#include "../internal.h" +#include "../delegation.h" +#include "../nfs4trace.h" +#include "../iostat.h" +#include "../nfs.h" + +#define NFSDBG_FACILITY NFSDBG_PNFS_LD + +#define FF_LAYOUT_POLL_RETRY_MAX (15*HZ) + +static struct pnfs_layout_hdr * +ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) +{ + struct nfs4_flexfile_layout *ffl; + + ffl = kzalloc(sizeof(*ffl), gfp_flags); + if (ffl) { + INIT_LIST_HEAD(&ffl->error_list); + return &ffl->generic_hdr; + } else + return NULL; +} + +static void +ff_layout_free_layout_hdr(struct pnfs_layout_hdr *lo) +{ + struct nfs4_ff_layout_ds_err *err, *n; + + list_for_each_entry_safe(err, n, &FF_LAYOUT_FROM_HDR(lo)->error_list, + list) { + list_del(&err->list); + kfree(err); + } + kfree(FF_LAYOUT_FROM_HDR(lo)); +} + +static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) +{ + __be32 *p; + + p = xdr_inline_decode(xdr, NFS4_STATEID_SIZE); + if (unlikely(p == NULL)) + return -ENOBUFS; + memcpy(stateid, p, NFS4_STATEID_SIZE); + dprintk("%s: stateid id= [%x%x%x%x]\n", __func__, + p[0], p[1], p[2], p[3]); + return 0; +} + +static int decode_deviceid(struct xdr_stream *xdr, struct nfs4_deviceid *devid) +{ + __be32 *p; + + p = xdr_inline_decode(xdr, NFS4_DEVICEID4_SIZE); + if (unlikely(!p)) + return -ENOBUFS; + memcpy(devid, p, NFS4_DEVICEID4_SIZE); + nfs4_print_deviceid(devid); + return 0; +} + +static int decode_nfs_fh(struct xdr_stream *xdr, struct nfs_fh *fh) +{ + __be32 *p; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -ENOBUFS; + fh->size = be32_to_cpup(p++); + if (fh->size > sizeof(struct nfs_fh)) { + printk(KERN_ERR "NFS flexfiles: Too big fh received %d\n", + fh->size); + return -EOVERFLOW; + } + /* fh.data */ + p = xdr_inline_decode(xdr, fh->size); + if (unlikely(!p)) + return -ENOBUFS; + memcpy(&fh->data, p, fh->size); + dprintk("%s: fh len %d\n", __func__, fh->size); + + return 0; +} + +/* + * Currently only stringified uids and gids are accepted. + * I.e., kerberos is not supported to the DSes, so no pricipals. + * + * That means that one common function will suffice, but when + * principals are added, this should be split to accomodate + * calls to both nfs_map_name_to_uid() and nfs_map_group_to_gid(). + */ +static int +decode_name(struct xdr_stream *xdr, u32 *id) +{ + __be32 *p; + int len; + + /* opaque_length(4)*/ + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -ENOBUFS; + len = be32_to_cpup(p++); + if (len < 0) + return -EINVAL; + + dprintk("%s: len %u\n", __func__, len); + + /* opaque body */ + p = xdr_inline_decode(xdr, len); + if (unlikely(!p)) + return -ENOBUFS; + + if (!nfs_map_string_to_numeric((char *)p, len, id)) + return -EINVAL; + + return 0; +} + +static void ff_layout_free_mirror_array(struct nfs4_ff_layout_segment *fls) +{ + int i; + + if (fls->mirror_array) { + for (i = 0; i < fls->mirror_array_cnt; i++) { + /* normally mirror_ds is freed in + * .free_deviceid_node but we still do it here + * for .alloc_lseg error path */ + if (fls->mirror_array[i]) { + kfree(fls->mirror_array[i]->fh_versions); + nfs4_ff_layout_put_deviceid(fls->mirror_array[i]->mirror_ds); + kfree(fls->mirror_array[i]); + } + } + kfree(fls->mirror_array); + fls->mirror_array = NULL; + } +} + +static int ff_layout_check_layout(struct nfs4_layoutget_res *lgr) +{ + int ret = 0; + + dprintk("--> %s\n", __func__); + + /* FIXME: remove this check when layout segment support is added */ + if (lgr->range.offset != 0 || + lgr->range.length != NFS4_MAX_UINT64) { + dprintk("%s Only whole file layouts supported. Use MDS i/o\n", + __func__); + ret = -EINVAL; + } + + dprintk("--> %s returns %d\n", __func__, ret); + return ret; +} + +static void _ff_layout_free_lseg(struct nfs4_ff_layout_segment *fls) +{ + if (fls) { + ff_layout_free_mirror_array(fls); + kfree(fls); + } +} + +static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls) +{ + struct nfs4_ff_layout_mirror *tmp; + int i, j; + + for (i = 0; i < fls->mirror_array_cnt - 1; i++) { + for (j = i + 1; j < fls->mirror_array_cnt; j++) + if (fls->mirror_array[i]->efficiency < + fls->mirror_array[j]->efficiency) { + tmp = fls->mirror_array[i]; + fls->mirror_array[i] = fls->mirror_array[j]; + fls->mirror_array[j] = tmp; + } + } +} + +static struct pnfs_layout_segment * +ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, + struct nfs4_layoutget_res *lgr, + gfp_t gfp_flags) +{ + struct pnfs_layout_segment *ret; + struct nfs4_ff_layout_segment *fls = NULL; + struct xdr_stream stream; + struct xdr_buf buf; + struct page *scratch; + u64 stripe_unit; + u32 mirror_array_cnt; + __be32 *p; + int i, rc; + + dprintk("--> %s\n", __func__); + scratch = alloc_page(gfp_flags); + if (!scratch) + return ERR_PTR(-ENOMEM); + + xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, + lgr->layoutp->len); + xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); + + /* stripe unit and mirror_array_cnt */ + rc = -EIO; + p = xdr_inline_decode(&stream, 8 + 4); + if (!p) + goto out_err_free; + + p = xdr_decode_hyper(p, &stripe_unit); + mirror_array_cnt = be32_to_cpup(p++); + dprintk("%s: stripe_unit=%llu mirror_array_cnt=%u\n", __func__, + stripe_unit, mirror_array_cnt); + + if (mirror_array_cnt > NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT || + mirror_array_cnt == 0) + goto out_err_free; + + rc = -ENOMEM; + fls = kzalloc(sizeof(*fls), gfp_flags); + if (!fls) + goto out_err_free; + + fls->mirror_array_cnt = mirror_array_cnt; + fls->stripe_unit = stripe_unit; + fls->mirror_array = kcalloc(fls->mirror_array_cnt, + sizeof(fls->mirror_array[0]), gfp_flags); + if (fls->mirror_array == NULL) + goto out_err_free; + + for (i = 0; i < fls->mirror_array_cnt; i++) { + struct nfs4_deviceid devid; + struct nfs4_deviceid_node *idnode; + u32 ds_count; + u32 fh_count; + int j; + + rc = -EIO; + p = xdr_inline_decode(&stream, 4); + if (!p) + goto out_err_free; + ds_count = be32_to_cpup(p); + + /* FIXME: allow for striping? */ + if (ds_count != 1) + goto out_err_free; + + fls->mirror_array[i] = + kzalloc(sizeof(struct nfs4_ff_layout_mirror), + gfp_flags); + if (fls->mirror_array[i] == NULL) { + rc = -ENOMEM; + goto out_err_free; + } + + spin_lock_init(&fls->mirror_array[i]->lock); + fls->mirror_array[i]->ds_count = ds_count; + + /* deviceid */ + rc = decode_deviceid(&stream, &devid); + if (rc) + goto out_err_free; + + idnode = nfs4_find_get_deviceid(NFS_SERVER(lh->plh_inode), + &devid, lh->plh_lc_cred, + gfp_flags); + /* + * upon success, mirror_ds is allocated by previous + * getdeviceinfo, or newly by .alloc_deviceid_node + * nfs4_find_get_deviceid failure is indeed getdeviceinfo falure + */ + if (idnode) + fls->mirror_array[i]->mirror_ds = + FF_LAYOUT_MIRROR_DS(idnode); + else + goto out_err_free; + + /* efficiency */ + rc = -EIO; + p = xdr_inline_decode(&stream, 4); + if (!p) + goto out_err_free; + fls->mirror_array[i]->efficiency = be32_to_cpup(p); + + /* stateid */ + rc = decode_stateid(&stream, &fls->mirror_array[i]->stateid); + if (rc) + goto out_err_free; + + /* fh */ + p = xdr_inline_decode(&stream, 4); + if (!p) + goto out_err_free; + fh_count = be32_to_cpup(p); + + fls->mirror_array[i]->fh_versions = + kzalloc(fh_count * sizeof(struct nfs_fh), + gfp_flags); + if (fls->mirror_array[i]->fh_versions == NULL) { + rc = -ENOMEM; + goto out_err_free; + } + + for (j = 0; j < fh_count; j++) { + rc = decode_nfs_fh(&stream, + &fls->mirror_array[i]->fh_versions[j]); + if (rc) + goto out_err_free; + } + + fls->mirror_array[i]->fh_versions_cnt = fh_count; + + /* user */ + rc = decode_name(&stream, &fls->mirror_array[i]->uid); + if (rc) + goto out_err_free; + + /* group */ + rc = decode_name(&stream, &fls->mirror_array[i]->gid); + if (rc) + goto out_err_free; + + dprintk("%s: uid %d gid %d\n", __func__, + fls->mirror_array[i]->uid, + fls->mirror_array[i]->gid); + } + + ff_layout_sort_mirrors(fls); + rc = ff_layout_check_layout(lgr); + if (rc) + goto out_err_free; + + ret = &fls->generic_hdr; + dprintk("<-- %s (success)\n", __func__); +out_free_page: + __free_page(scratch); + return ret; +out_err_free: + _ff_layout_free_lseg(fls); + ret = ERR_PTR(rc); + dprintk("<-- %s (%d)\n", __func__, rc); + goto out_free_page; +} + +static bool ff_layout_has_rw_segments(struct pnfs_layout_hdr *layout) +{ + struct pnfs_layout_segment *lseg; + + list_for_each_entry(lseg, &layout->plh_segs, pls_list) + if (lseg->pls_range.iomode == IOMODE_RW) + return true; + + return false; +} + +static void +ff_layout_free_lseg(struct pnfs_layout_segment *lseg) +{ + struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); + int i; + + dprintk("--> %s\n", __func__); + + for (i = 0; i < fls->mirror_array_cnt; i++) { + if (fls->mirror_array[i]) { + nfs4_ff_layout_put_deviceid(fls->mirror_array[i]->mirror_ds); + fls->mirror_array[i]->mirror_ds = NULL; + if (fls->mirror_array[i]->cred) { + put_rpccred(fls->mirror_array[i]->cred); + fls->mirror_array[i]->cred = NULL; + } + } + } + + if (lseg->pls_range.iomode == IOMODE_RW) { + struct nfs4_flexfile_layout *ffl; + struct inode *inode; + + ffl = FF_LAYOUT_FROM_HDR(lseg->pls_layout); + inode = ffl->generic_hdr.plh_inode; + spin_lock(&inode->i_lock); + if (!ff_layout_has_rw_segments(lseg->pls_layout)) { + ffl->commit_info.nbuckets = 0; + kfree(ffl->commit_info.buckets); + ffl->commit_info.buckets = NULL; + } + spin_unlock(&inode->i_lock); + } + _ff_layout_free_lseg(fls); +} + +/* Return 1 until we have multiple lsegs support */ +static int +ff_layout_get_lseg_count(struct nfs4_ff_layout_segment *fls) +{ + return 1; +} + +static int +ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg, + struct nfs_commit_info *cinfo, + gfp_t gfp_flags) +{ + struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); + struct pnfs_commit_bucket *buckets; + int size; + + if (cinfo->ds->nbuckets != 0) { + /* This assumes there is only one RW lseg per file. + * To support multiple lseg per file, we need to + * change struct pnfs_commit_bucket to allow dynamic + * increasing nbuckets. + */ + return 0; + } + + size = ff_layout_get_lseg_count(fls) * FF_LAYOUT_MIRROR_COUNT(lseg); + + buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket), + gfp_flags); + if (!buckets) + return -ENOMEM; + else { + int i; + + spin_lock(cinfo->lock); + if (cinfo->ds->nbuckets != 0) + kfree(buckets); + else { + cinfo->ds->buckets = buckets; + cinfo->ds->nbuckets = size; + for (i = 0; i < size; i++) { + INIT_LIST_HEAD(&buckets[i].written); + INIT_LIST_HEAD(&buckets[i].committing); + /* mark direct verifier as unset */ + buckets[i].direct_verf.committed = + NFS_INVALID_STABLE_HOW; + } + } + spin_unlock(cinfo->lock); + return 0; + } +} + +static struct nfs4_pnfs_ds * +ff_layout_choose_best_ds_for_read(struct nfs_pageio_descriptor *pgio, + int *best_idx) +{ + struct nfs4_ff_layout_segment *fls; + struct nfs4_pnfs_ds *ds; + int idx; + + fls = FF_LAYOUT_LSEG(pgio->pg_lseg); + /* mirrors are sorted by efficiency */ + for (idx = 0; idx < fls->mirror_array_cnt; idx++) { + ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, idx, false); + if (ds) { + *best_idx = idx; + return ds; + } + } + + return NULL; +} + +static void +ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, + struct nfs_page *req) +{ + struct nfs_pgio_mirror *pgm; + struct nfs4_ff_layout_mirror *mirror; + struct nfs4_pnfs_ds *ds; + int ds_idx; + + /* Use full layout for now */ + if (!pgio->pg_lseg) + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, + req->wb_context, + 0, + NFS4_MAX_UINT64, + IOMODE_READ, + GFP_KERNEL); + /* If no lseg, fall back to read through mds */ + if (pgio->pg_lseg == NULL) + goto out_mds; + + ds = ff_layout_choose_best_ds_for_read(pgio, &ds_idx); + if (!ds) + goto out_mds; + mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx); + + pgio->pg_mirror_idx = ds_idx; + + /* read always uses only one mirror - idx 0 for pgio layer */ + pgm = &pgio->pg_mirrors[0]; + pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize; + + return; +out_mds: + pnfs_put_lseg(pgio->pg_lseg); + pgio->pg_lseg = NULL; + nfs_pageio_reset_read_mds(pgio); +} + +static void +ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio, + struct nfs_page *req) +{ + struct nfs4_ff_layout_mirror *mirror; + struct nfs_pgio_mirror *pgm; + struct nfs_commit_info cinfo; + struct nfs4_pnfs_ds *ds; + int i; + int status; + + if (!pgio->pg_lseg) + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, + req->wb_context, + 0, + NFS4_MAX_UINT64, + IOMODE_RW, + GFP_NOFS); + /* If no lseg, fall back to write through mds */ + if (pgio->pg_lseg == NULL) + goto out_mds; + + nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq); + status = ff_layout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS); + if (status < 0) + goto out_mds; + + /* Use a direct mapping of ds_idx to pgio mirror_idx */ + if (WARN_ON_ONCE(pgio->pg_mirror_count != + FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg))) + goto out_mds; + + for (i = 0; i < pgio->pg_mirror_count; i++) { + ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, i, true); + if (!ds) + goto out_mds; + pgm = &pgio->pg_mirrors[i]; + mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i); + pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].wsize; + } + + return; + +out_mds: + pnfs_put_lseg(pgio->pg_lseg); + pgio->pg_lseg = NULL; + nfs_pageio_reset_write_mds(pgio); +} + +static unsigned int +ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio, + struct nfs_page *req) +{ + if (!pgio->pg_lseg) + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, + req->wb_context, + 0, + NFS4_MAX_UINT64, + IOMODE_RW, + GFP_NOFS); + if (pgio->pg_lseg) + return FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg); + + /* no lseg means that pnfs is not in use, so no mirroring here */ + pnfs_put_lseg(pgio->pg_lseg); + pgio->pg_lseg = NULL; + nfs_pageio_reset_write_mds(pgio); + return 1; +} + +static const struct nfs_pageio_ops ff_layout_pg_read_ops = { + .pg_init = ff_layout_pg_init_read, + .pg_test = pnfs_generic_pg_test, + .pg_doio = pnfs_generic_pg_readpages, + .pg_cleanup = pnfs_generic_pg_cleanup, +}; + +static const struct nfs_pageio_ops ff_layout_pg_write_ops = { + .pg_init = ff_layout_pg_init_write, + .pg_test = pnfs_generic_pg_test, + .pg_doio = pnfs_generic_pg_writepages, + .pg_get_mirror_count = ff_layout_pg_get_mirror_count_write, + .pg_cleanup = pnfs_generic_pg_cleanup, +}; + +static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs) +{ + struct rpc_task *task = &hdr->task; + + pnfs_layoutcommit_inode(hdr->inode, false); + + if (retry_pnfs) { + dprintk("%s Reset task %5u for i/o through pNFS " + "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, + hdr->task.tk_pid, + hdr->inode->i_sb->s_id, + (unsigned long long)NFS_FILEID(hdr->inode), + hdr->args.count, + (unsigned long long)hdr->args.offset); + + if (!hdr->dreq) { + struct nfs_open_context *ctx; + + ctx = nfs_list_entry(hdr->pages.next)->wb_context; + set_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); + hdr->completion_ops->error_cleanup(&hdr->pages); + } else { + nfs_direct_set_resched_writes(hdr->dreq); + /* fake unstable write to let common nfs resend pages */ + hdr->verf.committed = NFS_UNSTABLE; + hdr->good_bytes = 0; + } + return; + } + + if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { + dprintk("%s Reset task %5u for i/o through MDS " + "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, + hdr->task.tk_pid, + hdr->inode->i_sb->s_id, + (unsigned long long)NFS_FILEID(hdr->inode), + hdr->args.count, + (unsigned long long)hdr->args.offset); + + task->tk_status = pnfs_write_done_resend_to_mds(hdr); + } +} + +static void ff_layout_reset_read(struct nfs_pgio_header *hdr) +{ + struct rpc_task *task = &hdr->task; + + pnfs_layoutcommit_inode(hdr->inode, false); + + if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { + dprintk("%s Reset task %5u for i/o through MDS " + "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, + hdr->task.tk_pid, + hdr->inode->i_sb->s_id, + (unsigned long long)NFS_FILEID(hdr->inode), + hdr->args.count, + (unsigned long long)hdr->args.offset); + + task->tk_status = pnfs_read_done_resend_to_mds(hdr); + } +} + +static int ff_layout_async_handle_error_v4(struct rpc_task *task, + struct nfs4_state *state, + struct nfs_client *clp, + struct pnfs_layout_segment *lseg, + int idx) +{ + struct pnfs_layout_hdr *lo = lseg->pls_layout; + struct inode *inode = lo->plh_inode; + struct nfs_server *mds_server = NFS_SERVER(inode); + + struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); + struct nfs_client *mds_client = mds_server->nfs_client; + struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table; + + if (task->tk_status >= 0) + return 0; + + switch (task->tk_status) { + /* MDS state errors */ + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_BAD_STATEID: + if (state == NULL) + break; + nfs_remove_bad_delegation(state->inode); + case -NFS4ERR_OPENMODE: + if (state == NULL) + break; + if (nfs4_schedule_stateid_recovery(mds_server, state) < 0) + goto out_bad_stateid; + goto wait_on_recovery; + case -NFS4ERR_EXPIRED: + if (state != NULL) { + if (nfs4_schedule_stateid_recovery(mds_server, state) < 0) + goto out_bad_stateid; + } + nfs4_schedule_lease_recovery(mds_client); + goto wait_on_recovery; + /* DS session errors */ + case -NFS4ERR_BADSESSION: + case -NFS4ERR_BADSLOT: + case -NFS4ERR_BAD_HIGH_SLOT: + case -NFS4ERR_DEADSESSION: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + case -NFS4ERR_SEQ_FALSE_RETRY: + case -NFS4ERR_SEQ_MISORDERED: + dprintk("%s ERROR %d, Reset session. Exchangeid " + "flags 0x%x\n", __func__, task->tk_status, + clp->cl_exchange_flags); + nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); + break; + case -NFS4ERR_DELAY: + case -NFS4ERR_GRACE: + rpc_delay(task, FF_LAYOUT_POLL_RETRY_MAX); + break; + case -NFS4ERR_RETRY_UNCACHED_REP: + break; + /* Invalidate Layout errors */ + case -NFS4ERR_PNFS_NO_LAYOUT: + case -ESTALE: /* mapped NFS4ERR_STALE */ + case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */ + case -EISDIR: /* mapped NFS4ERR_ISDIR */ + case -NFS4ERR_FHEXPIRED: + case -NFS4ERR_WRONG_TYPE: + dprintk("%s Invalid layout error %d\n", __func__, + task->tk_status); + /* + * Destroy layout so new i/o will get a new layout. + * Layout will not be destroyed until all current lseg + * references are put. Mark layout as invalid to resend failed + * i/o and all i/o waiting on the slot table to the MDS until + * layout is destroyed and a new valid layout is obtained. + */ + pnfs_destroy_layout(NFS_I(inode)); + rpc_wake_up(&tbl->slot_tbl_waitq); + goto reset; + /* RPC connection errors */ + case -ECONNREFUSED: + case -EHOSTDOWN: + case -EHOSTUNREACH: + case -ENETUNREACH: + case -EIO: + case -ETIMEDOUT: + case -EPIPE: + dprintk("%s DS connection error %d\n", __func__, + task->tk_status); + nfs4_mark_deviceid_unavailable(devid); + rpc_wake_up(&tbl->slot_tbl_waitq); + /* fall through */ + default: + if (ff_layout_has_available_ds(lseg)) + return -NFS4ERR_RESET_TO_PNFS; +reset: + dprintk("%s Retry through MDS. Error %d\n", __func__, + task->tk_status); + return -NFS4ERR_RESET_TO_MDS; + } +out: + task->tk_status = 0; + return -EAGAIN; +out_bad_stateid: + task->tk_status = -EIO; + return 0; +wait_on_recovery: + rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL); + if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0) + rpc_wake_up_queued_task(&mds_client->cl_rpcwaitq, task); + goto out; +} + +/* Retry all errors through either pNFS or MDS except for -EJUKEBOX */ +static int ff_layout_async_handle_error_v3(struct rpc_task *task, + struct pnfs_layout_segment *lseg, + int idx) +{ + struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); + + if (task->tk_status >= 0) + return 0; + + if (task->tk_status != -EJUKEBOX) { + dprintk("%s DS connection error %d\n", __func__, + task->tk_status); + nfs4_mark_deviceid_unavailable(devid); + if (ff_layout_has_available_ds(lseg)) + return -NFS4ERR_RESET_TO_PNFS; + else + return -NFS4ERR_RESET_TO_MDS; + } + + if (task->tk_status == -EJUKEBOX) + nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY); + task->tk_status = 0; + rpc_restart_call(task); + rpc_delay(task, NFS_JUKEBOX_RETRY_TIME); + return -EAGAIN; +} + +static int ff_layout_async_handle_error(struct rpc_task *task, + struct nfs4_state *state, + struct nfs_client *clp, + struct pnfs_layout_segment *lseg, + int idx) +{ + int vers = clp->cl_nfs_mod->rpc_vers->number; + + switch (vers) { + case 3: + return ff_layout_async_handle_error_v3(task, lseg, idx); + case 4: + return ff_layout_async_handle_error_v4(task, state, clp, + lseg, idx); + default: + /* should never happen */ + WARN_ON_ONCE(1); + return 0; + } +} + +static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, + int idx, u64 offset, u64 length, + u32 status, int opnum) +{ + struct nfs4_ff_layout_mirror *mirror; + int err; + + mirror = FF_LAYOUT_COMP(lseg, idx); + err = ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), + mirror, offset, length, status, opnum, + GFP_NOIO); + dprintk("%s: err %d op %d status %u\n", __func__, err, opnum, status); +} + +/* NFS_PROTO call done callback routines */ + +static int ff_layout_read_done_cb(struct rpc_task *task, + struct nfs_pgio_header *hdr) +{ + struct inode *inode; + int err; + + trace_nfs4_pnfs_read(hdr, task->tk_status); + if (task->tk_status == -ETIMEDOUT && !hdr->res.op_status) + hdr->res.op_status = NFS4ERR_NXIO; + if (task->tk_status < 0 && hdr->res.op_status) + ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx, + hdr->args.offset, hdr->args.count, + hdr->res.op_status, OP_READ); + err = ff_layout_async_handle_error(task, hdr->args.context->state, + hdr->ds_clp, hdr->lseg, + hdr->pgio_mirror_idx); + + switch (err) { + case -NFS4ERR_RESET_TO_PNFS: + set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, + &hdr->lseg->pls_layout->plh_flags); + pnfs_read_resend_pnfs(hdr); + return task->tk_status; + case -NFS4ERR_RESET_TO_MDS: + inode = hdr->lseg->pls_layout->plh_inode; + pnfs_error_mark_layout_for_return(inode, hdr->lseg); + ff_layout_reset_read(hdr); + return task->tk_status; + case -EAGAIN: + rpc_restart_call_prepare(task); + return -EAGAIN; + } + + return 0; +} + +/* + * We reference the rpc_cred of the first WRITE that triggers the need for + * a LAYOUTCOMMIT, and use it to send the layoutcommit compound. + * rfc5661 is not clear about which credential should be used. + * + * Flexlayout client should treat DS replied FILE_SYNC as DATA_SYNC, so + * to follow http://www.rfc-editor.org/errata_search.php?rfc=5661&eid=2751 + * we always send layoutcommit after DS writes. + */ +static void +ff_layout_set_layoutcommit(struct nfs_pgio_header *hdr) +{ + pnfs_set_layoutcommit(hdr); + dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, + (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); +} + +static bool +ff_layout_reset_to_mds(struct pnfs_layout_segment *lseg, int idx) +{ + /* No mirroring for now */ + struct nfs4_deviceid_node *node = FF_LAYOUT_DEVID_NODE(lseg, idx); + + return ff_layout_test_devid_unavailable(node); +} + +static int ff_layout_read_prepare_common(struct rpc_task *task, + struct nfs_pgio_header *hdr) +{ + if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { + rpc_exit(task, -EIO); + return -EIO; + } + if (ff_layout_reset_to_mds(hdr->lseg, hdr->pgio_mirror_idx)) { + dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); + if (ff_layout_has_available_ds(hdr->lseg)) + pnfs_read_resend_pnfs(hdr); + else + ff_layout_reset_read(hdr); + rpc_exit(task, 0); + return -EAGAIN; + } + hdr->pgio_done_cb = ff_layout_read_done_cb; + + return 0; +} + +/* + * Call ops for the async read/write cases + * In the case of dense layouts, the offset needs to be reset to its + * original value. + */ +static void ff_layout_read_prepare_v3(struct rpc_task *task, void *data) +{ + struct nfs_pgio_header *hdr = data; + + if (ff_layout_read_prepare_common(task, hdr)) + return; + + rpc_call_start(task); +} + +static int ff_layout_setup_sequence(struct nfs_client *ds_clp, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + struct rpc_task *task) +{ + if (ds_clp->cl_session) + return nfs41_setup_sequence(ds_clp->cl_session, + args, + res, + task); + return nfs40_setup_sequence(ds_clp->cl_slot_tbl, + args, + res, + task); +} + +static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data) +{ + struct nfs_pgio_header *hdr = data; + + if (ff_layout_read_prepare_common(task, hdr)) + return; + + if (ff_layout_setup_sequence(hdr->ds_clp, + &hdr->args.seq_args, + &hdr->res.seq_res, + task)) + return; + + if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, + hdr->args.lock_context, FMODE_READ) == -EIO) + rpc_exit(task, -EIO); /* lost lock, terminate I/O */ +} + +static void ff_layout_read_call_done(struct rpc_task *task, void *data) +{ + struct nfs_pgio_header *hdr = data; + + dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); + + if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && + task->tk_status == 0) { + nfs4_sequence_done(task, &hdr->res.seq_res); + return; + } + + /* Note this may cause RPC to be resent */ + hdr->mds_ops->rpc_call_done(task, hdr); +} + +static void ff_layout_read_count_stats(struct rpc_task *task, void *data) +{ + struct nfs_pgio_header *hdr = data; + + rpc_count_iostats_metrics(task, + &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_READ]); +} + +static int ff_layout_write_done_cb(struct rpc_task *task, + struct nfs_pgio_header *hdr) +{ + struct inode *inode; + int err; + + trace_nfs4_pnfs_write(hdr, task->tk_status); + if (task->tk_status == -ETIMEDOUT && !hdr->res.op_status) + hdr->res.op_status = NFS4ERR_NXIO; + if (task->tk_status < 0 && hdr->res.op_status) + ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx, + hdr->args.offset, hdr->args.count, + hdr->res.op_status, OP_WRITE); + err = ff_layout_async_handle_error(task, hdr->args.context->state, + hdr->ds_clp, hdr->lseg, + hdr->pgio_mirror_idx); + + switch (err) { + case -NFS4ERR_RESET_TO_PNFS: + case -NFS4ERR_RESET_TO_MDS: + inode = hdr->lseg->pls_layout->plh_inode; + pnfs_error_mark_layout_for_return(inode, hdr->lseg); + if (err == -NFS4ERR_RESET_TO_PNFS) { + pnfs_set_retry_layoutget(hdr->lseg->pls_layout); + ff_layout_reset_write(hdr, true); + } else { + pnfs_clear_retry_layoutget(hdr->lseg->pls_layout); + ff_layout_reset_write(hdr, false); + } + return task->tk_status; + case -EAGAIN: + rpc_restart_call_prepare(task); + return -EAGAIN; + } + + if (hdr->res.verf->committed == NFS_FILE_SYNC || + hdr->res.verf->committed == NFS_DATA_SYNC) + ff_layout_set_layoutcommit(hdr); + + return 0; +} + +static int ff_layout_commit_done_cb(struct rpc_task *task, + struct nfs_commit_data *data) +{ + struct inode *inode; + int err; + + trace_nfs4_pnfs_commit_ds(data, task->tk_status); + if (task->tk_status == -ETIMEDOUT && !data->res.op_status) + data->res.op_status = NFS4ERR_NXIO; + if (task->tk_status < 0 && data->res.op_status) + ff_layout_io_track_ds_error(data->lseg, data->ds_commit_index, + data->args.offset, data->args.count, + data->res.op_status, OP_COMMIT); + err = ff_layout_async_handle_error(task, NULL, data->ds_clp, + data->lseg, data->ds_commit_index); + + switch (err) { + case -NFS4ERR_RESET_TO_PNFS: + case -NFS4ERR_RESET_TO_MDS: + inode = data->lseg->pls_layout->plh_inode; + pnfs_error_mark_layout_for_return(inode, data->lseg); + if (err == -NFS4ERR_RESET_TO_PNFS) + pnfs_set_retry_layoutget(data->lseg->pls_layout); + else + pnfs_clear_retry_layoutget(data->lseg->pls_layout); + pnfs_generic_prepare_to_resend_writes(data); + return -EAGAIN; + case -EAGAIN: + rpc_restart_call_prepare(task); + return -EAGAIN; + } + + if (data->verf.committed == NFS_UNSTABLE) + pnfs_commit_set_layoutcommit(data); + + return 0; +} + +static int ff_layout_write_prepare_common(struct rpc_task *task, + struct nfs_pgio_header *hdr) +{ + if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { + rpc_exit(task, -EIO); + return -EIO; + } + + if (ff_layout_reset_to_mds(hdr->lseg, hdr->pgio_mirror_idx)) { + bool retry_pnfs; + + retry_pnfs = ff_layout_has_available_ds(hdr->lseg); + dprintk("%s task %u reset io to %s\n", __func__, + task->tk_pid, retry_pnfs ? "pNFS" : "MDS"); + ff_layout_reset_write(hdr, retry_pnfs); + rpc_exit(task, 0); + return -EAGAIN; + } + + return 0; +} + +static void ff_layout_write_prepare_v3(struct rpc_task *task, void *data) +{ + struct nfs_pgio_header *hdr = data; + + if (ff_layout_write_prepare_common(task, hdr)) + return; + + rpc_call_start(task); +} + +static void ff_layout_write_prepare_v4(struct rpc_task *task, void *data) +{ + struct nfs_pgio_header *hdr = data; + + if (ff_layout_write_prepare_common(task, hdr)) + return; + + if (ff_layout_setup_sequence(hdr->ds_clp, + &hdr->args.seq_args, + &hdr->res.seq_res, + task)) + return; + + if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, + hdr->args.lock_context, FMODE_WRITE) == -EIO) + rpc_exit(task, -EIO); /* lost lock, terminate I/O */ +} + +static void ff_layout_write_call_done(struct rpc_task *task, void *data) +{ + struct nfs_pgio_header *hdr = data; + + if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && + task->tk_status == 0) { + nfs4_sequence_done(task, &hdr->res.seq_res); + return; + } + + /* Note this may cause RPC to be resent */ + hdr->mds_ops->rpc_call_done(task, hdr); +} + +static void ff_layout_write_count_stats(struct rpc_task *task, void *data) +{ + struct nfs_pgio_header *hdr = data; + + rpc_count_iostats_metrics(task, + &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]); +} + +static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data) +{ + rpc_call_start(task); +} + +static void ff_layout_commit_prepare_v4(struct rpc_task *task, void *data) +{ + struct nfs_commit_data *wdata = data; + + ff_layout_setup_sequence(wdata->ds_clp, + &wdata->args.seq_args, + &wdata->res.seq_res, + task); +} + +static void ff_layout_commit_count_stats(struct rpc_task *task, void *data) +{ + struct nfs_commit_data *cdata = data; + + rpc_count_iostats_metrics(task, + &NFS_CLIENT(cdata->inode)->cl_metrics[NFSPROC4_CLNT_COMMIT]); +} + +static const struct rpc_call_ops ff_layout_read_call_ops_v3 = { + .rpc_call_prepare = ff_layout_read_prepare_v3, + .rpc_call_done = ff_layout_read_call_done, + .rpc_count_stats = ff_layout_read_count_stats, + .rpc_release = pnfs_generic_rw_release, +}; + +static const struct rpc_call_ops ff_layout_read_call_ops_v4 = { + .rpc_call_prepare = ff_layout_read_prepare_v4, + .rpc_call_done = ff_layout_read_call_done, + .rpc_count_stats = ff_layout_read_count_stats, + .rpc_release = pnfs_generic_rw_release, +}; + +static const struct rpc_call_ops ff_layout_write_call_ops_v3 = { + .rpc_call_prepare = ff_layout_write_prepare_v3, + .rpc_call_done = ff_layout_write_call_done, + .rpc_count_stats = ff_layout_write_count_stats, + .rpc_release = pnfs_generic_rw_release, +}; + +static const struct rpc_call_ops ff_layout_write_call_ops_v4 = { + .rpc_call_prepare = ff_layout_write_prepare_v4, + .rpc_call_done = ff_layout_write_call_done, + .rpc_count_stats = ff_layout_write_count_stats, + .rpc_release = pnfs_generic_rw_release, +}; + +static const struct rpc_call_ops ff_layout_commit_call_ops_v3 = { + .rpc_call_prepare = ff_layout_commit_prepare_v3, + .rpc_call_done = pnfs_generic_write_commit_done, + .rpc_count_stats = ff_layout_commit_count_stats, + .rpc_release = pnfs_generic_commit_release, +}; + +static const struct rpc_call_ops ff_layout_commit_call_ops_v4 = { + .rpc_call_prepare = ff_layout_commit_prepare_v4, + .rpc_call_done = pnfs_generic_write_commit_done, + .rpc_count_stats = ff_layout_commit_count_stats, + .rpc_release = pnfs_generic_commit_release, +}; + +static enum pnfs_try_status +ff_layout_read_pagelist(struct nfs_pgio_header *hdr) +{ + struct pnfs_layout_segment *lseg = hdr->lseg; + struct nfs4_pnfs_ds *ds; + struct rpc_clnt *ds_clnt; + struct rpc_cred *ds_cred; + loff_t offset = hdr->args.offset; + u32 idx = hdr->pgio_mirror_idx; + int vers; + struct nfs_fh *fh; + + dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n", + __func__, hdr->inode->i_ino, + hdr->args.pgbase, (size_t)hdr->args.count, offset); + + ds = nfs4_ff_layout_prepare_ds(lseg, idx, false); + if (!ds) + goto out_failed; + + ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp, + hdr->inode); + if (IS_ERR(ds_clnt)) + goto out_failed; + + ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred); + if (IS_ERR(ds_cred)) + goto out_failed; + + vers = nfs4_ff_layout_ds_version(lseg, idx); + + dprintk("%s USE DS: %s cl_count %d vers %d\n", __func__, + ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count), vers); + + atomic_inc(&ds->ds_clp->cl_count); + hdr->ds_clp = ds->ds_clp; + fh = nfs4_ff_layout_select_ds_fh(lseg, idx); + if (fh) + hdr->args.fh = fh; + + /* + * Note that if we ever decide to split across DSes, + * then we may need to handle dense-like offsets. + */ + hdr->args.offset = offset; + hdr->mds_offset = offset; + + /* Perform an asynchronous read to ds */ + nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops, + vers == 3 ? &ff_layout_read_call_ops_v3 : + &ff_layout_read_call_ops_v4, + 0, RPC_TASK_SOFTCONN); + + return PNFS_ATTEMPTED; + +out_failed: + if (ff_layout_has_available_ds(lseg)) + return PNFS_TRY_AGAIN; + return PNFS_NOT_ATTEMPTED; +} + +/* Perform async writes. */ +static enum pnfs_try_status +ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) +{ + struct pnfs_layout_segment *lseg = hdr->lseg; + struct nfs4_pnfs_ds *ds; + struct rpc_clnt *ds_clnt; + struct rpc_cred *ds_cred; + loff_t offset = hdr->args.offset; + int vers; + struct nfs_fh *fh; + int idx = hdr->pgio_mirror_idx; + + ds = nfs4_ff_layout_prepare_ds(lseg, idx, true); + if (!ds) + return PNFS_NOT_ATTEMPTED; + + ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp, + hdr->inode); + if (IS_ERR(ds_clnt)) + return PNFS_NOT_ATTEMPTED; + + ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred); + if (IS_ERR(ds_cred)) + return PNFS_NOT_ATTEMPTED; + + vers = nfs4_ff_layout_ds_version(lseg, idx); + + dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d vers %d\n", + __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count, + offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count), + vers); + + hdr->pgio_done_cb = ff_layout_write_done_cb; + atomic_inc(&ds->ds_clp->cl_count); + hdr->ds_clp = ds->ds_clp; + hdr->ds_commit_idx = idx; + fh = nfs4_ff_layout_select_ds_fh(lseg, idx); + if (fh) + hdr->args.fh = fh; + + /* + * Note that if we ever decide to split across DSes, + * then we may need to handle dense-like offsets. + */ + hdr->args.offset = offset; + + /* Perform an asynchronous write */ + nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops, + vers == 3 ? &ff_layout_write_call_ops_v3 : + &ff_layout_write_call_ops_v4, + sync, RPC_TASK_SOFTCONN); + return PNFS_ATTEMPTED; +} + +static void +ff_layout_mark_request_commit(struct nfs_page *req, + struct pnfs_layout_segment *lseg, + struct nfs_commit_info *cinfo, + u32 ds_commit_idx) +{ + struct list_head *list; + struct pnfs_commit_bucket *buckets; + + spin_lock(cinfo->lock); + buckets = cinfo->ds->buckets; + list = &buckets[ds_commit_idx].written; + if (list_empty(list)) { + /* Non-empty buckets hold a reference on the lseg. That ref + * is normally transferred to the COMMIT call and released + * there. It could also be released if the last req is pulled + * off due to a rewrite, in which case it will be done in + * pnfs_common_clear_request_commit + */ + WARN_ON_ONCE(buckets[ds_commit_idx].wlseg != NULL); + buckets[ds_commit_idx].wlseg = pnfs_get_lseg(lseg); + } + set_bit(PG_COMMIT_TO_DS, &req->wb_flags); + cinfo->ds->nwritten++; + + /* nfs_request_add_commit_list(). We need to add req to list without + * dropping cinfo lock. + */ + set_bit(PG_CLEAN, &(req)->wb_flags); + nfs_list_add_request(req, list); + cinfo->mds->ncommit++; + spin_unlock(cinfo->lock); + if (!cinfo->dreq) { + inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); + inc_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info, + BDI_RECLAIMABLE); + __mark_inode_dirty(req->wb_context->dentry->d_inode, + I_DIRTY_DATASYNC); + } +} + +static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) +{ + return i; +} + +static struct nfs_fh * +select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i) +{ + struct nfs4_ff_layout_segment *flseg = FF_LAYOUT_LSEG(lseg); + + /* FIXME: Assume that there is only one NFS version available + * for the DS. + */ + return &flseg->mirror_array[i]->fh_versions[0]; +} + +static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) +{ + struct pnfs_layout_segment *lseg = data->lseg; + struct nfs4_pnfs_ds *ds; + struct rpc_clnt *ds_clnt; + struct rpc_cred *ds_cred; + u32 idx; + int vers; + struct nfs_fh *fh; + + idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); + ds = nfs4_ff_layout_prepare_ds(lseg, idx, true); + if (!ds) + goto out_err; + + ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp, + data->inode); + if (IS_ERR(ds_clnt)) + goto out_err; + + ds_cred = ff_layout_get_ds_cred(lseg, idx, data->cred); + if (IS_ERR(ds_cred)) + goto out_err; + + vers = nfs4_ff_layout_ds_version(lseg, idx); + + dprintk("%s ino %lu, how %d cl_count %d vers %d\n", __func__, + data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count), + vers); + data->commit_done_cb = ff_layout_commit_done_cb; + data->cred = ds_cred; + atomic_inc(&ds->ds_clp->cl_count); + data->ds_clp = ds->ds_clp; + fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); + if (fh) + data->args.fh = fh; + return nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops, + vers == 3 ? &ff_layout_commit_call_ops_v3 : + &ff_layout_commit_call_ops_v4, + how, RPC_TASK_SOFTCONN); +out_err: + pnfs_generic_prepare_to_resend_writes(data); + pnfs_generic_commit_release(data); + return -EAGAIN; +} + +static int +ff_layout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, + int how, struct nfs_commit_info *cinfo) +{ + return pnfs_generic_commit_pagelist(inode, mds_pages, how, cinfo, + ff_layout_initiate_commit); +} + +static struct pnfs_ds_commit_info * +ff_layout_get_ds_info(struct inode *inode) +{ + struct pnfs_layout_hdr *layout = NFS_I(inode)->layout; + + if (layout == NULL) + return NULL; + + return &FF_LAYOUT_FROM_HDR(layout)->commit_info; +} + +static void +ff_layout_free_deveiceid_node(struct nfs4_deviceid_node *d) +{ + nfs4_ff_layout_free_deviceid(container_of(d, struct nfs4_ff_layout_ds, + id_node)); +} + +static int ff_layout_encode_ioerr(struct nfs4_flexfile_layout *flo, + struct xdr_stream *xdr, + const struct nfs4_layoutreturn_args *args) +{ + struct pnfs_layout_hdr *hdr = &flo->generic_hdr; + __be32 *start; + int count = 0, ret = 0; + + start = xdr_reserve_space(xdr, 4); + if (unlikely(!start)) + return -E2BIG; + + /* This assume we always return _ALL_ layouts */ + spin_lock(&hdr->plh_inode->i_lock); + ret = ff_layout_encode_ds_ioerr(flo, xdr, &count, &args->range); + spin_unlock(&hdr->plh_inode->i_lock); + + *start = cpu_to_be32(count); + + return ret; +} + +/* report nothing for now */ +static void ff_layout_encode_iostats(struct nfs4_flexfile_layout *flo, + struct xdr_stream *xdr, + const struct nfs4_layoutreturn_args *args) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, 4); + if (likely(p)) + *p = cpu_to_be32(0); +} + +static struct nfs4_deviceid_node * +ff_layout_alloc_deviceid_node(struct nfs_server *server, + struct pnfs_device *pdev, gfp_t gfp_flags) +{ + struct nfs4_ff_layout_ds *dsaddr; + + dsaddr = nfs4_ff_alloc_deviceid_node(server, pdev, gfp_flags); + if (!dsaddr) + return NULL; + return &dsaddr->id_node; +} + +static void +ff_layout_encode_layoutreturn(struct pnfs_layout_hdr *lo, + struct xdr_stream *xdr, + const struct nfs4_layoutreturn_args *args) +{ + struct nfs4_flexfile_layout *flo = FF_LAYOUT_FROM_HDR(lo); + __be32 *start; + + dprintk("%s: Begin\n", __func__); + start = xdr_reserve_space(xdr, 4); + BUG_ON(!start); + + if (ff_layout_encode_ioerr(flo, xdr, args)) + goto out; + + ff_layout_encode_iostats(flo, xdr, args); +out: + *start = cpu_to_be32((xdr->p - start - 1) * 4); + dprintk("%s: Return\n", __func__); +} + +static struct pnfs_layoutdriver_type flexfilelayout_type = { + .id = LAYOUT_FLEX_FILES, + .name = "LAYOUT_FLEX_FILES", + .owner = THIS_MODULE, + .alloc_layout_hdr = ff_layout_alloc_layout_hdr, + .free_layout_hdr = ff_layout_free_layout_hdr, + .alloc_lseg = ff_layout_alloc_lseg, + .free_lseg = ff_layout_free_lseg, + .pg_read_ops = &ff_layout_pg_read_ops, + .pg_write_ops = &ff_layout_pg_write_ops, + .get_ds_info = ff_layout_get_ds_info, + .free_deviceid_node = ff_layout_free_deveiceid_node, + .mark_request_commit = ff_layout_mark_request_commit, + .clear_request_commit = pnfs_generic_clear_request_commit, + .scan_commit_lists = pnfs_generic_scan_commit_lists, + .recover_commit_reqs = pnfs_generic_recover_commit_reqs, + .commit_pagelist = ff_layout_commit_pagelist, + .read_pagelist = ff_layout_read_pagelist, + .write_pagelist = ff_layout_write_pagelist, + .alloc_deviceid_node = ff_layout_alloc_deviceid_node, + .encode_layoutreturn = ff_layout_encode_layoutreturn, +}; + +static int __init nfs4flexfilelayout_init(void) +{ + printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Registering...\n", + __func__); + return pnfs_register_layoutdriver(&flexfilelayout_type); +} + +static void __exit nfs4flexfilelayout_exit(void) +{ + printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Unregistering...\n", + __func__); + pnfs_unregister_layoutdriver(&flexfilelayout_type); +} + +MODULE_ALIAS("nfs-layouttype4-4"); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("The NFSv4 flexfile layout driver"); + +module_init(nfs4flexfilelayout_init); +module_exit(nfs4flexfilelayout_exit); diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h new file mode 100644 index 00000000000000..070f20445b2d33 --- /dev/null +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -0,0 +1,155 @@ +/* + * NFSv4 flexfile layout driver data structures. + * + * Copyright (c) 2014, Primary Data, Inc. All rights reserved. + * + * Tao Peng + */ + +#ifndef FS_NFS_NFS4FLEXFILELAYOUT_H +#define FS_NFS_NFS4FLEXFILELAYOUT_H + +#include "../pnfs.h" + +/* XXX: Let's filter out insanely large mirror count for now to avoid oom + * due to network error etc. */ +#define NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT 4096 + +struct nfs4_ff_ds_version { + u32 version; + u32 minor_version; + u32 rsize; + u32 wsize; + bool tightly_coupled; +}; + +/* chained in global deviceid hlist */ +struct nfs4_ff_layout_ds { + struct nfs4_deviceid_node id_node; + u32 ds_versions_cnt; + struct nfs4_ff_ds_version *ds_versions; + struct nfs4_pnfs_ds *ds; +}; + +struct nfs4_ff_layout_ds_err { + struct list_head list; /* linked in mirror error_list */ + u64 offset; + u64 length; + int status; + enum nfs_opnum4 opnum; + nfs4_stateid stateid; + struct nfs4_deviceid deviceid; +}; + +struct nfs4_ff_layout_mirror { + u32 ds_count; + u32 efficiency; + struct nfs4_ff_layout_ds *mirror_ds; + u32 fh_versions_cnt; + struct nfs_fh *fh_versions; + nfs4_stateid stateid; + struct nfs4_string user_name; + struct nfs4_string group_name; + u32 uid; + u32 gid; + struct rpc_cred *cred; + spinlock_t lock; +}; + +struct nfs4_ff_layout_segment { + struct pnfs_layout_segment generic_hdr; + u64 stripe_unit; + u32 mirror_array_cnt; + struct nfs4_ff_layout_mirror **mirror_array; +}; + +struct nfs4_flexfile_layout { + struct pnfs_layout_hdr generic_hdr; + struct pnfs_ds_commit_info commit_info; + struct list_head error_list; /* nfs4_ff_layout_ds_err */ +}; + +static inline struct nfs4_flexfile_layout * +FF_LAYOUT_FROM_HDR(struct pnfs_layout_hdr *lo) +{ + return container_of(lo, struct nfs4_flexfile_layout, generic_hdr); +} + +static inline struct nfs4_ff_layout_segment * +FF_LAYOUT_LSEG(struct pnfs_layout_segment *lseg) +{ + return container_of(lseg, + struct nfs4_ff_layout_segment, + generic_hdr); +} + +static inline struct nfs4_deviceid_node * +FF_LAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg, u32 idx) +{ + if (idx >= FF_LAYOUT_LSEG(lseg)->mirror_array_cnt || + FF_LAYOUT_LSEG(lseg)->mirror_array[idx] == NULL || + FF_LAYOUT_LSEG(lseg)->mirror_array[idx]->mirror_ds == NULL) + return NULL; + return &FF_LAYOUT_LSEG(lseg)->mirror_array[idx]->mirror_ds->id_node; +} + +static inline struct nfs4_ff_layout_ds * +FF_LAYOUT_MIRROR_DS(struct nfs4_deviceid_node *node) +{ + return container_of(node, struct nfs4_ff_layout_ds, id_node); +} + +static inline struct nfs4_ff_layout_mirror * +FF_LAYOUT_COMP(struct pnfs_layout_segment *lseg, u32 idx) +{ + if (idx >= FF_LAYOUT_LSEG(lseg)->mirror_array_cnt) + return NULL; + return FF_LAYOUT_LSEG(lseg)->mirror_array[idx]; +} + +static inline u32 +FF_LAYOUT_MIRROR_COUNT(struct pnfs_layout_segment *lseg) +{ + return FF_LAYOUT_LSEG(lseg)->mirror_array_cnt; +} + +static inline bool +ff_layout_test_devid_unavailable(struct nfs4_deviceid_node *node) +{ + return nfs4_test_deviceid_unavailable(node); +} + +static inline int +nfs4_ff_layout_ds_version(struct pnfs_layout_segment *lseg, u32 ds_idx) +{ + return FF_LAYOUT_COMP(lseg, ds_idx)->mirror_ds->ds_versions[0].version; +} + +struct nfs4_ff_layout_ds * +nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, + gfp_t gfp_flags); +void nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds); +void nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds *mirror_ds); +int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo, + struct nfs4_ff_layout_mirror *mirror, u64 offset, + u64 length, int status, enum nfs_opnum4 opnum, + gfp_t gfp_flags); +int ff_layout_encode_ds_ioerr(struct nfs4_flexfile_layout *flo, + struct xdr_stream *xdr, int *count, + const struct pnfs_layout_range *range); +struct nfs_fh * +nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx); + +struct nfs4_pnfs_ds * +nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, + bool fail_return); + +struct rpc_clnt * +nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg, + u32 ds_idx, + struct nfs_client *ds_clp, + struct inode *inode); +struct rpc_cred *ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, + u32 ds_idx, struct rpc_cred *mdscred); +bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg); +#endif /* FS_NFS_NFS4FLEXFILELAYOUT_H */ diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c new file mode 100644 index 00000000000000..3bbb16b3066f69 --- /dev/null +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -0,0 +1,552 @@ +/* + * Device operations for the pnfs nfs4 file layout driver. + * + * Copyright (c) 2014, Primary Data, Inc. All rights reserved. + * + * Tao Peng + */ + +#include +#include +#include +#include + +#include "../internal.h" +#include "../nfs4session.h" +#include "flexfilelayout.h" + +#define NFSDBG_FACILITY NFSDBG_PNFS_LD + +static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO; +static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS; + +void nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds) +{ + if (mirror_ds) + nfs4_put_deviceid_node(&mirror_ds->id_node); +} + +void nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds *mirror_ds) +{ + nfs4_print_deviceid(&mirror_ds->id_node.deviceid); + nfs4_pnfs_ds_put(mirror_ds->ds); + kfree(mirror_ds); +} + +/* Decode opaque device data and construct new_ds using it */ +struct nfs4_ff_layout_ds * +nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, + gfp_t gfp_flags) +{ + struct xdr_stream stream; + struct xdr_buf buf; + struct page *scratch; + struct list_head dsaddrs; + struct nfs4_pnfs_ds_addr *da; + struct nfs4_ff_layout_ds *new_ds = NULL; + struct nfs4_ff_ds_version *ds_versions = NULL; + u32 mp_count; + u32 version_count; + __be32 *p; + int i, ret = -ENOMEM; + + /* set up xdr stream */ + scratch = alloc_page(gfp_flags); + if (!scratch) + goto out_err; + + new_ds = kzalloc(sizeof(struct nfs4_ff_layout_ds), gfp_flags); + if (!new_ds) + goto out_scratch; + + nfs4_init_deviceid_node(&new_ds->id_node, + server, + &pdev->dev_id); + INIT_LIST_HEAD(&dsaddrs); + + xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen); + xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); + + /* multipath count */ + p = xdr_inline_decode(&stream, 4); + if (unlikely(!p)) + goto out_err_drain_dsaddrs; + mp_count = be32_to_cpup(p); + dprintk("%s: multipath ds count %d\n", __func__, mp_count); + + for (i = 0; i < mp_count; i++) { + /* multipath ds */ + da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net, + &stream, gfp_flags); + if (da) + list_add_tail(&da->da_node, &dsaddrs); + } + if (list_empty(&dsaddrs)) { + dprintk("%s: no suitable DS addresses found\n", + __func__); + ret = -ENOMEDIUM; + goto out_err_drain_dsaddrs; + } + + /* version count */ + p = xdr_inline_decode(&stream, 4); + if (unlikely(!p)) + goto out_err_drain_dsaddrs; + version_count = be32_to_cpup(p); + dprintk("%s: version count %d\n", __func__, version_count); + + ds_versions = kzalloc(version_count * sizeof(struct nfs4_ff_ds_version), + gfp_flags); + if (!ds_versions) + goto out_scratch; + + for (i = 0; i < version_count; i++) { + /* 20 = version(4) + minor_version(4) + rsize(4) + wsize(4) + + * tightly_coupled(4) */ + p = xdr_inline_decode(&stream, 20); + if (unlikely(!p)) + goto out_err_drain_dsaddrs; + ds_versions[i].version = be32_to_cpup(p++); + ds_versions[i].minor_version = be32_to_cpup(p++); + ds_versions[i].rsize = nfs_block_size(be32_to_cpup(p++), NULL); + ds_versions[i].wsize = nfs_block_size(be32_to_cpup(p++), NULL); + ds_versions[i].tightly_coupled = be32_to_cpup(p); + + if (ds_versions[i].rsize > NFS_MAX_FILE_IO_SIZE) + ds_versions[i].rsize = NFS_MAX_FILE_IO_SIZE; + if (ds_versions[i].wsize > NFS_MAX_FILE_IO_SIZE) + ds_versions[i].wsize = NFS_MAX_FILE_IO_SIZE; + + if (ds_versions[i].version != 3 || ds_versions[i].minor_version != 0) { + dprintk("%s: [%d] unsupported ds version %d-%d\n", __func__, + i, ds_versions[i].version, + ds_versions[i].minor_version); + ret = -EPROTONOSUPPORT; + goto out_err_drain_dsaddrs; + } + + dprintk("%s: [%d] vers %u minor_ver %u rsize %u wsize %u coupled %d\n", + __func__, i, ds_versions[i].version, + ds_versions[i].minor_version, + ds_versions[i].rsize, + ds_versions[i].wsize, + ds_versions[i].tightly_coupled); + } + + new_ds->ds_versions = ds_versions; + new_ds->ds_versions_cnt = version_count; + + new_ds->ds = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags); + if (!new_ds->ds) + goto out_err_drain_dsaddrs; + + /* If DS was already in cache, free ds addrs */ + while (!list_empty(&dsaddrs)) { + da = list_first_entry(&dsaddrs, + struct nfs4_pnfs_ds_addr, + da_node); + list_del_init(&da->da_node); + kfree(da->da_remotestr); + kfree(da); + } + + __free_page(scratch); + return new_ds; + +out_err_drain_dsaddrs: + while (!list_empty(&dsaddrs)) { + da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr, + da_node); + list_del_init(&da->da_node); + kfree(da->da_remotestr); + kfree(da); + } + + kfree(ds_versions); +out_scratch: + __free_page(scratch); +out_err: + kfree(new_ds); + + dprintk("%s ERROR: returning %d\n", __func__, ret); + return NULL; +} + +static u64 +end_offset(u64 start, u64 len) +{ + u64 end; + + end = start + len; + return end >= start ? end : NFS4_MAX_UINT64; +} + +static void extend_ds_error(struct nfs4_ff_layout_ds_err *err, + u64 offset, u64 length) +{ + u64 end; + + end = max_t(u64, end_offset(err->offset, err->length), + end_offset(offset, length)); + err->offset = min_t(u64, err->offset, offset); + err->length = end - err->offset; +} + +static bool ds_error_can_merge(struct nfs4_ff_layout_ds_err *err, u64 offset, + u64 length, int status, enum nfs_opnum4 opnum, + nfs4_stateid *stateid, + struct nfs4_deviceid *deviceid) +{ + return err->status == status && err->opnum == opnum && + nfs4_stateid_match(&err->stateid, stateid) && + !memcmp(&err->deviceid, deviceid, sizeof(*deviceid)) && + end_offset(err->offset, err->length) >= offset && + err->offset <= end_offset(offset, length); +} + +static bool merge_ds_error(struct nfs4_ff_layout_ds_err *old, + struct nfs4_ff_layout_ds_err *new) +{ + if (!ds_error_can_merge(old, new->offset, new->length, new->status, + new->opnum, &new->stateid, &new->deviceid)) + return false; + + extend_ds_error(old, new->offset, new->length); + return true; +} + +static bool +ff_layout_add_ds_error_locked(struct nfs4_flexfile_layout *flo, + struct nfs4_ff_layout_ds_err *dserr) +{ + struct nfs4_ff_layout_ds_err *err; + + list_for_each_entry(err, &flo->error_list, list) { + if (merge_ds_error(err, dserr)) { + return true; + } + } + + list_add(&dserr->list, &flo->error_list); + return false; +} + +static bool +ff_layout_update_ds_error(struct nfs4_flexfile_layout *flo, u64 offset, + u64 length, int status, enum nfs_opnum4 opnum, + nfs4_stateid *stateid, struct nfs4_deviceid *deviceid) +{ + bool found = false; + struct nfs4_ff_layout_ds_err *err; + + list_for_each_entry(err, &flo->error_list, list) { + if (ds_error_can_merge(err, offset, length, status, opnum, + stateid, deviceid)) { + found = true; + extend_ds_error(err, offset, length); + break; + } + } + + return found; +} + +int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo, + struct nfs4_ff_layout_mirror *mirror, u64 offset, + u64 length, int status, enum nfs_opnum4 opnum, + gfp_t gfp_flags) +{ + struct nfs4_ff_layout_ds_err *dserr; + bool needfree; + + if (status == 0) + return 0; + + if (mirror->mirror_ds == NULL) + return -EINVAL; + + spin_lock(&flo->generic_hdr.plh_inode->i_lock); + if (ff_layout_update_ds_error(flo, offset, length, status, opnum, + &mirror->stateid, + &mirror->mirror_ds->id_node.deviceid)) { + spin_unlock(&flo->generic_hdr.plh_inode->i_lock); + return 0; + } + spin_unlock(&flo->generic_hdr.plh_inode->i_lock); + dserr = kmalloc(sizeof(*dserr), gfp_flags); + if (!dserr) + return -ENOMEM; + + INIT_LIST_HEAD(&dserr->list); + dserr->offset = offset; + dserr->length = length; + dserr->status = status; + dserr->opnum = opnum; + nfs4_stateid_copy(&dserr->stateid, &mirror->stateid); + memcpy(&dserr->deviceid, &mirror->mirror_ds->id_node.deviceid, + NFS4_DEVICEID4_SIZE); + + spin_lock(&flo->generic_hdr.plh_inode->i_lock); + needfree = ff_layout_add_ds_error_locked(flo, dserr); + spin_unlock(&flo->generic_hdr.plh_inode->i_lock); + if (needfree) + kfree(dserr); + + return 0; +} + +/* currently we only support AUTH_NONE and AUTH_SYS */ +static rpc_authflavor_t +nfs4_ff_layout_choose_authflavor(struct nfs4_ff_layout_mirror *mirror) +{ + if (mirror->uid == (u32)-1) + return RPC_AUTH_NULL; + return RPC_AUTH_UNIX; +} + +/* fetch cred for NFSv3 DS */ +static int ff_layout_update_mirror_cred(struct nfs4_ff_layout_mirror *mirror, + struct nfs4_pnfs_ds *ds) +{ + if (ds->ds_clp && !mirror->cred && + mirror->mirror_ds->ds_versions[0].version == 3) { + struct rpc_auth *auth = ds->ds_clp->cl_rpcclient->cl_auth; + struct rpc_cred *cred; + struct auth_cred acred = { + .uid = make_kuid(&init_user_ns, mirror->uid), + .gid = make_kgid(&init_user_ns, mirror->gid), + }; + + /* AUTH_NULL ignores acred */ + cred = auth->au_ops->lookup_cred(auth, &acred, 0); + if (IS_ERR(cred)) { + dprintk("%s: lookup_cred failed with %ld\n", + __func__, PTR_ERR(cred)); + return PTR_ERR(cred); + } else { + mirror->cred = cred; + } + } + return 0; +} + +struct nfs_fh * +nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx) +{ + struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, mirror_idx); + struct nfs_fh *fh = NULL; + struct nfs4_deviceid_node *devid; + + if (mirror == NULL || mirror->mirror_ds == NULL || + mirror->mirror_ds->ds == NULL) { + printk(KERN_ERR "NFS: %s: No data server for mirror offset index %d\n", + __func__, mirror_idx); + if (mirror && mirror->mirror_ds) { + devid = &mirror->mirror_ds->id_node; + pnfs_generic_mark_devid_invalid(devid); + } + goto out; + } + + /* FIXME: For now assume there is only 1 version available for the DS */ + fh = &mirror->fh_versions[0]; +out: + return fh; +} + +/* Upon return, either ds is connected, or ds is NULL */ +struct nfs4_pnfs_ds * +nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, + bool fail_return) +{ + struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); + struct nfs4_pnfs_ds *ds = NULL; + struct nfs4_deviceid_node *devid; + struct inode *ino = lseg->pls_layout->plh_inode; + struct nfs_server *s = NFS_SERVER(ino); + unsigned int max_payload; + rpc_authflavor_t flavor; + + if (mirror == NULL || mirror->mirror_ds == NULL || + mirror->mirror_ds->ds == NULL) { + printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", + __func__, ds_idx); + if (mirror && mirror->mirror_ds) { + devid = &mirror->mirror_ds->id_node; + pnfs_generic_mark_devid_invalid(devid); + } + goto out; + } + + devid = &mirror->mirror_ds->id_node; + if (ff_layout_test_devid_unavailable(devid)) + goto out; + + ds = mirror->mirror_ds->ds; + /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ + smp_rmb(); + if (ds->ds_clp) + goto out; + + flavor = nfs4_ff_layout_choose_authflavor(mirror); + + /* FIXME: For now we assume the server sent only one version of NFS + * to use for the DS. + */ + nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo, + dataserver_retrans, + mirror->mirror_ds->ds_versions[0].version, + mirror->mirror_ds->ds_versions[0].minor_version, + flavor); + + /* connect success, check rsize/wsize limit */ + if (ds->ds_clp) { + max_payload = + nfs_block_size(rpc_max_payload(ds->ds_clp->cl_rpcclient), + NULL); + if (mirror->mirror_ds->ds_versions[0].rsize > max_payload) + mirror->mirror_ds->ds_versions[0].rsize = max_payload; + if (mirror->mirror_ds->ds_versions[0].wsize > max_payload) + mirror->mirror_ds->ds_versions[0].wsize = max_payload; + } else { + ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), + mirror, lseg->pls_range.offset, + lseg->pls_range.length, NFS4ERR_NXIO, + OP_ILLEGAL, GFP_NOIO); + if (fail_return) { + pnfs_error_mark_layout_for_return(ino, lseg); + if (ff_layout_has_available_ds(lseg)) + pnfs_set_retry_layoutget(lseg->pls_layout); + else + pnfs_clear_retry_layoutget(lseg->pls_layout); + + } else { + if (ff_layout_has_available_ds(lseg)) + set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, + &lseg->pls_layout->plh_flags); + else { + pnfs_error_mark_layout_for_return(ino, lseg); + pnfs_clear_retry_layoutget(lseg->pls_layout); + } + } + } + + if (ff_layout_update_mirror_cred(mirror, ds)) + ds = NULL; +out: + return ds; +} + +struct rpc_cred * +ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, u32 ds_idx, + struct rpc_cred *mdscred) +{ + struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); + struct rpc_cred *cred = ERR_PTR(-EINVAL); + + if (!nfs4_ff_layout_prepare_ds(lseg, ds_idx, true)) + goto out; + + if (mirror && mirror->cred) + cred = mirror->cred; + else + cred = mdscred; +out: + return cred; +} + +/** +* Find or create a DS rpc client with th MDS server rpc client auth flavor +* in the nfs_client cl_ds_clients list. +*/ +struct rpc_clnt * +nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg, u32 ds_idx, + struct nfs_client *ds_clp, struct inode *inode) +{ + struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); + + switch (mirror->mirror_ds->ds_versions[0].version) { + case 3: + /* For NFSv3 DS, flavor is set when creating DS connections */ + return ds_clp->cl_rpcclient; + case 4: + return nfs4_find_or_create_ds_client(ds_clp, inode); + default: + BUG(); + } +} + +static bool is_range_intersecting(u64 offset1, u64 length1, + u64 offset2, u64 length2) +{ + u64 end1 = end_offset(offset1, length1); + u64 end2 = end_offset(offset2, length2); + + return (end1 == NFS4_MAX_UINT64 || end1 > offset2) && + (end2 == NFS4_MAX_UINT64 || end2 > offset1); +} + +/* called with inode i_lock held */ +int ff_layout_encode_ds_ioerr(struct nfs4_flexfile_layout *flo, + struct xdr_stream *xdr, int *count, + const struct pnfs_layout_range *range) +{ + struct nfs4_ff_layout_ds_err *err, *n; + __be32 *p; + + list_for_each_entry_safe(err, n, &flo->error_list, list) { + if (!is_range_intersecting(err->offset, err->length, + range->offset, range->length)) + continue; + /* offset(8) + length(8) + stateid(NFS4_STATEID_SIZE) + * + deviceid(NFS4_DEVICEID4_SIZE) + status(4) + opnum(4) + */ + p = xdr_reserve_space(xdr, + 24 + NFS4_STATEID_SIZE + NFS4_DEVICEID4_SIZE); + if (unlikely(!p)) + return -ENOBUFS; + p = xdr_encode_hyper(p, err->offset); + p = xdr_encode_hyper(p, err->length); + p = xdr_encode_opaque_fixed(p, &err->stateid, + NFS4_STATEID_SIZE); + p = xdr_encode_opaque_fixed(p, &err->deviceid, + NFS4_DEVICEID4_SIZE); + *p++ = cpu_to_be32(err->status); + *p++ = cpu_to_be32(err->opnum); + *count += 1; + list_del(&err->list); + kfree(err); + dprintk("%s: offset %llu length %llu status %d op %d count %d\n", + __func__, err->offset, err->length, err->status, + err->opnum, *count); + } + + return 0; +} + +bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg) +{ + struct nfs4_ff_layout_mirror *mirror; + struct nfs4_deviceid_node *devid; + int idx; + + for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) { + mirror = FF_LAYOUT_COMP(lseg, idx); + if (mirror && mirror->mirror_ds) { + devid = &mirror->mirror_ds->id_node; + if (!ff_layout_test_devid_unavailable(devid)) + return true; + } + } + + return false; +} + +module_param(dataserver_retrans, uint, 0644); +MODULE_PARM_DESC(dataserver_retrans, "The number of times the NFSv4.1 client " + "retries a request before it attempts further " + " recovery action."); +module_param(dataserver_timeo, uint, 0644); +MODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the " + "NFSv4.1 client waits for a response from a " + " data server before it retries an NFS request."); diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 2f5db844c17253..857e2a99acc8d3 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -152,7 +152,7 @@ void nfs_fattr_map_and_free_names(struct nfs_server *server, struct nfs_fattr *f nfs_fattr_free_group_name(fattr); } -static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res) +int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res) { unsigned long val; char buf[16]; @@ -166,6 +166,7 @@ static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *re *res = val; return 1; } +EXPORT_SYMBOL_GPL(nfs_map_string_to_numeric); static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen) { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 44c600aac90752..ca6dda0f68bb5e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7796,9 +7796,7 @@ static void nfs4_layoutreturn_release(void *calldata) spin_lock(&lo->plh_inode->i_lock); if (lrp->res.lrs_present) pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); - clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); - smp_mb__after_atomic(); - wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); + pnfs_clear_layoutreturn_waitbit(lo); clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags); rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); lo->plh_block_lgets--; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index c4c9fe606ae66e..0fb0f1920a1f5b 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -910,7 +910,9 @@ send_layoutget(struct pnfs_layout_hdr *lo, pnfs_layout_io_set_failed(lo, range->iomode); } return NULL; - } + } else + pnfs_layout_clear_fail_bit(lo, + pnfs_iomode_to_fail_bit(range->iomode)); return lseg; } @@ -930,6 +932,13 @@ static void pnfs_clear_layoutcommit(struct inode *inode, } } +void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) +{ + clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); + smp_mb__after_atomic(); + wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); +} + static int pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, enum pnfs_iomode iomode, bool sync) @@ -943,6 +952,7 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, status = -ENOMEM; spin_lock(&ino->i_lock); lo->plh_block_lgets--; + pnfs_clear_layoutreturn_waitbit(lo); rpc_wake_up(&NFS_SERVER(ino)->roc_rpcwaitq); spin_unlock(&ino->i_lock); pnfs_put_layout_hdr(lo); @@ -1418,6 +1428,15 @@ static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo) TASK_UNINTERRUPTIBLE); } +static void pnfs_clear_first_layoutget(struct pnfs_layout_hdr *lo) +{ + unsigned long *bitlock = &lo->plh_flags; + + clear_bit_unlock(NFS_LAYOUT_FIRST_LAYOUTGET, bitlock); + smp_mb__after_atomic(); + wake_up_bit(bitlock, NFS_LAYOUT_FIRST_LAYOUTGET); +} + /* * Layout segment is retreived from the server if not cached. * The appropriate layout segment is referenced and returned to the caller. @@ -1499,6 +1518,8 @@ pnfs_update_layout(struct inode *ino, spin_unlock(&ino->i_lock); dprintk("%s wait for layoutreturn\n", __func__); if (pnfs_prepare_to_retry_layoutget(lo)) { + if (first) + pnfs_clear_first_layoutget(lo); pnfs_put_layout_hdr(lo); dprintk("%s retrying\n", __func__); goto lookup_again; @@ -1533,13 +1554,8 @@ pnfs_update_layout(struct inode *ino, pnfs_clear_retry_layoutget(lo); atomic_dec(&lo->plh_outstanding); out_put_layout_hdr: - if (first) { - unsigned long *bitlock = &lo->plh_flags; - - clear_bit_unlock(NFS_LAYOUT_FIRST_LAYOUTGET, bitlock); - smp_mb__after_atomic(); - wake_up_bit(bitlock, NFS_LAYOUT_FIRST_LAYOUTGET); - } + if (first) + pnfs_clear_first_layoutget(lo); pnfs_put_layout_hdr(lo); out: dprintk("%s: inode %s/%llu pNFS layout segment %s for " diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 49a4667084001c..7642021484bfe3 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -278,6 +278,7 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, u64 count, enum pnfs_iomode iomode, gfp_t gfp_flags); +void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo); void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp); int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *); diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 022b761dbf0a18..de7c91ca427e6e 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -516,6 +516,7 @@ enum pnfs_layouttype { LAYOUT_NFSV4_1_FILES = 1, LAYOUT_OSD2_OBJECTS = 2, LAYOUT_BLOCK_VOLUME = 3, + LAYOUT_FLEX_FILES = 4, }; /* used for both layout return and recall */ diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h index 0f4b79da658455..333844e38f66c4 100644 --- a/include/linux/nfs_idmap.h +++ b/include/linux/nfs_idmap.h @@ -73,5 +73,7 @@ int nfs_map_group_to_gid(const struct nfs_server *, const char *, size_t, kgid_t int nfs_map_uid_to_name(const struct nfs_server *, kuid_t, char *, size_t); int nfs_map_gid_to_group(const struct nfs_server *, kgid_t, char *, size_t); +int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res); + extern unsigned int nfs_idmap_cache_timeout; #endif /* NFS_IDMAP_H */ diff --git a/include/linux/sunrpc/metrics.h b/include/linux/sunrpc/metrics.h index 89f2ca17887348..7e61a17030a484 100644 --- a/include/linux/sunrpc/metrics.h +++ b/include/linux/sunrpc/metrics.h @@ -89,6 +89,8 @@ void rpc_free_iostats(struct rpc_iostats *); static inline struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt) { return NULL; } static inline void rpc_count_iostats(const struct rpc_task *task, struct rpc_iostats *stats) {} +static inline void rpc_count_iostats_metrics(const struct rpc_task *, + struct rpc_iostats *) {} static inline void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt) {} static inline void rpc_free_iostats(struct rpc_iostats *stats) {} From 8f9cdcb26b62f1a9b071a82820c7b08ac7439406 Mon Sep 17 00:00:00 2001 From: Tom Haynes Date: Mon, 12 Jan 2015 11:51:45 -0800 Subject: [PATCH 442/601] pnfs: Update documentation on the Layout Drivers Signed-off-by: Tom Haynes --- Documentation/filesystems/nfs/pnfs.txt | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/Documentation/filesystems/nfs/pnfs.txt b/Documentation/filesystems/nfs/pnfs.txt index adc81a35fe2d98..44a9f2493a8835 100644 --- a/Documentation/filesystems/nfs/pnfs.txt +++ b/Documentation/filesystems/nfs/pnfs.txt @@ -57,15 +57,16 @@ bit is set, preventing any new lsegs from being added. layout drivers -------------- -PNFS utilizes what is called layout drivers. The STD defines 3 basic -layout types: "files" "objects" and "blocks". For each of these types -there is a layout-driver with a common function-vectors table which -are called by the nfs-client pnfs-core to implement the different layout -types. +PNFS utilizes what is called layout drivers. The STD defines 4 basic +layout types: "files", "objects", "blocks", and "flexfiles". For each +of these types there is a layout-driver with a common function-vectors +table which are called by the nfs-client pnfs-core to implement the +different layout types. -Files-layout-driver code is in: fs/nfs/nfs4filelayout.c && nfs4filelayoutdev.c +Files-layout-driver code is in: fs/nfs/filelayout/.. directory Objects-layout-deriver code is in: fs/nfs/objlayout/.. directory Blocks-layout-deriver code is in: fs/nfs/blocklayout/.. directory +Flexfiles-layout-driver code is in: fs/nfs/flexfilelayout/.. directory objects-layout setup -------------------- From cb5d04bc39e914124e811ea55f3034d2379a5f6c Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 24 Jan 2015 22:14:52 +0800 Subject: [PATCH 443/601] nfs41: .init_read and .init_write can be called with valid pg_lseg With pgio refactoring in v3.15, .init_read and .init_write can be called with valid pgio->pg_lseg. file layout was fixed at that time by commit c6194271f (pnfs: filelayout: support non page aligned layouts). But the generic helper still needs to be fixed. Cc: stable@vger.kernel.org # 3.15+ Signed-off-by: Peng Tao --- fs/nfs/pnfs.c | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 0fb0f1920a1f5b..c7be9b997f5e80 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1711,19 +1711,19 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r { u64 rd_size = req->wb_bytes; - WARN_ON_ONCE(pgio->pg_lseg != NULL); - - if (pgio->pg_dreq == NULL) - rd_size = i_size_read(pgio->pg_inode) - req_offset(req); - else - rd_size = nfs_dreq_bytes_left(pgio->pg_dreq); - - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, - req->wb_context, - req_offset(req), - rd_size, - IOMODE_READ, - GFP_KERNEL); + if (pgio->pg_lseg == NULL) { + if (pgio->pg_dreq == NULL) + rd_size = i_size_read(pgio->pg_inode) - req_offset(req); + else + rd_size = nfs_dreq_bytes_left(pgio->pg_dreq); + + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, + req->wb_context, + req_offset(req), + rd_size, + IOMODE_READ, + GFP_KERNEL); + } /* If no lseg, fall back to read through mds */ if (pgio->pg_lseg == NULL) nfs_pageio_reset_read_mds(pgio); @@ -1735,14 +1735,13 @@ void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req, u64 wb_size) { - WARN_ON_ONCE(pgio->pg_lseg != NULL); - - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, - req->wb_context, - req_offset(req), - wb_size, - IOMODE_RW, - GFP_NOFS); + if (pgio->pg_lseg == NULL) + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, + req->wb_context, + req_offset(req), + wb_size, + IOMODE_RW, + GFP_NOFS); /* If no lseg, fall back to write through mds */ if (pgio->pg_lseg == NULL) nfs_pageio_reset_write_mds(pgio); From 7c13789e3e6c66dbcaade1760087429240eb3d27 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Fri, 30 Jan 2015 11:01:02 -0500 Subject: [PATCH 444/601] pnfs: lookup new lseg at lseg boundary Before mirroring support was added, the pageio descriptor's pg_lseg was set to null when an RPC was sent. Because of this, pg_init was called at lseg boundaries with pg_lseg = NULL, and it could be set to the new lseg. Signed-off-by: Weston Andros Adamson --- fs/nfs/pnfs.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index c7be9b997f5e80..9304984bde80af 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1788,10 +1788,16 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, seg_end = end_offset(pgio->pg_lseg->pls_range.offset, pgio->pg_lseg->pls_range.length); req_start = req_offset(req); - WARN_ON_ONCE(req_start > seg_end); + WARN_ON_ONCE(req_start >= seg_end); /* start of request is past the last byte of this segment */ - if (req_start >= seg_end) + if (req_start >= seg_end) { + /* reference the new lseg */ + if (pgio->pg_ops->pg_cleanup) + pgio->pg_ops->pg_cleanup(pgio); + if (pgio->pg_ops->pg_init) + pgio->pg_ops->pg_init(pgio, req); return 0; + } /* adjust 'size' iff there are fewer bytes left in the * segment than what nfs_generic_pg_test returned */ From 7c2c49eceb79eb4738f38a00270830057b5bfb76 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 3 Feb 2015 15:45:13 -0500 Subject: [PATCH 445/601] ktest: Place quotes around item variable Seems that some of the new console logic causes doprint to possibly get evaluated. When printing a commit message that contains parenthesis, it fails with a shell parsing error. This gets fixed when we add quotes around the $item variable, and prevent it from being evaluated by any shell commands. Signed-off-by: Steven Rostedt --- tools/testing/ktest/ktest.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 840803b3cd41d3..d08e214ec6e793 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -3447,7 +3447,7 @@ sub patchcheck { my $sha1 = $item; $sha1 =~ s/^([[:xdigit:]]+).*/$1/; - doprint "\nProcessing commit $item\n\n"; + doprint "\nProcessing commit \"$item\"\n\n"; run_command "git checkout $sha1" or die "Failed to checkout $sha1"; From 03a9a42a1a7e5b3e7919ddfacc1d1cc81882a955 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 30 Jan 2015 18:12:28 -0500 Subject: [PATCH 446/601] SUNRPC: NULL utsname dereference on NFS umount during namespace cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix an Oopsable condition when nsm_mon_unmon is called as part of the namespace cleanup, which now apparently happens after the utsname has been freed. Link: http://lkml.kernel.org/r/20150125220604.090121ae@neptune.home Reported-by: Bruno Prémont Cc: stable@vger.kernel.org # 3.18 Signed-off-by: Trond Myklebust --- fs/lockd/mon.c | 13 +++++++++---- include/linux/sunrpc/clnt.h | 3 ++- net/sunrpc/clnt.c | 12 +++++++----- net/sunrpc/rpcb_clnt.c | 8 ++++++-- 4 files changed, 24 insertions(+), 12 deletions(-) diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 1cc6ec51e6b1f7..47a32b6d9b9001 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -65,7 +65,7 @@ static inline struct sockaddr *nsm_addr(const struct nsm_handle *nsm) return (struct sockaddr *)&nsm->sm_addr; } -static struct rpc_clnt *nsm_create(struct net *net) +static struct rpc_clnt *nsm_create(struct net *net, const char *nodename) { struct sockaddr_in sin = { .sin_family = AF_INET, @@ -77,6 +77,7 @@ static struct rpc_clnt *nsm_create(struct net *net) .address = (struct sockaddr *)&sin, .addrsize = sizeof(sin), .servername = "rpc.statd", + .nodename = nodename, .program = &nsm_program, .version = NSM_VERSION, .authflavor = RPC_AUTH_NULL, @@ -102,7 +103,7 @@ static struct rpc_clnt *nsm_client_set(struct lockd_net *ln, return clnt; } -static struct rpc_clnt *nsm_client_get(struct net *net) +static struct rpc_clnt *nsm_client_get(struct net *net, const char *nodename) { struct rpc_clnt *clnt, *new; struct lockd_net *ln = net_generic(net, lockd_net_id); @@ -111,7 +112,7 @@ static struct rpc_clnt *nsm_client_get(struct net *net) if (clnt != NULL) goto out; - clnt = new = nsm_create(net); + clnt = new = nsm_create(net, nodename); if (IS_ERR(clnt)) goto out; @@ -190,19 +191,23 @@ int nsm_monitor(const struct nlm_host *host) struct nsm_res res; int status; struct rpc_clnt *clnt; + const char *nodename = NULL; dprintk("lockd: nsm_monitor(%s)\n", nsm->sm_name); if (nsm->sm_monitored) return 0; + if (host->h_rpcclnt) + nodename = host->h_rpcclnt->cl_nodename; + /* * Choose whether to record the caller_name or IP address of * this peer in the local rpc.statd's database. */ nsm->sm_mon_name = nsm_use_hostnames ? nsm->sm_name : nsm->sm_addrbuf; - clnt = nsm_client_get(host->net); + clnt = nsm_client_get(host->net, nodename); if (IS_ERR(clnt)) { status = PTR_ERR(clnt); dprintk("lockd: failed to create NSM upcall transport, " diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index d86acc63b25fa1..598ba80ec30c97 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -57,7 +57,7 @@ struct rpc_clnt { const struct rpc_timeout *cl_timeout; /* Timeout strategy */ int cl_nodelen; /* nodename length */ - char cl_nodename[UNX_MAXNODENAME]; + char cl_nodename[UNX_MAXNODENAME+1]; struct rpc_pipe_dir_head cl_pipedir_objects; struct rpc_clnt * cl_parent; /* Points to parent of clones */ struct rpc_rtt cl_rtt_default; @@ -112,6 +112,7 @@ struct rpc_create_args { struct sockaddr *saddress; const struct rpc_timeout *timeout; const char *servername; + const char *nodename; const struct rpc_program *program; u32 prognumber; /* overrides program->number */ u32 version; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 05da12a33945aa..3f5d4d48f0cbd2 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -286,10 +286,8 @@ static struct rpc_xprt *rpc_clnt_set_transport(struct rpc_clnt *clnt, static void rpc_clnt_set_nodename(struct rpc_clnt *clnt, const char *nodename) { - clnt->cl_nodelen = strlen(nodename); - if (clnt->cl_nodelen > UNX_MAXNODENAME) - clnt->cl_nodelen = UNX_MAXNODENAME; - memcpy(clnt->cl_nodename, nodename, clnt->cl_nodelen); + clnt->cl_nodelen = strlcpy(clnt->cl_nodename, + nodename, sizeof(clnt->cl_nodename)); } static int rpc_client_register(struct rpc_clnt *clnt, @@ -365,6 +363,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, const struct rpc_version *version; struct rpc_clnt *clnt = NULL; const struct rpc_timeout *timeout; + const char *nodename = args->nodename; int err; /* sanity check the name before trying to print it */ @@ -420,8 +419,10 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, atomic_set(&clnt->cl_count, 1); + if (nodename == NULL) + nodename = utsname()->nodename; /* save the nodename */ - rpc_clnt_set_nodename(clnt, utsname()->nodename); + rpc_clnt_set_nodename(clnt, nodename); err = rpc_client_register(clnt, args->authflavor, args->client_name); if (err) @@ -576,6 +577,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args, if (xprt == NULL) goto out_err; args->servername = xprt->servername; + args->nodename = clnt->cl_nodename; new = rpc_new_client(args, xprt, clnt); if (IS_ERR(new)) { diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 05202012bcfca1..cf5770d8f49af9 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -355,7 +355,8 @@ int rpcb_create_local(struct net *net) return result; } -static struct rpc_clnt *rpcb_create(struct net *net, const char *hostname, +static struct rpc_clnt *rpcb_create(struct net *net, const char *nodename, + const char *hostname, struct sockaddr *srvaddr, size_t salen, int proto, u32 version) { @@ -365,6 +366,7 @@ static struct rpc_clnt *rpcb_create(struct net *net, const char *hostname, .address = srvaddr, .addrsize = salen, .servername = hostname, + .nodename = nodename, .program = &rpcb_program, .version = version, .authflavor = RPC_AUTH_UNIX, @@ -740,7 +742,9 @@ void rpcb_getport_async(struct rpc_task *task) dprintk("RPC: %5u %s: trying rpcbind version %u\n", task->tk_pid, __func__, bind_version); - rpcb_clnt = rpcb_create(xprt->xprt_net, xprt->servername, sap, salen, + rpcb_clnt = rpcb_create(xprt->xprt_net, + clnt->cl_nodename, + xprt->servername, sap, salen, xprt->prot, bind_version); if (IS_ERR(rpcb_clnt)) { status = PTR_ERR(rpcb_clnt); From 7f43e71e8c538356efd5b33a183e6d9ace4739a5 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Fri, 30 Jan 2015 17:13:08 +1030 Subject: [PATCH 447/601] powerpc/powernv: Add OPAL soft-poweroff routine Register a notifier for a OPAL message indicating that the machine should prepare itself for a graceful power off. OPAL will tell us if the power off is a reboot or shutdown, but for now we perform the same orderly_poweroff action. Signed-off-by: Joel Stanley Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/opal.h | 2 +- arch/powerpc/platforms/powernv/Makefile | 2 +- arch/powerpc/platforms/powernv/opal-power.c | 65 +++++++++++++++++++++ 3 files changed, 67 insertions(+), 2 deletions(-) create mode 100644 arch/powerpc/platforms/powernv/opal-power.c diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 2baf8a5925ca96..9ee0a30a02cefc 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -304,7 +304,7 @@ enum OpalMessageType { */ OPAL_MSG_MEM_ERR, OPAL_MSG_EPOW, - OPAL_MSG_SHUTDOWN, + OPAL_MSG_SHUTDOWN, /* params[0] = 1 reboot, 0 shutdown */ OPAL_MSG_HMI_EVT, OPAL_MSG_TYPE_MAX, }; diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index f241accc053dc9..6f3c5d33c3af9f 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -1,7 +1,7 @@ obj-y += setup.o opal-wrappers.o opal.o opal-async.o obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o -obj-y += opal-msglog.o opal-hmi.o +obj-y += opal-msglog.o opal-hmi.o opal-power.o obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o diff --git a/arch/powerpc/platforms/powernv/opal-power.c b/arch/powerpc/platforms/powernv/opal-power.c new file mode 100644 index 00000000000000..48bf5b080bcf35 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-power.c @@ -0,0 +1,65 @@ +/* + * PowerNV OPAL power control for graceful shutdown handling + * + * Copyright 2015 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include + +#include +#include + +#define SOFT_OFF 0x00 +#define SOFT_REBOOT 0x01 + +static int opal_power_control_event(struct notifier_block *nb, + unsigned long msg_type, void *msg) +{ + struct opal_msg *power_msg = msg; + uint64_t type; + + type = be64_to_cpu(power_msg->params[0]); + + switch (type) { + case SOFT_REBOOT: + /* Fall through. The service processor is responsible for + * bringing the machine back up */ + case SOFT_OFF: + pr_info("OPAL: poweroff requested\n"); + orderly_poweroff(true); + break; + default: + pr_err("OPAL: power control type unexpected %016llx\n", type); + } + + return 0; +} + +static struct notifier_block opal_power_control_nb = { + .notifier_call = opal_power_control_event, + .next = NULL, + .priority = 0, +}; + +static int __init opal_power_control_init(void) +{ + int ret; + + ret = opal_message_notifier_register(OPAL_MSG_SHUTDOWN, + &opal_power_control_nb); + if (ret) { + pr_err("%s: Can't register OPAL event notifier (%d)\n", + __func__, ret); + return ret; + } + + return 0; +} +machine_subsys_initcall(powernv, opal_power_control_init); From c2c896bee08e1461fc24f9bf7dd57e2c63f6db70 Mon Sep 17 00:00:00 2001 From: Arseny Solokha Date: Wed, 4 Feb 2015 13:18:02 +1100 Subject: [PATCH 448/601] powerpc/mm: Warn on flushing tlb page in kernel context Function __flush_tlb_page() must only be called for user contexts, so put in extra hardening to warn on calling it for kernel context. Signed-off-by: Arseny Solokha Signed-off-by: Michael Ellerman --- arch/powerpc/mm/tlb_nohash.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index ab0616b0e6c24e..cbd3d069897f61 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -284,7 +284,11 @@ void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, struct cpumask *cpu_mask; unsigned int pid; - if (unlikely(!mm)) + /* + * This function as well as __local_flush_tlb_page() must only be called + * for user contexts. + */ + if (unlikely(WARN_ON(!mm))) return; preempt_disable(); From 851c63e3b381fdbf5aca1a797f37d8606b5588d2 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Mon, 5 Jan 2015 15:43:39 +0000 Subject: [PATCH 449/601] of: Fix brace position for struct of_device_id definition Currently it is not easy to grep for the definition of struct of_device_id. This is trivially fixed by moving the brace to the right place. Signed-off-by: Daniel Thompson Cc: Grant Likely Cc: Rob Herring Signed-off-by: Rob Herring --- include/linux/mod_devicetable.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 745def86258072..bbf85d612be5ac 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -220,8 +220,7 @@ struct serio_device_id { /* * Struct used for matching a device */ -struct of_device_id -{ +struct of_device_id { char name[32]; char type[32]; char compatible[128]; From 2d4c0aef0ff4d4374590d6c7b259a259bb2cb21b Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 27 Jan 2015 12:26:35 +0200 Subject: [PATCH 450/601] of: EXPORT_SYMBOL_GPL of_property_read_u64_array Make of_property_read_u64_array() available for modules as well. This was missing from the patch which originally added the function. Signed-off-by: Sakari Ailus Signed-off-by: Rob Herring --- drivers/of/base.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/of/base.c b/drivers/of/base.c index 36536b6a8834ac..0a8aeb8523fe7d 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -1303,6 +1303,7 @@ int of_property_read_u64_array(const struct device_node *np, } return 0; } +EXPORT_SYMBOL_GPL(of_property_read_u64_array); /** * of_property_read_string - Find and read a string from a property From 193c9d23a0f0b8ae0c2aeb517c953ba8aee4ceb9 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Fri, 30 Jan 2015 15:11:04 -0700 Subject: [PATCH 451/601] Documentation: DT bindings: add more Tegra chip compatible strings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Align compatible strings for several IP blocks present on Tegra chips with the latest doctrine from the DT maintainers: http://marc.info/?l=devicetree&m=142255654213019&w=2 The primary objective here is to avoid checkpatch warnings, per: http://marc.info/?l=linux-tegra&m=142201349727836&w=2 DT binding text files have been updated for the following IP blocks: - PCIe - SOR - SoC timers - AHB "gizmo" - APB_MISC - pinmux control - UART - PWM - I2C - SPI - RTC - PMC - eFuse - AHCI - HDA - XUSB_PADCTRL - SDHCI - SOC_THERM - AHUB - I2S - EHCI - USB PHY N.B. The nvidia,tegra20-timer compatible string is removed from the nvidia,tegra30-timer.txt documentation file because it's already mentioned in the nvidia,tegra20-timer.txt documentation file. This second version takes into account the following requests from Rob Herring : - Per-IP block patches have been combined into a single patch - Explicit documentation about which compatible strings are actually matched by the driver has been removed. In its place is implicit documentation that loosely follows Rob's prescribed format: "Must contain '"nvidia,-pcie", "nvidia,tegra20-pcie"' where is tegra30, tegra132, ..." [...] "You should attempt to document known values of if you use it" Signed-off-by: Paul Walmsley Cc: Alexandre Courbot Cc: Dylan Reid Cc: Greg Kroah-Hartman Cc: Hans de Goede Cc: Ian Campbell Cc: Jingchang Lu Cc: John Crispin Cc: Kumar Gala Cc: Linus Walleij Cc: Mark Rutland Cc: Mikko Perttunen Cc: Murali Karicheri Cc: Paul Walmsley Cc: Pawel Moll Cc: Peter De Schrijver Cc: Peter Hurley Cc: Sean Paul Cc: Stephen Warren Cc: Takashi Iwai Cc: Tejun Heo Cc: "Terje Bergström" Cc: Thierry Reding Cc: Tuomas Tynkkynen Cc: Wolfram Sang Cc: Zhang Rui Cc: dri-devel@lists.freedesktop.org Cc: linux-i2c@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-pci@vger.kernel.org Cc: linux-pm@vger.kernel.org Cc: linux-pwm@vger.kernel.org Cc: linux-tegra@vger.kernel.org Acked-by: Eduardo Valentin Signed-off-by: Rob Herring --- .../bindings/arm/tegra/nvidia,tegra20-ahb.txt | 5 ++++- .../bindings/arm/tegra/nvidia,tegra20-pmc.txt | 6 +++++- Documentation/devicetree/bindings/ata/tegra-sata.txt | 4 +++- .../devicetree/bindings/fuse/nvidia,tegra20-fuse.txt | 10 +++++----- .../devicetree/bindings/gpu/nvidia,tegra20-host1x.txt | 8 ++++++-- .../devicetree/bindings/i2c/nvidia,tegra20-i2c.txt | 10 +++++----- .../bindings/misc/nvidia,tegra20-apbmisc.txt | 9 ++++----- .../devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt | 6 +++++- .../devicetree/bindings/pci/nvidia,tegra20-pcie.txt | 8 ++++---- .../bindings/pinctrl/nvidia,tegra124-pinmux.txt | 3 ++- .../bindings/pinctrl/nvidia,tegra124-xusb-padctl.txt | 4 +++- .../devicetree/bindings/pwm/nvidia,tegra20-pwm.txt | 7 ++++--- .../devicetree/bindings/rtc/nvidia,tegra20-rtc.txt | 4 +++- Documentation/devicetree/bindings/serial/of-serial.txt | 5 ++++- .../devicetree/bindings/sound/nvidia,tegra30-ahub.txt | 5 ++++- .../devicetree/bindings/sound/nvidia,tegra30-hda.txt | 4 +++- .../devicetree/bindings/sound/nvidia,tegra30-i2s.txt | 5 ++++- .../devicetree/bindings/spi/nvidia,tegra114-spi.txt | 4 +++- .../devicetree/bindings/thermal/tegra-soctherm.txt | 4 +++- .../devicetree/bindings/timer/nvidia,tegra30-timer.txt | 4 +++- .../devicetree/bindings/usb/nvidia,tegra20-ehci.txt | 5 ++++- .../devicetree/bindings/usb/nvidia,tegra20-usb-phy.txt | 5 ++++- 22 files changed, 85 insertions(+), 40 deletions(-) diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-ahb.txt b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-ahb.txt index 234406d41c1277..067c9790062f7e 100644 --- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-ahb.txt +++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-ahb.txt @@ -1,7 +1,10 @@ NVIDIA Tegra AHB Required properties: -- compatible : "nvidia,tegra20-ahb" or "nvidia,tegra30-ahb" +- compatible : For Tegra20, must contain "nvidia,tegra20-ahb". For + Tegra30, must contain "nvidia,tegra30-ahb". Otherwise, must contain + '"nvidia,-ahb", "nvidia,tegra30-ahb"' where is tegra124, + tegra132, or tegra210. - reg : Should contain 1 register ranges(address and length) Example: diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.txt b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.txt index 68ac65f82a1c59..dd75b972ee376d 100644 --- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.txt +++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.txt @@ -6,7 +6,11 @@ modes. It provides power-gating controllers for SoC and CPU power-islands. Required properties: - name : Should be pmc -- compatible : Should contain "nvidia,tegra-pmc". +- compatible : For Tegra20, must contain "nvidia,tegra20-pmc". For Tegra30, + must contain "nvidia,tegra30-pmc". For Tegra114, must contain + "nvidia,tegra114-pmc". For Tegra124, must contain "nvidia,tegra124-pmc". + Otherwise, must contain "nvidia,-pmc", plus at least one of the + above, where is tegra132. - reg : Offset and length of the register set for the device - clocks : Must contain an entry for each entry in clock-names. See ../clocks/clock-bindings.txt for details. diff --git a/Documentation/devicetree/bindings/ata/tegra-sata.txt b/Documentation/devicetree/bindings/ata/tegra-sata.txt index 946f2072570bf4..66c83c3e891511 100644 --- a/Documentation/devicetree/bindings/ata/tegra-sata.txt +++ b/Documentation/devicetree/bindings/ata/tegra-sata.txt @@ -1,7 +1,9 @@ Tegra124 SoC SATA AHCI controller Required properties : -- compatible : "nvidia,tegra124-ahci". +- compatible : For Tegra124, must contain "nvidia,tegra124-ahci". Otherwise, + must contain '"nvidia,-ahci", "nvidia,tegra124-ahci"', where + is tegra132. - reg : Should contain 2 entries: - AHCI register set (SATA BAR5) - SATA register set diff --git a/Documentation/devicetree/bindings/fuse/nvidia,tegra20-fuse.txt b/Documentation/devicetree/bindings/fuse/nvidia,tegra20-fuse.txt index d8c98c7614d04d..23e1d3194174ab 100644 --- a/Documentation/devicetree/bindings/fuse/nvidia,tegra20-fuse.txt +++ b/Documentation/devicetree/bindings/fuse/nvidia,tegra20-fuse.txt @@ -1,11 +1,11 @@ NVIDIA Tegra20/Tegra30/Tegr114/Tegra124 fuse block. Required properties: -- compatible : should be: - "nvidia,tegra20-efuse" - "nvidia,tegra30-efuse" - "nvidia,tegra114-efuse" - "nvidia,tegra124-efuse" +- compatible : For Tegra20, must contain "nvidia,tegra20-efuse". For Tegra30, + must contain "nvidia,tegra30-efuse". For Tegra114, must contain + "nvidia,tegra114-efuse". For Tegra124, must contain "nvidia,tegra124-efuse". + Otherwise, must contain "nvidia,-efuse", plus one of the above, where + is tegra132. Details: nvidia,tegra20-efuse: Tegra20 requires using APB DMA to read the fuse data due to a hardware bug. Tegra20 also lacks certain information which is diff --git a/Documentation/devicetree/bindings/gpu/nvidia,tegra20-host1x.txt b/Documentation/devicetree/bindings/gpu/nvidia,tegra20-host1x.txt index 4c32ef0b7db8fd..009f4bfa1590cc 100644 --- a/Documentation/devicetree/bindings/gpu/nvidia,tegra20-host1x.txt +++ b/Documentation/devicetree/bindings/gpu/nvidia,tegra20-host1x.txt @@ -197,7 +197,9 @@ of the following host1x client modules: - sor: serial output resource Required properties: - - compatible: "nvidia,tegra124-sor" + - compatible: For Tegra124, must contain "nvidia,tegra124-sor". Otherwise, + must contain '"nvidia,-sor", "nvidia,tegra124-sor"', where + is tegra132. - reg: Physical base address and length of the controller's registers. - interrupts: The interrupt outputs from the controller. - clocks: Must contain an entry for each entry in clock-names. @@ -222,7 +224,9 @@ of the following host1x client modules: - nvidia,dpaux: phandle to a DispayPort AUX interface - dpaux: DisplayPort AUX interface - - compatible: "nvidia,tegra124-dpaux" + - compatible: For Tegra124, must contain "nvidia,tegra124-dpaux". Otherwise, + must contain '"nvidia,-dpaux", "nvidia,tegra124-dpaux"', where + is tegra132. - reg: Physical base address and length of the controller's registers. - interrupts: The interrupt outputs from the controller. - clocks: Must contain an entry for each entry in clock-names. diff --git a/Documentation/devicetree/bindings/i2c/nvidia,tegra20-i2c.txt b/Documentation/devicetree/bindings/i2c/nvidia,tegra20-i2c.txt index 87507e9ce6db50..656716b72cc4d5 100644 --- a/Documentation/devicetree/bindings/i2c/nvidia,tegra20-i2c.txt +++ b/Documentation/devicetree/bindings/i2c/nvidia,tegra20-i2c.txt @@ -1,11 +1,11 @@ NVIDIA Tegra20/Tegra30/Tegra114 I2C controller driver. Required properties: -- compatible : should be: - "nvidia,tegra114-i2c" - "nvidia,tegra30-i2c" - "nvidia,tegra20-i2c" - "nvidia,tegra20-i2c-dvc" +- compatible : For Tegra20, must be one of "nvidia,tegra20-i2c-dvc" or + "nvidia,tegra20-i2c". For Tegra30, must be "nvidia,tegra30-i2c". + For Tegra114, must be "nvidia,tegra114-i2c". Otherwise, must be + "nvidia,-i2c", plus at least one of the above, where is + tegra124, tegra132, or tegra210. Details of compatible are as follows: nvidia,tegra20-i2c-dvc: Tegra20 has specific I2C controller called as DVC I2C controller. This only support master mode of I2C communication. Register diff --git a/Documentation/devicetree/bindings/misc/nvidia,tegra20-apbmisc.txt b/Documentation/devicetree/bindings/misc/nvidia,tegra20-apbmisc.txt index b97b8bef1fe548..47b205cc9cc7ac 100644 --- a/Documentation/devicetree/bindings/misc/nvidia,tegra20-apbmisc.txt +++ b/Documentation/devicetree/bindings/misc/nvidia,tegra20-apbmisc.txt @@ -1,11 +1,10 @@ NVIDIA Tegra20/Tegra30/Tegr114/Tegra124 apbmisc block Required properties: -- compatible : should be: - "nvidia,tegra20-apbmisc" - "nvidia,tegra30-apbmisc" - "nvidia,tegra114-apbmisc" - "nvidia,tegra124-apbmisc" +- compatible : For Tegra20, must be "nvidia,tegra20-apbmisc". For Tegra30, + must be "nvidia,tegra30-apbmisc". Otherwise, must contain + "nvidia,-apbmisc", plus one of the above, where is tegra114, + tegra124, tegra132. - reg: Should contain 2 entries: the first entry gives the physical address and length of the registers which contain revision and debug features. The second entry gives the physical address and length of the diff --git a/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt b/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt index f357c16ea815c5..15b8368ee1f260 100644 --- a/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt +++ b/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt @@ -7,7 +7,11 @@ This file documents differences between the core properties described by mmc.txt and the properties used by the sdhci-tegra driver. Required properties: -- compatible : Should be "nvidia,-sdhci" +- compatible : For Tegra20, must contain "nvidia,tegra20-sdhci". + For Tegra30, must contain "nvidia,tegra30-sdhci". For Tegra114, + must contain "nvidia,tegra114-sdhci". For Tegra124, must contain + "nvidia,tegra124-sdhci". Otherwise, must contain "nvidia,-sdhci", + plus one of the above, where is tegra132 or tegra210. - clocks : Must contain one entry, for the module clock. See ../clocks/clock-bindings.txt for details. - resets : Must contain an entry for each entry in reset-names. diff --git a/Documentation/devicetree/bindings/pci/nvidia,tegra20-pcie.txt b/Documentation/devicetree/bindings/pci/nvidia,tegra20-pcie.txt index d763e047c6aeb1..75321ae23c0805 100644 --- a/Documentation/devicetree/bindings/pci/nvidia,tegra20-pcie.txt +++ b/Documentation/devicetree/bindings/pci/nvidia,tegra20-pcie.txt @@ -1,10 +1,10 @@ NVIDIA Tegra PCIe controller Required properties: -- compatible: Must be one of: - - "nvidia,tegra20-pcie" - - "nvidia,tegra30-pcie" - - "nvidia,tegra124-pcie" +- compatible: For Tegra20, must contain "nvidia,tegra20-pcie". For Tegra30, + "nvidia,tegra30-pcie". For Tegra124, must contain "nvidia,tegra124-pcie". + Otherwise, must contain "nvidia,-pcie", plus one of the above, where + is tegra132 or tegra210. - device_type: Must be "pci" - reg: A list of physical base address and length for each set of controller registers. Must contain an entry for each entry in the reg-names property. diff --git a/Documentation/devicetree/bindings/pinctrl/nvidia,tegra124-pinmux.txt b/Documentation/devicetree/bindings/pinctrl/nvidia,tegra124-pinmux.txt index 189814e7cdc7c1..ecb5c0d25218df 100644 --- a/Documentation/devicetree/bindings/pinctrl/nvidia,tegra124-pinmux.txt +++ b/Documentation/devicetree/bindings/pinctrl/nvidia,tegra124-pinmux.txt @@ -6,7 +6,8 @@ nvidia,tegra30-pinmux.txt. In fact, this document assumes that binding as a baseline, and only documents the differences between the two bindings. Required properties: -- compatible: "nvidia,tegra124-pinmux" +- compatible: For Tegra124, must contain "nvidia,tegra124-pinmux". For + Tegra132, must contain '"nvidia,tegra132-pinmux", "nvidia-tegra124-pinmux"'. - reg: Should contain a list of base address and size pairs for: -- first entry - the drive strength and pad control registers. -- second entry - the pinmux registers diff --git a/Documentation/devicetree/bindings/pinctrl/nvidia,tegra124-xusb-padctl.txt b/Documentation/devicetree/bindings/pinctrl/nvidia,tegra124-xusb-padctl.txt index 2f9c0bd6645793..30676ded85bb3a 100644 --- a/Documentation/devicetree/bindings/pinctrl/nvidia,tegra124-xusb-padctl.txt +++ b/Documentation/devicetree/bindings/pinctrl/nvidia,tegra124-xusb-padctl.txt @@ -13,7 +13,9 @@ how to describe and reference PHYs in device trees. Required properties: -------------------- -- compatible: should be "nvidia,tegra124-xusb-padctl" +- compatible: For Tegra124, must contain "nvidia,tegra124-xusb-padctl". + Otherwise, must contain '"nvidia,-xusb-padctl", + "nvidia-tegra124-xusb-padctl"', where is tegra132 or tegra210. - reg: Physical base address and length of the controller's registers. - resets: Must contain an entry for each entry in reset-names. See ../reset/reset.txt for details. diff --git a/Documentation/devicetree/bindings/pwm/nvidia,tegra20-pwm.txt b/Documentation/devicetree/bindings/pwm/nvidia,tegra20-pwm.txt index c7ea9d4a988b8d..c52f03b5032f8f 100644 --- a/Documentation/devicetree/bindings/pwm/nvidia,tegra20-pwm.txt +++ b/Documentation/devicetree/bindings/pwm/nvidia,tegra20-pwm.txt @@ -1,9 +1,10 @@ Tegra SoC PWFM controller Required properties: -- compatible: should be one of: - - "nvidia,tegra20-pwm" - - "nvidia,tegra30-pwm" +- compatible: For Tegra20, must contain "nvidia,tegra20-pwm". For Tegra30, + must contain "nvidia,tegra30-pwm". Otherwise, must contain + "nvidia,-pwm", plus one of the above, where is tegra114, + tegra124, tegra132, or tegra210. - reg: physical base address and length of the controller's registers - #pwm-cells: should be 2. See pwm.txt in this directory for a description of the cells format. diff --git a/Documentation/devicetree/bindings/rtc/nvidia,tegra20-rtc.txt b/Documentation/devicetree/bindings/rtc/nvidia,tegra20-rtc.txt index 652d1ff2e8beb6..b7d98ed3e098c5 100644 --- a/Documentation/devicetree/bindings/rtc/nvidia,tegra20-rtc.txt +++ b/Documentation/devicetree/bindings/rtc/nvidia,tegra20-rtc.txt @@ -6,7 +6,9 @@ state. Required properties: -- compatible : should be "nvidia,tegra20-rtc". +- compatible : For Tegra20, must contain "nvidia,tegra20-rtc". Otherwise, + must contain '"nvidia,-rtc", "nvidia,tegra20-rtc"', where + can be tegra30, tegra114, tegra124, or tegra132. - reg : Specifies base physical address and size of the registers. - interrupts : A single interrupt specifier. - clocks : Must contain one entry, for the module clock. diff --git a/Documentation/devicetree/bindings/serial/of-serial.txt b/Documentation/devicetree/bindings/serial/of-serial.txt index b52b98234b9b1d..bea60ef6cdc52c 100644 --- a/Documentation/devicetree/bindings/serial/of-serial.txt +++ b/Documentation/devicetree/bindings/serial/of-serial.txt @@ -8,7 +8,10 @@ Required properties: - "ns16550" - "ns16750" - "ns16850" - - "nvidia,tegra20-uart" + - For Tegra20, must contain "nvidia,tegra20-uart" + - For other Tegra, must contain '"nvidia,-uart", + "nvidia,tegra20-uart"' where is tegra30, tegra114, tegra124, + tegra132, or tegra210. - "nxp,lpc3220-uart" - "ralink,rt2880-uart" - "ibm,qpace-nwp-serial" diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra30-ahub.txt b/Documentation/devicetree/bindings/sound/nvidia,tegra30-ahub.txt index 946e2ac46091c0..0e9a1895d7fb06 100644 --- a/Documentation/devicetree/bindings/sound/nvidia,tegra30-ahub.txt +++ b/Documentation/devicetree/bindings/sound/nvidia,tegra30-ahub.txt @@ -1,7 +1,10 @@ NVIDIA Tegra30 AHUB (Audio Hub) Required properties: -- compatible : "nvidia,tegra30-ahub", "nvidia,tegra114-ahub", etc. +- compatible : For Tegra30, must contain "nvidia,tegra30-ahub". For Tegra114, + must contain "nvidia,tegra114-ahub". For Tegra124, must contain + "nvidia,tegra124-ahub". Otherwise, must contain "nvidia,-ahub", + plus at least one of the above, where is tegra132. - reg : Should contain the register physical address and length for each of the AHUB's register blocks. - Tegra30 requires 2 entries, for the APBIF and AHUB/AUDIO register blocks. diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra30-hda.txt b/Documentation/devicetree/bindings/sound/nvidia,tegra30-hda.txt index b4730c2822bc8f..13e2ef496724bd 100644 --- a/Documentation/devicetree/bindings/sound/nvidia,tegra30-hda.txt +++ b/Documentation/devicetree/bindings/sound/nvidia,tegra30-hda.txt @@ -1,7 +1,9 @@ NVIDIA Tegra30 HDA controller Required properties: -- compatible : "nvidia,tegra30-hda" +- compatible : For Tegra30, must contain "nvidia,tegra30-hda". Otherwise, + must contain '"nvidia,-hda", "nvidia,tegra30-hda"', where is + tegra114, tegra124, or tegra132. - reg : Should contain the HDA registers location and length. - interrupts : The interrupt from the HDA controller. - clocks : Must contain an entry for each required entry in clock-names. diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra30-i2s.txt b/Documentation/devicetree/bindings/sound/nvidia,tegra30-i2s.txt index 0c113ffe381492..38caa936f6f8ae 100644 --- a/Documentation/devicetree/bindings/sound/nvidia,tegra30-i2s.txt +++ b/Documentation/devicetree/bindings/sound/nvidia,tegra30-i2s.txt @@ -1,7 +1,10 @@ NVIDIA Tegra30 I2S controller Required properties: -- compatible : "nvidia,tegra30-i2s" +- compatible : For Tegra30, must contain "nvidia,tegra30-i2s". For Tegra124, + must contain "nvidia,tegra124-i2s". Otherwise, must contain + "nvidia,-i2s" plus at least one of the above, where is + tegra114 or tegra132. - reg : Should contain I2S registers location and length - clocks : Must contain one entry, for the module clock. See ../clocks/clock-bindings.txt for details. diff --git a/Documentation/devicetree/bindings/spi/nvidia,tegra114-spi.txt b/Documentation/devicetree/bindings/spi/nvidia,tegra114-spi.txt index 7ea701e07dc260..b785976fe98aff 100644 --- a/Documentation/devicetree/bindings/spi/nvidia,tegra114-spi.txt +++ b/Documentation/devicetree/bindings/spi/nvidia,tegra114-spi.txt @@ -1,7 +1,9 @@ NVIDIA Tegra114 SPI controller. Required properties: -- compatible : should be "nvidia,tegra114-spi". +- compatible : For Tegra114, must contain "nvidia,tegra114-spi". + Otherwise, must contain '"nvidia,-spi", "nvidia,tegra114-spi"' where + is tegra124, tegra132, or tegra210. - reg: Should contain SPI registers location and length. - interrupts: Should contain SPI interrupts. - clock-names : Must include the following entries: diff --git a/Documentation/devicetree/bindings/thermal/tegra-soctherm.txt b/Documentation/devicetree/bindings/thermal/tegra-soctherm.txt index ecf3ed76cd46f8..6b68cd1504056b 100644 --- a/Documentation/devicetree/bindings/thermal/tegra-soctherm.txt +++ b/Documentation/devicetree/bindings/thermal/tegra-soctherm.txt @@ -7,7 +7,9 @@ notifications. It is also used to manage emergency shutdown in an overheating situation. Required properties : -- compatible : "nvidia,tegra124-soctherm". +- compatible : For Tegra124, must contain "nvidia,tegra124-soctherm". + For Tegra132, must contain "nvidia,tegra132-soctherm". + For Tegra210, must contain "nvidia,tegra210-soctherm". - reg : Should contain 1 entry: - SOCTHERM register set - interrupts : Defines the interrupt used by SOCTHERM diff --git a/Documentation/devicetree/bindings/timer/nvidia,tegra30-timer.txt b/Documentation/devicetree/bindings/timer/nvidia,tegra30-timer.txt index b5082a1cf461a1..1761f53ee36f71 100644 --- a/Documentation/devicetree/bindings/timer/nvidia,tegra30-timer.txt +++ b/Documentation/devicetree/bindings/timer/nvidia,tegra30-timer.txt @@ -6,7 +6,9 @@ trigger a legacy watchdog reset. Required properties: -- compatible : should be "nvidia,tegra30-timer", "nvidia,tegra20-timer". +- compatible : For Tegra30, must contain "nvidia,tegra30-timer". Otherwise, + must contain '"nvidia,-timer", "nvidia,tegra30-timer"' where + is tegra124 or tegra132. - reg : Specifies base physical address and size of the registers. - interrupts : A list of 6 interrupts; one per each of timer channels 1 through 5, and one for the shared interrupt for the remaining channels. diff --git a/Documentation/devicetree/bindings/usb/nvidia,tegra20-ehci.txt b/Documentation/devicetree/bindings/usb/nvidia,tegra20-ehci.txt index 3dc9140e3dfba6..f60785f73d3d56 100644 --- a/Documentation/devicetree/bindings/usb/nvidia,tegra20-ehci.txt +++ b/Documentation/devicetree/bindings/usb/nvidia,tegra20-ehci.txt @@ -6,7 +6,10 @@ Practice : Universal Serial Bus" with the following modifications and additions : Required properties : - - compatible : Should be "nvidia,tegra20-ehci". + - compatible : For Tegra20, must contain "nvidia,tegra20-ehci". + For Tegra30, must contain "nvidia,tegra30-ehci". Otherwise, must contain + "nvidia,-ehci" plus at least one of the above, where is + tegra114, tegra124, tegra132, or tegra210. - nvidia,phy : phandle of the PHY that the controller is connected to. - clocks : Must contain one entry, for the module clock. See ../clocks/clock-bindings.txt for details. diff --git a/Documentation/devicetree/bindings/usb/nvidia,tegra20-usb-phy.txt b/Documentation/devicetree/bindings/usb/nvidia,tegra20-usb-phy.txt index c9205fbf26e22a..a9aa79fb90ed09 100644 --- a/Documentation/devicetree/bindings/usb/nvidia,tegra20-usb-phy.txt +++ b/Documentation/devicetree/bindings/usb/nvidia,tegra20-usb-phy.txt @@ -3,7 +3,10 @@ Tegra SOC USB PHY The device node for Tegra SOC USB PHY: Required properties : - - compatible : Should be "nvidia,tegra-usb-phy". + - compatible : For Tegra20, must contain "nvidia,tegra20-usb-phy". + For Tegra30, must contain "nvidia,tegra30-usb-phy". Otherwise, must contain + "nvidia,-usb-phy" plus at least one of the above, where is + tegra114, tegra124, tegra132, or tegra210. - reg : Defines the following set of registers, in the order listed: - The PHY's own register set. Always present. From f634da375fc9675a978b36298579b5c2d87a6a8b Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Fri, 30 Jan 2015 15:11:04 -0700 Subject: [PATCH 452/601] Documentation: DT bindings: add nvidia, tegra132-denver compatible string Add a compatible string for the NVIDIA Denver CPU to the ARM CPU DT binding documentation file. The primary objective here is to keep checkpatch.pl from warning when the compatible string is used in an SoC DT file, per: http://marc.info/?l=linux-tegra&m=142201349727836&w=2 This second version changes the string from "nvidia,denver" to "nvidia,tegra132-denver" to more precisely describe the revision of the Denver CPU complex that is present in the Tegra132 SoC. Signed-off-by: Paul Walmsley Cc: Rob Herring Cc: Pawel Moll Cc: Mark Rutland Cc: Ian Campbell Cc: Kumar Gala Cc: Heiko Stuebner Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Olof Johansson Cc: Maxime Ripard Cc: Rohit Vaswani Cc: Paul Walmsley Cc: Marc Carino Cc: Lorenzo Pieralisi Cc: linux-kernel@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/arm/cpus.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/arm/cpus.txt b/Documentation/devicetree/bindings/arm/cpus.txt index b2aacbe16ed9af..8b9e0a95de317e 100644 --- a/Documentation/devicetree/bindings/arm/cpus.txt +++ b/Documentation/devicetree/bindings/arm/cpus.txt @@ -175,6 +175,7 @@ nodes to be present and contain the properties described below. "marvell,pj4a" "marvell,pj4b" "marvell,sheeva-v5" + "nvidia,tegra132-denver" "qcom,krait" "qcom,scorpion" - enable-method From 10638a4ed2b8618f20fabf9ed19df60a68446e90 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Fri, 30 Jan 2015 15:11:04 -0700 Subject: [PATCH 453/601] Documentation: DT: document compatible string existence requirement DT maintainers require all compatible strings used in chip or board DTS file to be previously documented somewhere in Documentation/devicetree/bindings, per: http://marc.info/?l=linux-tegra&m=142201349727836&w=2 Document this requirement in the DT patch submission requirements text file. This second version updates the documentation to align with Rob's comments here: http://marc.info/?l=devicetree&m=142255654213019&w=2 Signed-off-by: Paul Walmsley Cc: Rob Herring Cc: Pawel Moll Cc: Mark Rutland Cc: Ian Campbell Cc: Kumar Gala Cc: Javier Martinez Canillas Cc: Jonathan Corbet Cc: Paul Walmsley Cc: linux-kernel@vger.kernel.org Signed-off-by: Rob Herring --- .../bindings/submitting-patches.txt | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/Documentation/devicetree/bindings/submitting-patches.txt b/Documentation/devicetree/bindings/submitting-patches.txt index b7ba01ad1426cf..56742bc70218bf 100644 --- a/Documentation/devicetree/bindings/submitting-patches.txt +++ b/Documentation/devicetree/bindings/submitting-patches.txt @@ -15,6 +15,29 @@ I. For patch submitters 3) The Documentation/ portion of the patch should come in the series before the code implementing the binding. + 4) Any compatible strings used in a chip or board DTS file must be + previously documented in the corresponding DT binding text file + in Documentation/devicetree/bindings. This rule applies even if + the Linux device driver does not yet match on the compatible + string. [ checkpatch will emit warnings if this step is not + followed as of commit bff5da4335256513497cc8c79f9a9d1665e09864 + ("checkpatch: add DT compatible string documentation checks"). ] + + 5) The wildcard "" may be used in compatible strings, as in + the following example: + + - compatible: Must contain '"nvidia,-pcie", + "nvidia,tegra20-pcie"' where is tegra30, tegra132, ... + + As in the above example, the known values of "" should be + documented if it is used. + + 6) If a documented compatible string is not yet matched by the + driver, the documentation should also include a compatible + string that is matched by the driver (as in the "nvidia,tegra20-pcie" + example above). + + II. For kernel maintainers 1) If you aren't comfortable reviewing a given binding, reply to it and ask From ea96f78813823ab186e7e52122c06fb9c3cf6e20 Mon Sep 17 00:00:00 2001 From: Ingo Tuchscherer Date: Mon, 2 Feb 2015 17:50:17 +0100 Subject: [PATCH 454/601] s390/zcrypt: fixed domain scanning problem (again) Older machines with more then 16 domains need a special check before PQAP instructions can be processed. With commit 5bc334bff9a6e189 this check was reverted by accident. This patch re-establishes the additional code needed for checking the extended domains for older machines. Signed-off-by: Ingo Tuchscherer Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/ap_bus.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index a60fc2f9f4b25a..3d7f19fb9a4ebd 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1060,9 +1060,13 @@ static inline int ap_test_config_card_id(unsigned int id) */ static inline int ap_test_config_domain(unsigned int domain) { - if (!ap_configuration) - return 1; - return ap_test_config(ap_configuration->aqm, domain); + if (!ap_configuration) /* QCI not supported */ + if (domain < 16) + return 1; /* then domains 0...15 are configured */ + else + return 0; + else + return ap_test_config(ap_configuration->aqm, domain); } /* From 91f65facba5add493fffb643acdb258bd9f54eb2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 3 Feb 2015 13:25:51 +0100 Subject: [PATCH 455/601] iommu/amd: Fix amd_iommu_free_device() put_device_state_wait() doesn't loop on the condition and a spurious wakeup will have it free the device state even though there might still be references out to it. Fix this by using 'normal' wait primitives. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_v2.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index 90f70d0e114192..b6398d7285f707 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c @@ -151,18 +151,6 @@ static void put_device_state(struct device_state *dev_state) wake_up(&dev_state->wq); } -static void put_device_state_wait(struct device_state *dev_state) -{ - DEFINE_WAIT(wait); - - prepare_to_wait(&dev_state->wq, &wait, TASK_UNINTERRUPTIBLE); - if (!atomic_dec_and_test(&dev_state->count)) - schedule(); - finish_wait(&dev_state->wq, &wait); - - free_device_state(dev_state); -} - /* Must be called under dev_state->lock */ static struct pasid_state **__get_pasid_state_ptr(struct device_state *dev_state, int pasid, bool alloc) @@ -851,7 +839,13 @@ void amd_iommu_free_device(struct pci_dev *pdev) /* Get rid of any remaining pasid states */ free_pasid_states(dev_state); - put_device_state_wait(dev_state); + put_device_state(dev_state); + /* + * Wait until the last reference is dropped before freeing + * the device state. + */ + wait_event(dev_state->wq, !atomic_read(&dev_state->count)); + free_device_state(dev_state); } EXPORT_SYMBOL(amd_iommu_free_device); From a1bec062c90456983225054d39c8a601db48e638 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 4 Feb 2015 15:50:38 +0100 Subject: [PATCH 456/601] iommu/amd: Use wait_event in put_pasid_state_wait Now that I learned about possible spurious wakeups this place needs fixing too. Replace the self-coded sleep variant with the generic wait_event() helper. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_v2.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index b6398d7285f707..5cc140969b4321 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c @@ -266,14 +266,7 @@ static void put_pasid_state(struct pasid_state *pasid_state) static void put_pasid_state_wait(struct pasid_state *pasid_state) { - DEFINE_WAIT(wait); - - prepare_to_wait(&pasid_state->wq, &wait, TASK_UNINTERRUPTIBLE); - - if (!atomic_dec_and_test(&pasid_state->count)) - schedule(); - - finish_wait(&pasid_state->wq, &wait); + wait_event(pasid_state->wq, !atomic_read(&pasid_state->count)); free_pasid_state(pasid_state); } From 63ce3ae889db917cff0cf0c65c837ca7160c8a83 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 4 Feb 2015 16:12:55 +0100 Subject: [PATCH 457/601] iommu: Update my email address The AMD address is dead for a long time already, replace it with a working one. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 2 +- drivers/iommu/amd_iommu_init.c | 2 +- drivers/iommu/amd_iommu_proto.h | 2 +- drivers/iommu/amd_iommu_types.h | 2 +- drivers/iommu/amd_iommu_v2.c | 6 +++--- drivers/iommu/iommu.c | 2 +- drivers/iommu/irq_remapping.h | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 98024856df07fc..6b6dc72e3f660a 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. - * Author: Joerg Roedel + * Author: Joerg Roedel * Leo Duran * * This program is free software; you can redistribute it and/or modify it diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index b0522f15730fbb..e93eb8cd3df3f4 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. - * Author: Joerg Roedel + * Author: Joerg Roedel * Leo Duran * * This program is free software; you can redistribute it and/or modify it diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h index 95ed6deae47fe7..b62ff5493980b0 100644 --- a/drivers/iommu/amd_iommu_proto.h +++ b/drivers/iommu/amd_iommu_proto.h @@ -1,6 +1,6 @@ /* * Copyright (C) 2009-2010 Advanced Micro Devices, Inc. - * Author: Joerg Roedel + * Author: Joerg Roedel * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index cec51a8ba844d7..c4fffb710c5873 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -1,6 +1,6 @@ /* * Copyright (C) 2007-2010 Advanced Micro Devices, Inc. - * Author: Joerg Roedel + * Author: Joerg Roedel * Leo Duran * * This program is free software; you can redistribute it and/or modify it diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index 90f70d0e114192..d3a3caf8227128 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2010-2012 Advanced Micro Devices, Inc. - * Author: Joerg Roedel + * Author: Joerg Roedel * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published @@ -31,7 +31,7 @@ #include "amd_iommu_proto.h" MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Joerg Roedel "); +MODULE_AUTHOR("Joerg Roedel "); #define MAX_DEVICES 0x10000 #define PRI_QUEUE_SIZE 512 @@ -921,7 +921,7 @@ static int __init amd_iommu_v2_init(void) { int ret; - pr_info("AMD IOMMUv2 driver by Joerg Roedel \n"); + pr_info("AMD IOMMUv2 driver by Joerg Roedel \n"); if (!amd_iommu_v2_supported()) { pr_info("AMD IOMMUv2 functionality not available on this system\n"); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 9e0dcdbf4110e8..72e683df073172 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. - * Author: Joerg Roedel + * Author: Joerg Roedel * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h index fde250f86e6034..a2b750110bd169 100644 --- a/drivers/iommu/irq_remapping.h +++ b/drivers/iommu/irq_remapping.h @@ -1,6 +1,6 @@ /* * Copyright (C) 2012 Advanced Micro Devices, Inc. - * Author: Joerg Roedel + * Author: Joerg Roedel * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published From ae0cbbb1cde1f1bc2454a138a5cab1887d0e103c Mon Sep 17 00:00:00 2001 From: Quentin Lambert Date: Wed, 4 Feb 2015 11:40:07 +0100 Subject: [PATCH 458/601] iommu/amd: Convert non-returned local variable to boolean when relevant This patch was produced using Coccinelle. A simplified version of the semantic patch is: @r exists@ identifier f; local idexpression u8 x; identifier xname; @@ f(...) { ...when any ( x@xname = 1; | x@xname = 0; ) ...when any } @bad exists@ identifier r.f; local idexpression u8 r.x expression e1 != {0, 1}, e2; @@ f(...) { ...when any ( x = e1; | x + e2 ) ...when any } @depends on !bad@ identifier r.f; local idexpression u8 r.x; identifier r.xname; @@ f(...) { ... ++ bool xname; - int xname; <... ( x = - 1 + true | x = - -1 + false ) ...> } Signed-off-by: Quentin Lambert Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 98024856df07fc..5ac2d118f4f40f 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -843,10 +843,10 @@ static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, size_t size, u16 domid, int pde) { u64 pages; - int s; + bool s; pages = iommu_num_pages(address, size, PAGE_SIZE); - s = 0; + s = false; if (pages > 1) { /* @@ -854,7 +854,7 @@ static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, * TLB entries for this domain */ address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; - s = 1; + s = true; } address &= PAGE_MASK; @@ -874,10 +874,10 @@ static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep, u64 address, size_t size) { u64 pages; - int s; + bool s; pages = iommu_num_pages(address, size, PAGE_SIZE); - s = 0; + s = false; if (pages > 1) { /* @@ -885,7 +885,7 @@ static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep, * TLB entries for this domain */ address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; - s = 1; + s = true; } address &= PAGE_MASK; From 0e3b137fbf0f4ab901de58fcac7edb12922daa08 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 3 Feb 2015 17:13:58 -0500 Subject: [PATCH 459/601] NFS: Add Anna Schumaker as co-maintainer for the NFS client Anna has essentially been performing the duties of co-maintainer for the past several years. In recognition of those efforts, I'd like to add her to the maintainers file. Cc: Anna Schumaker Signed-off-by: Trond Myklebust --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 2fa38532124537..b30d937c3ec8cd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6682,6 +6682,7 @@ F: Documentation/devicetree/bindings/net/nfc/ NFS, SUNRPC, AND LOCKD CLIENTS M: Trond Myklebust +M: Anna Schumaker L: linux-nfs@vger.kernel.org W: http://client.linux-nfs.org T: git git://git.linux-nfs.org/projects/trondmy/linux-nfs.git From 6ae373394c4257bad562817aa60464ff7fe8f9c4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 30 Jan 2015 14:21:14 -0500 Subject: [PATCH 460/601] NFSv4.1: Ask for no delegation on OPEN if using O_DIRECT If we're using NFSv4.1, then we have the ability to let the server know whether or not we believe that returning a delegation as part of our OPEN request would be useful. The feature needs to be used with care, since the client sending the request doesn't necessarily know how other clients are using that file, and how they may be affected by the delegation. For this reason, our initial use of the feature will be to let the server know when the client believes that handing out a delegation would not be useful. The first application for this function is when opening the file using O_DIRECT. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 30 ++++++++++++++++ fs/nfs/nfs4xdr.c | 79 +++++++++++++++++++++++++++-------------- include/linux/nfs_xdr.h | 2 ++ 3 files changed, 85 insertions(+), 26 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6e1c9b2d92c59a..cd4295d84d54b6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -940,6 +940,31 @@ static bool nfs4_clear_cap_atomic_open_v1(struct nfs_server *server, return true; } +static u32 +nfs4_map_atomic_open_share(struct nfs_server *server, + fmode_t fmode, int openflags) +{ + u32 res = 0; + + switch (fmode & (FMODE_READ | FMODE_WRITE)) { + case FMODE_READ: + res = NFS4_SHARE_ACCESS_READ; + break; + case FMODE_WRITE: + res = NFS4_SHARE_ACCESS_WRITE; + break; + case FMODE_READ|FMODE_WRITE: + res = NFS4_SHARE_ACCESS_BOTH; + } + if (!(server->caps & NFS_CAP_ATOMIC_OPEN_V1)) + goto out; + /* Want no delegation if we're using O_DIRECT */ + if (openflags & O_DIRECT) + res |= NFS4_SHARE_WANT_NO_DELEG; +out: + return res; +} + static enum open_claim_type4 nfs4_map_atomic_open_claim(struct nfs_server *server, enum open_claim_type4 claim) @@ -1002,6 +1027,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, atomic_inc(&sp->so_count); p->o_arg.open_flags = flags; p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); + p->o_arg.share_access = nfs4_map_atomic_open_share(server, + fmode, flags); /* don't put an ACCESS op in OPEN compound if O_EXCL, because ACCESS * will return permission denied for all bits until close */ if (!(flags & O_EXCL)) { @@ -2695,6 +2722,9 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) goto out_wait; } } + calldata->arg.share_access = + nfs4_map_atomic_open_share(NFS_SERVER(inode), + calldata->arg.fmode, 0); nfs_fattr_init(calldata->res.fattr); calldata->timestamp = jiffies; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index a2329d69502ba7..e23a0a664e12d5 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1351,24 +1351,12 @@ static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struc encode_string(xdr, name->len, name->name); } -static void encode_share_access(struct xdr_stream *xdr, fmode_t fmode) +static void encode_share_access(struct xdr_stream *xdr, u32 share_access) { __be32 *p; p = reserve_space(xdr, 8); - switch (fmode & (FMODE_READ|FMODE_WRITE)) { - case FMODE_READ: - *p++ = cpu_to_be32(NFS4_SHARE_ACCESS_READ); - break; - case FMODE_WRITE: - *p++ = cpu_to_be32(NFS4_SHARE_ACCESS_WRITE); - break; - case FMODE_READ|FMODE_WRITE: - *p++ = cpu_to_be32(NFS4_SHARE_ACCESS_BOTH); - break; - default: - *p++ = cpu_to_be32(0); - } + *p++ = cpu_to_be32(share_access); *p = cpu_to_be32(0); /* for linux, share_deny = 0 always */ } @@ -1380,7 +1368,7 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena * owner 4 = 32 */ encode_nfs4_seqid(xdr, arg->seqid); - encode_share_access(xdr, arg->fmode); + encode_share_access(xdr, arg->share_access); p = reserve_space(xdr, 36); p = xdr_encode_hyper(p, arg->clientid); *p++ = cpu_to_be32(24); @@ -1535,7 +1523,7 @@ static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_close encode_op_hdr(xdr, OP_OPEN_DOWNGRADE, decode_open_downgrade_maxsz, hdr); encode_nfs4_stateid(xdr, &arg->stateid); encode_nfs4_seqid(xdr, arg->seqid); - encode_share_access(xdr, arg->fmode); + encode_share_access(xdr, arg->share_access); } static void @@ -4935,20 +4923,13 @@ static int decode_space_limit(struct xdr_stream *xdr, u64 *maxsize) return -EIO; } -static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) +static int decode_rw_delegation(struct xdr_stream *xdr, + uint32_t delegation_type, + struct nfs_openres *res) { __be32 *p; - uint32_t delegation_type; int status; - p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) - goto out_overflow; - delegation_type = be32_to_cpup(p); - if (delegation_type == NFS4_OPEN_DELEGATE_NONE) { - res->delegation_type = 0; - return 0; - } status = decode_stateid(xdr, &res->delegation); if (unlikely(status)) return status; @@ -4972,6 +4953,52 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) return -EIO; } +static int decode_no_delegation(struct xdr_stream *xdr, struct nfs_openres *res) +{ + __be32 *p; + uint32_t why_no_delegation; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + why_no_delegation = be32_to_cpup(p); + switch (why_no_delegation) { + case WND4_CONTENTION: + case WND4_RESOURCE: + xdr_inline_decode(xdr, 4); + /* Ignore for now */ + } + return 0; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + +static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) +{ + __be32 *p; + uint32_t delegation_type; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + delegation_type = be32_to_cpup(p); + res->delegation_type = 0; + switch (delegation_type) { + case NFS4_OPEN_DELEGATE_NONE: + return 0; + case NFS4_OPEN_DELEGATE_READ: + case NFS4_OPEN_DELEGATE_WRITE: + return decode_rw_delegation(xdr, delegation_type, res); + case NFS4_OPEN_DELEGATE_NONE_EXT: + return decode_no_delegation(xdr, res); + } + return -EIO; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) { __be32 *p; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 81401125ab2dde..2c35e2affa6f0b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -326,6 +326,7 @@ struct nfs_openargs { struct nfs_seqid * seqid; int open_flags; fmode_t fmode; + u32 share_access; u32 access; __u64 clientid; struct stateowner_id id; @@ -393,6 +394,7 @@ struct nfs_closeargs { nfs4_stateid stateid; struct nfs_seqid * seqid; fmode_t fmode; + u32 share_access; const u32 * bitmask; }; From e230f12c98232de0c40a81f28a34d2cff8450e7f Mon Sep 17 00:00:00 2001 From: Rickard Strandqvist Date: Wed, 4 Feb 2015 11:34:30 -0500 Subject: [PATCH 461/601] selinux: Remove unused function avc_sidcmp() Remove the function avc_sidcmp() that is not used anywhere. This was partially found by using a static code analysis program called cppcheck. Signed-off-by: Rickard Strandqvist [PM: rewrite the patch subject line] Signed-off-by: Paul Moore --- security/selinux/avc.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/security/selinux/avc.c b/security/selinux/avc.c index a18f1fa6440bb7..afcc0aed9393a6 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -517,11 +517,6 @@ int __init avc_add_callback(int (*callback)(u32 event), u32 events) return rc; } -static inline int avc_sidcmp(u32 x, u32 y) -{ - return (x == y || x == SECSID_WILD || y == SECSID_WILD); -} - /** * avc_update_node Update an AVC entry * @event : Updating event From 2088d60e3b2f53d0c9590a0202eeff85b288b1eb Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Wed, 4 Feb 2015 11:34:30 -0500 Subject: [PATCH 462/601] selinux: quiet the filesystem labeling behavior message While the filesystem labeling method is only printed at the KERN_DEBUG level, this still appears in dmesg and on modern Linux distributions that create a lot of tmpfs mounts for session handling, the dmesg can easily be filled with a lot of "SELinux: initialized (dev X ..." messages. This patch removes this notification for the normal case but leaves the error message intact (displayed when mounting a filesystem with an unknown labeling behavior). Reported-by: Dave Jones Signed-off-by: Paul Moore --- security/selinux/hooks.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 6da7532893a197..c253caa90bb45e 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -456,10 +456,6 @@ static int sb_finish_set_opts(struct super_block *sb) if (sbsec->behavior > ARRAY_SIZE(labeling_behaviors)) printk(KERN_ERR "SELinux: initialized (dev %s, type %s), unknown behavior\n", sb->s_id, sb->s_type->name); - else - printk(KERN_DEBUG "SELinux: initialized (dev %s, type %s), %s\n", - sb->s_id, sb->s_type->name, - labeling_behaviors[sbsec->behavior-1]); sbsec->flags |= SE_SBINITIALIZED; if (selinux_is_sblabel_mnt(sb)) From d5f3a5f6e7e7822df5680d4fe39bf0b6979a1535 Mon Sep 17 00:00:00 2001 From: Mark Salyzyn Date: Wed, 4 Feb 2015 11:34:30 -0500 Subject: [PATCH 463/601] selinux: add security in-core xattr support for pstore and debugfs - add "pstore" and "debugfs" to list of in-core exceptions - change fstype checks to boolean equation - change from strncmp to strcmp for checking Signed-off-by: Mark Salyzyn Acked-by: Stephen Smalley [PM: tweaked the subject line prefix to "selinux"] Signed-off-by: Paul Moore --- security/selinux/hooks.c | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index c253caa90bb45e..87a915656eab61 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -401,23 +401,14 @@ static int selinux_is_sblabel_mnt(struct super_block *sb) { struct superblock_security_struct *sbsec = sb->s_security; - if (sbsec->behavior == SECURITY_FS_USE_XATTR || - sbsec->behavior == SECURITY_FS_USE_TRANS || - sbsec->behavior == SECURITY_FS_USE_TASK) - return 1; - - /* Special handling for sysfs. Is genfs but also has setxattr handler*/ - if (strncmp(sb->s_type->name, "sysfs", sizeof("sysfs")) == 0) - return 1; - - /* - * Special handling for rootfs. Is genfs but supports - * setting SELinux context on in-core inodes. - */ - if (strncmp(sb->s_type->name, "rootfs", sizeof("rootfs")) == 0) - return 1; - - return 0; + return sbsec->behavior == SECURITY_FS_USE_XATTR || + sbsec->behavior == SECURITY_FS_USE_TRANS || + sbsec->behavior == SECURITY_FS_USE_TASK || + /* Special handling. Genfs but also in-core setxattr handler */ + !strcmp(sb->s_type->name, "sysfs") || + !strcmp(sb->s_type->name, "pstore") || + !strcmp(sb->s_type->name, "debugfs") || + !strcmp(sb->s_type->name, "rootfs"); } static int sb_finish_set_opts(struct super_block *sb) From 6eb4e2b41b264f57ee02d16ee61683952945484d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 4 Feb 2015 11:34:30 -0500 Subject: [PATCH 464/601] SELinux: fix error code in policydb_init() If hashtab_create() returns a NULL pointer then we should return -ENOMEM but instead the current code returns success. Signed-off-by: Dan Carpenter Acked-by: Serge Hallyn Acked-by: Stephen Smalley Signed-off-by: Paul Moore --- security/selinux/ss/policydb.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index bc2a586f095c5f..74aa224267c11f 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -289,12 +289,16 @@ static int policydb_init(struct policydb *p) goto out; p->filename_trans = hashtab_create(filenametr_hash, filenametr_cmp, (1 << 10)); - if (!p->filename_trans) + if (!p->filename_trans) { + rc = -ENOMEM; goto out; + } p->range_tr = hashtab_create(rangetr_hash, rangetr_cmp, 256); - if (!p->range_tr) + if (!p->range_tr) { + rc = -ENOMEM; goto out; + } ebitmap_init(&p->filename_trans_ttypes); ebitmap_init(&p->policycaps); From d5e75500ca401d3128c82c5b0dee2f9b259d5b5c Mon Sep 17 00:00:00 2001 From: Pantelis Antoniou Date: Mon, 12 Jan 2015 19:02:49 +0200 Subject: [PATCH 465/601] of: unitest: Add I2C overlay unit tests. Introduce I2C device tree overlay tests. Tests insertion and removal of i2c adapters, i2c devices, and muxes. Signed-off-by: Pantelis Antoniou Signed-off-by: Rob Herring --- .../devicetree/bindings/unittest.txt | 59 +- drivers/of/unittest-data/tests-overlay.dtsi | 94 ++++ drivers/of/unittest.c | 512 ++++++++++++++++-- 3 files changed, 614 insertions(+), 51 deletions(-) diff --git a/Documentation/devicetree/bindings/unittest.txt b/Documentation/devicetree/bindings/unittest.txt index 0f92a22fddfad1..8933211f32f945 100644 --- a/Documentation/devicetree/bindings/unittest.txt +++ b/Documentation/devicetree/bindings/unittest.txt @@ -1,4 +1,4 @@ -* OF selftest platform device +1) OF selftest platform device ** selftest @@ -12,3 +12,60 @@ Example: compatible = "selftest"; status = "okay"; }; + +2) OF selftest i2c adapter platform device + +** platform device unittest adapter + +Required properties: +- compatible: must be selftest-i2c-bus + +Children nodes contain selftest i2c devices. + +Example: + selftest-i2c-bus { + compatible = "selftest-i2c-bus"; + status = "okay"; + }; + +3) OF selftest i2c device + +** I2C selftest device + +Required properties: +- compatible: must be selftest-i2c-dev + +All other properties are optional + +Example: + selftest-i2c-dev { + compatible = "selftest-i2c-dev"; + status = "okay"; + }; + +4) OF selftest i2c mux device + +** I2C selftest mux + +Required properties: +- compatible: must be selftest-i2c-mux + +Children nodes contain selftest i2c bus nodes per channel. + +Example: + selftest-i2c-mux { + compatible = "selftest-i2c-mux"; + status = "okay"; + #address-cells = <1>; + #size-cells = <0>; + channel-0 { + reg = <0>; + #address-cells = <1>; + #size-cells = <0>; + i2c-dev { + reg = <8>; + compatible = "selftest-i2c-dev"; + status = "okay"; + }; + }; + }; diff --git a/drivers/of/unittest-data/tests-overlay.dtsi b/drivers/of/unittest-data/tests-overlay.dtsi index a2b687d5f32470..244226cbb5a3b7 100644 --- a/drivers/of/unittest-data/tests-overlay.dtsi +++ b/drivers/of/unittest-data/tests-overlay.dtsi @@ -68,6 +68,48 @@ status = "disabled"; reg = <8>; }; + + i2c-test-bus { + compatible = "selftest-i2c-bus"; + status = "okay"; + reg = <50>; + + #address-cells = <1>; + #size-cells = <0>; + + test-selftest12 { + reg = <8>; + compatible = "selftest-i2c-dev"; + status = "disabled"; + }; + + test-selftest13 { + reg = <9>; + compatible = "selftest-i2c-dev"; + status = "okay"; + }; + + test-selftest14 { + reg = <10>; + compatible = "selftest-i2c-mux"; + status = "okay"; + + #address-cells = <1>; + #size-cells = <0>; + + i2c@0 { + #address-cells = <1>; + #size-cells = <0>; + reg = <0>; + + test-mux-dev { + reg = <32>; + compatible = "selftest-i2c-dev"; + status = "okay"; + }; + }; + }; + }; }; }; @@ -231,5 +273,57 @@ }; }; }; + + /* test enable using absolute target path (i2c) */ + overlay12 { + fragment@0 { + target-path = "/testcase-data/overlay-node/test-bus/i2c-test-bus/test-selftest12"; + __overlay__ { + status = "okay"; + }; + }; + }; + + /* test disable using absolute target path (i2c) */ + overlay13 { + fragment@0 { + target-path = "/testcase-data/overlay-node/test-bus/i2c-test-bus/test-selftest13"; + __overlay__ { + status = "disabled"; + }; + }; + }; + + /* test mux overlay */ + overlay15 { + fragment@0 { + target-path = "/testcase-data/overlay-node/test-bus/i2c-test-bus"; + __overlay__ { + #address-cells = <1>; + #size-cells = <0>; + test-selftest15 { + reg = <11>; + compatible = "selftest-i2c-mux"; + status = "okay"; + + #address-cells = <1>; + #size-cells = <0>; + + i2c@0 { + #address-cells = <1>; + #size-cells = <0>; + reg = <0>; + + test-mux-dev { + reg = <32>; + compatible = "selftest-i2c-dev"; + status = "okay"; + }; + }; + }; + }; + }; + }; + }; }; diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 12cdbc1e30420e..e86213b0e7e55a 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -20,6 +20,9 @@ #include #include +#include +#include + #include "of_private.h" static struct selftest_results { @@ -991,17 +994,94 @@ static int of_path_platform_device_exists(const char *path) return pdev != NULL; } -static const char *selftest_path(int nr) +#if IS_ENABLED(CONFIG_I2C) + +/* get the i2c client device instantiated at the path */ +static struct i2c_client *of_path_to_i2c_client(const char *path) +{ + struct device_node *np; + struct i2c_client *client; + + np = of_find_node_by_path(path); + if (np == NULL) + return NULL; + + client = of_find_i2c_device_by_node(np); + of_node_put(np); + + return client; +} + +/* find out if a i2c client device exists at that path */ +static int of_path_i2c_client_exists(const char *path) +{ + struct i2c_client *client; + + client = of_path_to_i2c_client(path); + if (client) + put_device(&client->dev); + return client != NULL; +} +#else +static int of_path_i2c_client_exists(const char *path) +{ + return 0; +} +#endif + +enum overlay_type { + PDEV_OVERLAY, + I2C_OVERLAY +}; + +static int of_path_device_type_exists(const char *path, + enum overlay_type ovtype) { + switch (ovtype) { + case PDEV_OVERLAY: + return of_path_platform_device_exists(path); + case I2C_OVERLAY: + return of_path_i2c_client_exists(path); + } + return 0; +} + +static const char *selftest_path(int nr, enum overlay_type ovtype) +{ + const char *base; static char buf[256]; - snprintf(buf, sizeof(buf) - 1, - "/testcase-data/overlay-node/test-bus/test-selftest%d", nr); + switch (ovtype) { + case PDEV_OVERLAY: + base = "/testcase-data/overlay-node/test-bus"; + break; + case I2C_OVERLAY: + base = "/testcase-data/overlay-node/test-bus/i2c-test-bus"; + break; + default: + buf[0] = '\0'; + return buf; + } + snprintf(buf, sizeof(buf) - 1, "%s/test-selftest%d", base, nr); buf[sizeof(buf) - 1] = '\0'; - return buf; } +static int of_selftest_device_exists(int selftest_nr, enum overlay_type ovtype) +{ + const char *path; + + path = selftest_path(selftest_nr, ovtype); + + switch (ovtype) { + case PDEV_OVERLAY: + return of_path_platform_device_exists(path); + case I2C_OVERLAY: + return of_path_i2c_client_exists(path); + } + return 0; +} + static const char *overlay_path(int nr) { static char buf[256]; @@ -1050,16 +1130,15 @@ static int of_selftest_apply_overlay(int selftest_nr, int overlay_nr, /* apply an overlay while checking before and after states */ static int of_selftest_apply_overlay_check(int overlay_nr, int selftest_nr, - int before, int after) + int before, int after, enum overlay_type ovtype) { int ret; /* selftest device must not be in before state */ - if (of_path_platform_device_exists(selftest_path(selftest_nr)) - != before) { + if (of_selftest_device_exists(selftest_nr, ovtype) != before) { selftest(0, "overlay @\"%s\" with device @\"%s\" %s\n", overlay_path(overlay_nr), - selftest_path(selftest_nr), + selftest_path(selftest_nr, ovtype), !before ? "enabled" : "disabled"); return -EINVAL; } @@ -1071,11 +1150,10 @@ static int of_selftest_apply_overlay_check(int overlay_nr, int selftest_nr, } /* selftest device must be to set to after state */ - if (of_path_platform_device_exists(selftest_path(selftest_nr)) - != after) { + if (of_selftest_device_exists(selftest_nr, ovtype) != after) { selftest(0, "overlay @\"%s\" failed to create @\"%s\" %s\n", overlay_path(overlay_nr), - selftest_path(selftest_nr), + selftest_path(selftest_nr, ovtype), !after ? "enabled" : "disabled"); return -EINVAL; } @@ -1085,16 +1163,16 @@ static int of_selftest_apply_overlay_check(int overlay_nr, int selftest_nr, /* apply an overlay and then revert it while checking before, after states */ static int of_selftest_apply_revert_overlay_check(int overlay_nr, - int selftest_nr, int before, int after) + int selftest_nr, int before, int after, + enum overlay_type ovtype) { int ret, ov_id; /* selftest device must be in before state */ - if (of_path_platform_device_exists(selftest_path(selftest_nr)) - != before) { + if (of_selftest_device_exists(selftest_nr, ovtype) != before) { selftest(0, "overlay @\"%s\" with device @\"%s\" %s\n", overlay_path(overlay_nr), - selftest_path(selftest_nr), + selftest_path(selftest_nr, ovtype), !before ? "enabled" : "disabled"); return -EINVAL; } @@ -1107,11 +1185,10 @@ static int of_selftest_apply_revert_overlay_check(int overlay_nr, } /* selftest device must be in after state */ - if (of_path_platform_device_exists(selftest_path(selftest_nr)) - != after) { + if (of_selftest_device_exists(selftest_nr, ovtype) != after) { selftest(0, "overlay @\"%s\" failed to create @\"%s\" %s\n", overlay_path(overlay_nr), - selftest_path(selftest_nr), + selftest_path(selftest_nr, ovtype), !after ? "enabled" : "disabled"); return -EINVAL; } @@ -1120,16 +1197,15 @@ static int of_selftest_apply_revert_overlay_check(int overlay_nr, if (ret != 0) { selftest(0, "overlay @\"%s\" failed to be destroyed @\"%s\"\n", overlay_path(overlay_nr), - selftest_path(selftest_nr)); + selftest_path(selftest_nr, ovtype)); return ret; } /* selftest device must be again in before state */ - if (of_path_platform_device_exists(selftest_path(selftest_nr)) - != before) { + if (of_selftest_device_exists(selftest_nr, PDEV_OVERLAY) != before) { selftest(0, "overlay @\"%s\" with device @\"%s\" %s\n", overlay_path(overlay_nr), - selftest_path(selftest_nr), + selftest_path(selftest_nr, ovtype), !before ? "enabled" : "disabled"); return -EINVAL; } @@ -1143,7 +1219,7 @@ static void of_selftest_overlay_0(void) int ret; /* device should enable */ - ret = of_selftest_apply_overlay_check(0, 0, 0, 1); + ret = of_selftest_apply_overlay_check(0, 0, 0, 1, PDEV_OVERLAY); if (ret != 0) return; @@ -1156,7 +1232,7 @@ static void of_selftest_overlay_1(void) int ret; /* device should disable */ - ret = of_selftest_apply_overlay_check(1, 1, 1, 0); + ret = of_selftest_apply_overlay_check(1, 1, 1, 0, PDEV_OVERLAY); if (ret != 0) return; @@ -1169,7 +1245,7 @@ static void of_selftest_overlay_2(void) int ret; /* device should enable */ - ret = of_selftest_apply_overlay_check(2, 2, 0, 1); + ret = of_selftest_apply_overlay_check(2, 2, 0, 1, PDEV_OVERLAY); if (ret != 0) return; @@ -1182,7 +1258,7 @@ static void of_selftest_overlay_3(void) int ret; /* device should disable */ - ret = of_selftest_apply_overlay_check(3, 3, 1, 0); + ret = of_selftest_apply_overlay_check(3, 3, 1, 0, PDEV_OVERLAY); if (ret != 0) return; @@ -1195,7 +1271,7 @@ static void of_selftest_overlay_4(void) int ret; /* device should disable */ - ret = of_selftest_apply_overlay_check(4, 4, 0, 1); + ret = of_selftest_apply_overlay_check(4, 4, 0, 1, PDEV_OVERLAY); if (ret != 0) return; @@ -1208,7 +1284,7 @@ static void of_selftest_overlay_5(void) int ret; /* device should disable */ - ret = of_selftest_apply_revert_overlay_check(5, 5, 0, 1); + ret = of_selftest_apply_revert_overlay_check(5, 5, 0, 1, PDEV_OVERLAY); if (ret != 0) return; @@ -1225,12 +1301,12 @@ static void of_selftest_overlay_6(void) /* selftest device must be in before state */ for (i = 0; i < 2; i++) { - if (of_path_platform_device_exists( - selftest_path(selftest_nr + i)) + if (of_selftest_device_exists(selftest_nr + i, PDEV_OVERLAY) != before) { selftest(0, "overlay @\"%s\" with device @\"%s\" %s\n", overlay_path(overlay_nr + i), - selftest_path(selftest_nr + i), + selftest_path(selftest_nr + i, + PDEV_OVERLAY), !before ? "enabled" : "disabled"); return; } @@ -1257,12 +1333,12 @@ static void of_selftest_overlay_6(void) for (i = 0; i < 2; i++) { /* selftest device must be in after state */ - if (of_path_platform_device_exists( - selftest_path(selftest_nr + i)) + if (of_selftest_device_exists(selftest_nr + i, PDEV_OVERLAY) != after) { selftest(0, "overlay @\"%s\" failed @\"%s\" %s\n", overlay_path(overlay_nr + i), - selftest_path(selftest_nr + i), + selftest_path(selftest_nr + i, + PDEV_OVERLAY), !after ? "enabled" : "disabled"); return; } @@ -1273,19 +1349,20 @@ static void of_selftest_overlay_6(void) if (ret != 0) { selftest(0, "overlay @\"%s\" failed destroy @\"%s\"\n", overlay_path(overlay_nr + i), - selftest_path(selftest_nr + i)); + selftest_path(selftest_nr + i, + PDEV_OVERLAY)); return; } } for (i = 0; i < 2; i++) { /* selftest device must be again in before state */ - if (of_path_platform_device_exists( - selftest_path(selftest_nr + i)) + if (of_selftest_device_exists(selftest_nr + i, PDEV_OVERLAY) != before) { selftest(0, "overlay @\"%s\" with device @\"%s\" %s\n", overlay_path(overlay_nr + i), - selftest_path(selftest_nr + i), + selftest_path(selftest_nr + i, + PDEV_OVERLAY), !before ? "enabled" : "disabled"); return; } @@ -1327,7 +1404,8 @@ static void of_selftest_overlay_8(void) if (ret == 0) { selftest(0, "overlay @\"%s\" was destroyed @\"%s\"\n", overlay_path(overlay_nr + 0), - selftest_path(selftest_nr)); + selftest_path(selftest_nr, + PDEV_OVERLAY)); return; } @@ -1337,7 +1415,8 @@ static void of_selftest_overlay_8(void) if (ret != 0) { selftest(0, "overlay @\"%s\" not destroyed @\"%s\"\n", overlay_path(overlay_nr + i), - selftest_path(selftest_nr)); + selftest_path(selftest_nr, + PDEV_OVERLAY)); return; } } @@ -1352,16 +1431,17 @@ static void of_selftest_overlay_10(void) char *child_path; /* device should disable */ - ret = of_selftest_apply_overlay_check(10, 10, 0, 1); - if (selftest(ret == 0, "overlay test %d failed; overlay application\n", 10)) + ret = of_selftest_apply_overlay_check(10, 10, 0, 1, PDEV_OVERLAY); + if (selftest(ret == 0, + "overlay test %d failed; overlay application\n", 10)) return; child_path = kasprintf(GFP_KERNEL, "%s/test-selftest101", - selftest_path(10)); + selftest_path(10, PDEV_OVERLAY)); if (selftest(child_path, "overlay test %d failed; kasprintf\n", 10)) return; - ret = of_path_platform_device_exists(child_path); + ret = of_path_device_type_exists(child_path, PDEV_OVERLAY); kfree(child_path); if (selftest(ret, "overlay test %d failed; no child device\n", 10)) return; @@ -1373,11 +1453,331 @@ static void of_selftest_overlay_11(void) int ret; /* device should disable */ - ret = of_selftest_apply_revert_overlay_check(11, 11, 0, 1); - if (selftest(ret == 0, "overlay test %d failed; overlay application\n", 11)) + ret = of_selftest_apply_revert_overlay_check(11, 11, 0, 1, + PDEV_OVERLAY); + if (selftest(ret == 0, + "overlay test %d failed; overlay application\n", 11)) + return; +} + +#if IS_ENABLED(CONFIG_I2C) && IS_ENABLED(CONFIG_OF_OVERLAY) + +struct selftest_i2c_bus_data { + struct platform_device *pdev; + struct i2c_adapter adap; +}; + +static int selftest_i2c_master_xfer(struct i2c_adapter *adap, + struct i2c_msg *msgs, int num) +{ + struct selftest_i2c_bus_data *std = i2c_get_adapdata(adap); + + (void)std; + + return num; +} + +static u32 selftest_i2c_functionality(struct i2c_adapter *adap) +{ + return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; +} + +static const struct i2c_algorithm selftest_i2c_algo = { + .master_xfer = selftest_i2c_master_xfer, + .functionality = selftest_i2c_functionality, +}; + +static int selftest_i2c_bus_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device_node *np = dev->of_node; + struct selftest_i2c_bus_data *std; + struct i2c_adapter *adap; + int ret; + + if (np == NULL) { + dev_err(dev, "No OF data for device\n"); + return -EINVAL; + + } + + dev_dbg(dev, "%s for node @%s\n", __func__, np->full_name); + + std = devm_kzalloc(dev, sizeof(*std), GFP_KERNEL); + if (!std) { + dev_err(dev, "Failed to allocate selftest i2c data\n"); + return -ENOMEM; + } + + /* link them together */ + std->pdev = pdev; + platform_set_drvdata(pdev, std); + + adap = &std->adap; + i2c_set_adapdata(adap, std); + adap->nr = -1; + strlcpy(adap->name, pdev->name, sizeof(adap->name)); + adap->class = I2C_CLASS_DEPRECATED; + adap->algo = &selftest_i2c_algo; + adap->dev.parent = dev; + adap->dev.of_node = dev->of_node; + adap->timeout = 5 * HZ; + adap->retries = 3; + + ret = i2c_add_numbered_adapter(adap); + if (ret != 0) { + dev_err(dev, "Failed to add I2C adapter\n"); + return ret; + } + + return 0; +} + +static int selftest_i2c_bus_remove(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device_node *np = dev->of_node; + struct selftest_i2c_bus_data *std = platform_get_drvdata(pdev); + + dev_dbg(dev, "%s for node @%s\n", __func__, np->full_name); + i2c_del_adapter(&std->adap); + + return 0; +} + +static struct of_device_id selftest_i2c_bus_match[] = { + { .compatible = "selftest-i2c-bus", }, + {}, +}; + +static struct platform_driver selftest_i2c_bus_driver = { + .probe = selftest_i2c_bus_probe, + .remove = selftest_i2c_bus_remove, + .driver = { + .name = "selftest-i2c-bus", + .of_match_table = of_match_ptr(selftest_i2c_bus_match), + }, +}; + +static int selftest_i2c_dev_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct device *dev = &client->dev; + struct device_node *np = client->dev.of_node; + + if (!np) { + dev_err(dev, "No OF node\n"); + return -EINVAL; + } + + dev_dbg(dev, "%s for node @%s\n", __func__, np->full_name); + + return 0; +}; + +static int selftest_i2c_dev_remove(struct i2c_client *client) +{ + struct device *dev = &client->dev; + struct device_node *np = client->dev.of_node; + + dev_dbg(dev, "%s for node @%s\n", __func__, np->full_name); + return 0; +} + +static const struct i2c_device_id selftest_i2c_dev_id[] = { + { .name = "selftest-i2c-dev" }, + { } +}; + +static struct i2c_driver selftest_i2c_dev_driver = { + .driver = { + .name = "selftest-i2c-dev", + .owner = THIS_MODULE, + }, + .probe = selftest_i2c_dev_probe, + .remove = selftest_i2c_dev_remove, + .id_table = selftest_i2c_dev_id, +}; + +#if IS_ENABLED(CONFIG_I2C_MUX) + +struct selftest_i2c_mux_data { + int nchans; + struct i2c_adapter *adap[]; +}; + +static int selftest_i2c_mux_select_chan(struct i2c_adapter *adap, + void *client, u32 chan) +{ + return 0; +} + +static int selftest_i2c_mux_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + int ret, i, nchans, size; + struct device *dev = &client->dev; + struct i2c_adapter *adap = to_i2c_adapter(dev->parent); + struct device_node *np = client->dev.of_node, *child; + struct selftest_i2c_mux_data *stm; + u32 reg, max_reg; + + dev_dbg(dev, "%s for node @%s\n", __func__, np->full_name); + + if (!np) { + dev_err(dev, "No OF node\n"); + return -EINVAL; + } + + max_reg = (u32)-1; + for_each_child_of_node(np, child) { + ret = of_property_read_u32(child, "reg", ®); + if (ret) + continue; + if (max_reg == (u32)-1 || reg > max_reg) + max_reg = reg; + } + nchans = max_reg == (u32)-1 ? 0 : max_reg + 1; + if (nchans == 0) { + dev_err(dev, "No channels\n"); + return -EINVAL; + } + + size = offsetof(struct selftest_i2c_mux_data, adap[nchans]); + stm = devm_kzalloc(dev, size, GFP_KERNEL); + if (!stm) { + dev_err(dev, "Out of memory\n"); + return -ENOMEM; + } + stm->nchans = nchans; + for (i = 0; i < nchans; i++) { + stm->adap[i] = i2c_add_mux_adapter(adap, dev, client, + 0, i, 0, selftest_i2c_mux_select_chan, NULL); + if (!stm->adap[i]) { + dev_err(dev, "Failed to register mux #%d\n", i); + for (i--; i >= 0; i--) + i2c_del_mux_adapter(stm->adap[i]); + return -ENODEV; + } + } + + i2c_set_clientdata(client, stm); + + return 0; +}; + +static int selftest_i2c_mux_remove(struct i2c_client *client) +{ + struct device *dev = &client->dev; + struct device_node *np = client->dev.of_node; + struct selftest_i2c_mux_data *stm = i2c_get_clientdata(client); + int i; + + dev_dbg(dev, "%s for node @%s\n", __func__, np->full_name); + for (i = stm->nchans - 1; i >= 0; i--) + i2c_del_mux_adapter(stm->adap[i]); + return 0; +} + +static const struct i2c_device_id selftest_i2c_mux_id[] = { + { .name = "selftest-i2c-mux" }, + { } +}; + +static struct i2c_driver selftest_i2c_mux_driver = { + .driver = { + .name = "selftest-i2c-mux", + .owner = THIS_MODULE, + }, + .probe = selftest_i2c_mux_probe, + .remove = selftest_i2c_mux_remove, + .id_table = selftest_i2c_mux_id, +}; + +#endif + +static int of_selftest_overlay_i2c_init(void) +{ + int ret; + + ret = i2c_add_driver(&selftest_i2c_dev_driver); + if (selftest(ret == 0, + "could not register selftest i2c device driver\n")) + return ret; + + ret = platform_driver_register(&selftest_i2c_bus_driver); + if (selftest(ret == 0, + "could not register selftest i2c bus driver\n")) + return ret; + +#if IS_ENABLED(CONFIG_I2C_MUX) + ret = i2c_add_driver(&selftest_i2c_mux_driver); + if (selftest(ret == 0, + "could not register selftest i2c mux driver\n")) + return ret; +#endif + + return 0; +} + +static void of_selftest_overlay_i2c_cleanup(void) +{ +#if IS_ENABLED(CONFIG_I2C_MUX) + i2c_del_driver(&selftest_i2c_mux_driver); +#endif + platform_driver_unregister(&selftest_i2c_bus_driver); + i2c_del_driver(&selftest_i2c_dev_driver); +} + +static void of_selftest_overlay_i2c_12(void) +{ + int ret; + + /* device should enable */ + ret = of_selftest_apply_overlay_check(12, 12, 0, 1, I2C_OVERLAY); + if (ret != 0) + return; + + selftest(1, "overlay test %d passed\n", 12); +} + +/* test deactivation of device */ +static void of_selftest_overlay_i2c_13(void) +{ + int ret; + + /* device should disable */ + ret = of_selftest_apply_overlay_check(13, 13, 1, 0, I2C_OVERLAY); + if (ret != 0) return; + + selftest(1, "overlay test %d passed\n", 13); +} + +/* just check for i2c mux existence */ +static void of_selftest_overlay_i2c_14(void) +{ } +static void of_selftest_overlay_i2c_15(void) +{ + int ret; + + /* device should enable */ + ret = of_selftest_apply_overlay_check(16, 15, 0, 1, I2C_OVERLAY); + if (ret != 0) + return; + + selftest(1, "overlay test %d passed\n", 15); +} + +#else + +static inline void of_selftest_overlay_i2c_14(void) { } +static inline void of_selftest_overlay_i2c_15(void) { } + +#endif + static void __init of_selftest_overlay(void) { struct device_node *bus_np = NULL; @@ -1402,15 +1802,15 @@ static void __init of_selftest_overlay(void) goto out; } - if (!of_path_platform_device_exists(selftest_path(100))) { + if (!of_selftest_device_exists(100, PDEV_OVERLAY)) { selftest(0, "could not find selftest0 @ \"%s\"\n", - selftest_path(100)); + selftest_path(100, PDEV_OVERLAY)); goto out; } - if (of_path_platform_device_exists(selftest_path(101))) { + if (of_selftest_device_exists(101, PDEV_OVERLAY)) { selftest(0, "selftest1 @ \"%s\" should not exist\n", - selftest_path(101)); + selftest_path(101, PDEV_OVERLAY)); goto out; } @@ -1429,6 +1829,18 @@ static void __init of_selftest_overlay(void) of_selftest_overlay_10(); of_selftest_overlay_11(); +#if IS_ENABLED(CONFIG_I2C) + if (selftest(of_selftest_overlay_i2c_init() == 0, "i2c init failed\n")) + goto out; + + of_selftest_overlay_i2c_12(); + of_selftest_overlay_i2c_13(); + of_selftest_overlay_i2c_14(); + of_selftest_overlay_i2c_15(); + + of_selftest_overlay_i2c_cleanup(); +#endif + out: of_node_put(bus_np); } From 523bf17f1c7c3171e03dbd31402dfa263e63d178 Mon Sep 17 00:00:00 2001 From: "Lad, Prabhakar" Date: Wed, 4 Feb 2015 12:04:06 +0000 Subject: [PATCH 466/601] of/fdt: fix sparse warning this patch fixes following sparse warning: fdt.c:765:12: warning: symbol 'early_init_dt_scan_chosen_serial' was not declared. Should it be static? Signed-off-by: Lad, Prabhakar Signed-off-by: Rob Herring --- drivers/of/fdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 510074226d571d..3a896c9aeb7464 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -762,7 +762,7 @@ static inline void early_init_dt_check_for_initrd(unsigned long node) #ifdef CONFIG_SERIAL_EARLYCON extern struct of_device_id __earlycon_of_table[]; -int __init early_init_dt_scan_chosen_serial(void) +static int __init early_init_dt_scan_chosen_serial(void) { int offset; const char *p; From 462003aa8e38789d2d76946e53dab814419f9976 Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Wed, 31 Dec 2014 16:23:20 +0800 Subject: [PATCH 467/601] of: Add vendor prefix for Himax Technologies Inc. Signed-off-by: Liu Ying Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/vendor-prefixes.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index 860b18df0548e0..1cbd3fc970e965 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -71,6 +71,7 @@ gumstix Gumstix, Inc. gw Gateworks Corporation hannstar HannStar Display Corporation haoyu Haoyu Microelectronic Co. Ltd. +himax Himax Technologies, Inc. hisilicon Hisilicon Limited. hit Hitachi Ltd. honeywell Honeywell From 984c7a786c50d1f7e77e7697b7bb844aed2e748f Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Wed, 31 Dec 2014 16:23:21 +0800 Subject: [PATCH 468/601] of: Add vendor prefix for Truly Semiconductors Limited Signed-off-by: Liu Ying Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/vendor-prefixes.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index 1cbd3fc970e965..52f38a00a26f1c 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -168,6 +168,7 @@ tlm Trusted Logic Mobility toradex Toradex AG toshiba Toshiba Corporation toumaz Toumaz +truly Truly Semiconductors Limited usi Universal Scientific Industrial Co., Ltd. v3 V3 Semiconductor variscite Variscite Ltd. From 3cfd59055988c33ddc5aad0f9c8250db87878787 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 2 Dec 2014 01:09:04 -0200 Subject: [PATCH 469/601] of: Use ovti for Omnivision The correct symbol for OmniVision Technologies is 'ovti', so let's convert it. Signed-off-by: Fabio Estevam Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/media/atmel-isi.txt | 2 +- Documentation/devicetree/bindings/media/video-interfaces.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/media/atmel-isi.txt b/Documentation/devicetree/bindings/media/atmel-isi.txt index 17e71b7b44c62a..251f008f220cff 100644 --- a/Documentation/devicetree/bindings/media/atmel-isi.txt +++ b/Documentation/devicetree/bindings/media/atmel-isi.txt @@ -38,7 +38,7 @@ Example: i2c1: i2c@f0018000 { ov2640: camera@0x30 { - compatible = "omnivision,ov2640"; + compatible = "ovti,ov2640"; reg = <0x30>; port { diff --git a/Documentation/devicetree/bindings/media/video-interfaces.txt b/Documentation/devicetree/bindings/media/video-interfaces.txt index ce719f89dd1ce4..d87a902d27dabb 100644 --- a/Documentation/devicetree/bindings/media/video-interfaces.txt +++ b/Documentation/devicetree/bindings/media/video-interfaces.txt @@ -159,7 +159,7 @@ pipelines can be active: ov772x -> ceu0 or imx074 -> csi2 -> ceu0. i2c0: i2c@0xfff20000 { ... ov772x_1: camera@0x21 { - compatible = "omnivision,ov772x"; + compatible = "ovti,ov772x"; reg = <0x21>; vddio-supply = <®ulator1>; vddcore-supply = <®ulator2>; From ffe24b28ebff32da00e53f9d18b289b8bf233373 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 2 Dec 2014 01:09:03 -0200 Subject: [PATCH 470/601] of: Add vendor prefix for OmniVision Technologies OmniVision Technologies is a manufacturer of CMOS Image Sensors. Signed-off-by: Fabio Estevam Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/vendor-prefixes.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index 52f38a00a26f1c..c2a48ff38f7411 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -120,6 +120,7 @@ nvidia NVIDIA nxp NXP Semiconductors onnn ON Semiconductor Corp. opencores OpenCores.org +ovti OmniVision Technologies panasonic Panasonic Corporation pericom Pericom Technology Inc. phytec PHYTEC Messtechnik GmbH From b625a61698619c7af652de2701a2fb17c5c5d66e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 4 Feb 2015 16:59:32 -0500 Subject: [PATCH 471/601] xprtrdma: Address sparse complaint in rpcr_to_rdmar() With "make ARCH=x86_64 allmodconfig make C=1 CF=-D__CHECK_ENDIAN__": linux-2.6/net/sunrpc/xprtrdma/xprt_rdma.h:273:30: warning: incorrect type in initializer (different base types) linux-2.6/net/sunrpc/xprtrdma/xprt_rdma.h:273:30: expected restricted __be32 [usertype] *buffer linux-2.6/net/sunrpc/xprtrdma/xprt_rdma.h:273:30: got unsigned int [usertype] *rq_buffer As far as I can tell this is a false positive. Reported-by: kbuild-all@01.org Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/xprt_rdma.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index c9d2a02f631b22..d1b70397c60f0d 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -270,9 +270,10 @@ struct rpcrdma_req { static inline struct rpcrdma_req * rpcr_to_rdmar(struct rpc_rqst *rqst) { - struct rpcrdma_regbuf *rb = container_of(rqst->rq_buffer, - struct rpcrdma_regbuf, - rg_base[0]); + void *buffer = rqst->rq_buffer; + struct rpcrdma_regbuf *rb; + + rb = container_of(buffer, struct rpcrdma_regbuf, rg_base); return rb->rg_owner; } From b087e6190ddcd9ae4e8ff2c788d2b32f193e946b Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Wed, 4 Feb 2015 19:09:01 +1100 Subject: [PATCH 472/601] cxl: Export optional AFU configuration record in sysfs An AFU may optionally contain one or more PCIe like configuration records, which can be used to identify the AFU. This patch adds support for exposing the raw config space and the vendor, device and class code under sysfs. These will appear in a subdirectory of the AFU device corresponding with the configuration record number, e.g. cat /sys/class/cxl/afu0.0/cr0/vendor 0x1014 cat /sys/class/cxl/afu0.0/cr0/device 0x4350 cat /sys/class/cxl/afu0.0/cr0/class 0x120000 hexdump -C /sys/class/cxl/afu0.0/cr0/config 00000000 14 10 50 43 00 00 00 00 06 00 00 12 00 00 00 00 |..PC............| 00000010 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| * 00000100 These files behave in much the same way as the equivalent files for PCI devices, with one exception being that the config file is currently read-only and restricted to the root user. It is not necessarily required to be this strict, but we currently do not have a compelling use-case to make it writable and/or world-readable, so I erred on the side of being restrictive. Signed-off-by: Ian Munsie Signed-off-by: Michael Ellerman --- Documentation/ABI/testing/sysfs-class-cxl | 37 +++++ drivers/misc/cxl/cxl.h | 13 ++ drivers/misc/cxl/pci.c | 23 +++ drivers/misc/cxl/sysfs.c | 179 ++++++++++++++++++++-- 4 files changed, 242 insertions(+), 10 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-class-cxl b/Documentation/ABI/testing/sysfs-class-cxl index 9ea01068a16c15..3680364b404885 100644 --- a/Documentation/ABI/testing/sysfs-class-cxl +++ b/Documentation/ABI/testing/sysfs-class-cxl @@ -81,6 +81,43 @@ Description: read only this this kernel supports. +AFU configuration records (eg. /sys/class/cxl/afu0.0/cr0): + +An AFU may optionally export one or more PCIe like configuration records, known +as AFU configuration records, which will show up here (if present). + +What: /sys/class/cxl//cr/vendor +Date: February 2015 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read only + Hexadecimal value of the vendor ID found in this AFU + configuration record. + +What: /sys/class/cxl//cr/device +Date: February 2015 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read only + Hexadecimal value of the device ID found in this AFU + configuration record. + +What: /sys/class/cxl//cr/vendor +Date: February 2015 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read only + Hexadecimal value of the class code found in this AFU + configuration record. + +What: /sys/class/cxl//cr/config +Date: February 2015 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read only + This binary file provides raw access to the AFU configuration + record. The format is expected to match the either the standard + or extended configuration space defined by the PCIe + specification. + + + Master contexts (eg. /sys/class/cxl/afu0.0m) What: /sys/class/cxl/m/mmio_size diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 6a6a487464c58e..a1cee4767ec6c5 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -382,6 +382,10 @@ struct cxl_afu { int slice; int modes_supported; int current_mode; + int crs_num; + u64 crs_len; + u64 crs_offset; + struct list_head crs; enum prefault_modes prefault_mode; bool psa; bool pp_psa; @@ -551,6 +555,15 @@ static inline void __iomem *_cxl_p2n_addr(struct cxl_afu *afu, cxl_p2n_reg_t reg #define cxl_p2n_read(afu, reg) \ in_be64(_cxl_p2n_addr(afu, reg)) + +#define cxl_afu_cr_read64(afu, cr, off) \ + in_le64((afu)->afu_desc_mmio + (afu)->crs_offset + ((cr) * (afu)->crs_len) + (off)) +#define cxl_afu_cr_read32(afu, cr, off) \ + in_le32((afu)->afu_desc_mmio + (afu)->crs_offset + ((cr) * (afu)->crs_len) + (off)) +u16 cxl_afu_cr_read16(struct cxl_afu *afu, int cr, u64 off); +u8 cxl_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off); + + struct cxl_calls { void (*cxl_slbia)(struct mm_struct *mm); struct module *owner; diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index cb250673b5c634..2b2e1b80d75985 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -114,6 +114,24 @@ #define AFUD_EB_LEN(val) EXTRACT_PPC_BITS(val, 8, 63) #define AFUD_READ_EB_OFF(afu) AFUD_READ(afu, 0x48) +u16 cxl_afu_cr_read16(struct cxl_afu *afu, int cr, u64 off) +{ + u64 aligned_off = off & ~0x3L; + u32 val; + + val = cxl_afu_cr_read32(afu, cr, aligned_off); + return (val >> ((off & 0x2) * 8)) & 0xffff; +} + +u8 cxl_afu_cr_read8(struct cxl_afu *afu, int cr, u64 off) +{ + u64 aligned_off = off & ~0x3L; + u32 val; + + val = cxl_afu_cr_read32(afu, cr, aligned_off); + return (val >> ((off & 0x3) * 8)) & 0xff; +} + static DEFINE_PCI_DEVICE_TABLE(cxl_pci_tbl) = { { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0477), }, { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x044b), }, @@ -556,6 +574,7 @@ static int cxl_read_afu_descriptor(struct cxl_afu *afu) val = AFUD_READ_INFO(afu); afu->pp_irqs = AFUD_NUM_INTS_PER_PROC(val); afu->max_procs_virtualised = AFUD_NUM_PROCS(val); + afu->crs_num = AFUD_NUM_CRS(val); if (AFUD_AFU_DIRECTED(val)) afu->modes_supported |= CXL_MODE_DIRECTED; @@ -570,6 +589,10 @@ static int cxl_read_afu_descriptor(struct cxl_afu *afu) if ((afu->pp_psa = AFUD_PPPSA_PP(val))) afu->pp_offset = AFUD_READ_PPPSA_OFF(afu); + val = AFUD_READ_CR(afu); + afu->crs_len = AFUD_CR_LEN(val) * 256; + afu->crs_offset = AFUD_READ_CR_OFF(afu); + return 0; } diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c index adf1f6d849131e..d0c38c7bc0c4bf 100644 --- a/drivers/misc/cxl/sysfs.c +++ b/drivers/misc/cxl/sysfs.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "cxl.h" @@ -367,8 +368,6 @@ static struct device_attribute afu_attrs[] = { __ATTR(reset, S_IWUSR, NULL, reset_store_afu), }; - - int cxl_sysfs_adapter_add(struct cxl *adapter) { int i, rc; @@ -391,31 +390,191 @@ void cxl_sysfs_adapter_remove(struct cxl *adapter) device_remove_file(&adapter->dev, &adapter_attrs[i]); } +struct afu_config_record { + struct kobject kobj; + struct bin_attribute config_attr; + struct list_head list; + int cr; + u16 device; + u16 vendor; + u32 class; +}; + +#define to_cr(obj) container_of(obj, struct afu_config_record, kobj) + +static ssize_t vendor_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct afu_config_record *cr = to_cr(kobj); + + return scnprintf(buf, PAGE_SIZE, "0x%.4x\n", cr->vendor); +} + +static ssize_t device_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct afu_config_record *cr = to_cr(kobj); + + return scnprintf(buf, PAGE_SIZE, "0x%.4x\n", cr->device); +} + +static ssize_t class_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct afu_config_record *cr = to_cr(kobj); + + return scnprintf(buf, PAGE_SIZE, "0x%.6x\n", cr->class); +} + +static ssize_t afu_read_config(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, + loff_t off, size_t count) +{ + struct afu_config_record *cr = to_cr(kobj); + struct cxl_afu *afu = to_cxl_afu(container_of(kobj->parent, struct device, kobj)); + + u64 i, j, val, size = afu->crs_len; + + if (off > size) + return 0; + if (off + count > size) + count = size - off; + + for (i = 0; i < count;) { + val = cxl_afu_cr_read64(afu, cr->cr, off & ~0x7); + for (j = off & 0x7; j < 8 && i < count; i++, j++, off++) + buf[i] = (val >> (j * 8)) & 0xff; + } + + return count; +} + +static struct kobj_attribute vendor_attribute = + __ATTR_RO(vendor); +static struct kobj_attribute device_attribute = + __ATTR_RO(device); +static struct kobj_attribute class_attribute = + __ATTR_RO(class); + +static struct attribute *afu_cr_attrs[] = { + &vendor_attribute.attr, + &device_attribute.attr, + &class_attribute.attr, + NULL, +}; + +static void release_afu_config_record(struct kobject *kobj) +{ + struct afu_config_record *cr = to_cr(kobj); + + kfree(cr); +} + +static struct kobj_type afu_config_record_type = { + .sysfs_ops = &kobj_sysfs_ops, + .release = release_afu_config_record, + .default_attrs = afu_cr_attrs, +}; + +static struct afu_config_record *cxl_sysfs_afu_new_cr(struct cxl_afu *afu, int cr_idx) +{ + struct afu_config_record *cr; + int rc; + + cr = kzalloc(sizeof(struct afu_config_record), GFP_KERNEL); + if (!cr) + return ERR_PTR(-ENOMEM); + + cr->cr = cr_idx; + cr->device = cxl_afu_cr_read16(afu, cr_idx, PCI_DEVICE_ID); + cr->vendor = cxl_afu_cr_read16(afu, cr_idx, PCI_VENDOR_ID); + cr->class = cxl_afu_cr_read32(afu, cr_idx, PCI_CLASS_REVISION) >> 8; + + /* + * Export raw AFU PCIe like config record. For now this is read only by + * root - we can expand that later to be readable by non-root and maybe + * even writable provided we have a good use-case. Once we suport + * exposing AFUs through a virtual PHB they will get that for free from + * Linux' PCI infrastructure, but until then it's not clear that we + * need it for anything since the main use case is just identifying + * AFUs, which can be done via the vendor, device and class attributes. + */ + sysfs_bin_attr_init(&cr->config_attr); + cr->config_attr.attr.name = "config"; + cr->config_attr.attr.mode = S_IRUSR; + cr->config_attr.size = afu->crs_len; + cr->config_attr.read = afu_read_config; + + rc = kobject_init_and_add(&cr->kobj, &afu_config_record_type, + &afu->dev.kobj, "cr%i", cr->cr); + if (rc) + goto err; + + rc = sysfs_create_bin_file(&cr->kobj, &cr->config_attr); + if (rc) + goto err1; + + rc = kobject_uevent(&cr->kobj, KOBJ_ADD); + if (rc) + goto err2; + + return cr; +err2: + sysfs_remove_bin_file(&cr->kobj, &cr->config_attr); +err1: + kobject_put(&cr->kobj); + return ERR_PTR(rc); +err: + kfree(cr); + return ERR_PTR(rc); +} + +void cxl_sysfs_afu_remove(struct cxl_afu *afu) +{ + struct afu_config_record *cr, *tmp; + int i; + + for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) + device_remove_file(&afu->dev, &afu_attrs[i]); + + list_for_each_entry_safe(cr, tmp, &afu->crs, list) { + sysfs_remove_bin_file(&cr->kobj, &cr->config_attr); + kobject_put(&cr->kobj); + } +} + int cxl_sysfs_afu_add(struct cxl_afu *afu) { + struct afu_config_record *cr; int i, rc; + INIT_LIST_HEAD(&afu->crs); + for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) { if ((rc = device_create_file(&afu->dev, &afu_attrs[i]))) goto err; } + for (i = 0; i < afu->crs_num; i++) { + cr = cxl_sysfs_afu_new_cr(afu, i); + if (IS_ERR(cr)) { + rc = PTR_ERR(cr); + goto err1; + } + list_add(&cr->list, &afu->crs); + } + return 0; +err1: + cxl_sysfs_afu_remove(afu); + return rc; err: for (i--; i >= 0; i--) device_remove_file(&afu->dev, &afu_attrs[i]); return rc; } -void cxl_sysfs_afu_remove(struct cxl_afu *afu) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(afu_attrs); i++) - device_remove_file(&afu->dev, &afu_attrs[i]); -} - int cxl_sysfs_afu_m_add(struct cxl_afu *afu) { int i, rc; From 3d5be0392f52489b7572903d83f1f267f26e6b69 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Wed, 4 Feb 2015 19:09:02 +1100 Subject: [PATCH 473/601] cxl: Fail AFU initialisation if an invalid configuration record is found If an AFU claims to have a configuration record but doesn't actually contain a vendor and device ID, fail the AFU initialisation. Right now this is just a way of politely letting AFU developers know that they need to fix their config space, but later on we may expose the AFUs as actual PCI devices in their own right and don't want to inadvertendly expose an AFU with a bad config space. Signed-off-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/pci.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 2b2e1b80d75985..1ef01647265f99 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -598,6 +598,8 @@ static int cxl_read_afu_descriptor(struct cxl_afu *afu) static int cxl_afu_descriptor_looks_ok(struct cxl_afu *afu) { + int i; + if (afu->psa && afu->adapter->ps_size < (afu->pp_offset + afu->pp_size*afu->max_procs_virtualised)) { dev_err(&afu->dev, "per-process PSA can't fit inside the PSA!\n"); @@ -607,6 +609,13 @@ static int cxl_afu_descriptor_looks_ok(struct cxl_afu *afu) if (afu->pp_psa && (afu->pp_size < PAGE_SIZE)) dev_warn(&afu->dev, "AFU uses < PAGE_SIZE per-process PSA!"); + for (i = 0; i < afu->crs_num; i++) { + if ((cxl_afu_cr_read32(afu, i, 0) == 0)) { + dev_err(&afu->dev, "ABORTING: AFU configuration record %i is invalid\n", i); + return -EINVAL; + } + } + return 0; } From a6130ed253a931d2169c26ab0958d81b0dce4d6e Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Wed, 4 Feb 2015 19:10:38 +1100 Subject: [PATCH 474/601] cxl: Add missing return statement after handling AFU errror We were missing a return statement in the PSL interrupt handler in the case of an AFU error, which would trigger an "Unhandled CXL PSL IRQ" warning. We do actually handle these type of errors (by notifying userspace), so add the missing return IRQ_HANDLED so we don't throw unecessary warnings. Signed-off-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/irq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c index f0836472403b8f..c8929c52669170 100644 --- a/drivers/misc/cxl/irq.c +++ b/drivers/misc/cxl/irq.c @@ -170,6 +170,7 @@ static irqreturn_t cxl_irq(int irq, void *data, struct cxl_irq_info *irq_info) } cxl_ack_irq(ctx, CXL_PSL_TFC_An_A, 0); + return IRQ_HANDLED; } if (dsisr & CXL_PSL_DSISR_An_OC) pr_devel("CXL interrupt: OS Context Warning\n"); From ea7c38fef0b774a5dc16fb0ca5935f0ae8568176 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 5 Feb 2015 15:13:24 -0500 Subject: [PATCH 475/601] NFSv4: Ensure we reference the inode for return-on-close in delegreturn If we have to do a return-on-close in the delegreturn code, then we must ensure that the inode and super block remain referenced. Cc: Peng Tao Cc: stable@vger.kernel.org # 3.17.x Signed-off-by: Trond Myklebust Reviewed-by: Peng Tao --- fs/nfs/internal.h | 22 +++++++++++++++++++++- fs/nfs/nfs4proc.c | 14 +++++++++----- fs/nfs/super.c | 9 ++++++--- 3 files changed, 36 insertions(+), 9 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index a98cf20061795a..21469e6e383427 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -391,7 +391,7 @@ extern struct rpc_stat nfs_rpcstat; extern int __init register_nfs_fs(void); extern void __exit unregister_nfs_fs(void); -extern void nfs_sb_active(struct super_block *sb); +extern bool nfs_sb_active(struct super_block *sb); extern void nfs_sb_deactive(struct super_block *sb); /* namespace.c */ @@ -514,6 +514,26 @@ extern int nfs41_walk_client_list(struct nfs_client *clp, struct nfs_client **result, struct rpc_cred *cred); +static inline struct inode *nfs_igrab_and_active(struct inode *inode) +{ + inode = igrab(inode); + if (inode != NULL && !nfs_sb_active(inode->i_sb)) { + iput(inode); + inode = NULL; + } + return inode; +} + +static inline void nfs_iput_and_deactive(struct inode *inode) +{ + if (inode != NULL) { + struct super_block *sb = inode->i_sb; + + iput(inode); + nfs_sb_deactive(sb); + } +} + /* * Determine the device name as a string */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index cd4295d84d54b6..dd892a4e7eb307 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5175,9 +5175,13 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) static void nfs4_delegreturn_release(void *calldata) { struct nfs4_delegreturndata *data = calldata; + struct inode *inode = data->inode; - if (data->roc) - pnfs_roc_release(data->inode); + if (inode) { + if (data->roc) + pnfs_roc_release(inode); + nfs_iput_and_deactive(inode); + } kfree(calldata); } @@ -5234,9 +5238,9 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co nfs_fattr_init(data->res.fattr); data->timestamp = jiffies; data->rpc_status = 0; - data->inode = inode; - data->roc = list_empty(&NFS_I(inode)->open_files) ? - pnfs_roc(inode) : false; + data->inode = nfs_igrab_and_active(inode); + if (data->inode) + data->roc = nfs4_roc(inode); task_setup_data.callback_data = data; msg.rpc_argp = &data->args; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 31a11b0e885deb..368d9395d2e7f0 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -405,12 +405,15 @@ void __exit unregister_nfs_fs(void) unregister_filesystem(&nfs_fs_type); } -void nfs_sb_active(struct super_block *sb) +bool nfs_sb_active(struct super_block *sb) { struct nfs_server *server = NFS_SB(sb); - if (atomic_inc_return(&server->active) == 1) - atomic_inc(&sb->s_active); + if (!atomic_inc_not_zero(&sb->s_active)) + return false; + if (atomic_inc_return(&server->active) != 1) + atomic_dec(&sb->s_active); + return true; } EXPORT_SYMBOL_GPL(nfs_sb_active); From 472e259449819d939b5a5188b6f4c7d59aa4304c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 5 Feb 2015 16:50:30 -0500 Subject: [PATCH 476/601] NFSv4.1: Pin the inode and super block in asynchronous layoutcommit If we're sending an asynchronous layoutcommit, then we need to ensure that the inode and the super block remain pinned. Signed-off-by: Trond Myklebust Reviewed-by: Peng Tao --- fs/nfs/nfs4proc.c | 19 +++++++++++-------- include/linux/nfs_xdr.h | 1 + 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index dd892a4e7eb307..e092b8540e2e16 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7989,6 +7989,7 @@ static void nfs4_layoutcommit_release(void *calldata) nfs_post_op_update_inode_force_wcc(data->args.inode, data->res.fattr); put_rpccred(data->cred); + nfs_iput_and_deactive(data->inode); kfree(data); } @@ -8013,7 +8014,6 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync) .rpc_message = &msg, .callback_ops = &nfs4_layoutcommit_ops, .callback_data = data, - .flags = RPC_TASK_ASYNC, }; struct rpc_task *task; int status = 0; @@ -8024,18 +8024,21 @@ nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync) data->args.lastbytewritten, data->args.inode->i_ino); + if (!sync) { + data->inode = nfs_igrab_and_active(data->args.inode); + if (data->inode == NULL) { + nfs4_layoutcommit_release(data); + return -EAGAIN; + } + task_setup_data.flags = RPC_TASK_ASYNC; + } nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); - if (sync == false) - goto out; - status = nfs4_wait_for_completion_rpc_task(task); - if (status != 0) - goto out; - status = task->tk_status; + if (sync) + status = task->tk_status; trace_nfs4_layoutcommit(data->args.inode, status); -out: dprintk("%s: status %d\n", __func__, status); rpc_put_task(task); return status; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 2c35e2affa6f0b..bb0d56f737e01b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -285,6 +285,7 @@ struct nfs4_layoutcommit_data { struct nfs_fattr fattr; struct list_head lseg_list; struct rpc_cred *cred; + struct inode *inode; struct nfs4_layoutcommit_args args; struct nfs4_layoutcommit_res res; }; From 5a0ec8acb945e302ce819b4a9787796ccf284548 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 5 Feb 2015 16:35:16 -0500 Subject: [PATCH 477/601] NFSv4.1: Pin the inode and super block in asynchronous layoutreturns If we're sending an asynchronous layoutreturn, then we need to ensure that the inode and the super block remain pinned. Cc: Peng Tao Signed-off-by: Trond Myklebust Reviewed-by: Peng Tao --- fs/nfs/nfs4proc.c | 19 +++++++++++-------- include/linux/nfs_xdr.h | 1 + 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e092b8540e2e16..2e7c9f7a6f7cc8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7856,6 +7856,7 @@ static void nfs4_layoutreturn_release(void *calldata) lo->plh_block_lgets--; spin_unlock(&lo->plh_inode->i_lock); pnfs_put_layout_hdr(lrp->args.layout); + nfs_iput_and_deactive(lrp->inode); kfree(calldata); dprintk("<-- %s\n", __func__); } @@ -7880,23 +7881,25 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync) .rpc_message = &msg, .callback_ops = &nfs4_layoutreturn_call_ops, .callback_data = lrp, - .flags = RPC_TASK_ASYNC, }; int status = 0; dprintk("--> %s\n", __func__); + if (!sync) { + lrp->inode = nfs_igrab_and_active(lrp->args.inode); + if (!lrp->inode) { + nfs4_layoutreturn_release(lrp); + return -EAGAIN; + } + task_setup_data.flags |= RPC_TASK_ASYNC; + } nfs4_init_sequence(&lrp->args.seq_args, &lrp->res.seq_res, 1); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) return PTR_ERR(task); - if (sync == false) - goto out; - status = nfs4_wait_for_completion_rpc_task(task); - if (status != 0) - goto out; - status = task->tk_status; + if (sync) + status = task->tk_status; trace_nfs4_layoutreturn(lrp->args.inode, status); -out: dprintk("<-- %s status=%d\n", __func__, status); rpc_put_task(task); return status; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index bb0d56f737e01b..38d96ba935c2d7 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -310,6 +310,7 @@ struct nfs4_layoutreturn { struct nfs4_layoutreturn_res res; struct rpc_cred *cred; struct nfs_client *clp; + struct inode *inode; int rpc_status; }; From e4af440aaf390ac1d39b26ef6cf4a28bcb6a5979 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 5 Feb 2015 17:05:08 -0500 Subject: [PATCH 478/601] NFSv4.1: pnfs_send_layoutreturn should use GFP_NOFS In we want to be able to call pnfs_send_layoutreturn() from within the writeback path, we really want it to use GFP_NOFS in order to prevent recursion. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 703501d3ed196f..a1d8620e8cb7c8 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -948,7 +948,7 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, struct nfs4_layoutreturn *lrp; int status = 0; - lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); + lrp = kzalloc(sizeof(*lrp), GFP_NOFS); if (unlikely(lrp == NULL)) { status = -ENOMEM; spin_lock(&ino->i_lock); From 4ef2e4f84c523ebbc930ce05fa27b9b1350f4a4b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 5 Feb 2015 17:27:39 -0500 Subject: [PATCH 479/601] NFSv4.1: Fix pnfs_put_lseg races pnfs_layoutreturn_free_lseg_async() can also race with inode put in the general case. We can now fix this, and also simplify the code. Cc: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 53 ++++++++++++++++++--------------------------------- 1 file changed, 19 insertions(+), 34 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index a1d8620e8cb7c8..107b321be7d4e2 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -361,14 +361,9 @@ pnfs_layout_need_return(struct pnfs_layout_hdr *lo, return true; } -static void pnfs_layoutreturn_free_lseg(struct work_struct *work) +static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg, + struct pnfs_layout_hdr *lo, struct inode *inode) { - struct pnfs_layout_segment *lseg; - struct pnfs_layout_hdr *lo; - struct inode *inode; - - lseg = container_of(work, struct pnfs_layout_segment, pls_work); - WARN_ON(atomic_read(&lseg->pls_refcount)); lo = lseg->pls_layout; inode = lo->plh_inode; @@ -383,24 +378,12 @@ static void pnfs_layoutreturn_free_lseg(struct work_struct *work) lo->plh_block_lgets++; lo->plh_return_iomode = 0; spin_unlock(&inode->i_lock); + pnfs_get_layout_hdr(lo); - pnfs_send_layoutreturn(lo, stateid, iomode, true); - spin_lock(&inode->i_lock); + /* Send an async layoutreturn so we dont deadlock */ + pnfs_send_layoutreturn(lo, stateid, iomode, false); } else - /* match pnfs_get_layout_hdr #2 in pnfs_put_lseg */ - pnfs_put_layout_hdr(lo); - pnfs_layout_remove_lseg(lo, lseg); - spin_unlock(&inode->i_lock); - pnfs_free_lseg(lseg); - /* match pnfs_get_layout_hdr #1 in pnfs_put_lseg */ - pnfs_put_layout_hdr(lo); -} - -static void -pnfs_layoutreturn_free_lseg_async(struct pnfs_layout_segment *lseg) -{ - INIT_WORK(&lseg->pls_work, pnfs_layoutreturn_free_lseg); - queue_work(nfsiod_workqueue, &lseg->pls_work); + spin_unlock(&inode->i_lock); } void @@ -415,21 +398,23 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg) dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, atomic_read(&lseg->pls_refcount), test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); + + /* Handle the case where refcount != 1 */ + if (atomic_add_unless(&lseg->pls_refcount, -1, 1)) + return; + lo = lseg->pls_layout; inode = lo->plh_inode; + /* Do we need a layoutreturn? */ + if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags)) + pnfs_layoutreturn_before_put_lseg(lseg, lo, inode); + if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { pnfs_get_layout_hdr(lo); - if (pnfs_layout_need_return(lo, lseg)) { - spin_unlock(&inode->i_lock); - /* hdr reference dropped in nfs4_layoutreturn_release */ - pnfs_get_layout_hdr(lo); - pnfs_layoutreturn_free_lseg_async(lseg); - } else { - pnfs_layout_remove_lseg(lo, lseg); - spin_unlock(&inode->i_lock); - pnfs_free_lseg(lseg); - pnfs_put_layout_hdr(lo); - } + pnfs_layout_remove_lseg(lo, lseg); + spin_unlock(&inode->i_lock); + pnfs_free_lseg(lseg); + pnfs_put_layout_hdr(lo); } } EXPORT_SYMBOL_GPL(pnfs_put_lseg); From 20e783e39e55c2615fb61d1b3d139ee9edcf6772 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 28 Jan 2015 17:54:38 +0100 Subject: [PATCH 480/601] ARM: 8296/1: cache-l2x0: clean up aurora cache handling The aurora cache controller is the only remaining user of a couple of functions in this file and are completely unused when that is disabled, leading to build warnings: arch/arm/mm/cache-l2x0.c:167:13: warning: 'l2x0_cache_sync' defined but not used [-Wunused-function] arch/arm/mm/cache-l2x0.c:184:13: warning: 'l2x0_flush_all' defined but not used [-Wunused-function] arch/arm/mm/cache-l2x0.c:194:13: warning: 'l2x0_disable' defined but not used [-Wunused-function] With the knowledge that the code is now aurora-specific, we can simplify it noticeably: - The pl310 errata workarounds are not needed on aurora and can be removed - As confirmed by Thomas Petazzoni from the data sheet, the cache_wait() macro is never needed. - No need to hold the lock across atomic cache sync - We can load the l2x0_base into a local variable across operations There should be no functional change in this patch, but readability and the generated object code improves, along with avoiding the warnings. (on Armada 370 RD and Armada XP GP, boot tested, plus a little bit of DMA traffic by reading data from a SD card) Acked-by: Thomas Petazzoni Tested-by: Thomas Petazzoni Signed-off-by: Arnd Bergmann Signed-off-by: Russell King --- arch/arm/mm/cache-l2x0.c | 111 ++++++++++++++------------------------- 1 file changed, 38 insertions(+), 73 deletions(-) diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 01de138094548b..404c598da27d9b 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -156,73 +156,6 @@ static void l2c_disable(void) dsb(st); } -#ifdef CONFIG_CACHE_PL310 -static inline void cache_wait(void __iomem *reg, unsigned long mask) -{ - /* cache operations by line are atomic on PL310 */ -} -#else -#define cache_wait l2c_wait_mask -#endif - -static inline void cache_sync(void) -{ - void __iomem *base = l2x0_base; - - writel_relaxed(0, base + sync_reg_offset); - cache_wait(base + L2X0_CACHE_SYNC, 1); -} - -#if defined(CONFIG_PL310_ERRATA_588369) || defined(CONFIG_PL310_ERRATA_727915) -static inline void debug_writel(unsigned long val) -{ - l2c_set_debug(l2x0_base, val); -} -#else -/* Optimised out for non-errata case */ -static inline void debug_writel(unsigned long val) -{ -} -#endif - -static void l2x0_cache_sync(void) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&l2x0_lock, flags); - cache_sync(); - raw_spin_unlock_irqrestore(&l2x0_lock, flags); -} - -static void __l2x0_flush_all(void) -{ - debug_writel(0x03); - __l2c_op_way(l2x0_base + L2X0_CLEAN_INV_WAY); - cache_sync(); - debug_writel(0x00); -} - -static void l2x0_flush_all(void) -{ - unsigned long flags; - - /* clean all ways */ - raw_spin_lock_irqsave(&l2x0_lock, flags); - __l2x0_flush_all(); - raw_spin_unlock_irqrestore(&l2x0_lock, flags); -} - -static void l2x0_disable(void) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&l2x0_lock, flags); - __l2x0_flush_all(); - l2c_write_sec(0, l2x0_base, L2X0_CTRL); - dsb(st); - raw_spin_unlock_irqrestore(&l2x0_lock, flags); -} - static void l2c_save(void __iomem *base) { l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); @@ -1349,14 +1282,15 @@ static unsigned long calc_range_end(unsigned long start, unsigned long end) static void aurora_pa_range(unsigned long start, unsigned long end, unsigned long offset) { + void __iomem *base = l2x0_base; unsigned long flags; raw_spin_lock_irqsave(&l2x0_lock, flags); - writel_relaxed(start, l2x0_base + AURORA_RANGE_BASE_ADDR_REG); - writel_relaxed(end, l2x0_base + offset); + writel_relaxed(start, base + AURORA_RANGE_BASE_ADDR_REG); + writel_relaxed(end, base + offset); raw_spin_unlock_irqrestore(&l2x0_lock, flags); - cache_sync(); + writel_relaxed(0, base + AURORA_SYNC_REG); } static void aurora_inv_range(unsigned long start, unsigned long end) @@ -1416,6 +1350,37 @@ static void aurora_flush_range(unsigned long start, unsigned long end) } } +static void aurora_flush_all(void) +{ + void __iomem *base = l2x0_base; + unsigned long flags; + + /* clean all ways */ + raw_spin_lock_irqsave(&l2x0_lock, flags); + __l2c_op_way(base + L2X0_CLEAN_INV_WAY); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); + + writel_relaxed(0, base + AURORA_SYNC_REG); +} + +static void aurora_cache_sync(void) +{ + writel_relaxed(0, l2x0_base + AURORA_SYNC_REG); +} + +static void aurora_disable(void) +{ + void __iomem *base = l2x0_base; + unsigned long flags; + + raw_spin_lock_irqsave(&l2x0_lock, flags); + __l2c_op_way(base + L2X0_CLEAN_INV_WAY); + writel_relaxed(0, base + AURORA_SYNC_REG); + l2c_write_sec(0, base, L2X0_CTRL); + dsb(st); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + static void aurora_save(void __iomem *base) { l2x0_saved_regs.ctrl = readl_relaxed(base + L2X0_CTRL); @@ -1480,9 +1445,9 @@ static const struct l2c_init_data of_aurora_with_outer_data __initconst = { .inv_range = aurora_inv_range, .clean_range = aurora_clean_range, .flush_range = aurora_flush_range, - .flush_all = l2x0_flush_all, - .disable = l2x0_disable, - .sync = l2x0_cache_sync, + .flush_all = aurora_flush_all, + .disable = aurora_disable, + .sync = aurora_cache_sync, .resume = l2c_resume, }, }; From 1d88967900b87f94435581dad4ae319686c6ce10 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 28 Jan 2015 17:55:31 +0100 Subject: [PATCH 481/601] ARM: 8297/1: cache-l2x0: optimize aurora range operations The aurora_inv_range(), aurora_clean_range() and aurora_flush_range() functions are highly redundant, both in source and in object code, and they are harder to understand than necessary. By moving the range loop into the aurora_pa_range() function, they become trivial wrappers, and the object code start looking like what one would expect for an optimal implementation. Further optimization may be possible by using the per-CPU "virtual" registers to avoid the spinlocks in most cases. (on Armada 370 RD and Armada XP GP, boot tested, plus a little bit of DMA traffic by reading data from a SD card) Reviewed-by: Thomas Petazzoni Tested-by: Thomas Petazzoni Signed-off-by: Arnd Bergmann Signed-off-by: Russell King --- arch/arm/mm/cache-l2x0.c | 68 +++++++++++++--------------------------- 1 file changed, 22 insertions(+), 46 deletions(-) diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 404c598da27d9b..5ea2d6d417f715 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -1256,7 +1256,7 @@ static const struct l2c_init_data of_l2c310_coherent_data __initconst = { * noninclusive, while the hardware cache range operations use * inclusive start and end addresses. */ -static unsigned long calc_range_end(unsigned long start, unsigned long end) +static unsigned long aurora_range_end(unsigned long start, unsigned long end) { /* * Limit the number of cache lines processed at once, @@ -1275,26 +1275,13 @@ static unsigned long calc_range_end(unsigned long start, unsigned long end) return end; } -/* - * Make sure 'start' and 'end' reference the same page, as L2 is PIPT - * and range operations only do a TLB lookup on the start address. - */ static void aurora_pa_range(unsigned long start, unsigned long end, - unsigned long offset) + unsigned long offset) { void __iomem *base = l2x0_base; + unsigned long range_end; unsigned long flags; - raw_spin_lock_irqsave(&l2x0_lock, flags); - writel_relaxed(start, base + AURORA_RANGE_BASE_ADDR_REG); - writel_relaxed(end, base + offset); - raw_spin_unlock_irqrestore(&l2x0_lock, flags); - - writel_relaxed(0, base + AURORA_SYNC_REG); -} - -static void aurora_inv_range(unsigned long start, unsigned long end) -{ /* * round start and end adresses up to cache line size */ @@ -1302,15 +1289,24 @@ static void aurora_inv_range(unsigned long start, unsigned long end) end = ALIGN(end, CACHE_LINE_SIZE); /* - * Invalidate all full cache lines between 'start' and 'end'. + * perform operation on all full cache lines between 'start' and 'end' */ while (start < end) { - unsigned long range_end = calc_range_end(start, end); - aurora_pa_range(start, range_end - CACHE_LINE_SIZE, - AURORA_INVAL_RANGE_REG); + range_end = aurora_range_end(start, end); + + raw_spin_lock_irqsave(&l2x0_lock, flags); + writel_relaxed(start, base + AURORA_RANGE_BASE_ADDR_REG); + writel_relaxed(range_end - CACHE_LINE_SIZE, base + offset); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); + + writel_relaxed(0, base + AURORA_SYNC_REG); start = range_end; } } +static void aurora_inv_range(unsigned long start, unsigned long end) +{ + aurora_pa_range(start, end, AURORA_INVAL_RANGE_REG); +} static void aurora_clean_range(unsigned long start, unsigned long end) { @@ -1318,36 +1314,16 @@ static void aurora_clean_range(unsigned long start, unsigned long end) * If L2 is forced to WT, the L2 will always be clean and we * don't need to do anything here. */ - if (!l2_wt_override) { - start &= ~(CACHE_LINE_SIZE - 1); - end = ALIGN(end, CACHE_LINE_SIZE); - while (start != end) { - unsigned long range_end = calc_range_end(start, end); - aurora_pa_range(start, range_end - CACHE_LINE_SIZE, - AURORA_CLEAN_RANGE_REG); - start = range_end; - } - } + if (!l2_wt_override) + aurora_pa_range(start, end, AURORA_CLEAN_RANGE_REG); } static void aurora_flush_range(unsigned long start, unsigned long end) { - start &= ~(CACHE_LINE_SIZE - 1); - end = ALIGN(end, CACHE_LINE_SIZE); - while (start != end) { - unsigned long range_end = calc_range_end(start, end); - /* - * If L2 is forced to WT, the L2 will always be clean and we - * just need to invalidate. - */ - if (l2_wt_override) - aurora_pa_range(start, range_end - CACHE_LINE_SIZE, - AURORA_INVAL_RANGE_REG); - else - aurora_pa_range(start, range_end - CACHE_LINE_SIZE, - AURORA_FLUSH_RANGE_REG); - start = range_end; - } + if (l2_wt_override) + aurora_pa_range(start, end, AURORA_INVAL_RANGE_REG); + else + aurora_pa_range(start, end, AURORA_FLUSH_RANGE_REG); } static void aurora_flush_all(void) From 4dda9c8a5e34773b290c6b5938ccb36e7fcdf35c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 8 Feb 2015 15:00:06 -0500 Subject: [PATCH 482/601] SUNRPC: Set SO_REUSEPORT socket option for TCP connections When using TCP, we need the ability to reuse port numbers after a disconnection, so that the NFSv3 server knows that we're the same client. Currently we use a hack to work around the TCP socket's TIME_WAIT: we send an RST instead of closing, which doesn't always work... The SO_REUSEPORT option added in Linux 3.9 allows us to bind multiple TCP connections to the same source address+port combination, and thus to use ordinary TCP close() instead of the current hack. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 53 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 87ce7e8bb8dc79..484c5040436a46 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1666,6 +1666,39 @@ static unsigned short xs_get_random_port(void) return rand + xprt_min_resvport; } +/** + * xs_set_reuseaddr_port - set the socket's port and address reuse options + * @sock: socket + * + * Note that this function has to be called on all sockets that share the + * same port, and it must be called before binding. + */ +static void xs_sock_set_reuseport(struct socket *sock) +{ + char opt = 1; + + kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)); +} + +static unsigned short xs_sock_getport(struct socket *sock) +{ + struct sockaddr_storage buf; + int buflen; + unsigned short port = 0; + + if (kernel_getsockname(sock, (struct sockaddr *)&buf, &buflen) < 0) + goto out; + switch (buf.ss_family) { + case AF_INET6: + port = ntohs(((struct sockaddr_in6 *)&buf)->sin6_port); + break; + case AF_INET: + port = ntohs(((struct sockaddr_in *)&buf)->sin_port); + } +out: + return port; +} + /** * xs_set_port - reset the port number in the remote endpoint address * @xprt: generic transport @@ -1680,6 +1713,12 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) xs_update_peer_port(xprt); } +static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock) +{ + if (transport->srcport == 0) + transport->srcport = xs_sock_getport(sock); +} + static unsigned short xs_get_srcport(struct sock_xprt *transport) { unsigned short port = transport->srcport; @@ -1833,7 +1872,8 @@ static void xs_dummy_setup_socket(struct work_struct *work) } static struct socket *xs_create_sock(struct rpc_xprt *xprt, - struct sock_xprt *transport, int family, int type, int protocol) + struct sock_xprt *transport, int family, int type, + int protocol, bool reuseport) { struct socket *sock; int err; @@ -1846,6 +1886,9 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt, } xs_reclassify_socket(family, sock); + if (reuseport) + xs_sock_set_reuseport(sock); + err = xs_bind(transport, sock); if (err) { sock_release(sock); @@ -2047,7 +2090,8 @@ static void xs_udp_setup_socket(struct work_struct *work) /* Start by resetting any existing state */ xs_reset_transport(transport); sock = xs_create_sock(xprt, transport, - xs_addr(xprt)->sa_family, SOCK_DGRAM, IPPROTO_UDP); + xs_addr(xprt)->sa_family, SOCK_DGRAM, + IPPROTO_UDP, false); if (IS_ERR(sock)) goto out; @@ -2149,7 +2193,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) sk->sk_allocation = GFP_ATOMIC; /* socket options */ - sk->sk_userlocks |= SOCK_BINDPORT_LOCK; sock_reset_flag(sk, SOCK_LINGER); tcp_sk(sk)->linger2 = 0; tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; @@ -2174,6 +2217,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); switch (ret) { case 0: + xs_set_srcport(transport, sock); case -EINPROGRESS: /* SYN_SENT! */ if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) @@ -2202,7 +2246,8 @@ static void xs_tcp_setup_socket(struct work_struct *work) if (!sock) { clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); sock = xs_create_sock(xprt, transport, - xs_addr(xprt)->sa_family, SOCK_STREAM, IPPROTO_TCP); + xs_addr(xprt)->sa_family, SOCK_STREAM, + IPPROTO_TCP, true); if (IS_ERR(sock)) { status = PTR_ERR(sock); goto out; From 3913c78c3ab61500ddf7c2c9617cc4f8e2c583e0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 8 Feb 2015 21:44:04 -0500 Subject: [PATCH 483/601] SUNRPC: Handle EADDRINUSE on connect Now that we're setting SO_REUSEPORT, we still need to handle the case where a connect() is attempted, but the old socket is still lingering. Essentially, all we want to do here is handle the error by waiting a few seconds and then retrying. Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 3 +++ net/sunrpc/xprtsock.c | 2 ++ 2 files changed, 5 insertions(+) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 3f5d4d48f0cbd2..612aa73bbc60c9 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1826,6 +1826,7 @@ call_connect_status(struct rpc_task *task) case -ECONNABORTED: case -ENETUNREACH: case -EHOSTUNREACH: + case -EADDRINUSE: case -ENOBUFS: case -EPIPE: if (RPC_IS_SOFTCONN(task)) @@ -1934,6 +1935,7 @@ call_transmit_status(struct rpc_task *task) } case -ECONNRESET: case -ECONNABORTED: + case -EADDRINUSE: case -ENOTCONN: case -ENOBUFS: case -EPIPE: @@ -2053,6 +2055,7 @@ call_status(struct rpc_task *task) case -ECONNRESET: case -ECONNABORTED: rpc_force_rebind(clnt); + case -EADDRINUSE: case -ENOBUFS: rpc_delay(task, 3*HZ); case -EPIPE: diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 484c5040436a46..20f25a837e06b0 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -721,6 +721,7 @@ static int xs_tcp_send_request(struct rpc_task *task) xs_tcp_shutdown(xprt); case -ECONNREFUSED: case -ENOTCONN: + case -EADDRINUSE: case -EPIPE: clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); } @@ -2299,6 +2300,7 @@ static void xs_tcp_setup_socket(struct work_struct *work) case -ECONNREFUSED: case -ECONNRESET: case -ENETUNREACH: + case -EADDRINUSE: case -ENOBUFS: /* retry with existing socket, after a delay */ goto out; From 76698b2358de466d23f44eaa1b0c9ebe8206099a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 8 Feb 2015 16:28:58 -0500 Subject: [PATCH 484/601] SUNRPC: Do not clear the source port in xs_reset_transport Now that we can reuse bound ports after a close, we never really want to clear the transport's source port after it has been set. Doing so really messes up the NFSv3 DRC on the server. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 20f25a837e06b0..ea1882f97912bd 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -811,8 +811,6 @@ static void xs_reset_transport(struct sock_xprt *transport) if (sk == NULL) return; - transport->srcport = 0; - write_lock_bh(&sk->sk_callback_lock); transport->inet = NULL; transport->sock = NULL; From 6cc7e908362a9dfec3c821f77ec98b6758592060 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 8 Feb 2015 18:35:25 -0500 Subject: [PATCH 485/601] SUNRPC: Ensure xs_reset_transport() resets the close connection flags Otherwise, we may end up looping. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index ea1882f97912bd..0fa7ed93dc2053 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -803,10 +803,21 @@ static void xs_error_report(struct sock *sk) read_unlock_bh(&sk->sk_callback_lock); } +static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt) +{ + smp_mb__before_atomic(); + clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); + clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state); + clear_bit(XPRT_CLOSE_WAIT, &xprt->state); + clear_bit(XPRT_CLOSING, &xprt->state); + smp_mb__after_atomic(); +} + static void xs_reset_transport(struct sock_xprt *transport) { struct socket *sock = transport->sock; struct sock *sk = transport->inet; + struct rpc_xprt *xprt = &transport->xprt; if (sk == NULL) return; @@ -819,8 +830,9 @@ static void xs_reset_transport(struct sock_xprt *transport) xs_restore_old_callbacks(transport, sk); write_unlock_bh(&sk->sk_callback_lock); + xs_sock_reset_connection_flags(xprt); - trace_rpc_socket_close(&transport->xprt, sock); + trace_rpc_socket_close(xprt, sock); sock_release(sock); } @@ -845,11 +857,6 @@ static void xs_close(struct rpc_xprt *xprt) xs_reset_transport(transport); xprt->reestablish_timeout = 0; - smp_mb__before_atomic(); - clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); - clear_bit(XPRT_CLOSE_WAIT, &xprt->state); - clear_bit(XPRT_CLOSING, &xprt->state); - smp_mb__after_atomic(); xprt_disconnect_done(xprt); } @@ -1455,16 +1462,6 @@ static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt) xprt_clear_connecting(xprt); } -static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt) -{ - smp_mb__before_atomic(); - clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); - clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state); - clear_bit(XPRT_CLOSE_WAIT, &xprt->state); - clear_bit(XPRT_CLOSING, &xprt->state); - smp_mb__after_atomic(); -} - static void xs_sock_mark_closed(struct rpc_xprt *xprt) { xs_sock_reset_connection_flags(xprt); From 718ba5b87343df303017585200ee182e937eabfc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 8 Feb 2015 18:19:25 -0500 Subject: [PATCH 486/601] SUNRPC: Add helpers to prevent socket create from racing The socket lock is currently held by the task that is requesting the connection be established. While that is efficient in the case where the connection happens quickly, it is racy in the case where it doesn't. What we really want is for the connect helper to be able to block access to the socket while it is being set up. This patch does so by arranging to transfer the socket lock from the task that is requesting the connect attempt, and then releasing that lock once everything is done. This scheme also gives us automatic protection against collisions with the RPC close code, so we can kill the cancel_delayed_work_sync() call in xs_close(). Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 3 +++ net/sunrpc/xprt.c | 37 +++++++++++++++++++++++++++++++++---- net/sunrpc/xprtsock.c | 7 +++++-- 3 files changed, 41 insertions(+), 6 deletions(-) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 9d27ac45b90955..2926e618dbc603 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -347,6 +347,9 @@ void xprt_force_disconnect(struct rpc_xprt *xprt); void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); int xs_swapper(struct rpc_xprt *xprt, int enable); +bool xprt_lock_connect(struct rpc_xprt *, struct rpc_task *, void *); +void xprt_unlock_connect(struct rpc_xprt *, void *); + /* * Reserved bit positions in xprt->state */ diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index ebbefad21a370b..ff3574df834491 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -690,6 +690,37 @@ xprt_init_autodisconnect(unsigned long data) spin_unlock(&xprt->transport_lock); } +bool xprt_lock_connect(struct rpc_xprt *xprt, + struct rpc_task *task, + void *cookie) +{ + bool ret = false; + + spin_lock_bh(&xprt->transport_lock); + if (!test_bit(XPRT_LOCKED, &xprt->state)) + goto out; + if (xprt->snd_task != task) + goto out; + xprt->snd_task = cookie; + ret = true; +out: + spin_unlock_bh(&xprt->transport_lock); + return ret; +} + +void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie) +{ + spin_lock_bh(&xprt->transport_lock); + if (xprt->snd_task != cookie) + goto out; + if (!test_bit(XPRT_LOCKED, &xprt->state)) + goto out; + xprt->snd_task =NULL; + xprt->ops->release_xprt(xprt, NULL); +out: + spin_unlock_bh(&xprt->transport_lock); +} + /** * xprt_connect - schedule a transport connect operation * @task: RPC task that is requesting the connect @@ -712,9 +743,7 @@ void xprt_connect(struct rpc_task *task) if (test_and_clear_bit(XPRT_CLOSE_WAIT, &xprt->state)) xprt->ops->close(xprt); - if (xprt_connected(xprt)) - xprt_release_write(xprt, task); - else { + if (!xprt_connected(xprt)) { task->tk_rqstp->rq_bytes_sent = 0; task->tk_timeout = task->tk_rqstp->rq_timeout; rpc_sleep_on(&xprt->pending, task, xprt_connect_status); @@ -726,6 +755,7 @@ void xprt_connect(struct rpc_task *task) xprt->stat.connect_start = jiffies; xprt->ops->connect(xprt, task); } + xprt_release_write(xprt, task); } static void xprt_connect_status(struct rpc_task *task) @@ -758,7 +788,6 @@ static void xprt_connect_status(struct rpc_task *task) dprintk("RPC: %5u xprt_connect_status: error %d connecting to " "server %s\n", task->tk_pid, -task->tk_status, xprt->servername); - xprt_release_write(xprt, task); task->tk_status = -EIO; } } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 0fa7ed93dc2053..e57d8ed2c4d879 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -852,8 +852,6 @@ static void xs_close(struct rpc_xprt *xprt) dprintk("RPC: xs_close xprt %p\n", xprt); - cancel_delayed_work_sync(&transport->connect_worker); - xs_reset_transport(transport); xprt->reestablish_timeout = 0; @@ -2101,6 +2099,7 @@ static void xs_udp_setup_socket(struct work_struct *work) trace_rpc_socket_connect(xprt, sock, 0); status = 0; out: + xprt_unlock_connect(xprt, transport); xprt_clear_connecting(xprt); xprt_wake_pending_tasks(xprt, status); } @@ -2286,6 +2285,7 @@ static void xs_tcp_setup_socket(struct work_struct *work) case 0: case -EINPROGRESS: case -EALREADY: + xprt_unlock_connect(xprt, transport); xprt_clear_connecting(xprt); return; case -EINVAL: @@ -2303,6 +2303,7 @@ static void xs_tcp_setup_socket(struct work_struct *work) out_eagain: status = -EAGAIN; out: + xprt_unlock_connect(xprt, transport); xprt_clear_connecting(xprt); xprt_wake_pending_tasks(xprt, status); } @@ -2325,6 +2326,8 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + WARN_ON_ONCE(!xprt_lock_connect(xprt, task, transport)); + if (transport->sock != NULL && !RPC_IS_SOFTCONN(task)) { dprintk("RPC: xs_connect delayed xprt %p for %lu " "seconds\n", From de84d89030fa4efa44c02c96c8b4a8176042c4ff Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 8 Feb 2015 16:49:48 -0500 Subject: [PATCH 487/601] SUNRPC: TCP/UDP always close the old socket before reconnecting It is not safe to call xs_reset_transport() from inside xs_udp_setup_socket() or xs_tcp_setup_socket(), since they do not own the correct locks. Instead, do it in xs_connect(). Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index e57d8ed2c4d879..e53a5ca03daf46 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2081,8 +2081,6 @@ static void xs_udp_setup_socket(struct work_struct *work) struct socket *sock = transport->sock; int status = -EIO; - /* Start by resetting any existing state */ - xs_reset_transport(transport); sock = xs_create_sock(xprt, transport, xs_addr(xprt)->sa_family, SOCK_DGRAM, IPPROTO_UDP, false); @@ -2328,6 +2326,9 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task) WARN_ON_ONCE(!xprt_lock_connect(xprt, task, transport)); + /* Start by resetting any existing state */ + xs_reset_transport(transport); + if (transport->sock != NULL && !RPC_IS_SOFTCONN(task)) { dprintk("RPC: xs_connect delayed xprt %p for %lu " "seconds\n", From 4efdd92c921135175a85452cd41273d9e2788db3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 8 Feb 2015 15:34:28 -0500 Subject: [PATCH 488/601] SUNRPC: Remove TCP client connection reset hack Instead we rely on SO_REUSEPORT to provide the reconnection semantics that we need for NFSv2/v3. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 - net/sunrpc/xprtsock.c | 67 +------------------------------------ 2 files changed, 1 insertion(+), 67 deletions(-) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 2926e618dbc603..86af854338b5e8 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -363,7 +363,6 @@ void xprt_unlock_connect(struct rpc_xprt *, void *); #define XPRT_CONNECTION_ABORT (7) #define XPRT_CONNECTION_CLOSE (8) #define XPRT_CONGESTED (9) -#define XPRT_CONNECTION_REUSE (10) static inline void xprt_set_connected(struct rpc_xprt *xprt) { diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index e53a5ca03daf46..dbf279cd44945b 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -796,8 +796,6 @@ static void xs_error_report(struct sock *sk) dprintk("RPC: xs_error_report client %p, error=%d...\n", xprt, -err); trace_rpc_socket_error(xprt, sk->sk_socket, err); - if (test_bit(XPRT_CONNECTION_REUSE, &xprt->state)) - goto out; xprt_wake_pending_tasks(xprt, err); out: read_unlock_bh(&sk->sk_callback_lock); @@ -2102,57 +2100,6 @@ static void xs_udp_setup_socket(struct work_struct *work) xprt_wake_pending_tasks(xprt, status); } -/* - * We need to preserve the port number so the reply cache on the server can - * find our cached RPC replies when we get around to reconnecting. - */ -static void xs_abort_connection(struct sock_xprt *transport) -{ - int result; - struct sockaddr any; - - dprintk("RPC: disconnecting xprt %p to reuse port\n", transport); - - /* - * Disconnect the transport socket by doing a connect operation - * with AF_UNSPEC. This should return immediately... - */ - memset(&any, 0, sizeof(any)); - any.sa_family = AF_UNSPEC; - result = kernel_connect(transport->sock, &any, sizeof(any), 0); - trace_rpc_socket_reset_connection(&transport->xprt, - transport->sock, result); - if (!result) - xs_sock_reset_connection_flags(&transport->xprt); - dprintk("RPC: AF_UNSPEC connect return code %d\n", result); -} - -static void xs_tcp_reuse_connection(struct sock_xprt *transport) -{ - unsigned int state = transport->inet->sk_state; - - if (state == TCP_CLOSE && transport->sock->state == SS_UNCONNECTED) { - /* we don't need to abort the connection if the socket - * hasn't undergone a shutdown - */ - if (transport->inet->sk_shutdown == 0) - return; - dprintk("RPC: %s: TCP_CLOSEd and sk_shutdown set to %d\n", - __func__, transport->inet->sk_shutdown); - } - if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT)) { - /* we don't need to abort the connection if the socket - * hasn't undergone a shutdown - */ - if (transport->inet->sk_shutdown == 0) - return; - dprintk("RPC: %s: ESTABLISHED/SYN_SENT " - "sk_shutdown set to %d\n", - __func__, transport->inet->sk_shutdown); - } - xs_abort_connection(transport); -} - static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); @@ -2245,18 +2192,6 @@ static void xs_tcp_setup_socket(struct work_struct *work) status = PTR_ERR(sock); goto out; } - } else { - int abort_and_exit; - - abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT, - &xprt->state); - /* "close" the socket, preserving the local port */ - set_bit(XPRT_CONNECTION_REUSE, &xprt->state); - xs_tcp_reuse_connection(transport); - clear_bit(XPRT_CONNECTION_REUSE, &xprt->state); - - if (abort_and_exit) - goto out_eagain; } dprintk("RPC: worker connecting xprt %p via %s to " @@ -2296,9 +2231,9 @@ static void xs_tcp_setup_socket(struct work_struct *work) case -EADDRINUSE: case -ENOBUFS: /* retry with existing socket, after a delay */ + xs_tcp_force_close(xprt); goto out; } -out_eagain: status = -EAGAIN; out: xprt_unlock_connect(xprt, transport); From 9cbc94fb06f98de0e8d393eaff09c790f4c3ba46 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 8 Feb 2015 15:50:27 -0500 Subject: [PATCH 489/601] SUNRPC: Remove TCP socket linger code Now that we no longer use the partial shutdown code when closing the socket, we no longer need to worry about the TCP linger2 state. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 35 ----------------------------------- 1 file changed, 35 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index dbf279cd44945b..c65f7401928869 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1427,37 +1427,6 @@ static void xs_tcp_data_ready(struct sock *sk) read_unlock_bh(&sk->sk_callback_lock); } -/* - * Do the equivalent of linger/linger2 handling for dealing with - * broken servers that don't close the socket in a timely - * fashion - */ -static void xs_tcp_schedule_linger_timeout(struct rpc_xprt *xprt, - unsigned long timeout) -{ - struct sock_xprt *transport; - - if (xprt_test_and_set_connecting(xprt)) - return; - set_bit(XPRT_CONNECTION_ABORT, &xprt->state); - transport = container_of(xprt, struct sock_xprt, xprt); - queue_delayed_work(rpciod_workqueue, &transport->connect_worker, - timeout); -} - -static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt) -{ - struct sock_xprt *transport; - - transport = container_of(xprt, struct sock_xprt, xprt); - - if (!test_bit(XPRT_CONNECTION_ABORT, &xprt->state) || - !cancel_delayed_work(&transport->connect_worker)) - return; - clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); - xprt_clear_connecting(xprt); -} - static void xs_sock_mark_closed(struct rpc_xprt *xprt) { xs_sock_reset_connection_flags(xprt); @@ -1513,7 +1482,6 @@ static void xs_tcp_state_change(struct sock *sk) clear_bit(XPRT_CONNECTED, &xprt->state); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); smp_mb__after_atomic(); - xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout); break; case TCP_CLOSE_WAIT: /* The server initiated a shutdown of the socket */ @@ -1530,13 +1498,11 @@ static void xs_tcp_state_change(struct sock *sk) break; case TCP_LAST_ACK: set_bit(XPRT_CLOSING, &xprt->state); - xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout); smp_mb__before_atomic(); clear_bit(XPRT_CONNECTED, &xprt->state); smp_mb__after_atomic(); break; case TCP_CLOSE: - xs_tcp_cancel_linger_timeout(xprt); xs_sock_mark_closed(xprt); } out: @@ -2134,7 +2100,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) /* socket options */ sock_reset_flag(sk, SOCK_LINGER); - tcp_sk(sk)->linger2 = 0; tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; xprt_clear_connected(xprt); From 505936f59f1e4cd0ff92ae5abc7aae64fb74dbdb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 8 Feb 2015 16:00:01 -0500 Subject: [PATCH 490/601] SUNRPC: Cleanup to remove remaining uses of XPRT_CONNECTION_ABORT Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 - net/sunrpc/xprtsock.c | 3 --- 2 files changed, 4 deletions(-) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 86af854338b5e8..ae39d478a272d7 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -360,7 +360,6 @@ void xprt_unlock_connect(struct rpc_xprt *, void *); #define XPRT_BOUND (4) #define XPRT_BINDING (5) #define XPRT_CLOSING (6) -#define XPRT_CONNECTION_ABORT (7) #define XPRT_CONNECTION_CLOSE (8) #define XPRT_CONGESTED (9) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index c65f7401928869..2f8db3499a172f 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -804,7 +804,6 @@ static void xs_error_report(struct sock *sk) static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt) { smp_mb__before_atomic(); - clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); clear_bit(XPRT_CLOSING, &xprt->state); @@ -1904,7 +1903,6 @@ static int xs_local_setup_socket(struct sock_xprt *transport) struct socket *sock; int status = -EIO; - clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); status = __sock_create(xprt->xprt_net, AF_LOCAL, SOCK_STREAM, 0, &sock, 1); if (status < 0) { @@ -2149,7 +2147,6 @@ static void xs_tcp_setup_socket(struct work_struct *work) int status = -EIO; if (!sock) { - clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); sock = xs_create_sock(xprt, transport, xs_addr(xprt)->sa_family, SOCK_STREAM, IPPROTO_TCP, true); From f3240c1f6d18591b2c1ef33ed18d5ca91e62c104 Mon Sep 17 00:00:00 2001 From: Hans-Christian Egtvedt Date: Thu, 25 Dec 2014 19:58:50 +0100 Subject: [PATCH 491/601] avr32: wire up missing syscalls This patch adds a bunch of missing syscalls to AVR32: __NR_pread64 __NR_pwrite64 __NR_timerfd_create __NR_fallocate __NR_timerfd_settime __NR_timerfd_gettime __NR_signalfd4 __NR_eventfd2 __NR_epoll_create1 __NR_dup3 __NR_pipe2 __NR_inotify_init1 __NR_preadv __NR_pwritev __NR_rt_tgsigqueueinfo __NR_perf_event_open __NR_recvmmsg __NR_fanotify_init __NR_fanotify_mark __NR_prlimit64 __NR_name_to_handle_at __NR_open_by_handle_at __NR_clock_adjtime __NR_syncfs __NR_sendmmsg __NR_process_vm_readv __NR_process_vm_writev __NR_kcmp __NR_finit_module __NR_sched_setattr __NR_sched_getattr __NR_renameat2 __NR_seccomp __NR_getrandom __NR_memfd_create __NR_bpf __NR_execveat On AVR32, all parameters beyond the 5th are passed on the stack. System calls don't use the stack -- they borrow a callee-saved register instead. This means that syscalls that take 6 parameters must be called through a stub that pushes the last parameter on the stack. This relates to syscall fallocate, fanotify_mark, process_vm_readv, and process_vm_writev. Signed-off-by: Hans-Christian Egtvedt --- arch/avr32/include/asm/unistd.h | 2 +- arch/avr32/include/uapi/asm/unistd.h | 41 +++++++++++++++++++++++++--- arch/avr32/kernel/syscall-stubs.S | 36 ++++++++++++++++++++++++ arch/avr32/kernel/syscall_table.S | 37 +++++++++++++++++++++++++ 4 files changed, 111 insertions(+), 5 deletions(-) diff --git a/arch/avr32/include/asm/unistd.h b/arch/avr32/include/asm/unistd.h index c1eb080e45fee5..2011bee3f25258 100644 --- a/arch/avr32/include/asm/unistd.h +++ b/arch/avr32/include/asm/unistd.h @@ -10,7 +10,7 @@ #include -#define NR_syscalls 284 +#define NR_syscalls 321 /* Old stuff */ #define __IGNORE_uselib diff --git a/arch/avr32/include/uapi/asm/unistd.h b/arch/avr32/include/uapi/asm/unistd.h index 8822bf46ddc683..bbe2fba565cdec 100644 --- a/arch/avr32/include/uapi/asm/unistd.h +++ b/arch/avr32/include/uapi/asm/unistd.h @@ -222,7 +222,6 @@ #define __NR_epoll_wait 207 #define __NR_remap_file_pages 208 #define __NR_set_tid_address 209 - #define __NR_timer_create 210 #define __NR_timer_settime 211 #define __NR_timer_gettime 212 @@ -238,7 +237,6 @@ /* 222 reserved for tux */ #define __NR_utimes 223 #define __NR_fadvise64_64 224 - #define __NR_cacheflush 225 #define __NR_vserver 226 @@ -281,7 +279,6 @@ #define __NR_tee 263 #define __NR_vmsplice 264 #define __NR_epoll_pwait 265 - #define __NR_msgget 266 #define __NR_msgsnd 267 #define __NR_msgrcv 268 @@ -294,11 +291,47 @@ #define __NR_shmget 275 #define __NR_shmdt 276 #define __NR_shmctl 277 - #define __NR_utimensat 278 #define __NR_signalfd 279 /* 280 was __NR_timerfd */ #define __NR_eventfd 281 #define __NR_setns 283 +#define __NR_pread64 284 +#define __NR_pwrite64 285 +#define __NR_timerfd_create 286 +#define __NR_fallocate 287 +#define __NR_timerfd_settime 288 +#define __NR_timerfd_gettime 289 +#define __NR_signalfd4 290 +#define __NR_eventfd2 291 +#define __NR_epoll_create1 292 +#define __NR_dup3 293 +#define __NR_pipe2 294 +#define __NR_inotify_init1 295 +#define __NR_preadv 296 +#define __NR_pwritev 297 +#define __NR_rt_tgsigqueueinfo 298 +#define __NR_perf_event_open 299 +#define __NR_recvmmsg 300 +#define __NR_fanotify_init 301 +#define __NR_fanotify_mark 302 +#define __NR_prlimit64 303 +#define __NR_name_to_handle_at 304 +#define __NR_open_by_handle_at 305 +#define __NR_clock_adjtime 306 +#define __NR_syncfs 307 +#define __NR_sendmmsg 308 +#define __NR_process_vm_readv 309 +#define __NR_process_vm_writev 310 +#define __NR_kcmp 311 +#define __NR_finit_module 312 +#define __NR_sched_setattr 313 +#define __NR_sched_getattr 314 +#define __NR_renameat2 315 +#define __NR_seccomp 316 +#define __NR_getrandom 317 +#define __NR_memfd_create 318 +#define __NR_bpf 319 +#define __NR_execveat 320 #endif /* _UAPI__ASM_AVR32_UNISTD_H */ diff --git a/arch/avr32/kernel/syscall-stubs.S b/arch/avr32/kernel/syscall-stubs.S index b5fc927cd39889..f9c68fab0e2fc1 100644 --- a/arch/avr32/kernel/syscall-stubs.S +++ b/arch/avr32/kernel/syscall-stubs.S @@ -88,3 +88,39 @@ __sys_sync_file_range: call sys_sync_file_range sub sp, -4 popm pc + + .global __sys_fallocate + .type __sys_fallocate,@function +__sys_fallocate: + pushm lr + st.w --sp, ARG6 + call sys_fallocate + sub sp, -4 + popm pc + + .global __sys_fanotify_mark + .type __sys_fanotify_mark,@function +__sys_fanotify_mark: + pushm lr + st.w --sp, ARG6 + call sys_fanotify_mark + sub sp, -4 + popm pc + + .global __sys_process_vm_readv + .type __sys_process_vm_readv,@function +__sys_process_vm_readv: + pushm lr + st.w --sp, ARG6 + call sys_process_vm_readv + sub sp, -4 + popm pc + + .global __sys_process_vm_writev + .type __sys_process_vm_writev,@function +__sys_process_vm_writev: + pushm lr + st.w --sp, ARG6 + call sys_process_vm_writev + sub sp, -4 + popm pc diff --git a/arch/avr32/kernel/syscall_table.S b/arch/avr32/kernel/syscall_table.S index 017a904180c82b..c3b593bfc3b3d8 100644 --- a/arch/avr32/kernel/syscall_table.S +++ b/arch/avr32/kernel/syscall_table.S @@ -297,4 +297,41 @@ sys_call_table: .long sys_eventfd .long sys_recvmmsg .long sys_setns + .long sys_pread64 + .long sys_pwrite64 /* 285 */ + .long sys_timerfd_create + .long __sys_fallocate + .long sys_timerfd_settime + .long sys_timerfd_gettime + .long sys_signalfd4 /* 290 */ + .long sys_eventfd2 + .long sys_epoll_create1 + .long sys_dup3 + .long sys_pipe2 + .long sys_inotify_init1 /* 295 */ + .long sys_preadv + .long sys_pwritev + .long sys_rt_tgsigqueueinfo + .long sys_perf_event_open + .long sys_recvmmsg /* 300 */ + .long sys_fanotify_init + .long __sys_fanotify_mark + .long sys_prlimit64 + .long sys_name_to_handle_at + .long sys_open_by_handle_at /* 305 */ + .long sys_clock_adjtime + .long sys_syncfs + .long sys_sendmmsg + .long __sys_process_vm_readv + .long __sys_process_vm_writev /* 310 */ + .long sys_kcmp + .long sys_finit_module + .long sys_sched_setattr + .long sys_sched_getattr + .long sys_renameat2 /* 315 */ + .long sys_seccomp + .long sys_getrandom + .long sys_memfd_create + .long sys_bpf + .long sys_execveat /* 320 */ .long sys_ni_syscall /* r8 is saturated at nr_syscalls */ From a31c353de1c25f631a684ebc794aba699bddfe26 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Sun, 11 Jan 2015 03:00:16 +0100 Subject: [PATCH 492/601] avr32: remove fake at91 cpu identification cpu_is_at91* are not used in any driver outside mach-at91. Remove those useless definitions. Signed-off-by: Alexandre Belloni Acked-by: Hans-Christian Egtvedt Acked-by: Nicolas Ferre --- arch/avr32/mach-at32ap/include/mach/cpu.h | 26 +---------------------- 1 file changed, 1 insertion(+), 25 deletions(-) diff --git a/arch/avr32/mach-at32ap/include/mach/cpu.h b/arch/avr32/mach-at32ap/include/mach/cpu.h index 16a24b14146c7e..4181086f4ddc56 100644 --- a/arch/avr32/mach-at32ap/include/mach/cpu.h +++ b/arch/avr32/mach-at32ap/include/mach/cpu.h @@ -1,5 +1,5 @@ /* - * AVR32 and (fake) AT91 CPU identification + * AVR32 CPU identification * * Copyright (C) 2007 Atmel Corporation * @@ -20,28 +20,4 @@ # define cpu_is_at32ap7000() (0) #endif -/* - * Since this is AVR32, we will never run on any AT91 CPU. But these - * definitions may reduce clutter in common drivers. - */ -#define cpu_is_at91rm9200() (0) -#define cpu_is_at91sam9xe() (0) -#define cpu_is_at91sam9260() (0) -#define cpu_is_at91sam9261() (0) -#define cpu_is_at91sam9263() (0) -#define cpu_is_at91sam9rl() (0) -#define cpu_is_at91sam9g10() (0) -#define cpu_is_at91sam9g20() (0) -#define cpu_is_at91sam9g45() (0) -#define cpu_is_at91sam9g45es() (0) -#define cpu_is_at91sam9m10() (0) -#define cpu_is_at91sam9g46() (0) -#define cpu_is_at91sam9m11() (0) -#define cpu_is_at91sam9x5() (0) -#define cpu_is_at91sam9g15() (0) -#define cpu_is_at91sam9g35() (0) -#define cpu_is_at91sam9x35() (0) -#define cpu_is_at91sam9g25() (0) -#define cpu_is_at91sam9x25() (0) - #endif /* __ASM_ARCH_CPU_H */ From 89422ad6f894c03dee85a3cdb6b33cec3e0b805b Mon Sep 17 00:00:00 2001 From: Hans-Christian Egtvedt Date: Sat, 31 Jan 2015 08:24:29 +0100 Subject: [PATCH 493/601] avr32: update all default configurations This patch runs all the default configurations through make silentoldconfig and make savedefconfig to rinse out outdated kconfig entries. for config in arch/avr32/configs/*defconfig; do make ARCH=avr32 `basename $config` make ARCH=avr32 silentoldconfig make ARCH=avr32 savedefconfig cp defconfig $config done Signed-off-by: Hans-Christian Egtvedt --- arch/avr32/configs/atngw100_defconfig | 30 +++++-------------- .../configs/atngw100_evklcd100_defconfig | 30 +++++-------------- .../configs/atngw100_evklcd101_defconfig | 30 +++++-------------- arch/avr32/configs/atngw100_mrmt_defconfig | 23 +++----------- arch/avr32/configs/atngw100mkii_defconfig | 30 +++++-------------- .../configs/atngw100mkii_evklcd100_defconfig | 30 +++++-------------- .../configs/atngw100mkii_evklcd101_defconfig | 30 +++++-------------- arch/avr32/configs/atstk1002_defconfig | 30 +++++-------------- arch/avr32/configs/atstk1003_defconfig | 26 +++++----------- arch/avr32/configs/atstk1004_defconfig | 27 +++++------------ arch/avr32/configs/atstk1006_defconfig | 30 +++++-------------- arch/avr32/configs/favr-32_defconfig | 26 ++++------------ arch/avr32/configs/hammerhead_defconfig | 22 +++----------- arch/avr32/configs/merisc_defconfig | 23 ++------------ arch/avr32/configs/mimc200_defconfig | 18 +++-------- 15 files changed, 97 insertions(+), 308 deletions(-) diff --git a/arch/avr32/configs/atngw100_defconfig b/arch/avr32/configs/atngw100_defconfig index 4733e38e7ae62c..ce0030020c2588 100644 --- a/arch/avr32/configs/atngw100_defconfig +++ b/arch/avr32/configs/atngw100_defconfig @@ -1,33 +1,28 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_BASE_FULL is not set # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_OPROFILE=m -# CONFIG_KPROBES is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_BOARD_ATNGW100_MKI=y # CONFIG_OWNERSHIP_TRACE is not set CONFIG_NMI_DEBUGGING=y -CONFIG_PM=y CONFIG_CPU_FREQ=y # CONFIG_CPU_FREQ_STAT is not set CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_AVR32_AT32AP_CPUFREQ=y -CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -52,7 +47,6 @@ CONFIG_INET6_IPCOMP=y CONFIG_NETFILTER=y # CONFIG_NETFILTER_ADVANCED is not set CONFIG_NETFILTER_XTABLES=y -# CONFIG_IP_NF_TARGET_ULOG is not set CONFIG_BRIDGE=m CONFIG_VLAN_8021Q=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" @@ -60,7 +54,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -70,28 +63,24 @@ CONFIG_MTD_UBI=y CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=m -CONFIG_MISC_DEVICES=y CONFIG_ATMEL_TCLIB=y CONFIG_NETDEVICES=y CONFIG_TUN=m -CONFIG_NET_ETHERNET=y CONFIG_MACB=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set CONFIG_PPP=m -CONFIG_PPP_FILTER=y -CONFIG_PPP_ASYNC=m -CONFIG_PPP_DEFLATE=m CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_FILTER=y CONFIG_PPP_MPPE=m CONFIG_PPPOE=m +CONFIG_PPP_ASYNC=m # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set # CONFIG_DEVKMEM is not set CONFIG_SERIAL_ATMEL=y CONFIG_SERIAL_ATMEL_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set # CONFIG_HW_RANDOM is not set CONFIG_I2C=m CONFIG_I2C_CHARDEV=m @@ -128,7 +117,6 @@ CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_EXT3_FS_XATTR is not set CONFIG_EXT4_FS=y -# CONFIG_EXT4_FS_XATTR is not set # CONFIG_DNOTIFY is not set CONFIG_FUSE_FS=m CONFIG_MSDOS_FS=m @@ -140,7 +128,6 @@ CONFIG_CONFIGFS_FS=y CONFIG_JFFS2_FS=y CONFIG_UBIFS_FS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m CONFIG_NFSD_V3=y @@ -149,8 +136,7 @@ CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_850=m CONFIG_NLS_ISO8859_1=m CONFIG_NLS_UTF8=m -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DETECT_HUNG_TASK=y CONFIG_FRAME_POINTER=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/avr32/configs/atngw100_evklcd100_defconfig b/arch/avr32/configs/atngw100_evklcd100_defconfig index 1be0ee31bd91c2..01ff632249c074 100644 --- a/arch/avr32/configs/atngw100_evklcd100_defconfig +++ b/arch/avr32/configs/atngw100_evklcd100_defconfig @@ -1,35 +1,30 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_BASE_FULL is not set # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_OPROFILE=m -# CONFIG_KPROBES is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_BOARD_ATNGW100_MKI=y CONFIG_BOARD_ATNGW100_EVKLCD10X=y CONFIG_BOARD_ATNGW100_EVKLCD10X_QVGA=y # CONFIG_OWNERSHIP_TRACE is not set CONFIG_NMI_DEBUGGING=y -CONFIG_PM=y CONFIG_CPU_FREQ=y # CONFIG_CPU_FREQ_STAT is not set CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_AVR32_AT32AP_CPUFREQ=y -CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -54,7 +49,6 @@ CONFIG_INET6_IPCOMP=y CONFIG_NETFILTER=y # CONFIG_NETFILTER_ADVANCED is not set CONFIG_NETFILTER_XTABLES=y -# CONFIG_IP_NF_TARGET_ULOG is not set CONFIG_BRIDGE=m CONFIG_VLAN_8021Q=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" @@ -62,7 +56,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -72,21 +65,17 @@ CONFIG_MTD_UBI=y CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=m -CONFIG_MISC_DEVICES=y CONFIG_ATMEL_TCLIB=y CONFIG_NETDEVICES=y CONFIG_TUN=m -CONFIG_NET_ETHERNET=y CONFIG_MACB=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set CONFIG_PPP=m -CONFIG_PPP_FILTER=y -CONFIG_PPP_ASYNC=m -CONFIG_PPP_DEFLATE=m CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_FILTER=y CONFIG_PPP_MPPE=m CONFIG_PPPOE=m +CONFIG_PPP_ASYNC=m # CONFIG_INPUT_MOUSEDEV is not set CONFIG_INPUT_EVDEV=m # CONFIG_INPUT_KEYBOARD is not set @@ -94,9 +83,9 @@ CONFIG_INPUT_EVDEV=m CONFIG_INPUT_TOUCHSCREEN=y CONFIG_TOUCHSCREEN_WM97XX=m # CONFIG_SERIO is not set +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_ATMEL=y CONFIG_SERIAL_ATMEL_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set # CONFIG_HW_RANDOM is not set CONFIG_I2C=m CONFIG_I2C_CHARDEV=m @@ -144,7 +133,6 @@ CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_EXT3_FS_XATTR is not set CONFIG_EXT4_FS=y -# CONFIG_EXT4_FS_XATTR is not set # CONFIG_DNOTIFY is not set CONFIG_FUSE_FS=m CONFIG_MSDOS_FS=m @@ -156,7 +144,6 @@ CONFIG_CONFIGFS_FS=y CONFIG_JFFS2_FS=y CONFIG_UBIFS_FS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m CONFIG_NFSD_V3=y @@ -165,8 +152,7 @@ CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_850=m CONFIG_NLS_ISO8859_1=m CONFIG_NLS_UTF8=m -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DETECT_HUNG_TASK=y CONFIG_FRAME_POINTER=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/avr32/configs/atngw100_evklcd101_defconfig b/arch/avr32/configs/atngw100_evklcd101_defconfig index 796e536f7bc435..c4021dfd534722 100644 --- a/arch/avr32/configs/atngw100_evklcd101_defconfig +++ b/arch/avr32/configs/atngw100_evklcd101_defconfig @@ -1,34 +1,29 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_BASE_FULL is not set # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_OPROFILE=m -# CONFIG_KPROBES is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_BOARD_ATNGW100_MKI=y CONFIG_BOARD_ATNGW100_EVKLCD10X=y # CONFIG_OWNERSHIP_TRACE is not set CONFIG_NMI_DEBUGGING=y -CONFIG_PM=y CONFIG_CPU_FREQ=y # CONFIG_CPU_FREQ_STAT is not set CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_AVR32_AT32AP_CPUFREQ=y -CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -53,7 +48,6 @@ CONFIG_INET6_IPCOMP=y CONFIG_NETFILTER=y # CONFIG_NETFILTER_ADVANCED is not set CONFIG_NETFILTER_XTABLES=y -# CONFIG_IP_NF_TARGET_ULOG is not set CONFIG_BRIDGE=m CONFIG_VLAN_8021Q=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" @@ -61,7 +55,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -71,21 +64,17 @@ CONFIG_MTD_UBI=y CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=m -CONFIG_MISC_DEVICES=y CONFIG_ATMEL_TCLIB=y CONFIG_NETDEVICES=y CONFIG_TUN=m -CONFIG_NET_ETHERNET=y CONFIG_MACB=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set CONFIG_PPP=m -CONFIG_PPP_FILTER=y -CONFIG_PPP_ASYNC=m -CONFIG_PPP_DEFLATE=m CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_FILTER=y CONFIG_PPP_MPPE=m CONFIG_PPPOE=m +CONFIG_PPP_ASYNC=m # CONFIG_INPUT_MOUSEDEV is not set CONFIG_INPUT_EVDEV=m # CONFIG_INPUT_KEYBOARD is not set @@ -93,9 +82,9 @@ CONFIG_INPUT_EVDEV=m CONFIG_INPUT_TOUCHSCREEN=y CONFIG_TOUCHSCREEN_WM97XX=m # CONFIG_SERIO is not set +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_ATMEL=y CONFIG_SERIAL_ATMEL_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set # CONFIG_HW_RANDOM is not set CONFIG_I2C=m CONFIG_I2C_CHARDEV=m @@ -143,7 +132,6 @@ CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_EXT3_FS_XATTR is not set CONFIG_EXT4_FS=y -# CONFIG_EXT4_FS_XATTR is not set # CONFIG_DNOTIFY is not set CONFIG_FUSE_FS=m CONFIG_MSDOS_FS=m @@ -155,7 +143,6 @@ CONFIG_CONFIGFS_FS=y CONFIG_JFFS2_FS=y CONFIG_UBIFS_FS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m CONFIG_NFSD_V3=y @@ -164,8 +151,7 @@ CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_850=m CONFIG_NLS_ISO8859_1=m CONFIG_NLS_UTF8=m -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DETECT_HUNG_TASK=y CONFIG_FRAME_POINTER=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/avr32/configs/atngw100_mrmt_defconfig b/arch/avr32/configs/atngw100_mrmt_defconfig index 6838781e966fea..ffcc28df9dbc6a 100644 --- a/arch/avr32/configs/atngw100_mrmt_defconfig +++ b/arch/avr32/configs/atngw100_mrmt_defconfig @@ -1,14 +1,11 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_LOG_BUF_SHIFT=14 -CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_BASE_FULL is not set # CONFIG_SLUB_DEBUG is not set CONFIG_MODULES=y @@ -17,8 +14,8 @@ CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set # CONFIG_OWNERSHIP_TRACE is not set -CONFIG_PM=y # CONFIG_SUSPEND is not set +CONFIG_PM=y CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_GOV_POWERSAVE=y CONFIG_CPU_FREQ_GOV_USERSPACE=y @@ -37,7 +34,6 @@ CONFIG_SYN_COOKIES=y # CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_BT=m -CONFIG_BT_L2CAP=m CONFIG_BT_RFCOMM=m CONFIG_BT_RFCOMM_TTY=y CONFIG_BT_HIDP=m @@ -49,7 +45,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -57,10 +52,7 @@ CONFIG_MTD_PHYSMAP=y CONFIG_MTD_DATAFLASH=y CONFIG_BLK_DEV_LOOP=y CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y CONFIG_MACB=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT_MOUSEDEV is not set CONFIG_INPUT_EVDEV=y # CONFIG_KEYBOARD_ATKBD is not set @@ -70,9 +62,9 @@ CONFIG_INPUT_TOUCHSCREEN=y CONFIG_TOUCHSCREEN_ADS7846=m # CONFIG_SERIO is not set CONFIG_VT_HW_CONSOLE_BINDING=y +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_ATMEL=y CONFIG_SERIAL_ATMEL_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set # CONFIG_HW_RANDOM is not set CONFIG_I2C=y CONFIG_I2C_CHARDEV=y @@ -84,9 +76,7 @@ CONFIG_WATCHDOG=y CONFIG_AT32AP700X_WDT=y CONFIG_FB=y CONFIG_FB_ATMEL=y -CONFIG_BACKLIGHT_LCD_SUPPORT=y CONFIG_LCD_CLASS_DEVICE=y -CONFIG_BACKLIGHT_CLASS_DEVICE=y CONFIG_SOUND=m CONFIG_SND=m CONFIG_SND_MIXER_OSS=m @@ -129,10 +119,7 @@ CONFIG_TMPFS=y CONFIG_CONFIGFS_FS=y CONFIG_JFFS2_FS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y -CONFIG_SMB_FS=m -CONFIG_SMB_NLS_DEFAULT=y CONFIG_CIFS=m CONFIG_CIFS_STATS=y CONFIG_CIFS_WEAK_PW_HASH=y @@ -142,10 +129,8 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_850=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DETECT_HUNG_TASK=y CONFIG_FRAME_POINTER=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set +CONFIG_MAGIC_SYSRQ=y +CONFIG_DETECT_HUNG_TASK=y CONFIG_CRC_CCITT=y diff --git a/arch/avr32/configs/atngw100mkii_defconfig b/arch/avr32/configs/atngw100mkii_defconfig index 97fe1b399b069d..04962641c9364b 100644 --- a/arch/avr32/configs/atngw100mkii_defconfig +++ b/arch/avr32/configs/atngw100mkii_defconfig @@ -1,33 +1,28 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_BASE_FULL is not set # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_OPROFILE=m -# CONFIG_KPROBES is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_BOARD_ATNGW100_MKII=y # CONFIG_OWNERSHIP_TRACE is not set CONFIG_NMI_DEBUGGING=y -CONFIG_PM=y CONFIG_CPU_FREQ=y # CONFIG_CPU_FREQ_STAT is not set CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_AVR32_AT32AP_CPUFREQ=y -CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -52,7 +47,6 @@ CONFIG_INET6_IPCOMP=y CONFIG_NETFILTER=y # CONFIG_NETFILTER_ADVANCED is not set CONFIG_NETFILTER_XTABLES=y -# CONFIG_IP_NF_TARGET_ULOG is not set CONFIG_BRIDGE=m CONFIG_VLAN_8021Q=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" @@ -60,7 +54,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_INTELEXT=y @@ -72,28 +65,24 @@ CONFIG_MTD_UBI=y CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=m -CONFIG_MISC_DEVICES=y CONFIG_ATMEL_TCLIB=y CONFIG_NETDEVICES=y CONFIG_TUN=m -CONFIG_NET_ETHERNET=y CONFIG_MACB=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set CONFIG_PPP=m -CONFIG_PPP_FILTER=y -CONFIG_PPP_ASYNC=m -CONFIG_PPP_DEFLATE=m CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_FILTER=y CONFIG_PPP_MPPE=m CONFIG_PPPOE=m +CONFIG_PPP_ASYNC=m # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set # CONFIG_DEVKMEM is not set CONFIG_SERIAL_ATMEL=y CONFIG_SERIAL_ATMEL_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set # CONFIG_HW_RANDOM is not set CONFIG_I2C=m CONFIG_I2C_CHARDEV=m @@ -130,7 +119,6 @@ CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_EXT3_FS_XATTR is not set CONFIG_EXT4_FS=y -# CONFIG_EXT4_FS_XATTR is not set # CONFIG_DNOTIFY is not set CONFIG_FUSE_FS=m CONFIG_MSDOS_FS=m @@ -142,7 +130,6 @@ CONFIG_CONFIGFS_FS=y CONFIG_JFFS2_FS=y CONFIG_UBIFS_FS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m CONFIG_NFSD_V3=y @@ -151,8 +138,7 @@ CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_850=m CONFIG_NLS_ISO8859_1=m CONFIG_NLS_UTF8=m -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DETECT_HUNG_TASK=y CONFIG_FRAME_POINTER=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/avr32/configs/atngw100mkii_evklcd100_defconfig b/arch/avr32/configs/atngw100mkii_evklcd100_defconfig index a176d24467e9d2..89c2cda573da54 100644 --- a/arch/avr32/configs/atngw100mkii_evklcd100_defconfig +++ b/arch/avr32/configs/atngw100mkii_evklcd100_defconfig @@ -1,36 +1,31 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_BASE_FULL is not set # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_OPROFILE=m -# CONFIG_KPROBES is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_BOARD_ATNGW100_MKII=y CONFIG_BOARD_ATNGW100_MKII_LCD=y CONFIG_BOARD_ATNGW100_EVKLCD10X=y CONFIG_BOARD_ATNGW100_EVKLCD10X_QVGA=y # CONFIG_OWNERSHIP_TRACE is not set CONFIG_NMI_DEBUGGING=y -CONFIG_PM=y CONFIG_CPU_FREQ=y # CONFIG_CPU_FREQ_STAT is not set CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_AVR32_AT32AP_CPUFREQ=y -CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -55,7 +50,6 @@ CONFIG_INET6_IPCOMP=y CONFIG_NETFILTER=y # CONFIG_NETFILTER_ADVANCED is not set CONFIG_NETFILTER_XTABLES=y -# CONFIG_IP_NF_TARGET_ULOG is not set CONFIG_BRIDGE=m CONFIG_VLAN_8021Q=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" @@ -63,7 +57,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_INTELEXT=y @@ -75,21 +68,17 @@ CONFIG_MTD_UBI=y CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=m -CONFIG_MISC_DEVICES=y CONFIG_ATMEL_TCLIB=y CONFIG_NETDEVICES=y CONFIG_TUN=m -CONFIG_NET_ETHERNET=y CONFIG_MACB=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set CONFIG_PPP=m -CONFIG_PPP_FILTER=y -CONFIG_PPP_ASYNC=m -CONFIG_PPP_DEFLATE=m CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_FILTER=y CONFIG_PPP_MPPE=m CONFIG_PPPOE=m +CONFIG_PPP_ASYNC=m # CONFIG_INPUT_MOUSEDEV is not set CONFIG_INPUT_EVDEV=m # CONFIG_INPUT_KEYBOARD is not set @@ -97,9 +86,9 @@ CONFIG_INPUT_EVDEV=m CONFIG_INPUT_TOUCHSCREEN=y CONFIG_TOUCHSCREEN_WM97XX=m # CONFIG_SERIO is not set +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_ATMEL=y CONFIG_SERIAL_ATMEL_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set # CONFIG_HW_RANDOM is not set CONFIG_I2C=m CONFIG_I2C_CHARDEV=m @@ -147,7 +136,6 @@ CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_EXT3_FS_XATTR is not set CONFIG_EXT4_FS=y -# CONFIG_EXT4_FS_XATTR is not set # CONFIG_DNOTIFY is not set CONFIG_FUSE_FS=m CONFIG_MSDOS_FS=m @@ -159,7 +147,6 @@ CONFIG_CONFIGFS_FS=y CONFIG_JFFS2_FS=y CONFIG_UBIFS_FS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m CONFIG_NFSD_V3=y @@ -168,8 +155,7 @@ CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_850=m CONFIG_NLS_ISO8859_1=m CONFIG_NLS_UTF8=m -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DETECT_HUNG_TASK=y CONFIG_FRAME_POINTER=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/avr32/configs/atngw100mkii_evklcd101_defconfig b/arch/avr32/configs/atngw100mkii_evklcd101_defconfig index d1bf6dcfc47d4c..1b4d4a87a35696 100644 --- a/arch/avr32/configs/atngw100mkii_evklcd101_defconfig +++ b/arch/avr32/configs/atngw100mkii_evklcd101_defconfig @@ -1,35 +1,30 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_BASE_FULL is not set # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_OPROFILE=m -# CONFIG_KPROBES is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_BOARD_ATNGW100_MKII=y CONFIG_BOARD_ATNGW100_MKII_LCD=y CONFIG_BOARD_ATNGW100_EVKLCD10X=y # CONFIG_OWNERSHIP_TRACE is not set CONFIG_NMI_DEBUGGING=y -CONFIG_PM=y CONFIG_CPU_FREQ=y # CONFIG_CPU_FREQ_STAT is not set CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_AVR32_AT32AP_CPUFREQ=y -CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -54,7 +49,6 @@ CONFIG_INET6_IPCOMP=y CONFIG_NETFILTER=y # CONFIG_NETFILTER_ADVANCED is not set CONFIG_NETFILTER_XTABLES=y -# CONFIG_IP_NF_TARGET_ULOG is not set CONFIG_BRIDGE=m CONFIG_VLAN_8021Q=m CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" @@ -62,7 +56,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_INTELEXT=y @@ -74,21 +67,17 @@ CONFIG_MTD_UBI=y CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=m -CONFIG_MISC_DEVICES=y CONFIG_ATMEL_TCLIB=y CONFIG_NETDEVICES=y CONFIG_TUN=m -CONFIG_NET_ETHERNET=y CONFIG_MACB=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set CONFIG_PPP=m -CONFIG_PPP_FILTER=y -CONFIG_PPP_ASYNC=m -CONFIG_PPP_DEFLATE=m CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_FILTER=y CONFIG_PPP_MPPE=m CONFIG_PPPOE=m +CONFIG_PPP_ASYNC=m # CONFIG_INPUT_MOUSEDEV is not set CONFIG_INPUT_EVDEV=m # CONFIG_INPUT_KEYBOARD is not set @@ -96,9 +85,9 @@ CONFIG_INPUT_EVDEV=m CONFIG_INPUT_TOUCHSCREEN=y CONFIG_TOUCHSCREEN_WM97XX=m # CONFIG_SERIO is not set +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_ATMEL=y CONFIG_SERIAL_ATMEL_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set # CONFIG_HW_RANDOM is not set CONFIG_I2C=m CONFIG_I2C_CHARDEV=m @@ -146,7 +135,6 @@ CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_EXT3_FS_XATTR is not set CONFIG_EXT4_FS=y -# CONFIG_EXT4_FS_XATTR is not set # CONFIG_DNOTIFY is not set CONFIG_FUSE_FS=m CONFIG_MSDOS_FS=m @@ -158,7 +146,6 @@ CONFIG_CONFIGFS_FS=y CONFIG_JFFS2_FS=y CONFIG_UBIFS_FS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m CONFIG_NFSD_V3=y @@ -167,8 +154,7 @@ CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_850=m CONFIG_NLS_ISO8859_1=m CONFIG_NLS_UTF8=m -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DETECT_HUNG_TASK=y CONFIG_FRAME_POINTER=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/avr32/configs/atstk1002_defconfig b/arch/avr32/configs/atstk1002_defconfig index b056820eef33f6..9b8b52e54b793c 100644 --- a/arch/avr32/configs/atstk1002_defconfig +++ b/arch/avr32/configs/atstk1002_defconfig @@ -1,32 +1,27 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_BASE_FULL is not set # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_OPROFILE=m -# CONFIG_KPROBES is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y # CONFIG_OWNERSHIP_TRACE is not set CONFIG_NMI_DEBUGGING=y -CONFIG_PM=y CONFIG_CPU_FREQ=y # CONFIG_CPU_FREQ_STAT is not set CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_AVR32_AT32AP_CPUFREQ=y -CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -54,7 +49,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -63,7 +57,6 @@ CONFIG_MTD_UBI=y CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=m -CONFIG_MISC_DEVICES=y CONFIG_ATMEL_TCLIB=y CONFIG_ATMEL_SSC=m # CONFIG_SCSI_PROC_FS is not set @@ -75,14 +68,11 @@ CONFIG_ATA=m CONFIG_PATA_AT32=m CONFIG_NETDEVICES=y CONFIG_TUN=m -CONFIG_NET_ETHERNET=y CONFIG_MACB=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set CONFIG_PPP=m -CONFIG_PPP_ASYNC=m -CONFIG_PPP_DEFLATE=m CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_ASYNC=m CONFIG_INPUT=m CONFIG_INPUT_EVDEV=m # CONFIG_KEYBOARD_ATKBD is not set @@ -91,10 +81,10 @@ CONFIG_KEYBOARD_GPIO=m CONFIG_MOUSE_GPIO=m # CONFIG_SERIO is not set # CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set # CONFIG_DEVKMEM is not set CONFIG_SERIAL_ATMEL=y CONFIG_SERIAL_ATMEL_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set # CONFIG_HW_RANDOM is not set CONFIG_I2C=m CONFIG_I2C_CHARDEV=m @@ -108,10 +98,8 @@ CONFIG_WATCHDOG=y CONFIG_AT32AP700X_WDT=y CONFIG_FB=y CONFIG_FB_ATMEL=y -CONFIG_BACKLIGHT_LCD_SUPPORT=y CONFIG_LCD_CLASS_DEVICE=y CONFIG_LCD_LTV350QV=y -# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_SOUND=m CONFIG_SND=m CONFIG_SND_MIXER_OSS=m @@ -119,7 +107,6 @@ CONFIG_SND_PCM_OSS=m # CONFIG_SND_SUPPORT_OLD_API is not set # CONFIG_SND_VERBOSE_PROCFS is not set CONFIG_SND_AT73C213=m -# CONFIG_HID_SUPPORT is not set CONFIG_USB_GADGET=y CONFIG_USB_ZERO=m CONFIG_USB_ETH=m @@ -147,7 +134,6 @@ CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_EXT3_FS_XATTR is not set CONFIG_EXT4_FS=y -# CONFIG_EXT4_FS_XATTR is not set # CONFIG_DNOTIFY is not set CONFIG_FUSE_FS=m CONFIG_MSDOS_FS=m @@ -159,15 +145,13 @@ CONFIG_CONFIGFS_FS=y CONFIG_JFFS2_FS=y CONFIG_UBIFS_FS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_CIFS=m CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_850=m CONFIG_NLS_ISO8859_1=m CONFIG_NLS_UTF8=m -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DETECT_HUNG_TASK=y CONFIG_FRAME_POINTER=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/avr32/configs/atstk1003_defconfig b/arch/avr32/configs/atstk1003_defconfig index 0cd23a303da11a..ccce1a0b791792 100644 --- a/arch/avr32/configs/atstk1003_defconfig +++ b/arch/avr32/configs/atstk1003_defconfig @@ -1,33 +1,28 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_BASE_FULL is not set # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_OPROFILE=m -# CONFIG_KPROBES is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_BOARD_ATSTK1003=y # CONFIG_OWNERSHIP_TRACE is not set CONFIG_NMI_DEBUGGING=y -CONFIG_PM=y CONFIG_CPU_FREQ=y # CONFIG_CPU_FREQ_STAT is not set CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_AVR32_AT32AP_CPUFREQ=y -CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -43,7 +38,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -52,7 +46,6 @@ CONFIG_MTD_UBI=y CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=m -CONFIG_MISC_DEVICES=y CONFIG_ATMEL_TCLIB=y CONFIG_ATMEL_SSC=m # CONFIG_SCSI_PROC_FS is not set @@ -63,12 +56,10 @@ CONFIG_ATA=m # CONFIG_SATA_PMP is not set CONFIG_PATA_AT32=m CONFIG_NETDEVICES=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set CONFIG_PPP=m -CONFIG_PPP_ASYNC=m -CONFIG_PPP_DEFLATE=m CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_ASYNC=m CONFIG_INPUT=m CONFIG_INPUT_EVDEV=m # CONFIG_KEYBOARD_ATKBD is not set @@ -77,10 +68,10 @@ CONFIG_KEYBOARD_GPIO=m CONFIG_MOUSE_GPIO=m # CONFIG_SERIO is not set # CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set # CONFIG_DEVKMEM is not set CONFIG_SERIAL_ATMEL=y CONFIG_SERIAL_ATMEL_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set # CONFIG_HW_RANDOM is not set CONFIG_I2C=m CONFIG_I2C_CHARDEV=m @@ -98,7 +89,6 @@ CONFIG_SND_MIXER_OSS=m CONFIG_SND_PCM_OSS=m # CONFIG_SND_DRIVERS is not set CONFIG_SND_AT73C213=m -# CONFIG_HID_SUPPORT is not set CONFIG_USB_GADGET=y CONFIG_USB_ZERO=m CONFIG_USB_ETH=m @@ -126,7 +116,6 @@ CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_EXT3_FS_XATTR is not set CONFIG_EXT4_FS=y -# CONFIG_EXT4_FS_XATTR is not set # CONFIG_DNOTIFY is not set CONFIG_FUSE_FS=m CONFIG_MSDOS_FS=m @@ -142,8 +131,7 @@ CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_850=m CONFIG_NLS_ISO8859_1=m CONFIG_NLS_UTF8=m -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DETECT_HUNG_TASK=y CONFIG_FRAME_POINTER=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/avr32/configs/atstk1004_defconfig b/arch/avr32/configs/atstk1004_defconfig index ac1041f5f85a02..e64288fc794d1f 100644 --- a/arch/avr32/configs/atstk1004_defconfig +++ b/arch/avr32/configs/atstk1004_defconfig @@ -1,33 +1,28 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_BASE_FULL is not set # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_OPROFILE=m -# CONFIG_KPROBES is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_BOARD_ATSTK1004=y # CONFIG_OWNERSHIP_TRACE is not set CONFIG_NMI_DEBUGGING=y -CONFIG_PM=y CONFIG_CPU_FREQ=y # CONFIG_CPU_FREQ_STAT is not set CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_AVR32_AT32AP_CPUFREQ=y -CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -43,7 +38,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -52,7 +46,6 @@ CONFIG_MTD_UBI=y CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=m -CONFIG_MISC_DEVICES=y CONFIG_ATMEL_TCLIB=y CONFIG_ATMEL_SSC=m # CONFIG_SCSI_PROC_FS is not set @@ -63,12 +56,10 @@ CONFIG_ATA=m # CONFIG_SATA_PMP is not set CONFIG_PATA_AT32=m CONFIG_NETDEVICES=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set CONFIG_PPP=m -CONFIG_PPP_ASYNC=m -CONFIG_PPP_DEFLATE=m CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_ASYNC=m CONFIG_INPUT=m CONFIG_INPUT_EVDEV=m # CONFIG_KEYBOARD_ATKBD is not set @@ -77,10 +68,10 @@ CONFIG_KEYBOARD_GPIO=m CONFIG_MOUSE_GPIO=m # CONFIG_SERIO is not set # CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set # CONFIG_DEVKMEM is not set CONFIG_SERIAL_ATMEL=y CONFIG_SERIAL_ATMEL_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set # CONFIG_HW_RANDOM is not set CONFIG_I2C=m CONFIG_I2C_CHARDEV=m @@ -94,10 +85,8 @@ CONFIG_WATCHDOG=y CONFIG_AT32AP700X_WDT=y CONFIG_FB=y CONFIG_FB_ATMEL=y -CONFIG_BACKLIGHT_LCD_SUPPORT=y CONFIG_LCD_CLASS_DEVICE=y CONFIG_LCD_LTV350QV=y -# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_USB_GADGET=y CONFIG_USB_ZERO=m CONFIG_USB_ETH=m @@ -125,7 +114,6 @@ CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_EXT3_FS_XATTR is not set CONFIG_EXT4_FS=y -# CONFIG_EXT4_FS_XATTR is not set # CONFIG_DNOTIFY is not set CONFIG_FUSE_FS=m CONFIG_MSDOS_FS=m @@ -141,8 +129,7 @@ CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_850=m CONFIG_NLS_ISO8859_1=m CONFIG_NLS_UTF8=m -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DETECT_HUNG_TASK=y CONFIG_FRAME_POINTER=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/avr32/configs/atstk1006_defconfig b/arch/avr32/configs/atstk1006_defconfig index ea4f670cb9954c..7d669f79ff74ef 100644 --- a/arch/avr32/configs/atstk1006_defconfig +++ b/arch/avr32/configs/atstk1006_defconfig @@ -1,33 +1,28 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_BASE_FULL is not set # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_OPROFILE=m -# CONFIG_KPROBES is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_BOARD_ATSTK1006=y # CONFIG_OWNERSHIP_TRACE is not set CONFIG_NMI_DEBUGGING=y -CONFIG_PM=y CONFIG_CPU_FREQ=y # CONFIG_CPU_FREQ_STAT is not set CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_AVR32_AT32AP_CPUFREQ=y -CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -55,7 +50,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -66,7 +60,6 @@ CONFIG_MTD_UBI=y CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_NBD=m CONFIG_BLK_DEV_RAM=m -CONFIG_MISC_DEVICES=y CONFIG_ATMEL_TCLIB=y CONFIG_ATMEL_SSC=m # CONFIG_SCSI_PROC_FS is not set @@ -78,14 +71,11 @@ CONFIG_ATA=m CONFIG_PATA_AT32=m CONFIG_NETDEVICES=y CONFIG_TUN=m -CONFIG_NET_ETHERNET=y CONFIG_MACB=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set CONFIG_PPP=m -CONFIG_PPP_ASYNC=m -CONFIG_PPP_DEFLATE=m CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_ASYNC=m CONFIG_INPUT=m CONFIG_INPUT_EVDEV=m # CONFIG_KEYBOARD_ATKBD is not set @@ -94,10 +84,10 @@ CONFIG_KEYBOARD_GPIO=m CONFIG_MOUSE_GPIO=m # CONFIG_SERIO is not set # CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set # CONFIG_DEVKMEM is not set CONFIG_SERIAL_ATMEL=y CONFIG_SERIAL_ATMEL_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set # CONFIG_HW_RANDOM is not set CONFIG_I2C=m CONFIG_I2C_CHARDEV=m @@ -111,10 +101,8 @@ CONFIG_WATCHDOG=y CONFIG_AT32AP700X_WDT=y CONFIG_FB=y CONFIG_FB_ATMEL=y -CONFIG_BACKLIGHT_LCD_SUPPORT=y CONFIG_LCD_CLASS_DEVICE=y CONFIG_LCD_LTV350QV=y -# CONFIG_BACKLIGHT_CLASS_DEVICE is not set CONFIG_SOUND=m CONFIG_SND=m CONFIG_SND_MIXER_OSS=m @@ -122,7 +110,6 @@ CONFIG_SND_PCM_OSS=m # CONFIG_SND_SUPPORT_OLD_API is not set # CONFIG_SND_VERBOSE_PROCFS is not set CONFIG_SND_AT73C213=m -# CONFIG_HID_SUPPORT is not set CONFIG_USB_GADGET=y CONFIG_USB_ZERO=m CONFIG_USB_ETH=m @@ -150,7 +137,6 @@ CONFIG_EXT3_FS=y # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set # CONFIG_EXT3_FS_XATTR is not set CONFIG_EXT4_FS=y -# CONFIG_EXT4_FS_XATTR is not set # CONFIG_DNOTIFY is not set CONFIG_FUSE_FS=m CONFIG_MSDOS_FS=m @@ -162,15 +148,13 @@ CONFIG_CONFIGFS_FS=y CONFIG_JFFS2_FS=y CONFIG_UBIFS_FS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_CIFS=m CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_850=m CONFIG_NLS_ISO8859_1=m CONFIG_NLS_UTF8=m -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DETECT_HUNG_TASK=y CONFIG_FRAME_POINTER=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/avr32/configs/favr-32_defconfig b/arch/avr32/configs/favr-32_defconfig index b3eb67dc05ac71..560c52f87d451c 100644 --- a/arch/avr32/configs/favr-32_defconfig +++ b/arch/avr32/configs/favr-32_defconfig @@ -1,28 +1,23 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 -CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_BASE_FULL is not set # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_OPROFILE=m -# CONFIG_KPROBES is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_BOARD_FAVR_32=y # CONFIG_OWNERSHIP_TRACE is not set CONFIG_NMI_DEBUGGING=y -CONFIG_PM=y CONFIG_CPU_FREQ=y # CONFIG_CPU_FREQ_STAT is not set CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y @@ -37,7 +32,6 @@ CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_NET_IPIP=m -CONFIG_NET_IPGRE=m CONFIG_INET_AH=m CONFIG_INET_ESP=m CONFIG_INET_XFRM_MODE_TRANSPORT=m @@ -59,7 +53,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -70,14 +63,11 @@ CONFIG_BLK_DEV_RAM=m CONFIG_ATMEL_TCLIB=y CONFIG_ATMEL_SSC=m CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y CONFIG_MACB=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set CONFIG_PPP=m -CONFIG_PPP_ASYNC=m -CONFIG_PPP_DEFLATE=m CONFIG_PPP_BSDCOMP=m +CONFIG_PPP_DEFLATE=m +CONFIG_PPP_ASYNC=m CONFIG_INPUT_MOUSEDEV=m CONFIG_INPUT_EVDEV=m # CONFIG_KEYBOARD_ATKBD is not set @@ -88,10 +78,10 @@ CONFIG_INPUT_TOUCHSCREEN=y CONFIG_TOUCHSCREEN_ADS7846=m # CONFIG_SERIO is not set # CONFIG_CONSOLE_TRANSLATIONS is not set +# CONFIG_LEGACY_PTYS is not set # CONFIG_DEVKMEM is not set CONFIG_SERIAL_ATMEL=y CONFIG_SERIAL_ATMEL_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set # CONFIG_HW_RANDOM is not set CONFIG_I2C=m CONFIG_I2C_CHARDEV=m @@ -105,12 +95,10 @@ CONFIG_WATCHDOG=y CONFIG_AT32AP700X_WDT=y CONFIG_FB=y CONFIG_FB_ATMEL=y -CONFIG_BACKLIGHT_LCD_SUPPORT=y # CONFIG_LCD_CLASS_DEVICE is not set CONFIG_BACKLIGHT_PWM=m CONFIG_SOUND=m CONFIG_SOUND_PRIME=m -# CONFIG_HID_SUPPORT is not set CONFIG_USB_GADGET=y CONFIG_USB_ZERO=m CONFIG_USB_ETH=m @@ -145,13 +133,11 @@ CONFIG_CONFIGFS_FS=y CONFIG_JFFS2_FS=y # CONFIG_JFFS2_FS_WRITEBUFFER is not set CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_ISO8859_1=m CONFIG_NLS_UTF8=m -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y CONFIG_FRAME_POINTER=y +CONFIG_MAGIC_SYSRQ=y # CONFIG_CRYPTO_HW is not set diff --git a/arch/avr32/configs/hammerhead_defconfig b/arch/avr32/configs/hammerhead_defconfig index 4912f0aadaa12c..d57fadb9e6b60c 100644 --- a/arch/avr32/configs/hammerhead_defconfig +++ b/arch/avr32/configs/hammerhead_defconfig @@ -1,26 +1,22 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_LOG_BUF_SHIFT=14 -CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_BASE_FULL is not set # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_OPROFILE=m -# CONFIG_KPROBES is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_BOARD_HAMMERHEAD=y CONFIG_BOARD_HAMMERHEAD_USB=y CONFIG_BOARD_HAMMERHEAD_LCD=y @@ -53,13 +49,11 @@ CONFIG_INET_IPCOMP=y CONFIG_NETFILTER=y # CONFIG_NETFILTER_ADVANCED is not set CONFIG_NETFILTER_XTABLES=y -# CONFIG_IP_NF_TARGET_ULOG is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_PREVENT_FIRMWARE_BUILD is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -70,16 +64,13 @@ CONFIG_ATMEL_TCLIB=y CONFIG_SCSI=m CONFIG_BLK_DEV_SD=m CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y CONFIG_MACB=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set CONFIG_INPUT_FF_MEMLESS=m CONFIG_INPUT_EVDEV=m CONFIG_INPUT_TOUCHSCREEN=y +# CONFIG_LEGACY_PTYS is not set CONFIG_SERIAL_ATMEL=y CONFIG_SERIAL_ATMEL_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set # CONFIG_HW_RANDOM is not set CONFIG_I2C=m CONFIG_I2C_CHARDEV=m @@ -114,10 +105,8 @@ CONFIG_HID_MONTEREY=m CONFIG_HID_PANTHERLORD=m CONFIG_HID_PETALYNX=m CONFIG_HID_SAMSUNG=m -CONFIG_HID_SONY=m CONFIG_HID_SUNPLUS=m CONFIG_USB=m -# CONFIG_USB_DEVICE_CLASS is not set CONFIG_USB_MON=m CONFIG_USB_ISP116X_HCD=m CONFIG_USB_STORAGE=m @@ -140,16 +129,13 @@ CONFIG_TMPFS=y CONFIG_CONFIGFS_FS=y CONFIG_JFFS2_FS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_850=m CONFIG_NLS_ISO8859_1=m CONFIG_NLS_UTF8=m -CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y CONFIG_FRAME_POINTER=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set +CONFIG_MAGIC_SYSRQ=y CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_ARC4=m diff --git a/arch/avr32/configs/merisc_defconfig b/arch/avr32/configs/merisc_defconfig index b9ef4cc85d08eb..e6a9cb7d574e77 100644 --- a/arch/avr32/configs/merisc_defconfig +++ b/arch/avr32/configs/merisc_defconfig @@ -1,22 +1,19 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_LOG_BUF_SHIFT=14 -CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_BASE_FULL is not set CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_BOARD_MERISC=y CONFIG_AP700X_32_BIT_SMC=y # CONFIG_OWNERSHIP_TRACE is not set @@ -39,14 +36,10 @@ CONFIG_INET_IPCOMP=y # CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set CONFIG_CAN=y -CONFIG_CAN_RAW=y -CONFIG_CAN_BCM=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_PREVENT_FIRMWARE_BUILD is not set # CONFIG_FW_LOADER is not set CONFIG_MTD=y -CONFIG_MTD_CONCAT=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_JEDECPROBE=y @@ -60,10 +53,7 @@ CONFIG_SCSI=y CONFIG_BLK_DEV_SD=y # CONFIG_SCSI_LOWLEVEL is not set CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y CONFIG_MACB=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT_MOUSEDEV is not set CONFIG_INPUT_EVDEV=y # CONFIG_KEYBOARD_ATKBD is not set @@ -74,11 +64,10 @@ CONFIG_INPUT_MISC=y CONFIG_INPUT_UINPUT=y # CONFIG_SERIO is not set # CONFIG_CONSOLE_TRANSLATIONS is not set -CONFIG_VT_HW_CONSOLE_BINDING=y +# CONFIG_LEGACY_PTYS is not set # CONFIG_DEVKMEM is not set CONFIG_SERIAL_ATMEL=y CONFIG_SERIAL_ATMEL_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set # CONFIG_HW_RANDOM is not set CONFIG_I2C=y CONFIG_I2C_CHARDEV=y @@ -89,13 +78,9 @@ CONFIG_SPI_SPIDEV=y CONFIG_GPIO_SYSFS=y # CONFIG_HWMON is not set CONFIG_WATCHDOG=y -CONFIG_VIDEO_OUTPUT_CONTROL=y CONFIG_FB=y CONFIG_FB_ATMEL=y -CONFIG_BACKLIGHT_LCD_SUPPORT=y # CONFIG_LCD_CLASS_DEVICE is not set -# CONFIG_BACKLIGHT_CLASS_DEVICE is not set -CONFIG_DISPLAY_SUPPORT=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y CONFIG_MMC=y @@ -121,12 +106,10 @@ CONFIG_JFFS2_FS=y CONFIG_JFFS2_FS_WBUF_VERIFY=y CONFIG_CRAMFS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_850=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y -# CONFIG_RCU_CPU_STALL_DETECTOR is not set # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set diff --git a/arch/avr32/configs/mimc200_defconfig b/arch/avr32/configs/mimc200_defconfig index d630e089dd322c..49c7e890af7b3d 100644 --- a/arch/avr32/configs/mimc200_defconfig +++ b/arch/avr32/configs/mimc200_defconfig @@ -1,25 +1,21 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_LOG_BUF_SHIFT=14 -CONFIG_SYSFS_DEPRECATED_V2=y CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y -# CONFIG_SYSCTL_SYSCALL is not set # CONFIG_BASE_FULL is not set # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_IOSCHED_DEADLINE is not set -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_BOARD_MIMC200=y # CONFIG_OWNERSHIP_TRACE is not set CONFIG_NMI_DEBUGGING=y -CONFIG_PM=y CONFIG_CPU_FREQ=y # CONFIG_CPU_FREQ_STAT is not set CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y @@ -50,7 +46,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" # CONFIG_FW_LOADER is not set CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y CONFIG_MTD_CFI=y CONFIG_MTD_CFI_AMDSTD=y @@ -60,17 +55,14 @@ CONFIG_ATMEL_TCLIB=y CONFIG_EEPROM_AT24=y CONFIG_EEPROM_AT25=y CONFIG_NETDEVICES=y -CONFIG_NET_ETHERNET=y CONFIG_MACB=y -# CONFIG_NETDEV_1000 is not set -# CONFIG_NETDEV_10000 is not set # CONFIG_INPUT is not set # CONFIG_SERIO is not set # CONFIG_VT is not set +# CONFIG_LEGACY_PTYS is not set # CONFIG_DEVKMEM is not set CONFIG_SERIAL_ATMEL=y CONFIG_SERIAL_ATMEL_CONSOLE=y -# CONFIG_LEGACY_PTYS is not set # CONFIG_HW_RANDOM is not set CONFIG_I2C=y CONFIG_I2C_CHARDEV=y @@ -109,15 +101,13 @@ CONFIG_TMPFS=y CONFIG_CONFIGFS_FS=y CONFIG_JFFS2_FS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_850=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y -CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y CONFIG_FRAME_POINTER=y +CONFIG_MAGIC_SYSRQ=y CONFIG_CRYPTO_ECB=y CONFIG_CRYPTO_PCBC=y CONFIG_CRYPTO_ARC4=y From 0efeac261c3f79c44fe61ee869722b77805c7ddf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 9 Feb 2015 09:26:39 -0500 Subject: [PATCH 494/601] SUNRPC: Ensure xs_tcp_shutdown() requests a full close of the connection The previous behaviour left the connection half-open in order to try to scrape the last replies from the socket. Now that we have more reliable reconnection, change the behaviour to close down the socket faster. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 2f8db3499a172f..3d83cbd32ef250 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -627,7 +627,7 @@ static int xs_udp_send_request(struct rpc_task *task) * @xprt: transport * * Initiates a graceful shutdown of the TCP socket by calling the - * equivalent of shutdown(SHUT_WR); + * equivalent of shutdown(SHUT_RDWR); */ static void xs_tcp_shutdown(struct rpc_xprt *xprt) { @@ -635,7 +635,7 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt) struct socket *sock = transport->sock; if (sock != NULL) { - kernel_sock_shutdown(sock, SHUT_WR); + kernel_sock_shutdown(sock, SHUT_RDWR); trace_rpc_socket_shutdown(xprt, sock); } } From caf4ccd4e88cf2795c927834bc488c8321437586 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 9 Feb 2015 09:23:34 -0500 Subject: [PATCH 495/601] SUNRPC: Make xs_tcp_close() do a socket shutdown rather than a sock_release Use of socket shutdown() means that we monitor the shutdown process through the xs_tcp_state_change() callback, so it is preferable to a full close in all cases unless we're destroying the transport. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 3d83cbd32ef250..0279e8ffb14a10 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -857,10 +857,7 @@ static void xs_close(struct rpc_xprt *xprt) static void xs_tcp_close(struct rpc_xprt *xprt) { - if (test_and_clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state)) - xs_close(xprt); - else - xs_tcp_shutdown(xprt); + xs_tcp_shutdown(xprt); } static void xs_xprt_free(struct rpc_xprt *xprt) @@ -1033,7 +1030,6 @@ static void xs_udp_data_ready(struct sock *sk) */ static void xs_tcp_force_close(struct rpc_xprt *xprt) { - set_bit(XPRT_CONNECTION_CLOSE, &xprt->state); xprt_force_disconnect(xprt); } From 9e2b9f37760e129cee053cc7b6e7288acc2a7134 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 8 Feb 2015 19:21:27 -0500 Subject: [PATCH 496/601] SUNRPC: Remove the redundant XPRT_CONNECTION_CLOSE flag Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 1 - net/sunrpc/xprt.c | 1 - net/sunrpc/xprtsock.c | 1 - 3 files changed, 3 deletions(-) diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index ae39d478a272d7..8b93ef53df3c95 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -360,7 +360,6 @@ void xprt_unlock_connect(struct rpc_xprt *, void *); #define XPRT_BOUND (4) #define XPRT_BINDING (5) #define XPRT_CLOSING (6) -#define XPRT_CONNECTION_CLOSE (8) #define XPRT_CONGESTED (9) static inline void xprt_set_connected(struct rpc_xprt *xprt) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index ff3574df834491..e3015aede0d944 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -683,7 +683,6 @@ xprt_init_autodisconnect(unsigned long data) if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) goto out_abort; spin_unlock(&xprt->transport_lock); - set_bit(XPRT_CONNECTION_CLOSE, &xprt->state); queue_work(rpciod_workqueue, &xprt->task_cleanup); return; out_abort: diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 0279e8ffb14a10..c72b13e2bdf52f 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -804,7 +804,6 @@ static void xs_error_report(struct sock *sk) static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt) { smp_mb__before_atomic(); - clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); clear_bit(XPRT_CLOSING, &xprt->state); smp_mb__after_atomic(); From b70ae915e4282854fb7864519e5ec559ab2de7c3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 9 Feb 2015 09:41:32 -0500 Subject: [PATCH 497/601] SUNRPC: Handle connection reset more efficiently. If the connection reset is due to an active call on our side, then the state change is sometimes not reported. Catch those instances using xs_error_report() instead. Also remove the xs_tcp_shutdown() call in xs_tcp_send_request() as the change in behaviour makes it redundant. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index c72b13e2bdf52f..540d542d85e559 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -718,7 +718,6 @@ static int xs_tcp_send_request(struct rpc_task *task) dprintk("RPC: sendmsg returned unrecognized error %d\n", -status); case -ECONNRESET: - xs_tcp_shutdown(xprt); case -ECONNREFUSED: case -ENOTCONN: case -EADDRINUSE: @@ -774,6 +773,21 @@ static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *s sk->sk_error_report = transport->old_error_report; } +static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt) +{ + smp_mb__before_atomic(); + clear_bit(XPRT_CLOSE_WAIT, &xprt->state); + clear_bit(XPRT_CLOSING, &xprt->state); + smp_mb__after_atomic(); +} + +static void xs_sock_mark_closed(struct rpc_xprt *xprt) +{ + xs_sock_reset_connection_flags(xprt); + /* Mark transport as closed and wake up all pending tasks */ + xprt_disconnect_done(xprt); +} + /** * xs_error_report - callback to handle TCP socket state errors * @sk: socket @@ -793,6 +807,9 @@ static void xs_error_report(struct sock *sk) err = -sk->sk_err; if (err == 0) goto out; + /* Is this a reset event? */ + if (sk->sk_state == TCP_CLOSE) + xs_sock_mark_closed(xprt); dprintk("RPC: xs_error_report client %p, error=%d...\n", xprt, -err); trace_rpc_socket_error(xprt, sk->sk_socket, err); @@ -801,14 +818,6 @@ static void xs_error_report(struct sock *sk) read_unlock_bh(&sk->sk_callback_lock); } -static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt) -{ - smp_mb__before_atomic(); - clear_bit(XPRT_CLOSE_WAIT, &xprt->state); - clear_bit(XPRT_CLOSING, &xprt->state); - smp_mb__after_atomic(); -} - static void xs_reset_transport(struct sock_xprt *transport) { struct socket *sock = transport->sock; @@ -1421,13 +1430,6 @@ static void xs_tcp_data_ready(struct sock *sk) read_unlock_bh(&sk->sk_callback_lock); } -static void xs_sock_mark_closed(struct rpc_xprt *xprt) -{ - xs_sock_reset_connection_flags(xprt); - /* Mark transport as closed and wake up all pending tasks */ - xprt_disconnect_done(xprt); -} - /** * xs_tcp_state_change - callback to handle TCP socket state changes * @sk: socket whose state has changed From 54c09874929dcaac37ed62ad2eca45d960ba1a00 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 9 Feb 2015 11:01:02 -0500 Subject: [PATCH 498/601] SUNRPC: Define xs_tcp_fin_timeout only if CONFIG_SUNRPC_DEBUG Now that the linger code is gone, the xs_tcp_fin_timeout variable has no real function. Keep it for now, since it is part of the /proc interface, but only define it if that /proc interface is enabled. Suggested-by: Anna Schumaker Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 540d542d85e559..8ab02262c76116 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -63,6 +63,8 @@ static unsigned int xprt_max_tcp_slot_table_entries = RPC_MAX_SLOT_TABLE; static unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; static unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) + #define XS_TCP_LINGER_TO (15U * HZ) static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO; @@ -75,8 +77,6 @@ static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO; * someone else's file names! */ -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) - static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE; static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE; static unsigned int max_tcp_slot_table_limit = RPC_MAX_SLOT_TABLE_LIMIT; From 4e20e3a60b57efa1e5c26324ce0260d58be6c81b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 9 Feb 2015 15:27:04 -0500 Subject: [PATCH 499/601] tracing: Update the TRACE_EVENT fields available in the sample code The sample code in samples/trace_events/ is extremely out of date and does not show all the new fields that have been added since the sample code was written. As most people are unaware of these new fields, adding sample code and explanations of those fields should help out. Signed-off-by: Steven Rostedt --- samples/trace_events/trace-events-sample.c | 19 ++- samples/trace_events/trace-events-sample.h | 145 +++++++++++++++++++-- 2 files changed, 150 insertions(+), 14 deletions(-) diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c index aabc4e97091126..16c15c08ed3893 100644 --- a/samples/trace_events/trace-events-sample.c +++ b/samples/trace_events/trace-events-sample.c @@ -10,12 +10,29 @@ #define CREATE_TRACE_POINTS #include "trace-events-sample.h" +static const char *random_strings[] = { + "Mother Goose", + "Snoopy", + "Gandalf", + "Frodo", + "One ring to rule them all" +}; static void simple_thread_func(int cnt) { + int array[6]; + int len = cnt % 5; + int i; + set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(HZ); - trace_foo_bar("hello", cnt); + + for (i = 0; i < len; i++) + array[i] = i + 1; + array[i] = 0; + + trace_foo_bar("hello", cnt, array, random_strings[len], + tsk_cpus_allowed(current)); } static int simple_thread(void *arg) diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h index 476429281389e0..dd65f7b8c0d9df 100644 --- a/samples/trace_events/trace-events-sample.h +++ b/samples/trace_events/trace-events-sample.h @@ -1,6 +1,6 @@ /* * If TRACE_SYSTEM is defined, that will be the directory created - * in the ftrace directory under /sys/kernel/debug/tracing/events/ + * in the ftrace directory under /sys/kernel/tracing/events/ * * The define_trace.h below will also look for a file name of * TRACE_SYSTEM.h where TRACE_SYSTEM is what is defined here. @@ -54,44 +54,163 @@ * Here it is simply "foo, bar". * * struct: This defines the way the data will be stored in the ring buffer. - * There are currently two types of elements. __field and __array. - * a __field is broken up into (type, name). Where type can be any - * primitive type (integer, long or pointer). __field_struct() can - * be any static complex data value (struct, union, but not an array). - * For an array. there are three fields. (type, name, size). The - * type of elements in the array, the name of the field and the size - * of the array. + * The items declared here become part of a special structure + * called "__entry", which can be used in the fast_assign part of the + * TRACE_EVENT macro. + * + * Here are the currently defined types you can use: + * + * __field : Is broken up into type and name. Where type can be any + * primitive type (integer, long or pointer). + * + * __field(int, foo) + * + * __entry->foo = 5; + * + * __field_struct : This can be any static complex data type (struct, union + * but not an array). Be careful using complex types, as each + * event is limited in size, and copying large amounts of data + * into the ring buffer can slow things down. + * + * __field_struct(struct bar, foo) + * + * __entry->bar.x = y; + + * __array: There are three fields (type, name, size). The type is the + * type of elements in teh array, the name is the name of the array. + * size is the number of items in the array (not the total size). + * + * __array( char, foo, 10) is the same as saying: char foo[10]; + * + * Assigning arrays can be done like any array: + * + * __entry->foo[0] = 'a'; + * + * memcpy(__entry->foo, bar, 10); + * + * __dynamic_array: This is similar to array, but can vary is size from + * instance to instance of the tracepoint being called. + * Like __array, this too has three elements (type, name, size); + * type is the type of the element, name is the name of the array. + * The size is different than __array. It is not a static number, + * but the algorithm to figure out the length of the array for the + * specific instance of tracepoint. Again, size is the numebr of + * items in the array, not the total length in bytes. + * + * __dynamic_array( int, foo, bar) is similar to: int foo[bar]; + * + * Note, unlike arrays, you must use the __get_dynamic_array() macro + * to access the array. + * + * memcpy(__get_dynamic_array(foo), bar, 10); + * + * Notice, that "__entry" is not needed here. + * + * __string: This is a special kind of __dynamic_array. It expects to + * have a nul terminated character array passed to it (it allows + * for NULL too, which would be converted into "(null)"). __string + * takes two paramenter (name, src), where name is the name of + * the string saved, and src is the string to copy into the + * ring buffer. + * + * __string(foo, bar) is similar to: strcpy(foo, bar) + * + * To assign a string, use the helper macro __assign_str(). + * + * __assign_str(foo, bar); + * + * In most cases, the __assign_str() macro will take the same + * parameters as the __string() macro had to declare the string. + * + * __bitmask: This is another kind of __dynamic_array, but it expects + * an array of longs, and the number of bits to parse. It takes + * two parameters (name, nr_bits), where name is the name of the + * bitmask to save, and the nr_bits is the number of bits to record. + * + * __bitmask(target_cpu, nr_cpumask_bits) + * + * To assign a bitmask, use the __assign_bitmask() helper macro. + * + * __assign_bitmask(target_cpus, cpumask_bits(bar), nr_cpumask_bits); * - * __array( char, foo, 10) is the same as saying char foo[10]. * * fast_assign: This is a C like function that is used to store the items - * into the ring buffer. + * into the ring buffer. A special variable called "__entry" will be the + * structure that points into the ring buffer and has the same fields as + * described by the struct part of TRACE_EVENT above. * * printk: This is a way to print out the data in pretty print. This is * useful if the system crashes and you are logging via a serial line, * the data can be printed to the console using this "printk" method. + * This is also used to print out the data from the trace files. + * Again, the __entry macro is used to access the data from the ring buffer. + * + * Note, __dynamic_array, __string, and __bitmask require special helpers + * to access the data. + * + * For __dynamic_array(int, foo, bar) use __get_dynamic_array(foo) + * Use __get_dynamic_array_len(foo) to get the length of the array + * saved. + * + * For __string(foo, bar) use __get_str(foo) + * + * For __bitmask(target_cpus, nr_cpumask_bits) use __get_bitmask(target_cpus) + * * * Note, that for both the assign and the printk, __entry is the handler * to the data structure in the ring buffer, and is defined by the * TP_STRUCT__entry. */ + +/* + * It is OK to have helper functions in the file, but they need to be protected + * from being defined more than once. Remember, this file gets included more + * than once. + */ +#ifndef __TRACE_EVENT_SAMPLE_HELPER_FUNCTIONS +#define __TRACE_EVENT_SAMPLE_HELPER_FUNCTIONS +static inline int __length_of(const int *list) +{ + int i; + + if (!list) + return 0; + + for (i = 0; list[i]; i++) + ; + return i; +} +#endif + TRACE_EVENT(foo_bar, - TP_PROTO(char *foo, int bar), + TP_PROTO(const char *foo, int bar, const int *lst, + const char *string, const struct cpumask *mask), - TP_ARGS(foo, bar), + TP_ARGS(foo, bar, lst, string, mask), TP_STRUCT__entry( __array( char, foo, 10 ) __field( int, bar ) + __dynamic_array(int, list, __length_of(lst)) + __string( str, string ) + __bitmask( cpus, num_possible_cpus() ) ), TP_fast_assign( strlcpy(__entry->foo, foo, 10); __entry->bar = bar; + memcpy(__get_dynamic_array(list), lst, + __length_of(lst) * sizeof(int)); + __assign_str(str, string); + __assign_bitmask(cpus, cpumask_bits(mask), num_possible_cpus()); ), - TP_printk("foo %s %d", __entry->foo, __entry->bar) + TP_printk("foo %s %d %s %s (%s)", __entry->foo, __entry->bar, + __print_array(__get_dynamic_array(list), + __get_dynamic_array_len(list), + sizeof(int)), + __get_str(str), __get_bitmask(cpus)) ); #endif From c4c7eb29382c456b9be9858c357a490ae0ccd0f6 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 9 Feb 2015 16:05:55 -0500 Subject: [PATCH 500/601] tracing: Add TRACE_EVENT_CONDITION sample The sample code lacks an example of TRACE_EVENT_CONDITION, and it has been expressed to me that this feature for TRACE_EVENT is not well known and not used when it could be. Signed-off-by: Steven Rostedt --- samples/trace_events/trace-events-sample.c | 3 ++ samples/trace_events/trace-events-sample.h | 58 ++++++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c index 16c15c08ed3893..c396a49b5d7831 100644 --- a/samples/trace_events/trace-events-sample.c +++ b/samples/trace_events/trace-events-sample.c @@ -31,8 +31,11 @@ static void simple_thread_func(int cnt) array[i] = i + 1; array[i] = 0; + /* Silly tracepoints */ trace_foo_bar("hello", cnt, array, random_strings[len], tsk_cpus_allowed(current)); + + trace_foo_bar_with_cond("Some times print", cnt); } static int simple_thread(void *arg) diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h index dd65f7b8c0d9df..c3232340914da3 100644 --- a/samples/trace_events/trace-events-sample.h +++ b/samples/trace_events/trace-events-sample.h @@ -212,6 +212,64 @@ TRACE_EVENT(foo_bar, sizeof(int)), __get_str(str), __get_bitmask(cpus)) ); + +/* + * There may be a case where a tracepoint should only be called if + * some condition is set. Otherwise the tracepoint should not be called. + * But to do something like: + * + * if (cond) + * trace_foo(); + * + * Would cause a little overhead when tracing is not enabled, and that + * overhead, even if small, is not something we want. As tracepoints + * use static branch (aka jump_labels), where no branch is taken to + * skip the tracepoint when not enabled, and a jmp is placed to jump + * to the tracepoint code when it is enabled, having a if statement + * nullifies that optimization. It would be nice to place that + * condition within the static branch. This is where TRACE_EVENT_CONDITION + * comes in. + * + * TRACE_EVENT_CONDITION() is just like TRACE_EVENT, except it adds another + * parameter just after args. Where TRACE_EVENT has: + * + * TRACE_EVENT(name, proto, args, struct, assign, printk) + * + * the CONDITION version has: + * + * TRACE_EVENT_CONDITION(name, proto, args, cond, struct, assign, printk) + * + * Everything is the same as TRACE_EVENT except for the new cond. Think + * of the cond variable as: + * + * if (cond) + * trace_foo_bar_with_cond(); + * + * Except that the logic for the if branch is placed after the static branch. + * That is, the if statement that processes the condition will not be + * executed unless that traecpoint is enabled. Otherwise it still remains + * a nop. + */ +TRACE_EVENT_CONDITION(foo_bar_with_cond, + + TP_PROTO(const char *foo, int bar), + + TP_ARGS(foo, bar), + + TP_CONDITION(!(bar % 10)), + + TP_STRUCT__entry( + __string( foo, foo ) + __field( int, bar ) + ), + + TP_fast_assign( + __assign_str(foo, foo); + __entry->bar = bar; + ), + + TP_printk("foo %s %d", __get_str(foo), __entry->bar) +); #endif /***** NOTICE! The #if protection ends here. *****/ From 402e23b4ed9ed81852b6c15b793fcf84ea91e491 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 9 Feb 2015 17:20:14 -0500 Subject: [PATCH 501/601] SUNRPC: Fix stupid typo in xs_sock_set_reuseport Yes, kernel_setsockopt() hates you for using a char argument. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 8ab02262c76116..19f7526f896589 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1629,9 +1629,10 @@ static unsigned short xs_get_random_port(void) */ static void xs_sock_set_reuseport(struct socket *sock) { - char opt = 1; + int opt = 1; - kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)); + kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEPORT, + (char *)&opt, sizeof(opt)); } static unsigned short xs_sock_getport(struct socket *sock) From 6adc13f8c096736957444ffa2aa11421b5671aef Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 9 Feb 2015 16:32:19 -0500 Subject: [PATCH 502/601] tracing: Add TRACE_EVENT_FN example If a function should be called before a tracepoint is enabled and/or after it is disabled, the TRACE_EVENT_FN() serves this purpose. But it is not well documented. Having it as a sample would help developers to know how to use it. Signed-off-by: Steven Rostedt --- samples/trace_events/trace-events-sample.c | 51 ++++++++++++++++++++++ samples/trace_events/trace-events-sample.h | 44 +++++++++++++++++++ 2 files changed, 95 insertions(+) diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c index c396a49b5d7831..39d4484aef53b3 100644 --- a/samples/trace_events/trace-events-sample.c +++ b/samples/trace_events/trace-events-sample.c @@ -49,6 +49,52 @@ static int simple_thread(void *arg) } static struct task_struct *simple_tsk; +static struct task_struct *simple_tsk_fn; + +static void simple_thread_func_fn(int cnt) +{ + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ); + + /* More silly tracepoints */ + trace_foo_bar_with_fn("Look at me", cnt); +} + +static int simple_thread_fn(void *arg) +{ + int cnt = 0; + + while (!kthread_should_stop()) + simple_thread_func_fn(cnt++); + + return 0; +} + +static DEFINE_MUTEX(thread_mutex); + +void foo_bar_reg(void) +{ + pr_info("Starting thread for foo_bar_fn\n"); + /* + * We shouldn't be able to start a trace when the module is + * unloading (there's other locks to prevent that). But + * for consistency sake, we still take the thread_mutex. + */ + mutex_lock(&thread_mutex); + simple_tsk_fn = kthread_run(simple_thread_fn, NULL, "event-sample-fn"); + mutex_unlock(&thread_mutex); +} + +void foo_bar_unreg(void) +{ + pr_info("Killing thread for foo_bar_fn\n"); + /* protect against module unloading */ + mutex_lock(&thread_mutex); + if (simple_tsk_fn) + kthread_stop(simple_tsk_fn); + simple_tsk_fn = NULL; + mutex_unlock(&thread_mutex); +} static int __init trace_event_init(void) { @@ -62,6 +108,11 @@ static int __init trace_event_init(void) static void __exit trace_event_exit(void) { kthread_stop(simple_tsk); + mutex_lock(&thread_mutex); + if (simple_tsk_fn) + kthread_stop(simple_tsk_fn); + simple_tsk_fn = NULL; + mutex_unlock(&thread_mutex); } module_init(trace_event_init); diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h index c3232340914da3..d0be8411b527b1 100644 --- a/samples/trace_events/trace-events-sample.h +++ b/samples/trace_events/trace-events-sample.h @@ -270,6 +270,50 @@ TRACE_EVENT_CONDITION(foo_bar_with_cond, TP_printk("foo %s %d", __get_str(foo), __entry->bar) ); + +void foo_bar_reg(void); +void foo_bar_unreg(void); + +/* + * Now in the case that some function needs to be called when the + * tracepoint is enabled and/or when it is disabled, the + * TRACE_EVENT_FN() serves this purpose. This is just like TRACE_EVENT() + * but adds two more parameters at the end: + * + * TRACE_EVENT_FN( name, proto, args, struct, assign, printk, reg, unreg) + * + * reg and unreg are functions with the prototype of: + * + * void reg(void) + * + * The reg function gets called before the tracepoint is enabled, and + * the unreg function gets called after the tracepoint is disabled. + * + * Note, reg and unreg are allowed to be NULL. If you only need to + * call a function before enabling, or after disabling, just set one + * function and pass in NULL for the other parameter. + */ +TRACE_EVENT_FN(foo_bar_with_fn, + + TP_PROTO(const char *foo, int bar), + + TP_ARGS(foo, bar), + + TP_STRUCT__entry( + __string( foo, foo ) + __field( int, bar ) + ), + + TP_fast_assign( + __assign_str(foo, foo); + __entry->bar = bar; + ), + + TP_printk("foo %s %d", __get_str(foo), __entry->bar), + + foo_bar_reg, foo_bar_unreg +); + #endif /***** NOTICE! The #if protection ends here. *****/ From 7496946a88ab48830f3101c08f8e770cc0902bbb Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Mon, 9 Feb 2015 17:14:04 -0500 Subject: [PATCH 503/601] tracing: Add samples of DECLARE_EVENT_CLASS() and DEFINE_EVENT() Add to samples/trace_events/ the macros DECLARE_EVENT_CLASS() and DEFINE_EVENT() and recommend using them over multiple TRACE_EVENT() macros if the multiple events have the same format. Signed-off-by: Steven Rostedt --- samples/trace_events/trace-events-sample.c | 7 ++ samples/trace_events/trace-events-sample.h | 81 ++++++++++++++++++++++ 2 files changed, 88 insertions(+) diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c index 39d4484aef53b3..880a7d1d27d240 100644 --- a/samples/trace_events/trace-events-sample.c +++ b/samples/trace_events/trace-events-sample.c @@ -35,7 +35,13 @@ static void simple_thread_func(int cnt) trace_foo_bar("hello", cnt, array, random_strings[len], tsk_cpus_allowed(current)); + trace_foo_with_template_simple("HELLO", cnt); + trace_foo_bar_with_cond("Some times print", cnt); + + trace_foo_with_template_cond("prints other times", cnt); + + trace_foo_with_template_print("I have to be different", cnt); } static int simple_thread(void *arg) @@ -58,6 +64,7 @@ static void simple_thread_func_fn(int cnt) /* More silly tracepoints */ trace_foo_bar_with_fn("Look at me", cnt); + trace_foo_with_template_fn("Look at me too", cnt); } static int simple_thread_fn(void *arg) diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h index d0be8411b527b1..a2c8b02b635916 100644 --- a/samples/trace_events/trace-events-sample.h +++ b/samples/trace_events/trace-events-sample.h @@ -314,6 +314,87 @@ TRACE_EVENT_FN(foo_bar_with_fn, foo_bar_reg, foo_bar_unreg ); +/* + * Each TRACE_EVENT macro creates several helper functions to produce + * the code to add the tracepoint, create the files in the trace + * directory, hook it to perf, assign the values and to print out + * the raw data from the ring buffer. To prevent too much bloat, + * if there are more than one tracepoint that uses the same format + * for the proto, args, struct, assign and printk, and only the name + * is different, it is highly recommended to use the DECLARE_EVENT_CLASS + * + * DECLARE_EVENT_CLASS() macro creates most of the functions for the + * tracepoint. Then DEFINE_EVENT() is use to hook a tracepoint to those + * functions. This DEFINE_EVENT() is an instance of the class and can + * be enabled and disabled separately from other events (either TRACE_EVENT + * or other DEFINE_EVENT()s). + * + * Note, TRACE_EVENT() itself is simply defined as: + * + * #define TRACE_EVENT(name, proto, args, tstruct, assign, printk) \ + * DEFINE_EVENT_CLASS(name, proto, args, tstruct, assign, printk); \ + * DEFINE_EVENT(name, name, proto, args) + * + * The DEFINE_EVENT() also can be declared with conditions and reg functions: + * + * DEFINE_EVENT_CONDITION(template, name, proto, args, cond); + * DEFINE_EVENT_FN(template, name, proto, args, reg, unreg); + */ +DECLARE_EVENT_CLASS(foo_template, + + TP_PROTO(const char *foo, int bar), + + TP_ARGS(foo, bar), + + TP_STRUCT__entry( + __string( foo, foo ) + __field( int, bar ) + ), + + TP_fast_assign( + __assign_str(foo, foo); + __entry->bar = bar; + ), + + TP_printk("foo %s %d", __get_str(foo), __entry->bar) +); + +/* + * Here's a better way for the previous samples (except, the first + * exmaple had more fields and could not be used here). + */ +DEFINE_EVENT(foo_template, foo_with_template_simple, + TP_PROTO(const char *foo, int bar), + TP_ARGS(foo, bar)); + +DEFINE_EVENT_CONDITION(foo_template, foo_with_template_cond, + TP_PROTO(const char *foo, int bar), + TP_ARGS(foo, bar), + TP_CONDITION(!(bar % 8))); + + +DEFINE_EVENT_FN(foo_template, foo_with_template_fn, + TP_PROTO(const char *foo, int bar), + TP_ARGS(foo, bar), + foo_bar_reg, foo_bar_unreg); + +/* + * Anytime two events share basically the same values and have + * the same output, use the DECLARE_EVENT_CLASS() and DEFINE_EVENT() + * when ever possible. + */ + +/* + * If the event is similar to the DECLARE_EVENT_CLASS, but you need + * to have a different output, then use DEFINE_EVENT_PRINT() which + * lets you override the TP_printk() of the class. + */ + +DEFINE_EVENT_PRINT(foo_template, foo_with_template_print, + TP_PROTO(const char *foo, int bar), + TP_ARGS(foo, bar), + TP_printk("bar %s %d", __get_str(foo), __entry->bar)); + #endif /***** NOTICE! The #if protection ends here. *****/ From 7215853e985a4bef1a6c14e00e89dfec84f1e457 Mon Sep 17 00:00:00 2001 From: Vikram Mulukutla Date: Wed, 17 Dec 2014 18:50:56 -0800 Subject: [PATCH 504/601] tracing: Fix unmapping loop in tracing_mark_write Commit 6edb2a8a385f0cdef51dae37ff23e74d76d8a6ce introduced an array map_pages that contains the addresses returned by kmap_atomic. However, when unmapping those pages, map_pages[0] is unmapped before map_pages[1], breaking the nesting requirement as specified in the documentation for kmap_atomic/kunmap_atomic. This was caught by the highmem debug code present in kunmap_atomic. Fix the loop to do the unmapping properly. Link: http://lkml.kernel.org/r/1418871056-6614-1-git-send-email-markivx@codeaurora.org Cc: stable@vger.kernel.org # 3.5+ Reviewed-by: Stephen Boyd Reported-by: Lime Yang Signed-off-by: Vikram Mulukutla Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5afce60e1b68e4..2078b86750e050 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4928,7 +4928,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, *fpos += written; out_unlock: - for (i = 0; i < nr_pages; i++){ + for (i = nr_pages - 1; i >= 0; i--) { kunmap_atomic(map_page[i]); put_page(pages[i]); } From 480486b4733d5bc7d9fe765b34bc6c2b72d5c12e Mon Sep 17 00:00:00 2001 From: Tom Haynes Date: Mon, 9 Feb 2015 17:48:32 -0800 Subject: [PATCH 505/601] pnfs/flexfiles: Do not dprintk after the free Found by 0-DAY kernel test infrastructure: fs/nfs/flexfilelayout/flexfilelayoutdev.c:520:13-16: ERROR: reference preceded by free on line 518 fs/nfs/flexfilelayout/flexfilelayoutdev.c:520:26-29: ERROR: reference preceded by free on line 518 fs/nfs/flexfilelayout/flexfilelayoutdev.c:520:39-42: ERROR: reference preceded by free on line 518 fs/nfs/flexfilelayout/flexfilelayoutdev.c:521:3-6: ERROR: reference preceded by free on line 518 Reported-by: Julia Lawall Signed-off-by: Tom Haynes Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 3bbb16b3066f69..e2c01f204a956b 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -515,10 +515,10 @@ int ff_layout_encode_ds_ioerr(struct nfs4_flexfile_layout *flo, *p++ = cpu_to_be32(err->opnum); *count += 1; list_del(&err->list); - kfree(err); dprintk("%s: offset %llu length %llu status %d op %d count %d\n", __func__, err->offset, err->length, err->status, err->opnum, *count); + kfree(err); } return 0; From 4e1c0664de11e4b5861957ab4ddff2aeeffd042f Mon Sep 17 00:00:00 2001 From: Jon Medhurst Date: Tue, 10 Feb 2015 15:03:22 +0800 Subject: [PATCH 506/601] ARM: kprobes: Fix compilation error caused by superfluous '*' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a superfluous '*' in the definition of kprobe_decode_insn_t which on older versions of GCC (4.2.4) causes the compilation error: In file included from arch/arm/probes/kprobes/core.c:37: arch/arm/probes/kprobes/core.h:43: error: '[*]' not allowed in other than a declaration Fix this by removing the unneeded character. Reported-by: Janusz Użycki Signed-off-by: Jon Medhurst --- arch/arm/probes/kprobes/core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/probes/kprobes/core.h b/arch/arm/probes/kprobes/core.h index b3036c587a764d..ec5d1f20a085fe 100644 --- a/arch/arm/probes/kprobes/core.h +++ b/arch/arm/probes/kprobes/core.h @@ -40,7 +40,7 @@ typedef enum probes_insn (kprobe_decode_insn_t)(probes_opcode_t, struct arch_probes_insn *, bool, const union decode_action *, - const struct decode_checker *[*]); + const struct decode_checker *[]); #ifdef CONFIG_THUMB2_KERNEL From 45cce4ccafe3cddc924ef5221d22b9853fc9a13c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 9 Feb 2015 12:54:16 +0100 Subject: [PATCH 507/601] s390/cacheinfo: don't use smp_processor_id() in preemptible context show_cacheinfo() needs to access the cacheinfo structure of any online cpu. This was done with using smp_processor_id() as in index while in preemtible context. This means the cpu could be offline and the data be gone when it would be accessed. Better use any online cpu address and protect the data by get_online_cpus() and put_online_cpus(). Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/cache.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c index fe21f074cf9f7f..632fa06ea162c5 100644 --- a/arch/s390/kernel/cache.c +++ b/arch/s390/kernel/cache.c @@ -66,10 +66,12 @@ static const enum cache_type cache_type_map[] = { void show_cacheinfo(struct seq_file *m) { - int cpu = smp_processor_id(), idx; - struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + struct cpu_cacheinfo *this_cpu_ci; struct cacheinfo *cache; + int idx; + get_online_cpus(); + this_cpu_ci = get_cpu_cacheinfo(cpumask_any(cpu_online_mask)); for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) { cache = this_cpu_ci->info_list + idx; seq_printf(m, "cache%-11d: ", idx); @@ -82,6 +84,7 @@ void show_cacheinfo(struct seq_file *m) seq_printf(m, "associativity=%d", cache->ways_of_associativity); seq_puts(m, "\n"); } + put_online_cpus(); } static inline enum cache_type get_cache_type(struct cache_info *ci, int level) From ada63d40747128d922d69f73a37b65e1e5403cdf Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 29 Jan 2015 18:09:13 +0100 Subject: [PATCH 508/601] ARM: 8300/1: teach __asmeq that r11 == fp and r12 == ip The __asmeq macro is used inside inline asm statements to ensure that register asm variables that explicitly specify a register are mapped correctly onto those registers when used in inline asm input and output constraints. However, the string based matching fails to take into account that 'fp' is often referred to as 'r11' and 'ip' is often referred to as 'r12', (e.g., by clang), causing false negatives. Fix this by making __asmeq consider the ("fp","r11"), ("r11","fp"), ("ip","r12") and ("r12","ip") cases specifically. Reviewed-by: Alex Elder Acked-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King --- arch/arm/include/asm/compiler.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/compiler.h b/arch/arm/include/asm/compiler.h index 8155db2f7fa16e..29fe85e5943907 100644 --- a/arch/arm/include/asm/compiler.h +++ b/arch/arm/include/asm/compiler.h @@ -8,8 +8,21 @@ * This string is meant to be concatenated with the inline asm string and * will cause compilation to stop on mismatch. * (for details, see gcc PR 15089) + * For compatibility with clang, we have to specifically take the equivalence + * of 'r11' <-> 'fp' and 'r12' <-> 'ip' into account as well. */ -#define __asmeq(x, y) ".ifnc " x "," y " ; .err ; .endif\n\t" +#define __asmeq(x, y) \ + ".ifnc " x "," y "; " \ + ".ifnc " x y ",fpr11; " \ + ".ifnc " x y ",r11fp; " \ + ".ifnc " x y ",ipr12; " \ + ".ifnc " x y ",r12ip; " \ + ".err; " \ + ".endif; " \ + ".endif; " \ + ".endif; " \ + ".endif; " \ + ".endif\n\t" #endif /* __ASM_ARM_COMPILER_H */ From bafe5865834c0cc0d0f937deecc275ccdd588ce8 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Sat, 31 Jan 2015 00:25:30 +0100 Subject: [PATCH 509/601] ARM: 8302/1: Add a secondary_startup that assumes ARM mode Some platforms always enter the kernel in ARM mode even if the kernel is compiled for THUMB2. Add a small wrapper on top of secondary_startup() that switches into THUMB2 mode. Signed-off-by: Stephen Boyd Acked-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/kernel/head.S | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index ecb7be8b085498..01963273c07a7b 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -346,6 +346,12 @@ __turn_mmu_on_loc: #if defined(CONFIG_SMP) .text +ENTRY(secondary_startup_arm) + .arm + THUMB( adr r9, BSYM(1f) ) @ Kernel is entered in ARM. + THUMB( bx r9 ) @ If this is a Thumb-2 kernel, + THUMB( .thumb ) @ switch to Thumb now. + THUMB(1: ) ENTRY(secondary_startup) /* * Common entry point for secondary CPUs. @@ -385,6 +391,7 @@ ENTRY(secondary_startup) THUMB( add r12, r10, #PROCINFO_INITFUNC ) THUMB( ret r12 ) ENDPROC(secondary_startup) +ENDPROC(secondary_startup_arm) /* * r6 = &secondary_data From 8684014d71e259f62f7dc24a20324e232806b2ef Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Sat, 31 Jan 2015 00:25:31 +0100 Subject: [PATCH 510/601] ARM: 8301/1: qcom: Use secondary_startup_arm() On qcom platforms we always enter the kernel in ARM mode, regardless of the kernel being compiled for THUMB mode. Use secondary_startup_arm() to properly switch the mode to what the kernel expects if required. Signed-off-by: Stephen Boyd Acked-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/mach-qcom/platsmp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-qcom/platsmp.c b/arch/arm/mach-qcom/platsmp.c index d6908569ecaf14..09cffed4c0a445 100644 --- a/arch/arm/mach-qcom/platsmp.c +++ b/arch/arm/mach-qcom/platsmp.c @@ -44,7 +44,7 @@ #define APCS_SAW2_VCTL 0x14 #define APCS_SAW2_2_VCTL 0x1c -extern void secondary_startup(void); +extern void secondary_startup_arm(void); static DEFINE_SPINLOCK(boot_lock); @@ -337,7 +337,7 @@ static void __init qcom_smp_prepare_cpus(unsigned int max_cpus) flags |= cold_boot_flags[map]; } - if (scm_set_boot_addr(virt_to_phys(secondary_startup), flags)) { + if (scm_set_boot_addr(virt_to_phys(secondary_startup_arm), flags)) { for_each_present_cpu(cpu) { if (cpu == smp_processor_id()) continue; From 3cf385713460eb2bb4cb7ceb8ed89833b00b594b Mon Sep 17 00:00:00 2001 From: Antonios Motakis Date: Tue, 6 Jan 2015 11:15:11 +0100 Subject: [PATCH 511/601] ARM: 8256/1: driver coamba: add device binding path 'driver_override' As already demonstrated with PCI [1] and the platform bus [2], a driver_override property in sysfs can be used to bypass the id matching of a device to a AMBA driver. This can be used by VFIO to bind to any AMBA device requested by the user. [1] http://lists-archives.com/linux-kernel/28030441-pci-introduce-new-device-binding-path-using-pci_dev-driver_override.html [2] https://www.redhat.com/archives/libvir-list/2014-April/msg00382.html Signed-off-by: Antonios Motakis Reviewed-by: Kim Phillips Signed-off-by: Russell King --- Documentation/ABI/testing/sysfs-bus-amba | 20 ++++++++++ drivers/amba/bus.c | 47 ++++++++++++++++++++++++ include/linux/amba/bus.h | 1 + 3 files changed, 68 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-bus-amba diff --git a/Documentation/ABI/testing/sysfs-bus-amba b/Documentation/ABI/testing/sysfs-bus-amba new file mode 100644 index 00000000000000..e7b54677cfbeb2 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-amba @@ -0,0 +1,20 @@ +What: /sys/bus/amba/devices/.../driver_override +Date: September 2014 +Contact: Antonios Motakis +Description: + This file allows the driver for a device to be specified which + will override standard OF, ACPI, ID table, and name matching. + When specified, only a driver with a name matching the value + written to driver_override will have an opportunity to bind to + the device. The override is specified by writing a string to the + driver_override file (echo vfio-amba > driver_override) and may + be cleared with an empty string (echo > driver_override). + This returns the device to standard matching rules binding. + Writing to driver_override does not automatically unbind the + device from its current driver or make any attempt to + automatically load the specified driver. If no driver with a + matching name is currently loaded in the kernel, the device will + not bind to any driver. This also allows devices to opt-out of + driver binding using a driver_override name such as "none". + Only a single driver may be specified in the override, there is + no support for parsing delimiters. diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index 52ddd9fbb55e9c..f0099360039e24 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -43,6 +44,10 @@ static int amba_match(struct device *dev, struct device_driver *drv) struct amba_device *pcdev = to_amba_device(dev); struct amba_driver *pcdrv = to_amba_driver(drv); + /* When driver_override is set, only bind to the matching driver */ + if (pcdev->driver_override) + return !strcmp(pcdev->driver_override, drv->name); + return amba_lookup(pcdrv->id_table, pcdev) != NULL; } @@ -59,6 +64,47 @@ static int amba_uevent(struct device *dev, struct kobj_uevent_env *env) return retval; } +static ssize_t driver_override_show(struct device *_dev, + struct device_attribute *attr, char *buf) +{ + struct amba_device *dev = to_amba_device(_dev); + + if (!dev->driver_override) + return 0; + + return sprintf(buf, "%s\n", dev->driver_override); +} + +static ssize_t driver_override_store(struct device *_dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct amba_device *dev = to_amba_device(_dev); + char *driver_override, *old = dev->driver_override, *cp; + + if (count > PATH_MAX) + return -EINVAL; + + driver_override = kstrndup(buf, count, GFP_KERNEL); + if (!driver_override) + return -ENOMEM; + + cp = strchr(driver_override, '\n'); + if (cp) + *cp = '\0'; + + if (strlen(driver_override)) { + dev->driver_override = driver_override; + } else { + kfree(driver_override); + dev->driver_override = NULL; + } + + kfree(old); + + return count; +} + #define amba_attr_func(name,fmt,arg...) \ static ssize_t name##_show(struct device *_dev, \ struct device_attribute *attr, char *buf) \ @@ -81,6 +127,7 @@ amba_attr_func(resource, "\t%016llx\t%016llx\t%016lx\n", static struct device_attribute amba_dev_attrs[] = { __ATTR_RO(id), __ATTR_RO(resource), + __ATTR_RW(driver_override), __ATTR_NULL, }; diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 0ab5f8e0dea264..50fc6686840212 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -33,6 +33,7 @@ struct amba_device { struct clk *pclk; unsigned int periphid; unsigned int irq[AMBA_NR_IRQS]; + char *driver_override; }; struct amba_driver { From 4c21462acc530bb81c6ae30e5bbd0b06f8c50626 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 10 Feb 2015 11:03:22 +0300 Subject: [PATCH 512/601] pnfs: delete an unintended goto There was an extra goto here where it shouldn't be, because of a merge error. Fixes: e2c63e091e29 ('Merge branch 'flexfiles'') Signed-off-by: Dan Carpenter Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 107b321be7d4e2..4f802b02fbb9b0 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1078,7 +1078,6 @@ bool pnfs_roc(struct inode *ino) goto out_noroc; } - goto out_noroc; pnfs_clear_retry_layoutget(lo); list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { From 34c0dad752294f373a0720840f59e186788ba227 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Mon, 9 Feb 2015 14:49:10 +0100 Subject: [PATCH 513/601] s390/hypfs: Add diagnose 0c support With this feature, you can read the CPU performance metrics provided by the z/VM diagnose 0C. This then allows to get the management time for each online CPU of the guest where the diagnose is executed. The new debugfs file /sys/kernel/debug/s390_hypfs/diag_0c exports the diag0C binary data to user space via an open/read/close interface. The binary data consists out of a header structure followed by an array that contains the diagnose 0c data for each online CPU. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/hypfs/Makefile | 1 + arch/s390/hypfs/hypfs.h | 4 + arch/s390/hypfs/hypfs_diag0c.c | 139 +++++++++++++++++++++++++++++ arch/s390/hypfs/inode.c | 9 +- arch/s390/include/uapi/asm/hypfs.h | 35 +++++++- 5 files changed, 184 insertions(+), 4 deletions(-) create mode 100644 arch/s390/hypfs/hypfs_diag0c.c diff --git a/arch/s390/hypfs/Makefile b/arch/s390/hypfs/Makefile index 06f8d95a16cdc9..2ee25ba252d68a 100644 --- a/arch/s390/hypfs/Makefile +++ b/arch/s390/hypfs/Makefile @@ -5,3 +5,4 @@ obj-$(CONFIG_S390_HYPFS_FS) += s390_hypfs.o s390_hypfs-objs := inode.o hypfs_diag.o hypfs_vm.o hypfs_dbfs.o hypfs_sprp.o +s390_hypfs-objs += hypfs_diag0c.o diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h index b34b5ab90a313b..36d093fa9fb30b 100644 --- a/arch/s390/hypfs/hypfs.h +++ b/arch/s390/hypfs/hypfs.h @@ -37,6 +37,10 @@ extern int hypfs_vm_init(void); extern void hypfs_vm_exit(void); extern int hypfs_vm_create_files(struct dentry *root); +/* VM diagnose 0c */ +int hypfs_diag0c_init(void); +void hypfs_diag0c_exit(void); + /* Set Partition-Resource Parameter */ int hypfs_sprp_init(void); void hypfs_sprp_exit(void); diff --git a/arch/s390/hypfs/hypfs_diag0c.c b/arch/s390/hypfs/hypfs_diag0c.c new file mode 100644 index 00000000000000..d4c0d371754303 --- /dev/null +++ b/arch/s390/hypfs/hypfs_diag0c.c @@ -0,0 +1,139 @@ +/* + * Hypervisor filesystem for Linux on s390 + * + * Diag 0C implementation + * + * Copyright IBM Corp. 2014 + */ + +#include +#include +#include +#include "hypfs.h" + +#define DBFS_D0C_HDR_VERSION 0 + +/* + * Execute diagnose 0c in 31 bit mode + */ +static void diag0c(struct hypfs_diag0c_entry *entry) +{ + asm volatile ( +#ifdef CONFIG_64BIT + " sam31\n" + " diag %0,%0,0x0c\n" + " sam64\n" +#else + " diag %0,%0,0x0c\n" +#endif + : /* no output register */ + : "a" (entry) + : "memory"); +} + +/* + * Get hypfs_diag0c_entry from CPU vector and store diag0c data + */ +static void diag0c_fn(void *data) +{ + diag0c(((void **) data)[smp_processor_id()]); +} + +/* + * Allocate buffer and store diag 0c data + */ +static void *diag0c_store(unsigned int *count) +{ + struct hypfs_diag0c_data *diag0c_data; + unsigned int cpu_count, cpu, i; + void **cpu_vec; + + get_online_cpus(); + cpu_count = num_online_cpus(); + cpu_vec = kmalloc(sizeof(*cpu_vec) * num_possible_cpus(), GFP_KERNEL); + if (!cpu_vec) + goto fail_put_online_cpus; + /* Note: Diag 0c needs 8 byte alignment and real storage */ + diag0c_data = kzalloc(sizeof(struct hypfs_diag0c_hdr) + + cpu_count * sizeof(struct hypfs_diag0c_entry), + GFP_KERNEL | GFP_DMA); + if (!diag0c_data) + goto fail_kfree_cpu_vec; + i = 0; + /* Fill CPU vector for each online CPU */ + for_each_online_cpu(cpu) { + diag0c_data->entry[i].cpu = cpu; + cpu_vec[cpu] = &diag0c_data->entry[i++]; + } + /* Collect data all CPUs */ + on_each_cpu(diag0c_fn, cpu_vec, 1); + *count = cpu_count; + kfree(cpu_vec); + put_online_cpus(); + return diag0c_data; + +fail_kfree_cpu_vec: + kfree(cpu_vec); +fail_put_online_cpus: + put_online_cpus(); + return ERR_PTR(-ENOMEM); +} + +/* + * Hypfs DBFS callback: Free diag 0c data + */ +static void dbfs_diag0c_free(const void *data) +{ + kfree(data); +} + +/* + * Hypfs DBFS callback: Create diag 0c data + */ +static int dbfs_diag0c_create(void **data, void **data_free_ptr, size_t *size) +{ + struct hypfs_diag0c_data *diag0c_data; + unsigned int count; + + diag0c_data = diag0c_store(&count); + if (IS_ERR(diag0c_data)) + return PTR_ERR(diag0c_data); + memset(&diag0c_data->hdr, 0, sizeof(diag0c_data->hdr)); + get_tod_clock_ext(diag0c_data->hdr.tod_ext); + diag0c_data->hdr.len = count * sizeof(struct hypfs_diag0c_entry); + diag0c_data->hdr.version = DBFS_D0C_HDR_VERSION; + diag0c_data->hdr.count = count; + *data = diag0c_data; + *data_free_ptr = diag0c_data; + *size = diag0c_data->hdr.len + sizeof(struct hypfs_diag0c_hdr); + return 0; +} + +/* + * Hypfs DBFS file structure + */ +static struct hypfs_dbfs_file dbfs_file_0c = { + .name = "diag_0c", + .data_create = dbfs_diag0c_create, + .data_free = dbfs_diag0c_free, +}; + +/* + * Initialize diag 0c interface for z/VM + */ +int __init hypfs_diag0c_init(void) +{ + if (!MACHINE_IS_VM) + return 0; + return hypfs_dbfs_create_file(&dbfs_file_0c); +} + +/* + * Shutdown diag 0c interface for z/VM + */ +void hypfs_diag0c_exit(void) +{ + if (!MACHINE_IS_VM) + return; + hypfs_dbfs_remove_file(&dbfs_file_0c); +} diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index c952b981e4f288..4c8008dd938e8d 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -482,10 +482,14 @@ static int __init hypfs_init(void) rc = -ENODATA; goto fail_hypfs_vm_exit; } + if (hypfs_diag0c_init()) { + rc = -ENODATA; + goto fail_hypfs_sprp_exit; + } s390_kobj = kobject_create_and_add("s390", hypervisor_kobj); if (!s390_kobj) { rc = -ENOMEM; - goto fail_hypfs_sprp_exit; + goto fail_hypfs_diag0c_exit; } rc = register_filesystem(&hypfs_type); if (rc) @@ -494,6 +498,8 @@ static int __init hypfs_init(void) fail_filesystem: kobject_put(s390_kobj); +fail_hypfs_diag0c_exit: + hypfs_diag0c_exit(); fail_hypfs_sprp_exit: hypfs_sprp_exit(); fail_hypfs_vm_exit: @@ -510,6 +516,7 @@ static void __exit hypfs_exit(void) { unregister_filesystem(&hypfs_type); kobject_put(s390_kobj); + hypfs_diag0c_exit(); hypfs_sprp_exit(); hypfs_vm_exit(); hypfs_diag_exit(); diff --git a/arch/s390/include/uapi/asm/hypfs.h b/arch/s390/include/uapi/asm/hypfs.h index 37998b449531ed..b3fe12d8dd87f7 100644 --- a/arch/s390/include/uapi/asm/hypfs.h +++ b/arch/s390/include/uapi/asm/hypfs.h @@ -1,16 +1,19 @@ /* - * IOCTL interface for hypfs + * Structures for hypfs interface * * Copyright IBM Corp. 2013 * * Author: Martin Schwidefsky */ -#ifndef _ASM_HYPFS_CTL_H -#define _ASM_HYPFS_CTL_H +#ifndef _ASM_HYPFS_H +#define _ASM_HYPFS_H #include +/* + * IOCTL for binary interface /sys/kernel/debug/diag_304 + */ struct hypfs_diag304 { __u32 args[2]; __u64 data; @@ -22,4 +25,30 @@ struct hypfs_diag304 { #define HYPFS_DIAG304 \ _IOWR(HYPFS_IOCTL_MAGIC, 0x20, struct hypfs_diag304) +/* + * Structures for binary interface /sys/kernel/debug/diag_0c + */ +struct hypfs_diag0c_hdr { + __u64 len; /* Length of diag0c buffer without header */ + __u16 version; /* Version of header */ + char reserved1[6]; /* Reserved */ + char tod_ext[16]; /* TOD clock for diag0c */ + __u64 count; /* Number of entries (CPUs) in diag0c array */ + char reserved2[24]; /* Reserved */ +}; + +struct hypfs_diag0c_entry { + char date[8]; /* MM/DD/YY in EBCDIC */ + char time[8]; /* HH:MM:SS in EBCDIC */ + __u64 virtcpu; /* Virtual time consumed by the virt CPU (us) */ + __u64 totalproc; /* Total of virtual and simulation time (us) */ + __u32 cpu; /* Linux logical CPU number */ + __u32 reserved; /* Align to 8 byte */ +}; + +struct hypfs_diag0c_data { + struct hypfs_diag0c_hdr hdr; /* 64 byte header */ + struct hypfs_diag0c_entry entry[]; /* diag0c entry array */ +}; + #endif From a178220df3a55c3ec46c2c2f0788a377acdb79c3 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Mon, 9 Feb 2015 14:49:32 +0100 Subject: [PATCH 514/601] s390/hypfs: Eliminate hypfs interval Currently the binary hypfs interfaces provides new data only once within an interval time of one second. This patch removes this restriction and now new data is returned immediately on every read on a hypfs binary file. This is done in order to allow more consistent snapshots for programs that read multiple hypfs binary files. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/hypfs/hypfs.h | 3 --- arch/s390/hypfs/hypfs_dbfs.c | 49 +++++++++--------------------------- 2 files changed, 12 insertions(+), 40 deletions(-) diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h index 36d093fa9fb30b..eecde500ed49e2 100644 --- a/arch/s390/hypfs/hypfs.h +++ b/arch/s390/hypfs/hypfs.h @@ -53,7 +53,6 @@ struct hypfs_dbfs_data { void *buf_free_ptr; size_t size; struct hypfs_dbfs_file *dbfs_file; - struct kref kref; }; struct hypfs_dbfs_file { @@ -65,8 +64,6 @@ struct hypfs_dbfs_file { unsigned long); /* Private data for hypfs_dbfs.c */ - struct hypfs_dbfs_data *data; - struct delayed_work data_free_work; struct mutex lock; struct dentry *dentry; }; diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c index 47fe1055c714a3..752f6df3e6974a 100644 --- a/arch/s390/hypfs/hypfs_dbfs.c +++ b/arch/s390/hypfs/hypfs_dbfs.c @@ -17,33 +17,16 @@ static struct hypfs_dbfs_data *hypfs_dbfs_data_alloc(struct hypfs_dbfs_file *f) data = kmalloc(sizeof(*data), GFP_KERNEL); if (!data) return NULL; - kref_init(&data->kref); data->dbfs_file = f; return data; } -static void hypfs_dbfs_data_free(struct kref *kref) +static void hypfs_dbfs_data_free(struct hypfs_dbfs_data *data) { - struct hypfs_dbfs_data *data; - - data = container_of(kref, struct hypfs_dbfs_data, kref); data->dbfs_file->data_free(data->buf_free_ptr); kfree(data); } -static void data_free_delayed(struct work_struct *work) -{ - struct hypfs_dbfs_data *data; - struct hypfs_dbfs_file *df; - - df = container_of(work, struct hypfs_dbfs_file, data_free_work.work); - mutex_lock(&df->lock); - data = df->data; - df->data = NULL; - mutex_unlock(&df->lock); - kref_put(&data->kref, hypfs_dbfs_data_free); -} - static ssize_t dbfs_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) { @@ -56,28 +39,21 @@ static ssize_t dbfs_read(struct file *file, char __user *buf, df = file_inode(file)->i_private; mutex_lock(&df->lock); - if (!df->data) { - data = hypfs_dbfs_data_alloc(df); - if (!data) { - mutex_unlock(&df->lock); - return -ENOMEM; - } - rc = df->data_create(&data->buf, &data->buf_free_ptr, - &data->size); - if (rc) { - mutex_unlock(&df->lock); - kfree(data); - return rc; - } - df->data = data; - schedule_delayed_work(&df->data_free_work, HZ); + data = hypfs_dbfs_data_alloc(df); + if (!data) { + mutex_unlock(&df->lock); + return -ENOMEM; + } + rc = df->data_create(&data->buf, &data->buf_free_ptr, &data->size); + if (rc) { + mutex_unlock(&df->lock); + kfree(data); + return rc; } - data = df->data; - kref_get(&data->kref); mutex_unlock(&df->lock); rc = simple_read_from_buffer(buf, size, ppos, data->buf, data->size); - kref_put(&data->kref, hypfs_dbfs_data_free); + hypfs_dbfs_data_free(data); return rc; } @@ -108,7 +84,6 @@ int hypfs_dbfs_create_file(struct hypfs_dbfs_file *df) if (IS_ERR(df->dentry)) return PTR_ERR(df->dentry); mutex_init(&df->lock); - INIT_DELAYED_WORK(&df->data_free_work, data_free_delayed); return 0; } From 6a039eab53c01a58bfff95c78fc800ca7de27c77 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Mon, 9 Feb 2015 16:47:00 +0100 Subject: [PATCH 515/601] s390/process: free vx save area when releasing tasks If a task uses vector registers, a save area is allocated to save/restore register states. Free the save area when releasing the task. Found the Memory leak with kmemleak: unreferenced object 0x72885e00 (size 512): comm "vx-test", pid 26123, jiffies 4294945635 (age 256.810s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 01 db 71 06 41 .............q.A 00 00 00 00 00 00 00 00 24 f7 a9 a7 51 94 79 bb ........$...Q.y. backtrace: [<00000000002d1c8a>] kmem_cache_alloc_trace+0x272/0x3d0 [<00000000001014ac>] alloc_vector_registers+0x54/0x138 [<00000000001017c8>] data_exception+0x158/0x1b0 [<00000000008b551e>] pgm_check_handler+0x13e/0x180 [<00000000800008b6>] 0x800008b6 Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/process.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 2c1eb4f3aaf5ec..13fc0978ca7e77 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -79,6 +79,14 @@ void release_thread(struct task_struct *dead_task) { } +#ifdef CONFIG_64BIT +void arch_release_task_struct(struct task_struct *tsk) +{ + if (tsk->thread.vxrs) + kfree(tsk->thread.vxrs); +} +#endif + int copy_thread(unsigned long clone_flags, unsigned long new_stackp, unsigned long arg, struct task_struct *p) { From c627d31ba0696cbd829437af2be2f2dee3546b1e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 10 Feb 2015 11:06:04 -0500 Subject: [PATCH 516/601] SUNRPC: Cleanup to remove xs_tcp_close() xs_tcp_close() is now just a call to xs_tcp_shutdown(), so remove it, and replace the entry in xs_tcp_ops. Suggested-by: Anna Schumaker Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 19f7526f896589..66891e32c5e311 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -863,11 +863,6 @@ static void xs_close(struct rpc_xprt *xprt) xprt_disconnect_done(xprt); } -static void xs_tcp_close(struct rpc_xprt *xprt) -{ - xs_tcp_shutdown(xprt); -} - static void xs_xprt_free(struct rpc_xprt *xprt) { xs_free_peer_addresses(xprt); @@ -2500,7 +2495,7 @@ static struct rpc_xprt_ops xs_tcp_ops = { .buf_free = rpc_free, .send_request = xs_tcp_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_def, - .close = xs_tcp_close, + .close = xs_tcp_shutdown, .destroy = xs_destroy, .print_stats = xs_tcp_print_stats, }; From 1e0d6714aceb770b04161fbedd7765d0e1fc27bd Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 10 Feb 2015 22:14:53 -0500 Subject: [PATCH 517/601] ring-buffer: Do not wake up a splice waiter when page is not full When an application connects to the ring buffer via splice, it can only read full pages. Splice does not work with partial pages. If there is not enough data to fill a page, the splice command will either block or return -EAGAIN (if set to nonblock). Code was added where if the page is not full, to just sleep again. The problem is, it will get woken up again on the next event. That is, when something is written into the ring buffer, if there is a waiter it will wake it up. The waiter would then check the buffer, see that it still does not have enough data to fill a page and go back to sleep. To make matters worse, when the waiter goes back to sleep, it could cause another event, which would wake it back up again to see it doesn't have enough data and sleep again. This produces a tremendous overhead and fills the ring buffer with noise. For example, recording sched_switch on an idle system for 10 seconds produces 25,350,475 events!!! Create another wait queue for those waiters wanting full pages. When an event is written, it only wakes up waiters if there's a full page of data. It does not wake up the waiter if the page is not yet full. After this change, recording sched_switch on an idle system for 10 seconds produces only 800 events. Getting rid of 25,349,675 useless events (99.9969% of events!!), is something to take seriously. Cc: stable@vger.kernel.org # 3.16+ Cc: Rabin Vincent Fixes: e30f53aad220 "tracing: Do not busy wait in buffer splice" Signed-off-by: Steven Rostedt --- kernel/trace/ring_buffer.c | 40 +++++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 96079180de3d66..5040d44fe5a3cf 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -445,7 +445,10 @@ int ring_buffer_print_page_header(struct trace_seq *s) struct rb_irq_work { struct irq_work work; wait_queue_head_t waiters; + wait_queue_head_t full_waiters; bool waiters_pending; + bool full_waiters_pending; + bool wakeup_full; }; /* @@ -527,6 +530,10 @@ static void rb_wake_up_waiters(struct irq_work *work) struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work); wake_up_all(&rbwork->waiters); + if (rbwork->wakeup_full) { + rbwork->wakeup_full = false; + wake_up_all(&rbwork->full_waiters); + } } /** @@ -551,9 +558,11 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full) * data in any cpu buffer, or a specific buffer, put the * caller on the appropriate wait queue. */ - if (cpu == RING_BUFFER_ALL_CPUS) + if (cpu == RING_BUFFER_ALL_CPUS) { work = &buffer->irq_work; - else { + /* Full only makes sense on per cpu reads */ + full = false; + } else { if (!cpumask_test_cpu(cpu, buffer->cpumask)) return -ENODEV; cpu_buffer = buffer->buffers[cpu]; @@ -562,7 +571,10 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full) while (true) { - prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); + if (full) + prepare_to_wait(&work->full_waiters, &wait, TASK_INTERRUPTIBLE); + else + prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); /* * The events can happen in critical sections where @@ -584,7 +596,10 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full) * that is necessary is that the wake up happens after * a task has been queued. It's OK for spurious wake ups. */ - work->waiters_pending = true; + if (full) + work->full_waiters_pending = true; + else + work->waiters_pending = true; if (signal_pending(current)) { ret = -EINTR; @@ -613,7 +628,10 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full) schedule(); } - finish_wait(&work->waiters, &wait); + if (full) + finish_wait(&work->full_waiters, &wait); + else + finish_wait(&work->waiters, &wait); return ret; } @@ -1228,6 +1246,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu) init_completion(&cpu_buffer->update_done); init_irq_work(&cpu_buffer->irq_work.work, rb_wake_up_waiters); init_waitqueue_head(&cpu_buffer->irq_work.waiters); + init_waitqueue_head(&cpu_buffer->irq_work.full_waiters); bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), GFP_KERNEL, cpu_to_node(cpu)); @@ -2799,6 +2818,8 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, static __always_inline void rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) { + bool pagebusy; + if (buffer->irq_work.waiters_pending) { buffer->irq_work.waiters_pending = false; /* irq_work_queue() supplies it's own memory barriers */ @@ -2810,6 +2831,15 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) /* irq_work_queue() supplies it's own memory barriers */ irq_work_queue(&cpu_buffer->irq_work.work); } + + pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page; + + if (!pagebusy && cpu_buffer->irq_work.full_waiters_pending) { + cpu_buffer->irq_work.wakeup_full = true; + cpu_buffer->irq_work.full_waiters_pending = false; + /* irq_work_queue() supplies it's own memory barriers */ + irq_work_queue(&cpu_buffer->irq_work.work); + } } /** From 04f81f0154e4bf002be6f4d85668ce1257efa4d9 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Wed, 11 Feb 2015 14:46:37 -0500 Subject: [PATCH 518/601] cipso: don't use IPCB() to locate the CIPSO IP option Using the IPCB() macro to get the IPv4 options is convenient, but unfortunately NetLabel often needs to examine the CIPSO option outside of the scope of the IP layer in the stack. While historically IPCB() worked above the IP layer, due to the inclusion of the inet_skb_param struct at the head of the {tcp,udp}_skb_cb structs, recent commit 971f10ec ("tcp: better TCP_SKB_CB layout to reduce cache line misses") reordered the tcp_skb_cb struct and invalidated this IPCB() trick. This patch fixes the problem by creating a new function, cipso_v4_optptr(), which locates the CIPSO option inside the IP header without calling IPCB(). Unfortunately, this isn't as fast as a simple lookup so some additional tweaks were made to limit the use of this new function. Cc: # 3.18 Reported-by: Casey Schaufler Signed-off-by: Paul Moore Tested-by: Casey Schaufler --- include/net/cipso_ipv4.h | 25 +++++++++++------- net/ipv4/cipso_ipv4.c | 51 +++++++++++++++++++++--------------- net/netlabel/netlabel_kapi.c | 15 +++++++---- 3 files changed, 56 insertions(+), 35 deletions(-) diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h index a6fd939f202d39..3ebb168b9afc68 100644 --- a/include/net/cipso_ipv4.h +++ b/include/net/cipso_ipv4.h @@ -120,13 +120,6 @@ extern int cipso_v4_rbm_optfmt; extern int cipso_v4_rbm_strictvalid; #endif -/* - * Helper Functions - */ - -#define CIPSO_V4_OPTEXIST(x) (IPCB(x)->opt.cipso != 0) -#define CIPSO_V4_OPTPTR(x) (skb_network_header(x) + IPCB(x)->opt.cipso) - /* * DOI List Functions */ @@ -190,7 +183,7 @@ static inline int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def, #ifdef CONFIG_NETLABEL void cipso_v4_cache_invalidate(void); -int cipso_v4_cache_add(const struct sk_buff *skb, +int cipso_v4_cache_add(const unsigned char *cipso_ptr, const struct netlbl_lsm_secattr *secattr); #else static inline void cipso_v4_cache_invalidate(void) @@ -198,7 +191,7 @@ static inline void cipso_v4_cache_invalidate(void) return; } -static inline int cipso_v4_cache_add(const struct sk_buff *skb, +static inline int cipso_v4_cache_add(const unsigned char *cipso_ptr, const struct netlbl_lsm_secattr *secattr) { return 0; @@ -211,6 +204,8 @@ static inline int cipso_v4_cache_add(const struct sk_buff *skb, #ifdef CONFIG_NETLABEL void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway); +int cipso_v4_getattr(const unsigned char *cipso, + struct netlbl_lsm_secattr *secattr); int cipso_v4_sock_setattr(struct sock *sk, const struct cipso_v4_doi *doi_def, const struct netlbl_lsm_secattr *secattr); @@ -226,6 +221,7 @@ int cipso_v4_skbuff_setattr(struct sk_buff *skb, int cipso_v4_skbuff_delattr(struct sk_buff *skb); int cipso_v4_skbuff_getattr(const struct sk_buff *skb, struct netlbl_lsm_secattr *secattr); +unsigned char *cipso_v4_optptr(const struct sk_buff *skb); int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option); #else static inline void cipso_v4_error(struct sk_buff *skb, @@ -235,6 +231,12 @@ static inline void cipso_v4_error(struct sk_buff *skb, return; } +static inline int cipso_v4_getattr(const unsigned char *cipso, + struct netlbl_lsm_secattr *secattr) +{ + return -ENOSYS; +} + static inline int cipso_v4_sock_setattr(struct sock *sk, const struct cipso_v4_doi *doi_def, const struct netlbl_lsm_secattr *secattr) @@ -282,6 +284,11 @@ static inline int cipso_v4_skbuff_getattr(const struct sk_buff *skb, return -ENOSYS; } +static inline unsigned char *cipso_v4_optptr(const struct sk_buff *skb) +{ + return NULL; +} + static inline int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option) { diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 5160c710f2eb4d..e361ea6f3fc8ce 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -378,20 +378,18 @@ static int cipso_v4_cache_check(const unsigned char *key, * negative values on failure. * */ -int cipso_v4_cache_add(const struct sk_buff *skb, +int cipso_v4_cache_add(const unsigned char *cipso_ptr, const struct netlbl_lsm_secattr *secattr) { int ret_val = -EPERM; u32 bkt; struct cipso_v4_map_cache_entry *entry = NULL; struct cipso_v4_map_cache_entry *old_entry = NULL; - unsigned char *cipso_ptr; u32 cipso_ptr_len; if (!cipso_v4_cache_enabled || cipso_v4_cache_bucketsize <= 0) return 0; - cipso_ptr = CIPSO_V4_OPTPTR(skb); cipso_ptr_len = cipso_ptr[1]; entry = kzalloc(sizeof(*entry), GFP_ATOMIC); @@ -1578,6 +1576,33 @@ static int cipso_v4_parsetag_loc(const struct cipso_v4_doi *doi_def, return 0; } +/** + * cipso_v4_optptr - Find the CIPSO option in the packet + * @skb: the packet + * + * Description: + * Parse the packet's IP header looking for a CIPSO option. Returns a pointer + * to the start of the CIPSO option on success, NULL if one if not found. + * + */ +unsigned char *cipso_v4_optptr(const struct sk_buff *skb) +{ + const struct iphdr *iph = ip_hdr(skb); + unsigned char *optptr = (unsigned char *)&(ip_hdr(skb)[1]); + int optlen; + int taglen; + + for (optlen = iph->ihl*4 - sizeof(struct iphdr); optlen > 0; ) { + if (optptr[0] == IPOPT_CIPSO) + return optptr; + taglen = optptr[1]; + optlen -= taglen; + optptr += taglen; + } + + return NULL; +} + /** * cipso_v4_validate - Validate a CIPSO option * @option: the start of the option, on error it is set to point to the error @@ -2119,8 +2144,8 @@ void cipso_v4_req_delattr(struct request_sock *req) * on success and negative values on failure. * */ -static int cipso_v4_getattr(const unsigned char *cipso, - struct netlbl_lsm_secattr *secattr) +int cipso_v4_getattr(const unsigned char *cipso, + struct netlbl_lsm_secattr *secattr) { int ret_val = -ENOMSG; u32 doi; @@ -2305,22 +2330,6 @@ int cipso_v4_skbuff_delattr(struct sk_buff *skb) return 0; } -/** - * cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option - * @skb: the packet - * @secattr: the security attributes - * - * Description: - * Parse the given packet's CIPSO option and return the security attributes. - * Returns zero on success and negative values on failure. - * - */ -int cipso_v4_skbuff_getattr(const struct sk_buff *skb, - struct netlbl_lsm_secattr *secattr) -{ - return cipso_v4_getattr(CIPSO_V4_OPTPTR(skb), secattr); -} - /* * Setup Functions */ diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index a845cd4cf21e5f..28cddc85b7005a 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -1065,10 +1065,12 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb, u16 family, struct netlbl_lsm_secattr *secattr) { + unsigned char *ptr; + switch (family) { case AF_INET: - if (CIPSO_V4_OPTEXIST(skb) && - cipso_v4_skbuff_getattr(skb, secattr) == 0) + ptr = cipso_v4_optptr(skb); + if (ptr && cipso_v4_getattr(ptr, secattr) == 0) return 0; break; #if IS_ENABLED(CONFIG_IPV6) @@ -1094,7 +1096,7 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb, */ void netlbl_skbuff_err(struct sk_buff *skb, int error, int gateway) { - if (CIPSO_V4_OPTEXIST(skb)) + if (cipso_v4_optptr(skb)) cipso_v4_error(skb, error, gateway); } @@ -1126,11 +1128,14 @@ void netlbl_cache_invalidate(void) int netlbl_cache_add(const struct sk_buff *skb, const struct netlbl_lsm_secattr *secattr) { + unsigned char *ptr; + if ((secattr->flags & NETLBL_SECATTR_CACHE) == 0) return -ENOMSG; - if (CIPSO_V4_OPTEXIST(skb)) - return cipso_v4_cache_add(skb, secattr); + ptr = cipso_v4_optptr(skb); + if (ptr) + return cipso_v4_cache_add(ptr, secattr); return -ENOMSG; } From e4b294c2d8f73af4cd41ff30638ad0e4769dc56a Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 11 Feb 2015 15:24:46 -0800 Subject: [PATCH 519/601] mm: add fields for compound destructor and order into struct page Currently, we use lru.next/lru.prev plus cast to access or set destructor and order of compound page. Let's replace it with explicit fields in struct page. Signed-off-by: Kirill A. Shutemov Acked-by: Jerome Marchand Acked-by: Christoph Lameter Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 9 ++++----- include/linux/mm_types.h | 8 ++++++++ 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 65db4aee738a1d..8dd4fde9d2e532 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -627,29 +627,28 @@ int split_free_page(struct page *page); * prototype for that function and accessor functions. * These are _only_ valid on the head of a PG_compound page. */ -typedef void compound_page_dtor(struct page *); static inline void set_compound_page_dtor(struct page *page, compound_page_dtor *dtor) { - page[1].lru.next = (void *)dtor; + page[1].compound_dtor = dtor; } static inline compound_page_dtor *get_compound_page_dtor(struct page *page) { - return (compound_page_dtor *)page[1].lru.next; + return page[1].compound_dtor; } static inline int compound_order(struct page *page) { if (!PageHead(page)) return 0; - return (unsigned long)page[1].lru.prev; + return page[1].compound_order; } static inline void set_compound_order(struct page *page, unsigned long order) { - page[1].lru.prev = (void *)order; + page[1].compound_order = order; } #ifdef CONFIG_MMU diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 07c8bd3f7b48b9..20ff2105b56415 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -28,6 +28,8 @@ struct mem_cgroup; IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK)) #define ALLOC_SPLIT_PTLOCKS (SPINLOCK_SIZE > BITS_PER_LONG/8) +typedef void compound_page_dtor(struct page *); + /* * Each physical page in the system has a struct page associated with * it to keep track of whatever it is we are using the page for at the @@ -142,6 +144,12 @@ struct page { struct rcu_head rcu_head; /* Used by SLAB * when destroying via RCU */ + /* First tail page of compound page */ + struct { + compound_page_dtor *compound_dtor; + unsigned long compound_order; + }; + #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS pgtable_t pmd_huge_pte; /* protected by page->ptl */ #endif From 1d148e218a0d0566b1c06f2f45f1436d53b049b2 Mon Sep 17 00:00:00 2001 From: "Wang, Yalin" Date: Wed, 11 Feb 2015 15:24:48 -0800 Subject: [PATCH 520/601] mm: add VM_BUG_ON_PAGE() to page_mapcount() Add VM_BUG_ON_PAGE() for slab pages. _mapcount is an union with slab struct in struct page, so we must avoid accessing _mapcount if this page is a slab page. Also remove the unneeded bracket. Signed-off-by: Yalin Wang Acked-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 8dd4fde9d2e532..c6bf813a6b3d73 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -484,7 +484,8 @@ static inline void page_mapcount_reset(struct page *page) static inline int page_mapcount(struct page *page) { - return atomic_read(&(page)->_mapcount) + 1; + VM_BUG_ON_PAGE(PageSlab(page), page); + return atomic_read(&page->_mapcount) + 1; } static inline int page_count(struct page *page) From 56873f43abdcd574b25105867a990f067747b2f4 Mon Sep 17 00:00:00 2001 From: "Wang, Yalin" Date: Wed, 11 Feb 2015 15:24:51 -0800 Subject: [PATCH 521/601] mm:add KPF_ZERO_PAGE flag for /proc/kpageflags Add KPF_ZERO_PAGE flag for zero_page, so that userspace processes can detect zero_page in /proc/kpageflags, and then do memory analysis more accurately. Signed-off-by: Yalin Wang Acked-by: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/vm/pagemap.txt | 8 ++++++++ fs/proc/page.c | 16 +++++++++++++--- include/linux/huge_mm.h | 12 ++++++++++++ include/uapi/linux/kernel-page-flags.h | 1 + mm/huge_memory.c | 7 +------ tools/vm/page-types.c | 1 + 6 files changed, 36 insertions(+), 9 deletions(-) diff --git a/Documentation/vm/pagemap.txt b/Documentation/vm/pagemap.txt index 5948e455c4d243..6fbd55ef6b4537 100644 --- a/Documentation/vm/pagemap.txt +++ b/Documentation/vm/pagemap.txt @@ -62,6 +62,8 @@ There are three components to pagemap: 20. NOPAGE 21. KSM 22. THP + 23. BALLOON + 24. ZERO_PAGE Short descriptions to the page flags: @@ -102,6 +104,12 @@ Short descriptions to the page flags: 22. THP contiguous pages which construct transparent hugepages +23. BALLOON + balloon compaction page + +24. ZERO_PAGE + zero page for pfn_zero or huge_zero page + [IO related page flags] 1. ERROR IO error occurred 3. UPTODATE page has up-to-date data diff --git a/fs/proc/page.c b/fs/proc/page.c index 1e3187da1fedbb..7eee2d8b97d978 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -121,9 +122,18 @@ u64 stable_page_flags(struct page *page) * just checks PG_head/PG_tail, so we need to check PageLRU/PageAnon * to make sure a given page is a thp, not a non-huge compound page. */ - else if (PageTransCompound(page) && (PageLRU(compound_head(page)) || - PageAnon(compound_head(page)))) - u |= 1 << KPF_THP; + else if (PageTransCompound(page)) { + struct page *head = compound_head(page); + + if (PageLRU(head) || PageAnon(head)) + u |= 1 << KPF_THP; + else if (is_huge_zero_page(head)) { + u |= 1 << KPF_ZERO_PAGE; + u |= 1 << KPF_THP; + } + } else if (is_zero_pfn(page_to_pfn(page))) + u |= 1 << KPF_ZERO_PAGE; + /* * Caveats on high order pages: page->_count will only be set diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index ad9051bab267dc..f10b20f0515992 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -157,6 +157,13 @@ static inline int hpage_nr_pages(struct page *page) extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pmd_t pmd, pmd_t *pmdp); +extern struct page *huge_zero_page; + +static inline bool is_huge_zero_page(struct page *page) +{ + return ACCESS_ONCE(huge_zero_page) == page; +} + #else /* CONFIG_TRANSPARENT_HUGEPAGE */ #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; }) @@ -206,6 +213,11 @@ static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_str return 0; } +static inline bool is_huge_zero_page(struct page *page) +{ + return false; +} + #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* _LINUX_HUGE_MM_H */ diff --git a/include/uapi/linux/kernel-page-flags.h b/include/uapi/linux/kernel-page-flags.h index 2f96d233c980bd..a6c4962e5d4623 100644 --- a/include/uapi/linux/kernel-page-flags.h +++ b/include/uapi/linux/kernel-page-flags.h @@ -32,6 +32,7 @@ #define KPF_KSM 21 #define KPF_THP 22 #define KPF_BALLOON 23 +#define KPF_ZERO_PAGE 24 #endif /* _UAPILINUX_KERNEL_PAGE_FLAGS_H */ diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 817a875f2b8c03..8897131809804e 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -171,12 +171,7 @@ static int start_khugepaged(void) } static atomic_t huge_zero_refcount; -static struct page *huge_zero_page __read_mostly; - -static inline bool is_huge_zero_page(struct page *page) -{ - return ACCESS_ONCE(huge_zero_page) == page; -} +struct page *huge_zero_page __read_mostly; static inline bool is_huge_zero_pmd(pmd_t pmd) { diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c index 264fbc297e0b13..8bdf16b8ba6047 100644 --- a/tools/vm/page-types.c +++ b/tools/vm/page-types.c @@ -133,6 +133,7 @@ static const char * const page_flag_names[] = { [KPF_KSM] = "x:ksm", [KPF_THP] = "t:thp", [KPF_BALLOON] = "o:balloon", + [KPF_ZERO_PAGE] = "z:zero_page", [KPF_RESERVED] = "r:reserved", [KPF_MLOCKED] = "m:mlocked", From d7a94e7e11badf8404d40b41e008c3131a3cebe3 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 11 Feb 2015 15:24:54 -0800 Subject: [PATCH 522/601] oom: don't count on mm-less current process out_of_memory() doesn't trigger the OOM killer if the current task is already exiting or it has fatal signals pending, and gives the task access to memory reserves instead. However, doing so is wrong if out_of_memory() is called by an allocation (e.g. from exit_task_work()) after the current task has already released its memory and cleared TIF_MEMDIE at exit_mm(). If we again set TIF_MEMDIE to post-exit_mm() current task, the OOM killer will be blocked by the task sitting in the final schedule() waiting for its parent to reap it. It will trigger an OOM livelock if its parent is unable to reap it due to doing an allocation and waiting for the OOM killer to kill it. Signed-off-by: Tetsuo Handa Acked-by: Michal Hocko Cc: David Rientjes Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index d503e9ce1c7b7f..f82dd13cca68bd 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -643,8 +643,12 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, * If current has a pending SIGKILL or is exiting, then automatically * select it. The goal is to allow it to allocate so that it may * quickly exit and free its memory. + * + * But don't select if current has already released its mm and cleared + * TIF_MEMDIE flag at exit_mm(), otherwise an OOM livelock may occur. */ - if (fatal_signal_pending(current) || task_will_free_mem(current)) { + if (current->mm && + (fatal_signal_pending(current) || task_will_free_mem(current))) { set_thread_flag(TIF_MEMDIE); return; } From 83363b917a2982dd509a5e2125e905b6873505a3 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 11 Feb 2015 15:24:56 -0800 Subject: [PATCH 523/601] oom: make sure that TIF_MEMDIE is set under task_lock OOM killer tries to exclude tasks which do not have mm_struct associated because killing such a task wouldn't help much. The OOM victim gets TIF_MEMDIE set to disable OOM killer while the current victim releases the memory and then enables the OOM killer again by dropping the flag. oom_kill_process is currently prone to a race condition when the OOM victim is already exiting and TIF_MEMDIE is set after the task releases its address space. This might theoretically lead to OOM livelock if the OOM victim blocks on an allocation later during exiting because it wouldn't kill any other process and the exiting one won't be able to exit. The situation is highly unlikely because the OOM victim is expected to release some memory which should help to sort out OOM situation. Fix this by checking task->mm and setting TIF_MEMDIE flag under task_lock which will serialize the OOM killer with exit_mm which sets task->mm to NULL. Setting the flag for current is not necessary because check and set is not racy. Reported-by: Tetsuo Handa Signed-off-by: Michal Hocko Cc: David Rientjes Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index f82dd13cca68bd..294493a7ae4b0e 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -438,11 +438,14 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, * If the task is already exiting, don't alarm the sysadmin or kill * its children or threads, just set TIF_MEMDIE so it can die quickly */ - if (task_will_free_mem(p)) { + task_lock(p); + if (p->mm && task_will_free_mem(p)) { set_tsk_thread_flag(p, TIF_MEMDIE); + task_unlock(p); put_task_struct(p); return; } + task_unlock(p); if (__ratelimit(&oom_rs)) dump_header(p, gfp_mask, order, memcg, nodemask); @@ -492,6 +495,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, /* mm cannot safely be dereferenced after task_unlock(victim) */ mm = victim->mm; + set_tsk_thread_flag(victim, TIF_MEMDIE); pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB\n", task_pid_nr(victim), victim->comm, K(victim->mm->total_vm), K(get_mm_counter(victim->mm, MM_ANONPAGES)), @@ -522,7 +526,6 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, } rcu_read_unlock(); - set_tsk_thread_flag(victim, TIF_MEMDIE); do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, true); put_task_struct(victim); } From 93aa7d95248d04b934eb8e89717c7b8d6400bf2b Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Wed, 11 Feb 2015 15:24:59 -0800 Subject: [PATCH 524/601] swap: remove unused mem_cgroup_uncharge_swapcache declaration The body of this function was removed by commit 0a31bc97c80c ("mm: memcontrol: rewrite uncharge API"). Signed-off-by: Vladimir Davydov Acked-by: Michal Hocko Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 15 --------------- mm/shmem.c | 2 +- 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index 34e8b60ab9736b..7067eca501e267 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -437,16 +437,6 @@ extern int reuse_swap_page(struct page *); extern int try_to_free_swap(struct page *); struct backing_dev_info; -#ifdef CONFIG_MEMCG -extern void -mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout); -#else -static inline void -mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout) -{ -} -#endif - #else /* CONFIG_SWAP */ #define swap_address_space(entry) (NULL) @@ -547,11 +537,6 @@ static inline swp_entry_t get_swap_page(void) return entry; } -static inline void -mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) -{ -} - #endif /* CONFIG_SWAP */ #endif /* __KERNEL__*/ #endif /* _LINUX_SWAP_H */ diff --git a/mm/shmem.c b/mm/shmem.c index b3e4031819815b..864c878401e66a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1131,7 +1131,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, * truncated or holepunched since swap was confirmed. * shmem_undo_range() will have done some of the * unaccounting, now delete_from_swap_cache() will do - * the rest (including mem_cgroup_uncharge_swapcache). + * the rest. * Reset swap.val? No, leave it so "failed" goes back to * "repeat": reading a hole and writing should succeed. */ From 6de226191d12fce30331ebf024ca3ed24834f0ee Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 11 Feb 2015 15:25:01 -0800 Subject: [PATCH 525/601] mm: memcontrol: track move_lock state internally The complexity of memcg page stat synchronization is currently leaking into the callsites, forcing them to keep track of the move_lock state and the IRQ flags. Simplify the API by tracking it in the memcg. Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Reviewed-by: Vladimir Davydov Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 12 +++---- mm/memcontrol.c | 68 ++++++++++++++++++++++---------------- mm/page-writeback.c | 12 +++---- mm/rmap.c | 12 +++---- 4 files changed, 51 insertions(+), 53 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index fb212e1d700d29..76b4084b8d082d 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -138,12 +138,10 @@ static inline bool mem_cgroup_disabled(void) return false; } -struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page, bool *locked, - unsigned long *flags); -void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool *locked, - unsigned long *flags); +struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page); void mem_cgroup_update_page_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx, int val); +void mem_cgroup_end_page_stat(struct mem_cgroup *memcg); static inline void mem_cgroup_inc_page_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx) @@ -285,14 +283,12 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) { } -static inline struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page, - bool *locked, unsigned long *flags) +static inline struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page) { return NULL; } -static inline void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, - bool *locked, unsigned long *flags) +static inline void mem_cgroup_end_page_stat(struct mem_cgroup *memcg) { } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f3f8a4f52a0ca3..028d07c79104e8 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -325,9 +325,11 @@ struct mem_cgroup { /* * set > 0 if pages under this cgroup are moving to other cgroup. */ - atomic_t moving_account; + atomic_t moving_account; /* taken only while moving_account > 0 */ - spinlock_t move_lock; + spinlock_t move_lock; + struct task_struct *move_lock_task; + unsigned long move_lock_flags; /* * percpu counter. */ @@ -1977,34 +1979,33 @@ bool mem_cgroup_oom_synchronize(bool handle) /** * mem_cgroup_begin_page_stat - begin a page state statistics transaction * @page: page that is going to change accounted state - * @locked: &memcg->move_lock slowpath was taken - * @flags: IRQ-state flags for &memcg->move_lock * * This function must mark the beginning of an accounted page state * change to prevent double accounting when the page is concurrently * being moved to another memcg: * - * memcg = mem_cgroup_begin_page_stat(page, &locked, &flags); + * memcg = mem_cgroup_begin_page_stat(page); * if (TestClearPageState(page)) * mem_cgroup_update_page_stat(memcg, state, -1); - * mem_cgroup_end_page_stat(memcg, locked, flags); - * - * The RCU lock is held throughout the transaction. The fast path can - * get away without acquiring the memcg->move_lock (@locked is false) - * because page moving starts with an RCU grace period. - * - * The RCU lock also protects the memcg from being freed when the page - * state that is going to change is the only thing preventing the page - * from being uncharged. E.g. end-writeback clearing PageWriteback(), - * which allows migration to go ahead and uncharge the page before the - * account transaction might be complete. + * mem_cgroup_end_page_stat(memcg); */ -struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page, - bool *locked, - unsigned long *flags) +struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page) { struct mem_cgroup *memcg; + unsigned long flags; + /* + * The RCU lock is held throughout the transaction. The fast + * path can get away without acquiring the memcg->move_lock + * because page moving starts with an RCU grace period. + * + * The RCU lock also protects the memcg from being freed when + * the page state that is going to change is the only thing + * preventing the page from being uncharged. + * E.g. end-writeback clearing PageWriteback(), which allows + * migration to go ahead and uncharge the page before the + * account transaction might be complete. + */ rcu_read_lock(); if (mem_cgroup_disabled()) @@ -2014,16 +2015,22 @@ struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page, if (unlikely(!memcg)) return NULL; - *locked = false; if (atomic_read(&memcg->moving_account) <= 0) return memcg; - spin_lock_irqsave(&memcg->move_lock, *flags); + spin_lock_irqsave(&memcg->move_lock, flags); if (memcg != page->mem_cgroup) { - spin_unlock_irqrestore(&memcg->move_lock, *flags); + spin_unlock_irqrestore(&memcg->move_lock, flags); goto again; } - *locked = true; + + /* + * When charge migration first begins, we can have locked and + * unlocked page stat updates happening concurrently. Track + * the task who has the lock for mem_cgroup_end_page_stat(). + */ + memcg->move_lock_task = current; + memcg->move_lock_flags = flags; return memcg; } @@ -2031,14 +2038,17 @@ struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page, /** * mem_cgroup_end_page_stat - finish a page state statistics transaction * @memcg: the memcg that was accounted against - * @locked: value received from mem_cgroup_begin_page_stat() - * @flags: value received from mem_cgroup_begin_page_stat() */ -void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool *locked, - unsigned long *flags) +void mem_cgroup_end_page_stat(struct mem_cgroup *memcg) { - if (memcg && *locked) - spin_unlock_irqrestore(&memcg->move_lock, *flags); + if (memcg && memcg->move_lock_task == current) { + unsigned long flags = memcg->move_lock_flags; + + memcg->move_lock_task = NULL; + memcg->move_lock_flags = 0; + + spin_unlock_irqrestore(&memcg->move_lock, flags); + } rcu_read_unlock(); } diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 6f4335238e3331..fb71e9deca8569 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2308,12 +2308,10 @@ EXPORT_SYMBOL(clear_page_dirty_for_io); int test_clear_page_writeback(struct page *page) { struct address_space *mapping = page_mapping(page); - unsigned long memcg_flags; struct mem_cgroup *memcg; - bool locked; int ret; - memcg = mem_cgroup_begin_page_stat(page, &locked, &memcg_flags); + memcg = mem_cgroup_begin_page_stat(page); if (mapping) { struct backing_dev_info *bdi = mapping->backing_dev_info; unsigned long flags; @@ -2338,19 +2336,17 @@ int test_clear_page_writeback(struct page *page) dec_zone_page_state(page, NR_WRITEBACK); inc_zone_page_state(page, NR_WRITTEN); } - mem_cgroup_end_page_stat(memcg, &locked, &memcg_flags); + mem_cgroup_end_page_stat(memcg); return ret; } int __test_set_page_writeback(struct page *page, bool keep_write) { struct address_space *mapping = page_mapping(page); - unsigned long memcg_flags; struct mem_cgroup *memcg; - bool locked; int ret; - memcg = mem_cgroup_begin_page_stat(page, &locked, &memcg_flags); + memcg = mem_cgroup_begin_page_stat(page); if (mapping) { struct backing_dev_info *bdi = mapping->backing_dev_info; unsigned long flags; @@ -2380,7 +2376,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write) mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK); inc_zone_page_state(page, NR_WRITEBACK); } - mem_cgroup_end_page_stat(memcg, &locked, &memcg_flags); + mem_cgroup_end_page_stat(memcg); return ret; } diff --git a/mm/rmap.c b/mm/rmap.c index 70b32498d4f24e..5e3e09081164b8 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1085,24 +1085,20 @@ void page_add_new_anon_rmap(struct page *page, void page_add_file_rmap(struct page *page) { struct mem_cgroup *memcg; - unsigned long flags; - bool locked; - memcg = mem_cgroup_begin_page_stat(page, &locked, &flags); + memcg = mem_cgroup_begin_page_stat(page); if (atomic_inc_and_test(&page->_mapcount)) { __inc_zone_page_state(page, NR_FILE_MAPPED); mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED); } - mem_cgroup_end_page_stat(memcg, &locked, &flags); + mem_cgroup_end_page_stat(memcg); } static void page_remove_file_rmap(struct page *page) { struct mem_cgroup *memcg; - unsigned long flags; - bool locked; - memcg = mem_cgroup_begin_page_stat(page, &locked, &flags); + memcg = mem_cgroup_begin_page_stat(page); /* page still mapped by someone else? */ if (!atomic_add_negative(-1, &page->_mapcount)) @@ -1123,7 +1119,7 @@ static void page_remove_file_rmap(struct page *page) if (unlikely(PageMlocked(page))) clear_page_mlock(page); out: - mem_cgroup_end_page_stat(memcg, &locked, &flags); + mem_cgroup_end_page_stat(memcg); } /** From 91fbdc0f89807bb97792ea6893717a8d3154b871 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 11 Feb 2015 15:25:04 -0800 Subject: [PATCH 526/601] mm/page_alloc.c:__alloc_pages_nodemask(): don't alter arg gfp_mask __alloc_pages_nodemask() strips __GFP_IO when retrying the page allocation. But it does this by altering the function-wide variable gfp_mask. This will cause subsequent allocation attempts to inadvertently use the modified gfp_mask. Also, pass the correct mask (the mask we actually used) into trace_mm_page_alloc(). Cc: Ming Lei Cc: Mel Gorman Cc: Johannes Weiner Reviewed-by: Yasuaki Ishimatsu Cc: David Rientjes Acked-by: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index f121050e8530b8..1c7d90f7a84ac6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2865,6 +2865,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, unsigned int cpuset_mems_cookie; int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR; int classzone_idx; + gfp_t alloc_mask; /* The gfp_t that was actually used for allocation */ gfp_mask &= gfp_allowed_mask; @@ -2898,22 +2899,24 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, classzone_idx = zonelist_zone_idx(preferred_zoneref); /* First allocation attempt */ - page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, - zonelist, high_zoneidx, alloc_flags, - preferred_zone, classzone_idx, migratetype); + alloc_mask = gfp_mask|__GFP_HARDWALL; + page = get_page_from_freelist(alloc_mask, nodemask, order, zonelist, + high_zoneidx, alloc_flags, preferred_zone, + classzone_idx, migratetype); if (unlikely(!page)) { /* * Runtime PM, block IO and its error handling path * can deadlock because I/O on the device might not * complete. */ - gfp_mask = memalloc_noio_flags(gfp_mask); - page = __alloc_pages_slowpath(gfp_mask, order, + alloc_mask = memalloc_noio_flags(gfp_mask); + + page = __alloc_pages_slowpath(alloc_mask, order, zonelist, high_zoneidx, nodemask, preferred_zone, classzone_idx, migratetype); } - trace_mm_page_alloc(page, order, gfp_mask, migratetype); + trace_mm_page_alloc(page, order, alloc_mask, migratetype); out: /* From 23f086f962e67a1b8a508c0d8e86b7833c941564 Mon Sep 17 00:00:00 2001 From: Xishi Qiu Date: Wed, 11 Feb 2015 15:25:07 -0800 Subject: [PATCH 527/601] kmemcheck: move hook into __alloc_pages_nodemask() for the page allocator Now kmemcheck_pagealloc_alloc() is only called by __alloc_pages_slowpath(). __alloc_pages_nodemask() __alloc_pages_slowpath() kmemcheck_pagealloc_alloc() And the page will not be tracked by kmemcheck in the following path. __alloc_pages_nodemask() get_page_from_freelist() So move kmemcheck_pagealloc_alloc() into __alloc_pages_nodemask(), like this: __alloc_pages_nodemask() ... get_page_from_freelist() if (!page) __alloc_pages_slowpath() kmemcheck_pagealloc_alloc() ... Signed-off-by: Xishi Qiu Cc: Vegard Nossum Cc: Pekka Enberg Cc: Li Zefan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1c7d90f7a84ac6..a88cb0cbf3526b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2842,11 +2842,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, nopage: warn_alloc_failed(gfp_mask, order, NULL); - return page; got_pg: - if (kmemcheck_enabled) - kmemcheck_pagealloc_alloc(page, order, gfp_mask); - return page; } @@ -2916,6 +2912,9 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, preferred_zone, classzone_idx, migratetype); } + if (kmemcheck_enabled && page) + kmemcheck_pagealloc_alloc(page, order, gfp_mask); + trace_mm_page_alloc(page, order, alloc_mask, migratetype); out: From 44628d9755e249aab9a6e1a17407d2f4278047ee Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 11 Feb 2015 15:25:10 -0800 Subject: [PATCH 528/601] mm: fix typo of MIGRATE_RESERVE in comment Found it when I want to jump to the definition of MIGRATE_RESERVE ctags. Signed-off-by: Baoquan He Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 2f0856d14b212d..b4182970133479 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -426,7 +426,7 @@ struct zone { const char *name; /* - * Number of MIGRATE_RESEVE page block. To maintain for just + * Number of MIGRATE_RESERVE page block. To maintain for just * optimization. Protected by zone->lock. */ int nr_migrate_reserve_block; From cfc511557945812280699a92f171ddd2d254aca6 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 11 Feb 2015 15:25:12 -0800 Subject: [PATCH 529/601] mm, vmscan: wake up all pfmemalloc-throttled processes at once Kswapd in balance_pgdate() currently uses wake_up() on processes waiting in throttle_direct_reclaim(), which only wakes up a single process. This might leave processes waiting for longer than necessary, until the check is reached in the next loop iteration. Processes might also be left waiting if zone was fully balanced in single iteration. Note that the comment in balance_pgdat() also says "Wake them", so waking up a single process does not seem intentional. Thus, replace wake_up() with wake_up_all(). Signed-off-by: Vlastimil Babka Cc: Mel Gorman Cc: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Acked-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index dcd90c891d8e53..f756a202d5d5a4 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3175,7 +3175,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, */ if (waitqueue_active(&pgdat->pfmemalloc_wait) && pfmemalloc_watermark_ok(pgdat)) - wake_up(&pgdat->pfmemalloc_wait); + wake_up_all(&pgdat->pfmemalloc_wait); /* * Fragmentation may mean that the system cannot be rebalanced From 61f77eda9bbf0d2e922197ed2dcf88638a639ce5 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 11 Feb 2015 15:25:15 -0800 Subject: [PATCH 530/601] mm/hugetlb: reduce arch dependent code around follow_huge_* Currently we have many duplicates in definitions around follow_huge_addr(), follow_huge_pmd(), and follow_huge_pud(), so this patch tries to remove the m. The basic idea is to put the default implementation for these functions in mm/hugetlb.c as weak symbols (regardless of CONFIG_ARCH_WANT_GENERAL_HUGETL B), and to implement arch-specific code only when the arch needs it. For follow_huge_addr(), only powerpc and ia64 have their own implementation, and in all other architectures this function just returns ERR_PTR(-EINVAL). So this patch sets returning ERR_PTR(-EINVAL) as default. As for follow_huge_(pmd|pud)(), if (pmd|pud)_huge() is implemented to always return 0 in your architecture (like in ia64 or sparc,) it's never called (the callsite is optimized away) no matter how implemented it is. So in such architectures, we don't need arch-specific implementation. In some architecture (like mips, s390 and tile,) their current arch-specific follow_huge_(pmd|pud)() are effectively identical with the common code, so this patch lets these architecture use the common code. One exception is metag, where pmd_huge() could return non-zero but it expects follow_huge_pmd() to always return NULL. This means that we need arch-specific implementation which returns NULL. This behavior looks strange to me (because non-zero pmd_huge() implies that the architecture supports PMD-based hugepage, so follow_huge_pmd() can/should return some relevant value,) but that's beyond this cleanup patch, so let's keep it. Justification of non-trivial changes: - in s390, follow_huge_pmd() checks !MACHINE_HAS_HPAGE at first, and this patch removes the check. This is OK because we can assume MACHINE_HAS_HPAGE is true when follow_huge_pmd() can be called (note that pmd_huge() has the same check and always returns 0 for !MACHINE_HAS_HPAGE.) - in s390 and mips, we use HPAGE_MASK instead of PMD_MASK as done in common code. This patch forces these archs use PMD_MASK, but it's OK because they are identical in both archs. In s390, both of HPAGE_SHIFT and PMD_SHIFT are 20. In mips, HPAGE_SHIFT is defined as (PAGE_SHIFT + PAGE_SHIFT - 3) and PMD_SHIFT is define as (PAGE_SHIFT + PAGE_SHIFT + PTE_ORDER - 3), but PTE_ORDER is always 0, so these are identical. Signed-off-by: Naoya Horiguchi Acked-by: Hugh Dickins Cc: James Hogan Cc: David Rientjes Cc: Mel Gorman Cc: Johannes Weiner Cc: Michal Hocko Cc: Rik van Riel Cc: Andrea Arcangeli Cc: Luiz Capitulino Cc: Nishanth Aravamudan Cc: Lee Schermerhorn Cc: Steve Capper Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mm/hugetlbpage.c | 6 ------ arch/arm64/mm/hugetlbpage.c | 6 ------ arch/ia64/mm/hugetlbpage.c | 6 ------ arch/metag/mm/hugetlbpage.c | 6 ------ arch/mips/mm/hugetlbpage.c | 18 ------------------ arch/powerpc/mm/hugetlbpage.c | 8 ++++++++ arch/s390/mm/hugetlbpage.c | 20 -------------------- arch/sh/mm/hugetlbpage.c | 12 ------------ arch/sparc/mm/hugetlbpage.c | 12 ------------ arch/tile/mm/hugetlbpage.c | 28 ---------------------------- arch/x86/mm/hugetlbpage.c | 12 ------------ mm/hugetlb.c | 30 +++++++++++++++--------------- 12 files changed, 23 insertions(+), 141 deletions(-) diff --git a/arch/arm/mm/hugetlbpage.c b/arch/arm/mm/hugetlbpage.c index 66781bf34077cb..c724124150934f 100644 --- a/arch/arm/mm/hugetlbpage.c +++ b/arch/arm/mm/hugetlbpage.c @@ -36,12 +36,6 @@ * of type casting from pmd_t * to pte_t *. */ -struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, - int write) -{ - return ERR_PTR(-EINVAL); -} - int pud_huge(pud_t pud) { return 0; diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 023747bf4dd7dd..2de9d2e59d9680 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -38,12 +38,6 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) } #endif -struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, - int write) -{ - return ERR_PTR(-EINVAL); -} - int pmd_huge(pmd_t pmd) { return !(pmd_val(pmd) & PMD_TABLE_BIT); diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c index 76069c18ee42c1..52b7604b5215f8 100644 --- a/arch/ia64/mm/hugetlbpage.c +++ b/arch/ia64/mm/hugetlbpage.c @@ -114,12 +114,6 @@ int pud_huge(pud_t pud) return 0; } -struct page * -follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write) -{ - return NULL; -} - void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) diff --git a/arch/metag/mm/hugetlbpage.c b/arch/metag/mm/hugetlbpage.c index 3c32075d294528..7ca80ac42ed5a9 100644 --- a/arch/metag/mm/hugetlbpage.c +++ b/arch/metag/mm/hugetlbpage.c @@ -94,12 +94,6 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) return 0; } -struct page *follow_huge_addr(struct mm_struct *mm, - unsigned long address, int write) -{ - return ERR_PTR(-EINVAL); -} - int pmd_huge(pmd_t pmd) { return pmd_page_shift(pmd) > PAGE_SHIFT; diff --git a/arch/mips/mm/hugetlbpage.c b/arch/mips/mm/hugetlbpage.c index 4ec8ee10d3718f..06e0f421b41b19 100644 --- a/arch/mips/mm/hugetlbpage.c +++ b/arch/mips/mm/hugetlbpage.c @@ -68,12 +68,6 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len) return 0; } -struct page * -follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) -{ - return ERR_PTR(-EINVAL); -} - int pmd_huge(pmd_t pmd) { return (pmd_val(pmd) & _PAGE_HUGE) != 0; @@ -83,15 +77,3 @@ int pud_huge(pud_t pud) { return (pud_val(pud) & _PAGE_HUGE) != 0; } - -struct page * -follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int write) -{ - struct page *page; - - page = pte_page(*(pte_t *)pmd); - if (page) - page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT); - return page; -} diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 5ff4e07d920a36..cf0464f4284f22 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -714,6 +714,14 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, return NULL; } +struct page * +follow_huge_pud(struct mm_struct *mm, unsigned long address, + pud_t *pud, int write) +{ + BUG(); + return NULL; +} + static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, unsigned long sz) { diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index 3c80d2e38f03c4..210ffede015313 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -192,12 +192,6 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) return 0; } -struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, - int write) -{ - return ERR_PTR(-EINVAL); -} - int pmd_huge(pmd_t pmd) { if (!MACHINE_HAS_HPAGE) @@ -210,17 +204,3 @@ int pud_huge(pud_t pud) { return 0; } - -struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmdp, int write) -{ - struct page *page; - - if (!MACHINE_HAS_HPAGE) - return NULL; - - page = pmd_page(*pmdp); - if (page) - page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT); - return page; -} diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c index d7762349ea4869..534bc978af8a58 100644 --- a/arch/sh/mm/hugetlbpage.c +++ b/arch/sh/mm/hugetlbpage.c @@ -67,12 +67,6 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) return 0; } -struct page *follow_huge_addr(struct mm_struct *mm, - unsigned long address, int write) -{ - return ERR_PTR(-EINVAL); -} - int pmd_huge(pmd_t pmd) { return 0; @@ -82,9 +76,3 @@ int pud_huge(pud_t pud) { return 0; } - -struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int write) -{ - return NULL; -} diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index d329537739c6f3..4242eab12e1073 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -215,12 +215,6 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, return entry; } -struct page *follow_huge_addr(struct mm_struct *mm, - unsigned long address, int write) -{ - return ERR_PTR(-EINVAL); -} - int pmd_huge(pmd_t pmd) { return 0; @@ -230,9 +224,3 @@ int pud_huge(pud_t pud) { return 0; } - -struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int write) -{ - return NULL; -} diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c index 3270e001926633..8416240c322c92 100644 --- a/arch/tile/mm/hugetlbpage.c +++ b/arch/tile/mm/hugetlbpage.c @@ -150,12 +150,6 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) return NULL; } -struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, - int write) -{ - return ERR_PTR(-EINVAL); -} - int pmd_huge(pmd_t pmd) { return !!(pmd_val(pmd) & _PAGE_HUGE_PAGE); @@ -166,28 +160,6 @@ int pud_huge(pud_t pud) return !!(pud_val(pud) & _PAGE_HUGE_PAGE); } -struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int write) -{ - struct page *page; - - page = pte_page(*(pte_t *)pmd); - if (page) - page += ((address & ~PMD_MASK) >> PAGE_SHIFT); - return page; -} - -struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, - pud_t *pud, int write) -{ - struct page *page; - - page = pte_page(*(pte_t *)pud); - if (page) - page += ((address & ~PUD_MASK) >> PAGE_SHIFT); - return page; -} - int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) { return 0; diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index bca0aa3a003f6b..f48423f1014115 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -52,20 +52,8 @@ int pud_huge(pud_t pud) return 0; } -struct page * -follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int write) -{ - return NULL; -} #else -struct page * -follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) -{ - return ERR_PTR(-EINVAL); -} - int pmd_huge(pmd_t pmd) { return !!(pmd_val(pmd) & _PAGE_PSE); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index be0e5d0db5ec86..f533d336e569b1 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3660,7 +3660,20 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) return (pte_t *) pmd; } -struct page * +#endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */ + +/* + * These functions are overwritable if your architecture needs its own + * behavior. + */ +struct page * __weak +follow_huge_addr(struct mm_struct *mm, unsigned long address, + int write) +{ + return ERR_PTR(-EINVAL); +} + +struct page * __weak follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write) { @@ -3672,7 +3685,7 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, return page; } -struct page * +struct page * __weak follow_huge_pud(struct mm_struct *mm, unsigned long address, pud_t *pud, int write) { @@ -3684,19 +3697,6 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address, return page; } -#else /* !CONFIG_ARCH_WANT_GENERAL_HUGETLB */ - -/* Can be overriden by architectures */ -struct page * __weak -follow_huge_pud(struct mm_struct *mm, unsigned long address, - pud_t *pud, int write) -{ - BUG(); - return NULL; -} - -#endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */ - #ifdef CONFIG_MEMORY_FAILURE /* Should be called in hugetlb_lock */ From cbef8478bee55775ac312a574aad48af7bb9cf9f Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 11 Feb 2015 15:25:19 -0800 Subject: [PATCH 531/601] mm/hugetlb: pmd_huge() returns true for non-present hugepage Migrating hugepages and hwpoisoned hugepages are considered as non-present hugepages, and they are referenced via migration entries and hwpoison entries in their page table slots. This behavior causes race condition because pmd_huge() doesn't tell non-huge pages from migrating/hwpoisoned hugepages. follow_page_mask() is one example where the kernel would call follow_page_pte() for such hugepage while this function is supposed to handle only normal pages. To avoid this, this patch makes pmd_huge() return true when pmd_none() is true *and* pmd_present() is false. We don't have to worry about mixing up non-present pmd entry with normal pmd (pointing to leaf level pte entry) because pmd_present() is true in normal pmd. The same race condition could happen in (x86-specific) gup_pmd_range(), where this patch simply adds pmd_present() check instead of pmd_huge(). This is because gup_pmd_range() is fast path. If we have non-present hugepage in this function, we will go into gup_huge_pmd(), then return 0 at flag mask check, and finally fall back to the slow path. Fixes: 290408d4a2 ("hugetlb: hugepage migration core") Signed-off-by: Naoya Horiguchi Cc: Hugh Dickins Cc: James Hogan Cc: David Rientjes Cc: Mel Gorman Cc: Johannes Weiner Cc: Michal Hocko Cc: Rik van Riel Cc: Andrea Arcangeli Cc: Luiz Capitulino Cc: Nishanth Aravamudan Cc: Lee Schermerhorn Cc: Steve Capper Cc: [2.6.36+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/gup.c | 2 +- arch/x86/mm/hugetlbpage.c | 8 +++++++- mm/hugetlb.c | 2 ++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index d7547824e76369..224b14235e967b 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -172,7 +172,7 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, */ if (pmd_none(pmd) || pmd_trans_splitting(pmd)) return 0; - if (unlikely(pmd_large(pmd))) { + if (unlikely(pmd_large(pmd) || !pmd_present(pmd))) { /* * NUMA hinting faults need to be handled in the GUP * slowpath for accounting purposes and so that they diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index f48423f1014115..42982b26e32be6 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -54,9 +54,15 @@ int pud_huge(pud_t pud) #else +/* + * pmd_huge() returns 1 if @pmd is hugetlb related entry, that is normal + * hugetlb entry or non-present (migration or hwpoisoned) hugetlb entry. + * Otherwise, returns 0. + */ int pmd_huge(pmd_t pmd) { - return !!(pmd_val(pmd) & _PAGE_PSE); + return !pmd_none(pmd) && + (pmd_val(pmd) & (_PAGE_PRESENT|_PAGE_PSE)) != _PAGE_PRESENT; } int pud_huge(pud_t pud) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f533d336e569b1..d96b8bfa748f7b 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3679,6 +3679,8 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, { struct page *page; + if (!pmd_present(*pmd)) + return NULL; page = pte_page(*(pte_t *)pmd); if (page) page += ((address & ~PMD_MASK) >> PAGE_SHIFT); From e66f17ff71772b209eed39de35aaa99ba819c93d Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 11 Feb 2015 15:25:22 -0800 Subject: [PATCH 532/601] mm/hugetlb: take page table lock in follow_huge_pmd() We have a race condition between move_pages() and freeing hugepages, where move_pages() calls follow_page(FOLL_GET) for hugepages internally and tries to get its refcount without preventing concurrent freeing. This race crashes the kernel, so this patch fixes it by moving FOLL_GET code for hugepages into follow_huge_pmd() with taking the page table lock. This patch intentionally removes page==NULL check after pte_page. This is justified because pte_page() never returns NULL for any architectures or configurations. This patch changes the behavior of follow_huge_pmd() for tail pages and then tail pages can be pinned/returned. So the caller must be changed to properly handle the returned tail pages. We could have a choice to add the similar locking to follow_huge_(addr|pud) for consistency, but it's not necessary because currently these functions don't support FOLL_GET flag, so let's leave it for future development. Here is the reproducer: $ cat movepages.c #include #include #include #define ADDR_INPUT 0x700000000000UL #define HPS 0x200000 #define PS 0x1000 int main(int argc, char *argv[]) { int i; int nr_hp = strtol(argv[1], NULL, 0); int nr_p = nr_hp * HPS / PS; int ret; void **addrs; int *status; int *nodes; pid_t pid; pid = strtol(argv[2], NULL, 0); addrs = malloc(sizeof(char *) * nr_p + 1); status = malloc(sizeof(char *) * nr_p + 1); nodes = malloc(sizeof(char *) * nr_p + 1); while (1) { for (i = 0; i < nr_p; i++) { addrs[i] = (void *)ADDR_INPUT + i * PS; nodes[i] = 1; status[i] = 0; } ret = numa_move_pages(pid, nr_p, addrs, nodes, status, MPOL_MF_MOVE_ALL); if (ret == -1) err("move_pages"); for (i = 0; i < nr_p; i++) { addrs[i] = (void *)ADDR_INPUT + i * PS; nodes[i] = 0; status[i] = 0; } ret = numa_move_pages(pid, nr_p, addrs, nodes, status, MPOL_MF_MOVE_ALL); if (ret == -1) err("move_pages"); } return 0; } $ cat hugepage.c #include #include #include #define ADDR_INPUT 0x700000000000UL #define HPS 0x200000 int main(int argc, char *argv[]) { int nr_hp = strtol(argv[1], NULL, 0); char *p; while (1) { p = mmap((void *)ADDR_INPUT, nr_hp * HPS, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); if (p != (void *)ADDR_INPUT) { perror("mmap"); break; } memset(p, 0, nr_hp * HPS); munmap(p, nr_hp * HPS); } } $ sysctl vm.nr_hugepages=40 $ ./hugepage 10 & $ ./movepages 10 $(pgrep -f hugepage) Fixes: e632a938d914 ("mm: migrate: add hugepage migration code to move_pages()") Signed-off-by: Naoya Horiguchi Reported-by: Hugh Dickins Cc: James Hogan Cc: David Rientjes Cc: Mel Gorman Cc: Johannes Weiner Cc: Michal Hocko Cc: Rik van Riel Cc: Andrea Arcangeli Cc: Luiz Capitulino Cc: Nishanth Aravamudan Cc: Lee Schermerhorn Cc: Steve Capper Cc: [3.12+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 8 +++---- include/linux/swapops.h | 4 ++++ mm/gup.c | 25 +++++++-------------- mm/hugetlb.c | 48 +++++++++++++++++++++++++++++------------ mm/migrate.c | 5 +++-- 5 files changed, 53 insertions(+), 37 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 7d785635992079..7b578503204918 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -99,9 +99,9 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep); struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, int write); struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int write); + pmd_t *pmd, int flags); struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, - pud_t *pud, int write); + pud_t *pud, int flags); int pmd_huge(pmd_t pmd); int pud_huge(pud_t pmd); unsigned long hugetlb_change_protection(struct vm_area_struct *vma, @@ -133,8 +133,8 @@ static inline void hugetlb_report_meminfo(struct seq_file *m) static inline void hugetlb_show_meminfo(void) { } -#define follow_huge_pmd(mm, addr, pmd, write) NULL -#define follow_huge_pud(mm, addr, pud, write) NULL +#define follow_huge_pmd(mm, addr, pmd, flags) NULL +#define follow_huge_pud(mm, addr, pud, flags) NULL #define prepare_hugepage_range(file, addr, len) (-EINVAL) #define pmd_huge(x) 0 #define pud_huge(x) 0 diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 50cbc876be56a2..831a3168ab35a6 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -135,6 +135,8 @@ static inline void make_migration_entry_read(swp_entry_t *entry) *entry = swp_entry(SWP_MIGRATION_READ, swp_offset(*entry)); } +extern void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, + spinlock_t *ptl); extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address); extern void migration_entry_wait_huge(struct vm_area_struct *vma, @@ -148,6 +150,8 @@ static inline int is_migration_entry(swp_entry_t swp) } #define migration_entry_to_page(swp) NULL static inline void make_migration_entry_read(swp_entry_t *entryp) { } +static inline void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, + spinlock_t *ptl) { } static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address) { } static inline void migration_entry_wait_huge(struct vm_area_struct *vma, diff --git a/mm/gup.c b/mm/gup.c index 12bc2bc33da759..1a8ab05918e018 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -167,10 +167,10 @@ struct page *follow_page_mask(struct vm_area_struct *vma, if (pud_none(*pud)) return no_page_table(vma, flags); if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) { - if (flags & FOLL_GET) - return NULL; - page = follow_huge_pud(mm, address, pud, flags & FOLL_WRITE); - return page; + page = follow_huge_pud(mm, address, pud, flags); + if (page) + return page; + return no_page_table(vma, flags); } if (unlikely(pud_bad(*pud))) return no_page_table(vma, flags); @@ -179,19 +179,10 @@ struct page *follow_page_mask(struct vm_area_struct *vma, if (pmd_none(*pmd)) return no_page_table(vma, flags); if (pmd_huge(*pmd) && vma->vm_flags & VM_HUGETLB) { - page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE); - if (flags & FOLL_GET) { - /* - * Refcount on tail pages are not well-defined and - * shouldn't be taken. The caller should handle a NULL - * return when trying to follow tail pages. - */ - if (PageHead(page)) - get_page(page); - else - page = NULL; - } - return page; + page = follow_huge_pmd(mm, address, pmd, flags); + if (page) + return page; + return no_page_table(vma, flags); } if ((flags & FOLL_NUMA) && pmd_numa(*pmd)) return no_page_table(vma, flags); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d96b8bfa748f7b..5aca3707450fe0 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3675,28 +3675,48 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, struct page * __weak follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int write) + pmd_t *pmd, int flags) { - struct page *page; - - if (!pmd_present(*pmd)) - return NULL; - page = pte_page(*(pte_t *)pmd); - if (page) - page += ((address & ~PMD_MASK) >> PAGE_SHIFT); + struct page *page = NULL; + spinlock_t *ptl; +retry: + ptl = pmd_lockptr(mm, pmd); + spin_lock(ptl); + /* + * make sure that the address range covered by this pmd is not + * unmapped from other threads. + */ + if (!pmd_huge(*pmd)) + goto out; + if (pmd_present(*pmd)) { + page = pte_page(*(pte_t *)pmd) + + ((address & ~PMD_MASK) >> PAGE_SHIFT); + if (flags & FOLL_GET) + get_page(page); + } else { + if (is_hugetlb_entry_migration(huge_ptep_get((pte_t *)pmd))) { + spin_unlock(ptl); + __migration_entry_wait(mm, (pte_t *)pmd, ptl); + goto retry; + } + /* + * hwpoisoned entry is treated as no_page_table in + * follow_page_mask(). + */ + } +out: + spin_unlock(ptl); return page; } struct page * __weak follow_huge_pud(struct mm_struct *mm, unsigned long address, - pud_t *pud, int write) + pud_t *pud, int flags) { - struct page *page; + if (flags & FOLL_GET) + return NULL; - page = pte_page(*(pte_t *)pud); - if (page) - page += ((address & ~PUD_MASK) >> PAGE_SHIFT); - return page; + return pte_page(*(pte_t *)pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT); } #ifdef CONFIG_MEMORY_FAILURE diff --git a/mm/migrate.c b/mm/migrate.c index 6e284bcca8bbfb..f98067e5d35338 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -197,7 +197,7 @@ static void remove_migration_ptes(struct page *old, struct page *new) * get to the page and wait until migration is finished. * When we return from this function the fault will be retried. */ -static void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, +void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, spinlock_t *ptl) { pte_t pte; @@ -1236,7 +1236,8 @@ static int do_move_page_to_node_array(struct mm_struct *mm, goto put_and_set; if (PageHuge(page)) { - isolate_huge_page(page, &pagelist); + if (PageHead(page)) + isolate_huge_page(page, &pagelist); goto put_and_set; } From 0f792cf949a0be506c2aa8bfac0605746b146dda Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 11 Feb 2015 15:25:25 -0800 Subject: [PATCH 533/601] mm/hugetlb: fix getting refcount 0 page in hugetlb_fault() When running the test which causes the race as shown in the previous patch, we can hit the BUG "get_page() on refcount 0 page" in hugetlb_fault(). This race happens when pte turns into migration entry just after the first check of is_hugetlb_entry_migration() in hugetlb_fault() passed with false. To fix this, we need to check pte_present() again after huge_ptep_get(). This patch also reorders taking ptl and doing pte_page(), because pte_page() should be done in ptl. Due to this reordering, we need use trylock_page() in page != pagecache_page case to respect locking order. Fixes: 66aebce747ea ("hugetlb: fix race condition in hugetlb_fault()") Signed-off-by: Naoya Horiguchi Cc: Hugh Dickins Cc: James Hogan Cc: David Rientjes Cc: Mel Gorman Cc: Johannes Weiner Cc: Michal Hocko Cc: Rik van Riel Cc: Andrea Arcangeli Cc: Luiz Capitulino Cc: Nishanth Aravamudan Cc: Lee Schermerhorn Cc: Steve Capper Cc: [3.2+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 52 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 16 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 5aca3707450fe0..385c3a1aced78f 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3134,6 +3134,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, struct page *pagecache_page = NULL; struct hstate *h = hstate_vma(vma); struct address_space *mapping; + int need_wait_lock = 0; address &= huge_page_mask(h); @@ -3171,6 +3172,16 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, ret = 0; + /* + * entry could be a migration/hwpoison entry at this point, so this + * check prevents the kernel from going below assuming that we have + * a active hugepage in pagecache. This goto expects the 2nd page fault, + * and is_hugetlb_entry_(migration|hwpoisoned) check will properly + * handle it. + */ + if (!pte_present(entry)) + goto out_mutex; + /* * If we are going to COW the mapping later, we examine the pending * reservations for this page now. This will ensure that any @@ -3190,30 +3201,31 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, vma, address); } + ptl = huge_pte_lock(h, mm, ptep); + + /* Check for a racing update before calling hugetlb_cow */ + if (unlikely(!pte_same(entry, huge_ptep_get(ptep)))) + goto out_ptl; + /* * hugetlb_cow() requires page locks of pte_page(entry) and * pagecache_page, so here we need take the former one * when page != pagecache_page or !pagecache_page. - * Note that locking order is always pagecache_page -> page, - * so no worry about deadlock. */ page = pte_page(entry); - get_page(page); if (page != pagecache_page) - lock_page(page); - - ptl = huge_pte_lockptr(h, mm, ptep); - spin_lock(ptl); - /* Check for a racing update before calling hugetlb_cow */ - if (unlikely(!pte_same(entry, huge_ptep_get(ptep)))) - goto out_ptl; + if (!trylock_page(page)) { + need_wait_lock = 1; + goto out_ptl; + } + get_page(page); if (flags & FAULT_FLAG_WRITE) { if (!huge_pte_write(entry)) { ret = hugetlb_cow(mm, vma, address, ptep, entry, pagecache_page, ptl); - goto out_ptl; + goto out_put_page; } entry = huge_pte_mkdirty(entry); } @@ -3221,7 +3233,10 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (huge_ptep_set_access_flags(vma, address, ptep, entry, flags & FAULT_FLAG_WRITE)) update_mmu_cache(vma, address, ptep); - +out_put_page: + if (page != pagecache_page) + unlock_page(page); + put_page(page); out_ptl: spin_unlock(ptl); @@ -3229,12 +3244,17 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, unlock_page(pagecache_page); put_page(pagecache_page); } - if (page != pagecache_page) - unlock_page(page); - put_page(page); - out_mutex: mutex_unlock(&htlb_fault_mutex_table[hash]); + /* + * Generally it's safe to hold refcount during waiting page lock. But + * here we just wait to defer the next page fault to avoid busy loop and + * the page is not used after unlocked before returning from the current + * page fault. So we are safe from accessing freed page, even if we wait + * here without taking refcount. + */ + if (need_wait_lock) + wait_on_page_locked(page); return ret; } From a8bda28d87c38c6aa93de28ba5d30cc18e865a11 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 11 Feb 2015 15:25:28 -0800 Subject: [PATCH 534/601] mm/hugetlb: add migration/hwpoisoned entry check in hugetlb_change_protection There is a race condition between hugepage migration and change_protection(), where hugetlb_change_protection() doesn't care about migration entries and wrongly overwrites them. That causes unexpected results like kernel crash. HWPoison entries also can cause the same problem. This patch adds is_hugetlb_entry_(migration|hwpoisoned) check in this function to do proper actions. Fixes: 290408d4a2 ("hugetlb: hugepage migration core") Signed-off-by: Naoya Horiguchi Cc: Hugh Dickins Cc: James Hogan Cc: David Rientjes Cc: Mel Gorman Cc: Johannes Weiner Cc: Michal Hocko Cc: Rik van Riel Cc: Andrea Arcangeli Cc: Luiz Capitulino Cc: Nishanth Aravamudan Cc: Lee Schermerhorn Cc: Steve Capper Cc: [2.6.36+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 385c3a1aced78f..c2970e7eb5271b 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3384,7 +3384,26 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, spin_unlock(ptl); continue; } - if (!huge_pte_none(huge_ptep_get(ptep))) { + pte = huge_ptep_get(ptep); + if (unlikely(is_hugetlb_entry_hwpoisoned(pte))) { + spin_unlock(ptl); + continue; + } + if (unlikely(is_hugetlb_entry_migration(pte))) { + swp_entry_t entry = pte_to_swp_entry(pte); + + if (is_write_migration_entry(entry)) { + pte_t newpte; + + make_migration_entry_read(&entry); + newpte = swp_entry_to_pte(entry); + set_huge_pte_at(mm, address, ptep, newpte); + pages++; + } + spin_unlock(ptl); + continue; + } + if (!huge_pte_none(pte)) { pte = huge_ptep_get_and_clear(mm, address, ptep); pte = pte_mkhuge(huge_pte_modify(pte, newprot)); pte = arch_make_huge_pte(pte, vma, NULL, 0); From 9fbc1f635fd0bd28cb32550211bf095753ac637a Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 11 Feb 2015 15:25:32 -0800 Subject: [PATCH 535/601] mm/hugetlb: add migration entry check in __unmap_hugepage_range If __unmap_hugepage_range() tries to unmap the address range over which hugepage migration is on the way, we get the wrong page because pte_page() doesn't work for migration entries. This patch simply clears the pte for migration entries as we do for hwpoison entries. Fixes: 290408d4a2 ("hugetlb: hugepage migration core") Signed-off-by: Naoya Horiguchi Cc: Hugh Dickins Cc: James Hogan Cc: David Rientjes Cc: Mel Gorman Cc: Johannes Weiner Cc: Michal Hocko Cc: Rik van Riel Cc: Andrea Arcangeli Cc: Luiz Capitulino Cc: Nishanth Aravamudan Cc: Lee Schermerhorn Cc: Steve Capper Cc: [2.6.36+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index c2970e7eb5271b..fd28d6ba5e5db3 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2657,9 +2657,10 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, goto unlock; /* - * HWPoisoned hugepage is already unmapped and dropped reference + * Migrating hugepage or HWPoisoned hugepage is already + * unmapped and its refcount is dropped, so just clear pte here. */ - if (unlikely(is_hugetlb_entry_hwpoisoned(pte))) { + if (unlikely(!pte_present(pte))) { huge_pte_clear(mm, address, ptep); goto unlock; } From 4ecf886045152d2ddf98ae74e39f789868ac1f98 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 11 Feb 2015 15:25:35 -0800 Subject: [PATCH 536/601] sparc32: fix broken set_pte() 32-bit sparc uses swap instruction to implement set_pte(). It called using GCC inline assembler. But it misses the "memory" clobber to indicate that pte value will be updated in memory. As result GCC doesn't know that it cannot postpone pte pointer dereference which occurs before set_pte() to post-set_pte() time. It leads to real-world bugs -- [1]. In this situation we have code: ptent = ptep_modify_prot_start(mm, addr, pte); ptent = pte_modify(ptent, newprot); ... ptep_modify_prot_commit(mm, addr, pte, ptent); ptep_modify_prot_start() in sparc case is just 'pte' dereference plus pte_clear(). pte_clear() calls broken set_pte(). GCC thinks it's valid to dereference 'pte' again on pte_modify() and gets cleared pte. ptep_modify_prot_commit() puts 'pteent' with pfn==0 back to page table, which eventually leads to the crash. [1] http://lkml.kernel.org/r/54C06B19.8060305@roeck-us.net Signed-off-by: Kirill A. Shutemov Reported-by: Guenter Roeck Tested-by: Guenter Roeck Cc: Paul Moore Cc: Joonsoo Kim Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sparc/include/asm/pgtable_32.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h index b2f7dc46a7d142..9912eb0b499ae5 100644 --- a/arch/sparc/include/asm/pgtable_32.h +++ b/arch/sparc/include/asm/pgtable_32.h @@ -102,7 +102,8 @@ extern unsigned long empty_zero_page; */ static inline unsigned long srmmu_swap(unsigned long *addr, unsigned long value) { - __asm__ __volatile__("swap [%2], %0" : "=&r" (value) : "0" (value), "r" (addr)); + __asm__ __volatile__("swap [%2], %0" : + "=&r" (value) : "0" (value), "r" (addr) : "memory"); return value; } From 753791910e23a95aade78f69e49713acddf8bb8c Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 11 Feb 2015 15:25:38 -0800 Subject: [PATCH 537/601] mm: set page->pfmemalloc in prep_new_page() The possibility of replacing the numerous parameters of alloc_pages* functions with a single structure has been discussed when Minchan proposed to expand the x86 kernel stack [1]. This series implements the change, along with few more cleanups/microoptimizations. The series is based on next-20150108 and I used gcc 4.8.3 20140627 on openSUSE 13.2 for compiling. Config includess NUMA and COMPACTION. The core change is the introduction of a new struct alloc_context, which looks like this: struct alloc_context { struct zonelist *zonelist; nodemask_t *nodemask; struct zone *preferred_zone; int classzone_idx; int migratetype; enum zone_type high_zoneidx; }; All the contents is mostly constant, except that __alloc_pages_slowpath() changes preferred_zone, classzone_idx and potentially zonelist. But that's not a problem in case control returns to retry_cpuset: in __alloc_pages_nodemask(), those will be reset to initial values again (although it's a bit subtle). On the other hand, gfp_flags and alloc_info mutate so much that it doesn't make sense to put them into alloc_context. Still, the result is one parameter instead of up to 7. This is all in Patch 2. Patch 3 is a step to expand alloc_context usage out of page_alloc.c itself. The function try_to_compact_pages() can also much benefit from the parameter reduction, but it means the struct definition has to be moved to a shared header. Patch 1 should IMHO be included even if the rest is deemed not useful enough. It improves maintainability and also has some code/stack reduction. Patch 4 is OTOH a tiny optimization. Overall bloat-o-meter results: add/remove: 0/0 grow/shrink: 0/4 up/down: 0/-460 (-460) function old new delta nr_free_zone_pages 129 115 -14 __alloc_pages_direct_compact 329 256 -73 get_page_from_freelist 2670 2576 -94 __alloc_pages_nodemask 2564 2285 -279 try_to_compact_pages 582 579 -3 Overall stack sizes per ./scripts/checkstack.pl: old new delta get_page_from_freelist: 184 184 0 __alloc_pages_nodemask 248 200 -48 __alloc_pages_direct_c 40 - -40 try_to_compact_pages 72 72 0 -88 [1] http://marc.info/?l=linux-mm&m=140142462528257&w=2 This patch (of 4): prep_new_page() sets almost everything in the struct page of the page being allocated, except page->pfmemalloc. This is not obvious and has at least once led to a bug where page->pfmemalloc was forgotten to be set correctly, see commit 8fb74b9fb2b1 ("mm: compaction: partially revert capture of suitable high-order page"). This patch moves the pfmemalloc setting to prep_new_page(), which means it needs to gain alloc_flags parameter. The call to prep_new_page is moved from buffered_rmqueue() to get_page_from_freelist(), which also leads to simpler code. An obsolete comment for buffered_rmqueue() is replaced. In addition to better maintainability there is a small reduction of code and stack usage for get_page_from_freelist(), which inlines the other functions involved. add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-145 (-145) function old new delta get_page_from_freelist 2670 2525 -145 Stack usage is reduced from 184 to 168 bytes. Signed-off-by: Vlastimil Babka Acked-by: Michal Hocko Cc: Mel Gorman Cc: Zhang Yanfei Cc: Minchan Kim Cc: David Rientjes Cc: Rik van Riel Cc: "Aneesh Kumar K.V" Cc: "Kirill A. Shutemov" Cc: Johannes Weiner Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a88cb0cbf3526b..30a3250c0a2105 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -970,7 +970,8 @@ static inline int check_new_page(struct page *page) return 0; } -static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags) +static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags, + int alloc_flags) { int i; @@ -994,6 +995,14 @@ static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags) set_page_owner(page, order, gfp_flags); + /* + * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was necessary to + * allocate the page. The expectation is that the caller is taking + * steps that will free more memory. The caller should avoid the page + * being used for !PFMEMALLOC purposes. + */ + page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS); + return 0; } @@ -1642,9 +1651,7 @@ int split_free_page(struct page *page) } /* - * Really, prep_compound_page() should be called from __rmqueue_bulk(). But - * we cheat by calling it from here, in the order > 0 path. Saves a branch - * or two. + * Allocate a page from the given zone. Use pcplists for order-0 allocations. */ static inline struct page *buffered_rmqueue(struct zone *preferred_zone, @@ -1655,7 +1662,6 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, struct page *page; bool cold = ((gfp_flags & __GFP_COLD) != 0); -again: if (likely(order == 0)) { struct per_cpu_pages *pcp; struct list_head *list; @@ -1711,8 +1717,6 @@ struct page *buffered_rmqueue(struct zone *preferred_zone, local_irq_restore(flags); VM_BUG_ON_PAGE(bad_range(zone, page), page); - if (prep_new_page(page, order, gfp_flags)) - goto again; return page; failed: @@ -2177,25 +2181,16 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order, try_this_zone: page = buffered_rmqueue(preferred_zone, zone, order, gfp_mask, migratetype); - if (page) - break; + if (page) { + if (prep_new_page(page, order, gfp_mask, alloc_flags)) + goto try_this_zone; + return page; + } this_zone_full: if (IS_ENABLED(CONFIG_NUMA) && zlc_active) zlc_mark_zone_full(zonelist, z); } - if (page) { - /* - * page->pfmemalloc is set when ALLOC_NO_WATERMARKS was - * necessary to allocate the page. The expectation is - * that the caller is taking steps that will free more - * memory. The caller should avoid the page being used - * for !PFMEMALLOC purposes. - */ - page->pfmemalloc = !!(alloc_flags & ALLOC_NO_WATERMARKS); - return page; - } - /* * The first pass makes sure allocations are spread fairly within the * local node. However, the local node might have free pages left From a9263751e11a07af40a98dba88021821cd430cfd Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 11 Feb 2015 15:25:41 -0800 Subject: [PATCH 538/601] mm, page_alloc: reduce number of alloc_pages* functions' parameters Introduce struct alloc_context to accumulate the numerous parameters passed between the alloc_pages* family of functions and get_page_from_freelist(). This excludes gfp_flags and alloc_info, which mutate too much along the way, and allocation order, which is conceptually different. The result is shorter function signatures, as well as overal code size and stack usage reductions. bloat-o-meter: add/remove: 0/0 grow/shrink: 1/2 up/down: 127/-310 (-183) function old new delta get_page_from_freelist 2525 2652 +127 __alloc_pages_direct_compact 329 283 -46 __alloc_pages_nodemask 2564 2300 -264 checkstack.pl: function old new __alloc_pages_nodemask 248 200 get_page_from_freelist 168 184 __alloc_pages_direct_compact 40 24 Signed-off-by: Vlastimil Babka Acked-by: Michal Hocko Cc: Mel Gorman Cc: Zhang Yanfei Cc: Minchan Kim Cc: David Rientjes Cc: Rik van Riel Cc: "Aneesh Kumar K.V" Cc: "Kirill A. Shutemov" Cc: Johannes Weiner Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 229 +++++++++++++++++++++++------------------------- 1 file changed, 108 insertions(+), 121 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 30a3250c0a2105..4aead0bd8d4443 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -232,6 +232,27 @@ EXPORT_SYMBOL(nr_node_ids); EXPORT_SYMBOL(nr_online_nodes); #endif +/* + * Structure for holding the mostly immutable allocation parameters passed + * between alloc_pages* family of functions. + * + * nodemask, migratetype and high_zoneidx are initialized only once in + * __alloc_pages_nodemask() and then never change. + * + * zonelist, preferred_zone and classzone_idx are set first in + * __alloc_pages_nodemask() for the fast path, and might be later changed + * in __alloc_pages_slowpath(). All other functions pass the whole strucure + * by a const pointer. + */ +struct alloc_context { + struct zonelist *zonelist; + nodemask_t *nodemask; + struct zone *preferred_zone; + int classzone_idx; + int migratetype; + enum zone_type high_zoneidx; +}; + int page_group_by_mobility_disabled __read_mostly; void set_pageblock_migratetype(struct page *page, int migratetype) @@ -2037,10 +2058,10 @@ static void reset_alloc_batches(struct zone *preferred_zone) * a page. */ static struct page * -get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order, - struct zonelist *zonelist, int high_zoneidx, int alloc_flags, - struct zone *preferred_zone, int classzone_idx, int migratetype) +get_page_from_freelist(gfp_t gfp_mask, unsigned int order, int alloc_flags, + const struct alloc_context *ac) { + struct zonelist *zonelist = ac->zonelist; struct zoneref *z; struct page *page = NULL; struct zone *zone; @@ -2059,8 +2080,8 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order, * Scan zonelist, looking for a zone with enough free. * See also __cpuset_node_allowed() comment in kernel/cpuset.c. */ - for_each_zone_zonelist_nodemask(zone, z, zonelist, - high_zoneidx, nodemask) { + for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->high_zoneidx, + ac->nodemask) { unsigned long mark; if (IS_ENABLED(CONFIG_NUMA) && zlc_active && @@ -2077,7 +2098,7 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order, * time the page has in memory before being reclaimed. */ if (alloc_flags & ALLOC_FAIR) { - if (!zone_local(preferred_zone, zone)) + if (!zone_local(ac->preferred_zone, zone)) break; if (test_bit(ZONE_FAIR_DEPLETED, &zone->flags)) { nr_fair_skipped++; @@ -2115,7 +2136,7 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order, mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK]; if (!zone_watermark_ok(zone, order, mark, - classzone_idx, alloc_flags)) { + ac->classzone_idx, alloc_flags)) { int ret; /* Checked here to keep the fast path fast */ @@ -2136,7 +2157,7 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order, } if (zone_reclaim_mode == 0 || - !zone_allows_reclaim(preferred_zone, zone)) + !zone_allows_reclaim(ac->preferred_zone, zone)) goto this_zone_full; /* @@ -2158,7 +2179,7 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order, default: /* did we reclaim enough */ if (zone_watermark_ok(zone, order, mark, - classzone_idx, alloc_flags)) + ac->classzone_idx, alloc_flags)) goto try_this_zone; /* @@ -2179,8 +2200,8 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order, } try_this_zone: - page = buffered_rmqueue(preferred_zone, zone, order, - gfp_mask, migratetype); + page = buffered_rmqueue(ac->preferred_zone, zone, order, + gfp_mask, ac->migratetype); if (page) { if (prep_new_page(page, order, gfp_mask, alloc_flags)) goto try_this_zone; @@ -2203,7 +2224,7 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order, alloc_flags &= ~ALLOC_FAIR; if (nr_fair_skipped) { zonelist_rescan = true; - reset_alloc_batches(preferred_zone); + reset_alloc_batches(ac->preferred_zone); } if (nr_online_nodes > 1) zonelist_rescan = true; @@ -2325,9 +2346,7 @@ should_alloc_retry(gfp_t gfp_mask, unsigned int order, static inline struct page * __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, - struct zonelist *zonelist, enum zone_type high_zoneidx, - nodemask_t *nodemask, struct zone *preferred_zone, - int classzone_idx, int migratetype, unsigned long *did_some_progress) + const struct alloc_context *ac, unsigned long *did_some_progress) { struct page *page; @@ -2340,7 +2359,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, * Acquire the per-zone oom lock for each zone. If that * fails, somebody else is making progress for us. */ - if (!oom_zonelist_trylock(zonelist, gfp_mask)) { + if (!oom_zonelist_trylock(ac->zonelist, gfp_mask)) { *did_some_progress = 1; schedule_timeout_uninterruptible(1); return NULL; @@ -2359,10 +2378,8 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, * here, this is only to catch a parallel oom killing, we must fail if * we're still under heavy pressure. */ - page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, - order, zonelist, high_zoneidx, - ALLOC_WMARK_HIGH|ALLOC_CPUSET, - preferred_zone, classzone_idx, migratetype); + page = get_page_from_freelist(gfp_mask | __GFP_HARDWALL, order, + ALLOC_WMARK_HIGH|ALLOC_CPUSET, ac); if (page) goto out; @@ -2374,7 +2391,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, if (order > PAGE_ALLOC_COSTLY_ORDER) goto out; /* The OOM killer does not needlessly kill tasks for lowmem */ - if (high_zoneidx < ZONE_NORMAL) + if (ac->high_zoneidx < ZONE_NORMAL) goto out; /* The OOM killer does not compensate for light reclaim */ if (!(gfp_mask & __GFP_FS)) @@ -2390,10 +2407,10 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, goto out; } /* Exhausted what can be done so it's blamo time */ - out_of_memory(zonelist, gfp_mask, order, nodemask, false); + out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false); *did_some_progress = 1; out: - oom_zonelist_unlock(zonelist, gfp_mask); + oom_zonelist_unlock(ac->zonelist, gfp_mask); return page; } @@ -2401,10 +2418,9 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, /* Try memory compaction for high-order allocations before reclaim */ static struct page * __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, - struct zonelist *zonelist, enum zone_type high_zoneidx, - nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, - int classzone_idx, int migratetype, enum migrate_mode mode, - int *contended_compaction, bool *deferred_compaction) + int alloc_flags, const struct alloc_context *ac, + enum migrate_mode mode, int *contended_compaction, + bool *deferred_compaction) { unsigned long compact_result; struct page *page; @@ -2413,10 +2429,10 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, return NULL; current->flags |= PF_MEMALLOC; - compact_result = try_to_compact_pages(zonelist, order, gfp_mask, - nodemask, mode, + compact_result = try_to_compact_pages(ac->zonelist, order, gfp_mask, + ac->nodemask, mode, contended_compaction, - alloc_flags, classzone_idx); + alloc_flags, ac->classzone_idx); current->flags &= ~PF_MEMALLOC; switch (compact_result) { @@ -2435,10 +2451,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, */ count_vm_event(COMPACTSTALL); - page = get_page_from_freelist(gfp_mask, nodemask, - order, zonelist, high_zoneidx, - alloc_flags & ~ALLOC_NO_WATERMARKS, - preferred_zone, classzone_idx, migratetype); + page = get_page_from_freelist(gfp_mask, order, + alloc_flags & ~ALLOC_NO_WATERMARKS, ac); if (page) { struct zone *zone = page_zone(page); @@ -2462,10 +2476,9 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, #else static inline struct page * __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, - struct zonelist *zonelist, enum zone_type high_zoneidx, - nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, - int classzone_idx, int migratetype, enum migrate_mode mode, - int *contended_compaction, bool *deferred_compaction) + int alloc_flags, const struct alloc_context *ac, + enum migrate_mode mode, int *contended_compaction, + bool *deferred_compaction) { return NULL; } @@ -2473,8 +2486,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, /* Perform direct synchronous page reclaim */ static int -__perform_reclaim(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, - nodemask_t *nodemask) +__perform_reclaim(gfp_t gfp_mask, unsigned int order, + const struct alloc_context *ac) { struct reclaim_state reclaim_state; int progress; @@ -2488,7 +2501,8 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, reclaim_state.reclaimed_slab = 0; current->reclaim_state = &reclaim_state; - progress = try_to_free_pages(zonelist, order, gfp_mask, nodemask); + progress = try_to_free_pages(ac->zonelist, order, gfp_mask, + ac->nodemask); current->reclaim_state = NULL; lockdep_clear_current_reclaim_state(); @@ -2502,28 +2516,23 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, /* The really slow allocator path where we enter direct reclaim */ static inline struct page * __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, - struct zonelist *zonelist, enum zone_type high_zoneidx, - nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, - int classzone_idx, int migratetype, unsigned long *did_some_progress) + int alloc_flags, const struct alloc_context *ac, + unsigned long *did_some_progress) { struct page *page = NULL; bool drained = false; - *did_some_progress = __perform_reclaim(gfp_mask, order, zonelist, - nodemask); + *did_some_progress = __perform_reclaim(gfp_mask, order, ac); if (unlikely(!(*did_some_progress))) return NULL; /* After successful reclaim, reconsider all zones for allocation */ if (IS_ENABLED(CONFIG_NUMA)) - zlc_clear_zones_full(zonelist); + zlc_clear_zones_full(ac->zonelist); retry: - page = get_page_from_freelist(gfp_mask, nodemask, order, - zonelist, high_zoneidx, - alloc_flags & ~ALLOC_NO_WATERMARKS, - preferred_zone, classzone_idx, - migratetype); + page = get_page_from_freelist(gfp_mask, order, + alloc_flags & ~ALLOC_NO_WATERMARKS, ac); /* * If an allocation failed after direct reclaim, it could be because @@ -2544,36 +2553,30 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, */ static inline struct page * __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order, - struct zonelist *zonelist, enum zone_type high_zoneidx, - nodemask_t *nodemask, struct zone *preferred_zone, - int classzone_idx, int migratetype) + const struct alloc_context *ac) { struct page *page; do { - page = get_page_from_freelist(gfp_mask, nodemask, order, - zonelist, high_zoneidx, ALLOC_NO_WATERMARKS, - preferred_zone, classzone_idx, migratetype); + page = get_page_from_freelist(gfp_mask, order, + ALLOC_NO_WATERMARKS, ac); if (!page && gfp_mask & __GFP_NOFAIL) - wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50); + wait_iff_congested(ac->preferred_zone, BLK_RW_ASYNC, + HZ/50); } while (!page && (gfp_mask & __GFP_NOFAIL)); return page; } -static void wake_all_kswapds(unsigned int order, - struct zonelist *zonelist, - enum zone_type high_zoneidx, - struct zone *preferred_zone, - nodemask_t *nodemask) +static void wake_all_kswapds(unsigned int order, const struct alloc_context *ac) { struct zoneref *z; struct zone *zone; - for_each_zone_zonelist_nodemask(zone, z, zonelist, - high_zoneidx, nodemask) - wakeup_kswapd(zone, order, zone_idx(preferred_zone)); + for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, + ac->high_zoneidx, ac->nodemask) + wakeup_kswapd(zone, order, zone_idx(ac->preferred_zone)); } static inline int @@ -2632,9 +2635,7 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask) static inline struct page * __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, - struct zonelist *zonelist, enum zone_type high_zoneidx, - nodemask_t *nodemask, struct zone *preferred_zone, - int classzone_idx, int migratetype) + struct alloc_context *ac) { const gfp_t wait = gfp_mask & __GFP_WAIT; struct page *page = NULL; @@ -2670,8 +2671,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, retry: if (!(gfp_mask & __GFP_NO_KSWAPD)) - wake_all_kswapds(order, zonelist, high_zoneidx, - preferred_zone, nodemask); + wake_all_kswapds(order, ac); /* * OK, we're below the kswapd watermark and have kicked background @@ -2684,17 +2684,16 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, * Find the true preferred zone if the allocation is unconstrained by * cpusets. */ - if (!(alloc_flags & ALLOC_CPUSET) && !nodemask) { + if (!(alloc_flags & ALLOC_CPUSET) && !ac->nodemask) { struct zoneref *preferred_zoneref; - preferred_zoneref = first_zones_zonelist(zonelist, high_zoneidx, - NULL, &preferred_zone); - classzone_idx = zonelist_zone_idx(preferred_zoneref); + preferred_zoneref = first_zones_zonelist(ac->zonelist, + ac->high_zoneidx, NULL, &ac->preferred_zone); + ac->classzone_idx = zonelist_zone_idx(preferred_zoneref); } /* This is the last chance, in general, before the goto nopage. */ - page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, - high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, - preferred_zone, classzone_idx, migratetype); + page = get_page_from_freelist(gfp_mask, order, + alloc_flags & ~ALLOC_NO_WATERMARKS, ac); if (page) goto got_pg; @@ -2705,11 +2704,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, * the allocation is high priority and these type of * allocations are system rather than user orientated */ - zonelist = node_zonelist(numa_node_id(), gfp_mask); + ac->zonelist = node_zonelist(numa_node_id(), gfp_mask); + + page = __alloc_pages_high_priority(gfp_mask, order, ac); - page = __alloc_pages_high_priority(gfp_mask, order, - zonelist, high_zoneidx, nodemask, - preferred_zone, classzone_idx, migratetype); if (page) { goto got_pg; } @@ -2738,11 +2736,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, * Try direct compaction. The first pass is asynchronous. Subsequent * attempts after direct reclaim are synchronous */ - page = __alloc_pages_direct_compact(gfp_mask, order, zonelist, - high_zoneidx, nodemask, alloc_flags, - preferred_zone, - classzone_idx, migratetype, - migration_mode, &contended_compaction, + page = __alloc_pages_direct_compact(gfp_mask, order, alloc_flags, ac, + migration_mode, + &contended_compaction, &deferred_compaction); if (page) goto got_pg; @@ -2788,12 +2784,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, migration_mode = MIGRATE_SYNC_LIGHT; /* Try direct reclaim and then allocating */ - page = __alloc_pages_direct_reclaim(gfp_mask, order, - zonelist, high_zoneidx, - nodemask, - alloc_flags, preferred_zone, - classzone_idx, migratetype, - &did_some_progress); + page = __alloc_pages_direct_reclaim(gfp_mask, order, alloc_flags, ac, + &did_some_progress); if (page) goto got_pg; @@ -2807,17 +2799,15 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, * start OOM killing tasks. */ if (!did_some_progress) { - page = __alloc_pages_may_oom(gfp_mask, order, zonelist, - high_zoneidx, nodemask, - preferred_zone, classzone_idx, - migratetype,&did_some_progress); + page = __alloc_pages_may_oom(gfp_mask, order, ac, + &did_some_progress); if (page) goto got_pg; if (!did_some_progress) goto nopage; } /* Wait for some write requests to complete then retry */ - wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50); + wait_iff_congested(ac->preferred_zone, BLK_RW_ASYNC, HZ/50); goto retry; } else { /* @@ -2825,11 +2815,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, * direct reclaim and reclaim/compaction depends on compaction * being called after reclaim so call directly if necessary */ - page = __alloc_pages_direct_compact(gfp_mask, order, zonelist, - high_zoneidx, nodemask, alloc_flags, - preferred_zone, - classzone_idx, migratetype, - migration_mode, &contended_compaction, + page = __alloc_pages_direct_compact(gfp_mask, order, + alloc_flags, ac, migration_mode, + &contended_compaction, &deferred_compaction); if (page) goto got_pg; @@ -2848,15 +2836,16 @@ struct page * __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, nodemask_t *nodemask) { - enum zone_type high_zoneidx = gfp_zone(gfp_mask); - struct zone *preferred_zone; struct zoneref *preferred_zoneref; struct page *page = NULL; - int migratetype = gfpflags_to_migratetype(gfp_mask); unsigned int cpuset_mems_cookie; int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR; - int classzone_idx; gfp_t alloc_mask; /* The gfp_t that was actually used for allocation */ + struct alloc_context ac = { + .high_zoneidx = gfp_zone(gfp_mask), + .nodemask = nodemask, + .migratetype = gfpflags_to_migratetype(gfp_mask), + }; gfp_mask &= gfp_allowed_mask; @@ -2875,25 +2864,25 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, if (unlikely(!zonelist->_zonerefs->zone)) return NULL; - if (IS_ENABLED(CONFIG_CMA) && migratetype == MIGRATE_MOVABLE) + if (IS_ENABLED(CONFIG_CMA) && ac.migratetype == MIGRATE_MOVABLE) alloc_flags |= ALLOC_CMA; retry_cpuset: cpuset_mems_cookie = read_mems_allowed_begin(); + /* We set it here, as __alloc_pages_slowpath might have changed it */ + ac.zonelist = zonelist; /* The preferred zone is used for statistics later */ - preferred_zoneref = first_zones_zonelist(zonelist, high_zoneidx, - nodemask ? : &cpuset_current_mems_allowed, - &preferred_zone); - if (!preferred_zone) + preferred_zoneref = first_zones_zonelist(ac.zonelist, ac.high_zoneidx, + ac.nodemask ? : &cpuset_current_mems_allowed, + &ac.preferred_zone); + if (!ac.preferred_zone) goto out; - classzone_idx = zonelist_zone_idx(preferred_zoneref); + ac.classzone_idx = zonelist_zone_idx(preferred_zoneref); /* First allocation attempt */ alloc_mask = gfp_mask|__GFP_HARDWALL; - page = get_page_from_freelist(alloc_mask, nodemask, order, zonelist, - high_zoneidx, alloc_flags, preferred_zone, - classzone_idx, migratetype); + page = get_page_from_freelist(alloc_mask, order, alloc_flags, &ac); if (unlikely(!page)) { /* * Runtime PM, block IO and its error handling path @@ -2902,15 +2891,13 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, */ alloc_mask = memalloc_noio_flags(gfp_mask); - page = __alloc_pages_slowpath(alloc_mask, order, - zonelist, high_zoneidx, nodemask, - preferred_zone, classzone_idx, migratetype); + page = __alloc_pages_slowpath(alloc_mask, order, &ac); } if (kmemcheck_enabled && page) kmemcheck_pagealloc_alloc(page, order, gfp_mask); - trace_mm_page_alloc(page, order, alloc_mask, migratetype); + trace_mm_page_alloc(page, order, alloc_mask, ac.migratetype); out: /* From 1a6d53a105406d97396c87511afd6f09b4dc8ad2 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 11 Feb 2015 15:25:44 -0800 Subject: [PATCH 539/601] mm: reduce try_to_compact_pages parameters Expand the usage of the struct alloc_context introduced in the previous patch also for calling try_to_compact_pages(), to reduce the number of its parameters. Since the function is in different compilation unit, we need to move alloc_context definition in the shared mm/internal.h header. With this change we get simpler code and small savings of code size and stack usage: add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-27 (-27) function old new delta __alloc_pages_direct_compact 283 256 -27 add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-13 (-13) function old new delta try_to_compact_pages 582 569 -13 Stack usage of __alloc_pages_direct_compact goes from 24 to none (per scripts/checkstack.pl). Signed-off-by: Vlastimil Babka Acked-by: Michal Hocko Cc: Mel Gorman Cc: Zhang Yanfei Cc: Minchan Kim Cc: David Rientjes Cc: Rik van Riel Cc: "Aneesh Kumar K.V" Cc: "Kirill A. Shutemov" Cc: Johannes Weiner Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 17 +++++++++-------- mm/compaction.c | 23 +++++++++++------------ mm/internal.h | 22 ++++++++++++++++++++++ mm/page_alloc.c | 27 ++------------------------- 4 files changed, 44 insertions(+), 45 deletions(-) diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 3238ffa33f68bb..f2efda2e6ac6dd 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -21,6 +21,8 @@ /* Zone lock or lru_lock was contended in async compaction */ #define COMPACT_CONTENDED_LOCK 2 +struct alloc_context; /* in mm/internal.h */ + #ifdef CONFIG_COMPACTION extern int sysctl_compact_memory; extern int sysctl_compaction_handler(struct ctl_table *table, int write, @@ -30,10 +32,9 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos); extern int fragmentation_index(struct zone *zone, unsigned int order); -extern unsigned long try_to_compact_pages(struct zonelist *zonelist, - int order, gfp_t gfp_mask, nodemask_t *mask, - enum migrate_mode mode, int *contended, - int alloc_flags, int classzone_idx); +extern unsigned long try_to_compact_pages(gfp_t gfp_mask, unsigned int order, + int alloc_flags, const struct alloc_context *ac, + enum migrate_mode mode, int *contended); extern void compact_pgdat(pg_data_t *pgdat, int order); extern void reset_isolation_suitable(pg_data_t *pgdat); extern unsigned long compaction_suitable(struct zone *zone, int order, @@ -101,10 +102,10 @@ static inline bool compaction_restarting(struct zone *zone, int order) } #else -static inline unsigned long try_to_compact_pages(struct zonelist *zonelist, - int order, gfp_t gfp_mask, nodemask_t *nodemask, - enum migrate_mode mode, int *contended, - int alloc_flags, int classzone_idx) +static inline unsigned long try_to_compact_pages(gfp_t gfp_mask, + unsigned int order, int alloc_flags, + const struct alloc_context *ac, + enum migrate_mode mode, int *contended) { return COMPACT_CONTINUE; } diff --git a/mm/compaction.c b/mm/compaction.c index 546e571e9d60d1..9c7e6909dd2908 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1335,22 +1335,20 @@ int sysctl_extfrag_threshold = 500; /** * try_to_compact_pages - Direct compact to satisfy a high-order allocation - * @zonelist: The zonelist used for the current allocation - * @order: The order of the current allocation * @gfp_mask: The GFP mask of the current allocation - * @nodemask: The allowed nodes to allocate from + * @order: The order of the current allocation + * @alloc_flags: The allocation flags of the current allocation + * @ac: The context of current allocation * @mode: The migration mode for async, sync light, or sync migration * @contended: Return value that determines if compaction was aborted due to * need_resched() or lock contention * * This is the main entry point for direct page compaction. */ -unsigned long try_to_compact_pages(struct zonelist *zonelist, - int order, gfp_t gfp_mask, nodemask_t *nodemask, - enum migrate_mode mode, int *contended, - int alloc_flags, int classzone_idx) +unsigned long try_to_compact_pages(gfp_t gfp_mask, unsigned int order, + int alloc_flags, const struct alloc_context *ac, + enum migrate_mode mode, int *contended) { - enum zone_type high_zoneidx = gfp_zone(gfp_mask); int may_enter_fs = gfp_mask & __GFP_FS; int may_perform_io = gfp_mask & __GFP_IO; struct zoneref *z; @@ -1365,8 +1363,8 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, return COMPACT_SKIPPED; /* Compact each zone in the list */ - for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx, - nodemask) { + for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx, + ac->nodemask) { int status; int zone_contended; @@ -1374,7 +1372,8 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, continue; status = compact_zone_order(zone, order, gfp_mask, mode, - &zone_contended, alloc_flags, classzone_idx); + &zone_contended, alloc_flags, + ac->classzone_idx); rc = max(status, rc); /* * It takes at least one zone that wasn't lock contended @@ -1384,7 +1383,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, /* If a normal allocation would succeed, stop compacting */ if (zone_watermark_ok(zone, order, low_wmark_pages(zone), - classzone_idx, alloc_flags)) { + ac->classzone_idx, alloc_flags)) { /* * We think the allocation will succeed in this zone, * but it is not certain, hence the false. The caller diff --git a/mm/internal.h b/mm/internal.h index efad241f701480..c4d6c9b43491d5 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -109,6 +109,28 @@ extern pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address); * in mm/page_alloc.c */ +/* + * Structure for holding the mostly immutable allocation parameters passed + * between functions involved in allocations, including the alloc_pages* + * family of functions. + * + * nodemask, migratetype and high_zoneidx are initialized only once in + * __alloc_pages_nodemask() and then never change. + * + * zonelist, preferred_zone and classzone_idx are set first in + * __alloc_pages_nodemask() for the fast path, and might be later changed + * in __alloc_pages_slowpath(). All other functions pass the whole strucure + * by a const pointer. + */ +struct alloc_context { + struct zonelist *zonelist; + nodemask_t *nodemask; + struct zone *preferred_zone; + int classzone_idx; + int migratetype; + enum zone_type high_zoneidx; +}; + /* * Locate the struct page for both the matching buddy in our * pair (buddy1) and the combined O(n+1) page they form (page). diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4aead0bd8d4443..d664eb922a7df8 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -232,27 +232,6 @@ EXPORT_SYMBOL(nr_node_ids); EXPORT_SYMBOL(nr_online_nodes); #endif -/* - * Structure for holding the mostly immutable allocation parameters passed - * between alloc_pages* family of functions. - * - * nodemask, migratetype and high_zoneidx are initialized only once in - * __alloc_pages_nodemask() and then never change. - * - * zonelist, preferred_zone and classzone_idx are set first in - * __alloc_pages_nodemask() for the fast path, and might be later changed - * in __alloc_pages_slowpath(). All other functions pass the whole strucure - * by a const pointer. - */ -struct alloc_context { - struct zonelist *zonelist; - nodemask_t *nodemask; - struct zone *preferred_zone; - int classzone_idx; - int migratetype; - enum zone_type high_zoneidx; -}; - int page_group_by_mobility_disabled __read_mostly; void set_pageblock_migratetype(struct page *page, int migratetype) @@ -2429,10 +2408,8 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, return NULL; current->flags |= PF_MEMALLOC; - compact_result = try_to_compact_pages(ac->zonelist, order, gfp_mask, - ac->nodemask, mode, - contended_compaction, - alloc_flags, ac->classzone_idx); + compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac, + mode, contended_compaction); current->flags &= ~PF_MEMALLOC; switch (compact_result) { From 05891fb06517d19ae5357c9dc44e96bbe0300a3c Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 11 Feb 2015 15:25:47 -0800 Subject: [PATCH 540/601] mm: microoptimize zonelist operations next_zones_zonelist() returns a zoneref pointer, as well as a zone pointer via extra parameter. Since the latter can be trivially obtained by dereferencing the former, the overhead of the extra parameter is unjustified. This patch thus removes the zone parameter from next_zones_zonelist(). Both callers happen to be in the same header file, so it's simple to add the zoneref dereference inline. We save some bytes of code size. add/remove: 0/0 grow/shrink: 0/3 up/down: 0/-105 (-105) function old new delta nr_free_zone_pages 129 115 -14 __alloc_pages_nodemask 2300 2285 -15 get_page_from_freelist 2652 2576 -76 add/remove: 0/0 grow/shrink: 1/0 up/down: 10/0 (10) function old new delta try_to_compact_pages 569 579 +10 Signed-off-by: Vlastimil Babka Cc: Mel Gorman Cc: Zhang Yanfei Cc: Minchan Kim Cc: David Rientjes Cc: Rik van Riel Cc: "Aneesh Kumar K.V" Cc: "Kirill A. Shutemov" Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 13 +++++++------ mm/mmzone.c | 4 +--- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index b4182970133479..f279d9c158cd56 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -970,7 +970,6 @@ static inline int zonelist_node_idx(struct zoneref *zoneref) * @z - The cursor used as a starting point for the search * @highest_zoneidx - The zone index of the highest zone to return * @nodes - An optional nodemask to filter the zonelist with - * @zone - The first suitable zone found is returned via this parameter * * This function returns the next zone at or below a given zone index that is * within the allowed nodemask using a cursor as the starting point for the @@ -980,8 +979,7 @@ static inline int zonelist_node_idx(struct zoneref *zoneref) */ struct zoneref *next_zones_zonelist(struct zoneref *z, enum zone_type highest_zoneidx, - nodemask_t *nodes, - struct zone **zone); + nodemask_t *nodes); /** * first_zones_zonelist - Returns the first zone at or below highest_zoneidx within the allowed nodemask in a zonelist @@ -1000,8 +998,10 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, nodemask_t *nodes, struct zone **zone) { - return next_zones_zonelist(zonelist->_zonerefs, highest_zoneidx, nodes, - zone); + struct zoneref *z = next_zones_zonelist(zonelist->_zonerefs, + highest_zoneidx, nodes); + *zone = zonelist_zone(z); + return z; } /** @@ -1018,7 +1018,8 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, #define for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \ for (z = first_zones_zonelist(zlist, highidx, nodemask, &zone); \ zone; \ - z = next_zones_zonelist(++z, highidx, nodemask, &zone)) \ + z = next_zones_zonelist(++z, highidx, nodemask), \ + zone = zonelist_zone(z)) \ /** * for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index diff --git a/mm/mmzone.c b/mm/mmzone.c index bf34fb8556db0a..7d87ebb0d63249 100644 --- a/mm/mmzone.c +++ b/mm/mmzone.c @@ -54,8 +54,7 @@ static inline int zref_in_nodemask(struct zoneref *zref, nodemask_t *nodes) /* Returns the next zone at or below highest_zoneidx in a zonelist */ struct zoneref *next_zones_zonelist(struct zoneref *z, enum zone_type highest_zoneidx, - nodemask_t *nodes, - struct zone **zone) + nodemask_t *nodes) { /* * Find the next suitable zone to use for the allocation. @@ -69,7 +68,6 @@ struct zoneref *next_zones_zonelist(struct zoneref *z, (z->zone && !zref_in_nodemask(z, nodes))) z++; - *zone = zonelist_zone(z); return z; } From 6e9f0d582dde095d971a3c6ce4685a218a0eac8e Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 11 Feb 2015 15:25:50 -0800 Subject: [PATCH 541/601] mm/page_alloc.c: drop dead destroy_compound_page() The only caller is __free_one_page(). By the time we should have page->flags to be cleared already: - for 0-order pages though PCP list: free_hot_cold_page() free_pages_prepare() free_pages_check() page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; free_pcppages_bulk() page = __free_one_page(page) - for non-0-order pages: __free_pages_ok() free_pages_prepare() free_pages_check() page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; free_one_page() __free_one_page() So there's no way PageCompound() will return true in __free_one_page(). Let's remove dead destroy_compound_page() and put assert for page->flags there instead. Signed-off-by: Kirill A. Shutemov Cc: Vlastimil Babka Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 35 +---------------------------------- 1 file changed, 1 insertion(+), 34 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d664eb922a7df8..12d55b859d3faf 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -381,36 +381,6 @@ void prep_compound_page(struct page *page, unsigned long order) } } -/* update __split_huge_page_refcount if you change this function */ -static int destroy_compound_page(struct page *page, unsigned long order) -{ - int i; - int nr_pages = 1 << order; - int bad = 0; - - if (unlikely(compound_order(page) != order)) { - bad_page(page, "wrong compound order", 0); - bad++; - } - - __ClearPageHead(page); - - for (i = 1; i < nr_pages; i++) { - struct page *p = page + i; - - if (unlikely(!PageTail(p))) { - bad_page(page, "PageTail not set", 0); - bad++; - } else if (unlikely(p->first_page != page)) { - bad_page(page, "first_page not consistent", 0); - bad++; - } - __ClearPageTail(p); - } - - return bad; -} - static inline void prep_zero_page(struct page *page, unsigned int order, gfp_t gfp_flags) { @@ -613,10 +583,7 @@ static inline void __free_one_page(struct page *page, int max_order = MAX_ORDER; VM_BUG_ON(!zone_is_initialized(zone)); - - if (unlikely(PageCompound(page))) - if (unlikely(destroy_compound_page(page, order))) - return; + VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page); VM_BUG_ON(migratetype == -1); if (is_migrate_isolate(migratetype)) { From 81422f29c5f4fb968023f465218c3d978c133ceb Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 11 Feb 2015 15:25:52 -0800 Subject: [PATCH 542/601] mm: more checks on free_pages_prepare() for tail pages Although it was not called, destroy_compound_page() did some potentially useful checks. Let's re-introduce them in free_pages_prepare(), where they can be actually triggered when CONFIG_DEBUG_VM=y. compound_order() assert is already in free_pages_prepare(). We have few checks for tail pages left. Signed-off-by: Kirill A. Shutemov Cc: Vlastimil Babka Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 12d55b859d3faf..0081228d1caafe 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -764,21 +764,40 @@ static void free_one_page(struct zone *zone, spin_unlock(&zone->lock); } +static int free_tail_pages_check(struct page *head_page, struct page *page) +{ + if (!IS_ENABLED(CONFIG_DEBUG_VM)) + return 0; + if (unlikely(!PageTail(page))) { + bad_page(page, "PageTail not set", 0); + return 1; + } + if (unlikely(page->first_page != head_page)) { + bad_page(page, "first_page not consistent", 0); + return 1; + } + return 0; +} + static bool free_pages_prepare(struct page *page, unsigned int order) { - int i; - int bad = 0; + bool compound = PageCompound(page); + int i, bad = 0; VM_BUG_ON_PAGE(PageTail(page), page); - VM_BUG_ON_PAGE(PageHead(page) && compound_order(page) != order, page); + VM_BUG_ON_PAGE(compound && compound_order(page) != order, page); trace_mm_page_free(page, order); kmemcheck_free_shadow(page, order); if (PageAnon(page)) page->mapping = NULL; - for (i = 0; i < (1 << order); i++) + bad += free_pages_check(page); + for (i = 1; i < (1 << order); i++) { + if (compound) + bad += free_tail_pages_check(page, page + i); bad += free_pages_check(page + i); + } if (bad) return false; From 90cbc2508827e1e15dca23361c33cc26dd2b9e99 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Wed, 11 Feb 2015 15:25:55 -0800 Subject: [PATCH 543/601] vmscan: force scan offline memory cgroups Since commit b2052564e66d ("mm: memcontrol: continue cache reclaim from offlined groups") pages charged to a memory cgroup are not reparented when the cgroup is removed. Instead, they are supposed to be reclaimed in a regular way, along with pages accounted to online memory cgroups. However, an lruvec of an offline memory cgroup will sooner or later get so small that it will be scanned only at low scan priorities (see get_scan_count()). Therefore, if there are enough reclaimable pages in big lruvecs, pages accounted to offline memory cgroups will never be scanned at all, wasting memory. Fix this by unconditionally forcing scanning dead lruvecs from kswapd. [akpm@linux-foundation.org: fix build] Signed-off-by: Vladimir Davydov Acked-by: Michal Hocko Acked-by: Johannes Weiner Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 6 ++++++ mm/memcontrol.c | 14 ++++++++++++++ mm/vmscan.c | 8 ++++++-- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 76b4084b8d082d..353537a5981a67 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -102,6 +102,7 @@ void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); * For memory reclaim. */ int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec); +bool mem_cgroup_lruvec_online(struct lruvec *lruvec); int mem_cgroup_select_victim_node(struct mem_cgroup *memcg); unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list); void mem_cgroup_update_lru_size(struct lruvec *, enum lru_list, int); @@ -266,6 +267,11 @@ mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec) return 1; } +static inline bool mem_cgroup_lruvec_online(struct lruvec *lruvec) +{ + return true; +} + static inline unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 028d07c79104e8..6187ca4d5dc214 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1367,6 +1367,20 @@ int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec) return inactive * inactive_ratio < active; } +bool mem_cgroup_lruvec_online(struct lruvec *lruvec) +{ + struct mem_cgroup_per_zone *mz; + struct mem_cgroup *memcg; + + if (mem_cgroup_disabled()) + return true; + + mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec); + memcg = mz->memcg; + + return !!(memcg->css.flags & CSS_ONLINE); +} + #define mem_cgroup_from_counter(counter, member) \ container_of(counter, struct mem_cgroup, member) diff --git a/mm/vmscan.c b/mm/vmscan.c index f756a202d5d5a4..b6dfa0081a8ecb 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1903,8 +1903,12 @@ static void get_scan_count(struct lruvec *lruvec, int swappiness, * latencies, so it's better to scan a minimum amount there as * well. */ - if (current_is_kswapd() && !zone_reclaimable(zone)) - force_scan = true; + if (current_is_kswapd()) { + if (!zone_reclaimable(zone)) + force_scan = true; + if (!mem_cgroup_lruvec_online(lruvec)) + force_scan = true; + } if (!global_reclaim(sc)) force_scan = true; From 0ca44b148ef2596882742dc03ef1c3adcd40f03b Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Wed, 11 Feb 2015 15:25:58 -0800 Subject: [PATCH 544/601] memcg: add BUILD_BUG_ON() for string tables Use BUILD_BUG_ON() to compile assert that memcg string tables are in sync with corresponding enums. There aren't currently any issues with these tables. This is just defensive. Signed-off-by: Greg Thelen Acked-by: Johannes Weiner Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6187ca4d5dc214..dc5c4cd0afdf60 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3568,6 +3568,10 @@ static int memcg_stat_show(struct seq_file *m, void *v) struct mem_cgroup *mi; unsigned int i; + BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_stat_names) != + MEM_CGROUP_STAT_NSTATS); + BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_events_names) != + MEM_CGROUP_EVENTS_NSTATS); BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS); for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { From 8d29e18a459dfc2adeafc1acb9c4185ee6713116 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 11 Feb 2015 15:26:01 -0800 Subject: [PATCH 545/601] mm: use correct format specifiers when printing address ranges Especially on 32 bit kernels memory node ranges are printed with 32 bit wide addresses only. Use u64 types and %llx specifiers to print full width of addresses. Signed-off-by: Juergen Gross Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 0081228d1caafe..641d5a9a861790 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4994,8 +4994,8 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size, pgdat->node_start_pfn = node_start_pfn; #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); - printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n", nid, - (u64) start_pfn << PAGE_SHIFT, (u64) (end_pfn << PAGE_SHIFT) - 1); + pr_info("Initmem setup node %d [mem %#018Lx-%#018Lx]\n", nid, + (u64)start_pfn << PAGE_SHIFT, ((u64)end_pfn << PAGE_SHIFT) - 1); #endif calculate_node_totalpages(pgdat, start_pfn, end_pfn, zones_size, zholes_size); @@ -5367,9 +5367,10 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) arch_zone_highest_possible_pfn[i]) pr_cont("empty\n"); else - pr_cont("[mem %0#10lx-%0#10lx]\n", - arch_zone_lowest_possible_pfn[i] << PAGE_SHIFT, - (arch_zone_highest_possible_pfn[i] + pr_cont("[mem %#018Lx-%#018Lx]\n", + (u64)arch_zone_lowest_possible_pfn[i] + << PAGE_SHIFT, + ((u64)arch_zone_highest_possible_pfn[i] << PAGE_SHIFT) - 1); } @@ -5377,15 +5378,16 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) pr_info("Movable zone start for each node\n"); for (i = 0; i < MAX_NUMNODES; i++) { if (zone_movable_pfn[i]) - pr_info(" Node %d: %#010lx\n", i, - zone_movable_pfn[i] << PAGE_SHIFT); + pr_info(" Node %d: %#018Lx\n", i, + (u64)zone_movable_pfn[i] << PAGE_SHIFT); } /* Print out the early node map */ pr_info("Early memory node ranges\n"); for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) - pr_info(" node %3d: [mem %#010lx-%#010lx]\n", nid, - start_pfn << PAGE_SHIFT, (end_pfn << PAGE_SHIFT) - 1); + pr_info(" node %3d: [mem %#018Lx-%#018Lx]\n", nid, + (u64)start_pfn << PAGE_SHIFT, + ((u64)end_pfn << PAGE_SHIFT) - 1); /* Initialise every node */ mminit_verify_pageflags_layout(); From 650c5e565492f9092552bfe4d65935196c7d9567 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 11 Feb 2015 15:26:03 -0800 Subject: [PATCH 546/601] mm: page_counter: pull "-1" handling out of page_counter_memparse() The unified hierarchy interface for memory cgroups will no longer use "-1" to mean maximum possible resource value. In preparation for this, make the string an argument and let the caller supply it. Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Cc: Vladimir Davydov Cc: Greg Thelen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_counter.h | 3 ++- mm/hugetlb_cgroup.c | 2 +- mm/memcontrol.c | 4 ++-- mm/page_counter.c | 7 ++++--- net/ipv4/tcp_memcontrol.c | 2 +- 5 files changed, 10 insertions(+), 8 deletions(-) diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h index 955421575d16aa..17fa4f8de3a679 100644 --- a/include/linux/page_counter.h +++ b/include/linux/page_counter.h @@ -41,7 +41,8 @@ int page_counter_try_charge(struct page_counter *counter, struct page_counter **fail); void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages); int page_counter_limit(struct page_counter *counter, unsigned long limit); -int page_counter_memparse(const char *buf, unsigned long *nr_pages); +int page_counter_memparse(const char *buf, const char *max, + unsigned long *nr_pages); static inline void page_counter_reset_watermark(struct page_counter *counter) { diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index 037e1c00a5b7a8..6e0057439a469b 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -279,7 +279,7 @@ static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of, return -EINVAL; buf = strstrip(buf); - ret = page_counter_memparse(buf, &nr_pages); + ret = page_counter_memparse(buf, "-1", &nr_pages); if (ret) return ret; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index dc5c4cd0afdf60..6453ea5a27aace 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3414,7 +3414,7 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of, int ret; buf = strstrip(buf); - ret = page_counter_memparse(buf, &nr_pages); + ret = page_counter_memparse(buf, "-1", &nr_pages); if (ret) return ret; @@ -3786,7 +3786,7 @@ static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg, unsigned long usage; int i, size, ret; - ret = page_counter_memparse(args, &threshold); + ret = page_counter_memparse(args, "-1", &threshold); if (ret) return ret; diff --git a/mm/page_counter.c b/mm/page_counter.c index a009574fbba997..11b4beda14ba99 100644 --- a/mm/page_counter.c +++ b/mm/page_counter.c @@ -166,18 +166,19 @@ int page_counter_limit(struct page_counter *counter, unsigned long limit) /** * page_counter_memparse - memparse() for page counter limits * @buf: string to parse + * @max: string meaning maximum possible value * @nr_pages: returns the result in number of pages * * Returns -EINVAL, or 0 and @nr_pages on success. @nr_pages will be * limited to %PAGE_COUNTER_MAX. */ -int page_counter_memparse(const char *buf, unsigned long *nr_pages) +int page_counter_memparse(const char *buf, const char *max, + unsigned long *nr_pages) { - char unlimited[] = "-1"; char *end; u64 bytes; - if (!strncmp(buf, unlimited, sizeof(unlimited))) { + if (!strcmp(buf, max)) { *nr_pages = PAGE_COUNTER_MAX; return 0; } diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 272327134a1b45..c2a75c6957a183 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -120,7 +120,7 @@ static ssize_t tcp_cgroup_write(struct kernfs_open_file *of, switch (of_cft(of)->private) { case RES_LIMIT: /* see memcontrol.c */ - ret = page_counter_memparse(buf, &nr_pages); + ret = page_counter_memparse(buf, "-1", &nr_pages); if (ret) break; mutex_lock(&tcp_limit_mutex); From 241994ed8649f7300667be8b13a9e04ae04e05a1 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 11 Feb 2015 15:26:06 -0800 Subject: [PATCH 547/601] mm: memcontrol: default hierarchy interface for memory Introduce the basic control files to account, partition, and limit memory using cgroups in default hierarchy mode. This interface versioning allows us to address fundamental design issues in the existing memory cgroup interface, further explained below. The old interface will be maintained indefinitely, but a clearer model and improved workload performance should encourage existing users to switch over to the new one eventually. The control files are thus: - memory.current shows the current consumption of the cgroup and its descendants, in bytes. - memory.low configures the lower end of the cgroup's expected memory consumption range. The kernel considers memory below that boundary to be a reserve - the minimum that the workload needs in order to make forward progress - and generally avoids reclaiming it, unless there is an imminent risk of entering an OOM situation. - memory.high configures the upper end of the cgroup's expected memory consumption range. A cgroup whose consumption grows beyond this threshold is forced into direct reclaim, to work off the excess and to throttle new allocations heavily, but is generally allowed to continue and the OOM killer is not invoked. - memory.max configures the hard maximum amount of memory that the cgroup is allowed to consume before the OOM killer is invoked. - memory.events shows event counters that indicate how often the cgroup was reclaimed while below memory.low, how often it was forced to reclaim excess beyond memory.high, how often it hit memory.max, and how often it entered OOM due to memory.max. This allows users to identify configuration problems when observing a degradation in workload performance. An overcommitted system will have an increased rate of low boundary breaches, whereas increased rates of high limit breaches, maximum hits, or even OOM situations will indicate internally overcommitted cgroups. For existing users of memory cgroups, the following deviations from the current interface are worth pointing out and explaining: - The original lower boundary, the soft limit, is defined as a limit that is per default unset. As a result, the set of cgroups that global reclaim prefers is opt-in, rather than opt-out. The costs for optimizing these mostly negative lookups are so high that the implementation, despite its enormous size, does not even provide the basic desirable behavior. First off, the soft limit has no hierarchical meaning. All configured groups are organized in a global rbtree and treated like equal peers, regardless where they are located in the hierarchy. This makes subtree delegation impossible. Second, the soft limit reclaim pass is so aggressive that it not just introduces high allocation latencies into the system, but also impacts system performance due to overreclaim, to the point where the feature becomes self-defeating. The memory.low boundary on the other hand is a top-down allocated reserve. A cgroup enjoys reclaim protection when it and all its ancestors are below their low boundaries, which makes delegation of subtrees possible. Secondly, new cgroups have no reserve per default and in the common case most cgroups are eligible for the preferred reclaim pass. This allows the new low boundary to be efficiently implemented with just a minor addition to the generic reclaim code, without the need for out-of-band data structures and reclaim passes. Because the generic reclaim code considers all cgroups except for the ones running low in the preferred first reclaim pass, overreclaim of individual groups is eliminated as well, resulting in much better overall workload performance. - The original high boundary, the hard limit, is defined as a strict limit that can not budge, even if the OOM killer has to be called. But this generally goes against the goal of making the most out of the available memory. The memory consumption of workloads varies during runtime, and that requires users to overcommit. But doing that with a strict upper limit requires either a fairly accurate prediction of the working set size or adding slack to the limit. Since working set size estimation is hard and error prone, and getting it wrong results in OOM kills, most users tend to err on the side of a looser limit and end up wasting precious resources. The memory.high boundary on the other hand can be set much more conservatively. When hit, it throttles allocations by forcing them into direct reclaim to work off the excess, but it never invokes the OOM killer. As a result, a high boundary that is chosen too aggressively will not terminate the processes, but instead it will lead to gradual performance degradation. The user can monitor this and make corrections until the minimal memory footprint that still gives acceptable performance is found. In extreme cases, with many concurrent allocations and a complete breakdown of reclaim progress within the group, the high boundary can be exceeded. But even then it's mostly better to satisfy the allocation from the slack available in other groups or the rest of the system than killing the group. Otherwise, memory.max is there to limit this type of spillover and ultimately contain buggy or even malicious applications. - The original control file names are unwieldy and inconsistent in many different ways. For example, the upper boundary hit count is exported in the memory.failcnt file, but an OOM event count has to be manually counted by listening to memory.oom_control events, and lower boundary / soft limit events have to be counted by first setting a threshold for that value and then counting those events. Also, usage and limit files encode their units in the filename. That makes the filenames very long, even though this is not information that a user needs to be reminded of every time they type out those names. To address these naming issues, as well as to signal clearly that the new interface carries a new configuration model, the naming conventions in it necessarily differ from the old interface. - The original limit files indicate the state of an unset limit with a very high number, and a configured limit can be unset by echoing -1 into those files. But that very high number is implementation and architecture dependent and not very descriptive. And while -1 can be understood as an underflow into the highest possible value, -2 or -10M etc. do not work, so it's not inconsistent. memory.low, memory.high, and memory.max will use the string "infinity" to indicate and set the highest possible value. [akpm@linux-foundation.org: use seq_puts() for basic strings] Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Cc: Vladimir Davydov Cc: Greg Thelen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cgroups/unified-hierarchy.txt | 79 +++++++ include/linux/memcontrol.h | 32 +++ mm/memcontrol.c | 229 +++++++++++++++++++- mm/vmscan.c | 22 +- 4 files changed, 348 insertions(+), 14 deletions(-) diff --git a/Documentation/cgroups/unified-hierarchy.txt b/Documentation/cgroups/unified-hierarchy.txt index 4f4563277864f4..71daa35ec2d920 100644 --- a/Documentation/cgroups/unified-hierarchy.txt +++ b/Documentation/cgroups/unified-hierarchy.txt @@ -327,6 +327,85 @@ supported and the interface files "release_agent" and - use_hierarchy is on by default and the cgroup file for the flag is not created. +- The original lower boundary, the soft limit, is defined as a limit + that is per default unset. As a result, the set of cgroups that + global reclaim prefers is opt-in, rather than opt-out. The costs + for optimizing these mostly negative lookups are so high that the + implementation, despite its enormous size, does not even provide the + basic desirable behavior. First off, the soft limit has no + hierarchical meaning. All configured groups are organized in a + global rbtree and treated like equal peers, regardless where they + are located in the hierarchy. This makes subtree delegation + impossible. Second, the soft limit reclaim pass is so aggressive + that it not just introduces high allocation latencies into the + system, but also impacts system performance due to overreclaim, to + the point where the feature becomes self-defeating. + + The memory.low boundary on the other hand is a top-down allocated + reserve. A cgroup enjoys reclaim protection when it and all its + ancestors are below their low boundaries, which makes delegation of + subtrees possible. Secondly, new cgroups have no reserve per + default and in the common case most cgroups are eligible for the + preferred reclaim pass. This allows the new low boundary to be + efficiently implemented with just a minor addition to the generic + reclaim code, without the need for out-of-band data structures and + reclaim passes. Because the generic reclaim code considers all + cgroups except for the ones running low in the preferred first + reclaim pass, overreclaim of individual groups is eliminated as + well, resulting in much better overall workload performance. + +- The original high boundary, the hard limit, is defined as a strict + limit that can not budge, even if the OOM killer has to be called. + But this generally goes against the goal of making the most out of + the available memory. The memory consumption of workloads varies + during runtime, and that requires users to overcommit. But doing + that with a strict upper limit requires either a fairly accurate + prediction of the working set size or adding slack to the limit. + Since working set size estimation is hard and error prone, and + getting it wrong results in OOM kills, most users tend to err on the + side of a looser limit and end up wasting precious resources. + + The memory.high boundary on the other hand can be set much more + conservatively. When hit, it throttles allocations by forcing them + into direct reclaim to work off the excess, but it never invokes the + OOM killer. As a result, a high boundary that is chosen too + aggressively will not terminate the processes, but instead it will + lead to gradual performance degradation. The user can monitor this + and make corrections until the minimal memory footprint that still + gives acceptable performance is found. + + In extreme cases, with many concurrent allocations and a complete + breakdown of reclaim progress within the group, the high boundary + can be exceeded. But even then it's mostly better to satisfy the + allocation from the slack available in other groups or the rest of + the system than killing the group. Otherwise, memory.max is there + to limit this type of spillover and ultimately contain buggy or even + malicious applications. + +- The original control file names are unwieldy and inconsistent in + many different ways. For example, the upper boundary hit count is + exported in the memory.failcnt file, but an OOM event count has to + be manually counted by listening to memory.oom_control events, and + lower boundary / soft limit events have to be counted by first + setting a threshold for that value and then counting those events. + Also, usage and limit files encode their units in the filename. + That makes the filenames very long, even though this is not + information that a user needs to be reminded of every time they type + out those names. + + To address these naming issues, as well as to signal clearly that + the new interface carries a new configuration model, the naming + conventions in it necessarily differ from the old interface. + +- The original limit files indicate the state of an unset limit with a + Very High Number, and a configured limit can be unset by echoing -1 + into those files. But that very high number is implementation and + architecture dependent and not very descriptive. And while -1 can + be understood as an underflow into the highest possible value, -2 or + -10M etc. do not work, so it's not consistent. + + memory.low, memory.high, and memory.max will use the string + "infinity" to indicate and set the highest possible value. 5. Planned Changes diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 353537a5981a67..6cfd934c7c9b3e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -52,7 +52,27 @@ struct mem_cgroup_reclaim_cookie { unsigned int generation; }; +enum mem_cgroup_events_index { + MEM_CGROUP_EVENTS_PGPGIN, /* # of pages paged in */ + MEM_CGROUP_EVENTS_PGPGOUT, /* # of pages paged out */ + MEM_CGROUP_EVENTS_PGFAULT, /* # of page-faults */ + MEM_CGROUP_EVENTS_PGMAJFAULT, /* # of major page-faults */ + MEM_CGROUP_EVENTS_NSTATS, + /* default hierarchy events */ + MEMCG_LOW = MEM_CGROUP_EVENTS_NSTATS, + MEMCG_HIGH, + MEMCG_MAX, + MEMCG_OOM, + MEMCG_NR_EVENTS, +}; + #ifdef CONFIG_MEMCG +void mem_cgroup_events(struct mem_cgroup *memcg, + enum mem_cgroup_events_index idx, + unsigned int nr); + +bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg); + int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask, struct mem_cgroup **memcgp); void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg, @@ -175,6 +195,18 @@ void mem_cgroup_split_huge_fixup(struct page *head); #else /* CONFIG_MEMCG */ struct mem_cgroup; +static inline void mem_cgroup_events(struct mem_cgroup *memcg, + enum mem_cgroup_events_index idx, + unsigned int nr) +{ +} + +static inline bool mem_cgroup_low(struct mem_cgroup *root, + struct mem_cgroup *memcg) +{ + return false; +} + static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask, struct mem_cgroup **memcgp) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6453ea5a27aace..ee97c9ac62c08c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -97,14 +97,6 @@ static const char * const mem_cgroup_stat_names[] = { "swap", }; -enum mem_cgroup_events_index { - MEM_CGROUP_EVENTS_PGPGIN, /* # of pages paged in */ - MEM_CGROUP_EVENTS_PGPGOUT, /* # of pages paged out */ - MEM_CGROUP_EVENTS_PGFAULT, /* # of page-faults */ - MEM_CGROUP_EVENTS_PGMAJFAULT, /* # of major page-faults */ - MEM_CGROUP_EVENTS_NSTATS, -}; - static const char * const mem_cgroup_events_names[] = { "pgpgin", "pgpgout", @@ -138,7 +130,7 @@ enum mem_cgroup_events_target { struct mem_cgroup_stat_cpu { long count[MEM_CGROUP_STAT_NSTATS]; - unsigned long events[MEM_CGROUP_EVENTS_NSTATS]; + unsigned long events[MEMCG_NR_EVENTS]; unsigned long nr_page_events; unsigned long targets[MEM_CGROUP_NTARGETS]; }; @@ -284,6 +276,10 @@ struct mem_cgroup { struct page_counter memsw; struct page_counter kmem; + /* Normal memory consumption range */ + unsigned long low; + unsigned long high; + unsigned long soft_limit; /* vmpressure notifications */ @@ -2315,6 +2311,8 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, if (!(gfp_mask & __GFP_WAIT)) goto nomem; + mem_cgroup_events(mem_over_limit, MEMCG_MAX, 1); + nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages, gfp_mask, may_swap); @@ -2356,6 +2354,8 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, if (fatal_signal_pending(current)) goto bypass; + mem_cgroup_events(mem_over_limit, MEMCG_OOM, 1); + mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(nr_pages)); nomem: if (!(gfp_mask & __GFP_NOFAIL)) @@ -2367,6 +2367,16 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, css_get_many(&memcg->css, batch); if (batch > nr_pages) refill_stock(memcg, batch - nr_pages); + /* + * If the hierarchy is above the normal consumption range, + * make the charging task trim their excess contribution. + */ + do { + if (page_counter_read(&memcg->memory) <= memcg->high) + continue; + mem_cgroup_events(memcg, MEMCG_HIGH, 1); + try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true); + } while ((memcg = parent_mem_cgroup(memcg))); done: return ret; } @@ -4276,7 +4286,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, return ret; } -static struct cftype mem_cgroup_files[] = { +static struct cftype mem_cgroup_legacy_files[] = { { .name = "usage_in_bytes", .private = MEMFILE_PRIVATE(_MEM, RES_USAGE), @@ -4552,6 +4562,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) if (parent_css == NULL) { root_mem_cgroup = memcg; page_counter_init(&memcg->memory, NULL); + memcg->high = PAGE_COUNTER_MAX; memcg->soft_limit = PAGE_COUNTER_MAX; page_counter_init(&memcg->memsw, NULL); page_counter_init(&memcg->kmem, NULL); @@ -4597,6 +4608,7 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) if (parent->use_hierarchy) { page_counter_init(&memcg->memory, &parent->memory); + memcg->high = PAGE_COUNTER_MAX; memcg->soft_limit = PAGE_COUNTER_MAX; page_counter_init(&memcg->memsw, &parent->memsw); page_counter_init(&memcg->kmem, &parent->kmem); @@ -4607,6 +4619,7 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) */ } else { page_counter_init(&memcg->memory, NULL); + memcg->high = PAGE_COUNTER_MAX; memcg->soft_limit = PAGE_COUNTER_MAX; page_counter_init(&memcg->memsw, NULL); page_counter_init(&memcg->kmem, NULL); @@ -4682,6 +4695,8 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css) mem_cgroup_resize_limit(memcg, PAGE_COUNTER_MAX); mem_cgroup_resize_memsw_limit(memcg, PAGE_COUNTER_MAX); memcg_update_kmem_limit(memcg, PAGE_COUNTER_MAX); + memcg->low = 0; + memcg->high = PAGE_COUNTER_MAX; memcg->soft_limit = PAGE_COUNTER_MAX; } @@ -5267,6 +5282,147 @@ static void mem_cgroup_bind(struct cgroup_subsys_state *root_css) mem_cgroup_from_css(root_css)->use_hierarchy = true; } +static u64 memory_current_read(struct cgroup_subsys_state *css, + struct cftype *cft) +{ + return mem_cgroup_usage(mem_cgroup_from_css(css), false); +} + +static int memory_low_show(struct seq_file *m, void *v) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); + unsigned long low = ACCESS_ONCE(memcg->low); + + if (low == PAGE_COUNTER_MAX) + seq_puts(m, "infinity\n"); + else + seq_printf(m, "%llu\n", (u64)low * PAGE_SIZE); + + return 0; +} + +static ssize_t memory_low_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); + unsigned long low; + int err; + + buf = strstrip(buf); + err = page_counter_memparse(buf, "infinity", &low); + if (err) + return err; + + memcg->low = low; + + return nbytes; +} + +static int memory_high_show(struct seq_file *m, void *v) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); + unsigned long high = ACCESS_ONCE(memcg->high); + + if (high == PAGE_COUNTER_MAX) + seq_puts(m, "infinity\n"); + else + seq_printf(m, "%llu\n", (u64)high * PAGE_SIZE); + + return 0; +} + +static ssize_t memory_high_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); + unsigned long high; + int err; + + buf = strstrip(buf); + err = page_counter_memparse(buf, "infinity", &high); + if (err) + return err; + + memcg->high = high; + + return nbytes; +} + +static int memory_max_show(struct seq_file *m, void *v) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); + unsigned long max = ACCESS_ONCE(memcg->memory.limit); + + if (max == PAGE_COUNTER_MAX) + seq_puts(m, "infinity\n"); + else + seq_printf(m, "%llu\n", (u64)max * PAGE_SIZE); + + return 0; +} + +static ssize_t memory_max_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); + unsigned long max; + int err; + + buf = strstrip(buf); + err = page_counter_memparse(buf, "infinity", &max); + if (err) + return err; + + err = mem_cgroup_resize_limit(memcg, max); + if (err) + return err; + + return nbytes; +} + +static int memory_events_show(struct seq_file *m, void *v) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); + + seq_printf(m, "low %lu\n", mem_cgroup_read_events(memcg, MEMCG_LOW)); + seq_printf(m, "high %lu\n", mem_cgroup_read_events(memcg, MEMCG_HIGH)); + seq_printf(m, "max %lu\n", mem_cgroup_read_events(memcg, MEMCG_MAX)); + seq_printf(m, "oom %lu\n", mem_cgroup_read_events(memcg, MEMCG_OOM)); + + return 0; +} + +static struct cftype memory_files[] = { + { + .name = "current", + .read_u64 = memory_current_read, + }, + { + .name = "low", + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = memory_low_show, + .write = memory_low_write, + }, + { + .name = "high", + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = memory_high_show, + .write = memory_high_write, + }, + { + .name = "max", + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = memory_max_show, + .write = memory_max_write, + }, + { + .name = "events", + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = memory_events_show, + }, + { } /* terminate */ +}; + struct cgroup_subsys memory_cgrp_subsys = { .css_alloc = mem_cgroup_css_alloc, .css_online = mem_cgroup_css_online, @@ -5277,7 +5433,8 @@ struct cgroup_subsys memory_cgrp_subsys = { .cancel_attach = mem_cgroup_cancel_attach, .attach = mem_cgroup_move_task, .bind = mem_cgroup_bind, - .legacy_cftypes = mem_cgroup_files, + .dfl_cftypes = memory_files, + .legacy_cftypes = mem_cgroup_legacy_files, .early_init = 0, }; @@ -5312,6 +5469,56 @@ static void __init enable_swap_cgroup(void) } #endif +/** + * mem_cgroup_events - count memory events against a cgroup + * @memcg: the memory cgroup + * @idx: the event index + * @nr: the number of events to account for + */ +void mem_cgroup_events(struct mem_cgroup *memcg, + enum mem_cgroup_events_index idx, + unsigned int nr) +{ + this_cpu_add(memcg->stat->events[idx], nr); +} + +/** + * mem_cgroup_low - check if memory consumption is below the normal range + * @root: the highest ancestor to consider + * @memcg: the memory cgroup to check + * + * Returns %true if memory consumption of @memcg, and that of all + * configurable ancestors up to @root, is below the normal range. + */ +bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg) +{ + if (mem_cgroup_disabled()) + return false; + + /* + * The toplevel group doesn't have a configurable range, so + * it's never low when looked at directly, and it is not + * considered an ancestor when assessing the hierarchy. + */ + + if (memcg == root_mem_cgroup) + return false; + + if (page_counter_read(&memcg->memory) > memcg->low) + return false; + + while (memcg != root) { + memcg = parent_mem_cgroup(memcg); + + if (memcg == root_mem_cgroup) + break; + + if (page_counter_read(&memcg->memory) > memcg->low) + return false; + } + return true; +} + #ifdef CONFIG_MEMCG_SWAP /** * mem_cgroup_swapout - transfer a memsw charge to swap diff --git a/mm/vmscan.c b/mm/vmscan.c index b6dfa0081a8ecb..8e645ee5204554 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -91,6 +91,9 @@ struct scan_control { /* Can pages be swapped as part of reclaim? */ unsigned int may_swap:1; + /* Can cgroups be reclaimed below their normal consumption range? */ + unsigned int may_thrash:1; + unsigned int hibernation_mode:1; /* One of the zones is ready for compaction */ @@ -2294,6 +2297,12 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc, struct lruvec *lruvec; int swappiness; + if (mem_cgroup_low(root, memcg)) { + if (!sc->may_thrash) + continue; + mem_cgroup_events(memcg, MEMCG_LOW, 1); + } + lruvec = mem_cgroup_zone_lruvec(zone, memcg); swappiness = mem_cgroup_swappiness(memcg); @@ -2315,8 +2324,7 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc, mem_cgroup_iter_break(root, memcg); break; } - memcg = mem_cgroup_iter(root, memcg, &reclaim); - } while (memcg); + } while ((memcg = mem_cgroup_iter(root, memcg, &reclaim))); /* * Shrink the slab caches in the same proportion that @@ -2519,10 +2527,11 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) static unsigned long do_try_to_free_pages(struct zonelist *zonelist, struct scan_control *sc) { + int initial_priority = sc->priority; unsigned long total_scanned = 0; unsigned long writeback_threshold; bool zones_reclaimable; - +retry: delayacct_freepages_start(); if (global_reclaim(sc)) @@ -2572,6 +2581,13 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, if (sc->compaction_ready) return 1; + /* Untapped cgroup reserves? Don't OOM, retry. */ + if (!sc->may_thrash) { + sc->priority = initial_priority; + sc->may_thrash = 1; + goto retry; + } + /* Any of the zones still reclaimable? Don't OOM. */ if (zones_reclaimable) return 1; From 1dfab5abcdd404fd04597c063d9f61a5b3247552 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 11 Feb 2015 15:26:09 -0800 Subject: [PATCH 548/601] mm: memcontrol: fold move_anon() and move_file() Turn the move type enum into flags and give the flags field a shorter name. Once that is done, move_anon() and move_file() are simple enough to just fold them into the callsites. [akpm@linux-foundation.org: tweak MOVE_MASK definition, per Michal] Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Reviewed-by: Vladimir Davydov Cc: Greg Thelen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 49 ++++++++++++++++++------------------------------- 1 file changed, 18 insertions(+), 31 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ee97c9ac62c08c..11c9e6a1dad5e5 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -369,21 +369,18 @@ static bool memcg_kmem_is_active(struct mem_cgroup *memcg) /* Stuffs for move charges at task migration. */ /* - * Types of charges to be moved. "move_charge_at_immitgrate" and - * "immigrate_flags" are treated as a left-shifted bitmap of these types. + * Types of charges to be moved. */ -enum move_type { - MOVE_CHARGE_TYPE_ANON, /* private anonymous page and swap of it */ - MOVE_CHARGE_TYPE_FILE, /* file page(including tmpfs) and swap of it */ - NR_MOVE_TYPE, -}; +#define MOVE_ANON 0x1U +#define MOVE_FILE 0x2U +#define MOVE_MASK (MOVE_ANON | MOVE_FILE) /* "mc" and its members are protected by cgroup_mutex */ static struct move_charge_struct { spinlock_t lock; /* for from, to */ struct mem_cgroup *from; struct mem_cgroup *to; - unsigned long immigrate_flags; + unsigned long flags; unsigned long precharge; unsigned long moved_charge; unsigned long moved_swap; @@ -394,16 +391,6 @@ static struct move_charge_struct { .waitq = __WAIT_QUEUE_HEAD_INITIALIZER(mc.waitq), }; -static bool move_anon(void) -{ - return test_bit(MOVE_CHARGE_TYPE_ANON, &mc.immigrate_flags); -} - -static bool move_file(void) -{ - return test_bit(MOVE_CHARGE_TYPE_FILE, &mc.immigrate_flags); -} - /* * Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft * limit reclaim to prevent infinite loops, if they ever occur. @@ -3500,7 +3487,7 @@ static int mem_cgroup_move_charge_write(struct cgroup_subsys_state *css, { struct mem_cgroup *memcg = mem_cgroup_from_css(css); - if (val >= (1 << NR_MOVE_TYPE)) + if (val & ~MOVE_MASK) return -EINVAL; /* @@ -4773,12 +4760,12 @@ static struct page *mc_handle_present_pte(struct vm_area_struct *vma, if (!page || !page_mapped(page)) return NULL; if (PageAnon(page)) { - /* we don't move shared anon */ - if (!move_anon()) + if (!(mc.flags & MOVE_ANON)) return NULL; - } else if (!move_file()) - /* we ignore mapcount for file pages */ - return NULL; + } else { + if (!(mc.flags & MOVE_FILE)) + return NULL; + } if (!get_page_unless_zero(page)) return NULL; @@ -4792,7 +4779,7 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma, struct page *page = NULL; swp_entry_t ent = pte_to_swp_entry(ptent); - if (!move_anon() || non_swap_entry(ent)) + if (!(mc.flags & MOVE_ANON) || non_swap_entry(ent)) return NULL; /* * Because lookup_swap_cache() updates some statistics counter, @@ -4821,7 +4808,7 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma, if (!vma->vm_file) /* anonymous vma */ return NULL; - if (!move_file()) + if (!(mc.flags & MOVE_FILE)) return NULL; mapping = vma->vm_file->f_mapping; @@ -4900,7 +4887,7 @@ static enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma, page = pmd_page(pmd); VM_BUG_ON_PAGE(!page || !PageHead(page), page); - if (!move_anon()) + if (!(mc.flags & MOVE_ANON)) return ret; if (page->mem_cgroup == mc.from) { ret = MC_TARGET_PAGE; @@ -5042,15 +5029,15 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css, struct task_struct *p = cgroup_taskset_first(tset); int ret = 0; struct mem_cgroup *memcg = mem_cgroup_from_css(css); - unsigned long move_charge_at_immigrate; + unsigned long move_flags; /* * We are now commited to this value whatever it is. Changes in this * tunable will only affect upcoming migrations, not the current one. * So we need to save it, and keep it going. */ - move_charge_at_immigrate = memcg->move_charge_at_immigrate; - if (move_charge_at_immigrate) { + move_flags = ACCESS_ONCE(memcg->move_charge_at_immigrate); + if (move_flags) { struct mm_struct *mm; struct mem_cgroup *from = mem_cgroup_from_task(p); @@ -5070,7 +5057,7 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css, spin_lock(&mc.lock); mc.from = from; mc.to = memcg; - mc.immigrate_flags = move_charge_at_immigrate; + mc.flags = move_flags; spin_unlock(&mc.lock); /* We set mc.moving_task later */ From 49550b605587924b3336386caae53200c68969d3 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 11 Feb 2015 15:26:12 -0800 Subject: [PATCH 549/601] oom: add helpers for setting and clearing TIF_MEMDIE This patchset addresses a race which was described in the changelog for 5695be142e20 ("OOM, PM: OOM killed task shouldn't escape PM suspend"): : PM freezer relies on having all tasks frozen by the time devices are : getting frozen so that no task will touch them while they are getting : frozen. But OOM killer is allowed to kill an already frozen task in order : to handle OOM situtation. In order to protect from late wake ups OOM : killer is disabled after all tasks are frozen. This, however, still keeps : a window open when a killed task didn't manage to die by the time : freeze_processes finishes. The original patch hasn't closed the race window completely because that would require a more complex solution as it can be seen by this patchset. The primary motivation was to close the race condition between OOM killer and PM freezer _completely_. As Tejun pointed out, even though the race condition is unlikely the harder it would be to debug weird bugs deep in the PM freezer when the debugging options are reduced considerably. I can only speculate what might happen when a task is still runnable unexpectedly. On a plus side and as a side effect the oom enable/disable has a better (full barrier) semantic without polluting hot paths. I have tested the series in KVM with 100M RAM: - many small tasks (20M anon mmap) which are triggering OOM continually - s2ram which resumes automatically is triggered in a loop echo processors > /sys/power/pm_test while true do echo mem > /sys/power/state sleep 1s done - simple module which allocates and frees 20M in 8K chunks. If it sees freezing(current) then it tries another round of allocation before calling try_to_freeze - debugging messages of PM stages and OOM killer enable/disable/fail added and unmark_oom_victim is delayed by 1s after it clears TIF_MEMDIE and before it wakes up waiters. - rebased on top of the current mmotm which means some necessary updates in mm/oom_kill.c. mark_tsk_oom_victim is now called under task_lock but I think this should be OK because __thaw_task shouldn't interfere with any locking down wake_up_process. Oleg? As expected there are no OOM killed tasks after oom is disabled and allocations requested by the kernel thread are failing after all the tasks are frozen and OOM disabled. I wasn't able to catch a race where oom_killer_disable would really have to wait but I kinda expected the race is really unlikely. [ 242.609330] Killed process 2992 (mem_eater) total-vm:24412kB, anon-rss:2164kB, file-rss:4kB [ 243.628071] Unmarking 2992 OOM victim. oom_victims: 1 [ 243.636072] (elapsed 2.837 seconds) done. [ 243.641985] Trying to disable OOM killer [ 243.643032] Waiting for concurent OOM victims [ 243.644342] OOM killer disabled [ 243.645447] Freezing remaining freezable tasks ... (elapsed 0.005 seconds) done. [ 243.652983] Suspending console(s) (use no_console_suspend to debug) [ 243.903299] kmem_eater: page allocation failure: order:1, mode:0x204010 [...] [ 243.992600] PM: suspend of devices complete after 336.667 msecs [ 243.993264] PM: late suspend of devices complete after 0.660 msecs [ 243.994713] PM: noirq suspend of devices complete after 1.446 msecs [ 243.994717] ACPI: Preparing to enter system sleep state S3 [ 243.994795] PM: Saving platform NVS memory [ 243.994796] Disabling non-boot CPUs ... The first 2 patches are simple cleanups for OOM. They should go in regardless the rest IMO. Patches 3 and 4 are trivial printk -> pr_info conversion and they should go in ditto. The main patch is the last one and I would appreciate acks from Tejun and Rafael. I think the OOM part should be OK (except for __thaw_task vs. task_lock where a look from Oleg would appreciated) but I am not so sure I haven't screwed anything in the freezer code. I have found several surprises there. This patch (of 5): This patch is just a preparatory and it doesn't introduce any functional change. Note: I am utterly unhappy about lowmemory killer abusing TIF_MEMDIE just to wait for the oom victim and to prevent from new killing. This is just a side effect of the flag. The primary meaning is to give the oom victim access to the memory reserves and that shouldn't be necessary here. Signed-off-by: Michal Hocko Cc: Tejun Heo Cc: David Rientjes Cc: Johannes Weiner Cc: Oleg Nesterov Cc: Cong Wang Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/staging/android/lowmemorykiller.c | 7 ++++++- include/linux/oom.h | 4 ++++ kernel/exit.c | 2 +- mm/memcontrol.c | 2 +- mm/oom_kill.c | 23 ++++++++++++++++++++--- 5 files changed, 32 insertions(+), 6 deletions(-) diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index b545d3d1da3e5f..feafa172b155a2 100644 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -160,7 +160,12 @@ static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc) selected->pid, selected->comm, selected_oom_score_adj, selected_tasksize); lowmem_deathpending_timeout = jiffies + HZ; - set_tsk_thread_flag(selected, TIF_MEMDIE); + /* + * FIXME: lowmemorykiller shouldn't abuse global OOM killer + * infrastructure. There is no real reason why the selected + * task should have access to the memory reserves. + */ + mark_tsk_oom_victim(selected); send_sig(SIGKILL, selected, 0); rem += selected_tasksize; } diff --git a/include/linux/oom.h b/include/linux/oom.h index 76200984d1e220..b42b80f88c3ad6 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -47,6 +47,10 @@ static inline bool oom_task_origin(const struct task_struct *p) return !!(p->signal->oom_flags & OOM_FLAG_ORIGIN); } +extern void mark_tsk_oom_victim(struct task_struct *tsk); + +extern void unmark_oom_victim(void); + extern unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask, unsigned long totalpages); diff --git a/kernel/exit.c b/kernel/exit.c index 6806c55475eec1..02b3d1ab2ec0d7 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -435,7 +435,7 @@ static void exit_mm(struct task_struct *tsk) task_unlock(tsk); mm_update_next_owner(mm); mmput(mm); - clear_thread_flag(TIF_MEMDIE); + unmark_oom_victim(); } static struct task_struct *find_alive_thread(struct task_struct *p) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 11c9e6a1dad5e5..fe4d258ef32bd5 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1556,7 +1556,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, * quickly exit and free its memory. */ if (fatal_signal_pending(current) || task_will_free_mem(current)) { - set_thread_flag(TIF_MEMDIE); + mark_tsk_oom_victim(current); return; } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 294493a7ae4b0e..80b34e285f96ec 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -416,6 +416,23 @@ void note_oom_kill(void) atomic_inc(&oom_kills); } +/** + * mark_tsk_oom_victim - marks the given taks as OOM victim. + * @tsk: task to mark + */ +void mark_tsk_oom_victim(struct task_struct *tsk) +{ + set_tsk_thread_flag(tsk, TIF_MEMDIE); +} + +/** + * unmark_oom_victim - unmarks the current task as OOM victim. + */ +void unmark_oom_victim(void) +{ + clear_thread_flag(TIF_MEMDIE); +} + #define K(x) ((x) << (PAGE_SHIFT-10)) /* * Must be called while holding a reference to p, which will be released upon @@ -440,7 +457,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, */ task_lock(p); if (p->mm && task_will_free_mem(p)) { - set_tsk_thread_flag(p, TIF_MEMDIE); + mark_tsk_oom_victim(p); task_unlock(p); put_task_struct(p); return; @@ -495,7 +512,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, /* mm cannot safely be dereferenced after task_unlock(victim) */ mm = victim->mm; - set_tsk_thread_flag(victim, TIF_MEMDIE); + mark_tsk_oom_victim(victim); pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB\n", task_pid_nr(victim), victim->comm, K(victim->mm->total_vm), K(get_mm_counter(victim->mm, MM_ANONPAGES)), @@ -652,7 +669,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, */ if (current->mm && (fatal_signal_pending(current) || task_will_free_mem(current))) { - set_thread_flag(TIF_MEMDIE); + mark_tsk_oom_victim(current); return; } From 63a8ca9b2084fa5bd91aa380532f18e361764109 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 11 Feb 2015 15:26:15 -0800 Subject: [PATCH 550/601] oom: thaw the OOM victim if it is frozen oom_kill_process only sets TIF_MEMDIE flag and sends a signal to the victim. This is basically noop when the task is frozen though because the task sleeps in the uninterruptible sleep. The victim is eventually thawed later when oom_scan_process_thread meets the task again in a later OOM invocation so the OOM killer doesn't live lock. But this is less than optimal. Let's add __thaw_task into mark_tsk_oom_victim after we set TIF_MEMDIE to the victim. We are not checking whether the task is frozen because that would be racy and __thaw_task does that already. oom_scan_process_thread doesn't need to care about freezer anymore as TIF_MEMDIE and freezer are excluded completely now. Signed-off-by: Michal Hocko Cc: Tejun Heo Cc: David Rientjes Cc: Johannes Weiner Cc: Oleg Nesterov Cc: Cong Wang Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 80b34e285f96ec..3cbd76b8c13b59 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -266,8 +266,6 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task, * Don't allow any other task to have access to the reserves. */ if (test_tsk_thread_flag(task, TIF_MEMDIE)) { - if (unlikely(frozen(task))) - __thaw_task(task); if (!force_kill) return OOM_SCAN_ABORT; } @@ -423,6 +421,14 @@ void note_oom_kill(void) void mark_tsk_oom_victim(struct task_struct *tsk) { set_tsk_thread_flag(tsk, TIF_MEMDIE); + + /* + * Make sure that the task is woken up from uninterruptible sleep + * if it is frozen because OOM killer wouldn't be able to free + * any memory and livelock. freezing_slow_path will tell the freezer + * that TIF_MEMDIE tasks should be ignored. + */ + __thaw_task(tsk); } /** From 35536ae170f01fb7e5ca032d5324d03e9e5a36bd Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 11 Feb 2015 15:26:18 -0800 Subject: [PATCH 551/601] PM: convert printk to pr_* equivalent While touching this area let's convert printk to pr_*. This also makes the printing of continuation lines done properly. Signed-off-by: Michal Hocko Acked-by: Tejun Heo Cc: David Rientjes Cc: Johannes Weiner Cc: Oleg Nesterov Cc: Cong Wang Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/power/process.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/kernel/power/process.c b/kernel/power/process.c index 5a6ec8678b9a09..3ac45f192e9fef 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -84,8 +84,8 @@ static int try_to_freeze_tasks(bool user_only) elapsed_msecs = elapsed_msecs64; if (todo) { - printk("\n"); - printk(KERN_ERR "Freezing of tasks %s after %d.%03d seconds " + pr_cont("\n"); + pr_err("Freezing of tasks %s after %d.%03d seconds " "(%d tasks refusing to freeze, wq_busy=%d):\n", wakeup ? "aborted" : "failed", elapsed_msecs / 1000, elapsed_msecs % 1000, @@ -101,7 +101,7 @@ static int try_to_freeze_tasks(bool user_only) read_unlock(&tasklist_lock); } } else { - printk("(elapsed %d.%03d seconds) ", elapsed_msecs / 1000, + pr_cont("(elapsed %d.%03d seconds) ", elapsed_msecs / 1000, elapsed_msecs % 1000); } @@ -155,7 +155,7 @@ int freeze_processes(void) atomic_inc(&system_freezing_cnt); pm_wakeup_clear(); - printk("Freezing user space processes ... "); + pr_info("Freezing user space processes ... "); pm_freezing = true; oom_kills_saved = oom_kills_count(); error = try_to_freeze_tasks(true); @@ -171,13 +171,13 @@ int freeze_processes(void) if (oom_kills_count() != oom_kills_saved && !check_frozen_processes()) { __usermodehelper_set_disable_depth(UMH_ENABLED); - printk("OOM in progress."); + pr_cont("OOM in progress."); error = -EBUSY; } else { - printk("done."); + pr_cont("done."); } } - printk("\n"); + pr_cont("\n"); BUG_ON(in_atomic()); if (error) @@ -197,13 +197,14 @@ int freeze_kernel_threads(void) { int error; - printk("Freezing remaining freezable tasks ... "); + pr_info("Freezing remaining freezable tasks ... "); + pm_nosig_freezing = true; error = try_to_freeze_tasks(false); if (!error) - printk("done."); + pr_cont("done."); - printk("\n"); + pr_cont("\n"); BUG_ON(in_atomic()); if (error) @@ -224,7 +225,7 @@ void thaw_processes(void) oom_killer_enable(); - printk("Restarting tasks ... "); + pr_info("Restarting tasks ... "); __usermodehelper_set_disable_depth(UMH_FREEZING); thaw_workqueues(); @@ -243,7 +244,7 @@ void thaw_processes(void) usermodehelper_enable(); schedule(); - printk("done.\n"); + pr_cont("done.\n"); trace_suspend_resume(TPS("thaw_processes"), 0, false); } @@ -252,7 +253,7 @@ void thaw_kernel_threads(void) struct task_struct *g, *p; pm_nosig_freezing = false; - printk("Restarting kernel threads ... "); + pr_info("Restarting kernel threads ... "); thaw_workqueues(); @@ -264,5 +265,5 @@ void thaw_kernel_threads(void) read_unlock(&tasklist_lock); schedule(); - printk("done.\n"); + pr_cont("done.\n"); } From 401e4a7cf67d993bae02efdf1a234d7e2dbd2df2 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 11 Feb 2015 15:26:21 -0800 Subject: [PATCH 552/601] sysrq: convert printk to pr_* equivalent While touching this area let's convert printk to pr_*. This also makes the printing of continuation lines done properly. Signed-off-by: Michal Hocko Acked-by: Tejun Heo Cc: David Rientjes Cc: Johannes Weiner Cc: Oleg Nesterov Cc: Cong Wang Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/tty/sysrq.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 42bad18c66c938..0071469ecbf18d 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -90,7 +90,7 @@ static void sysrq_handle_loglevel(int key) i = key - '0'; console_loglevel = CONSOLE_LOGLEVEL_DEFAULT; - printk("Loglevel set to %d\n", i); + pr_info("Loglevel set to %d\n", i); console_loglevel = i; } static struct sysrq_key_op sysrq_loglevel_op = { @@ -220,7 +220,7 @@ static void showacpu(void *dummy) return; spin_lock_irqsave(&show_lock, flags); - printk(KERN_INFO "CPU%d:\n", smp_processor_id()); + pr_info("CPU%d:\n", smp_processor_id()); show_stack(NULL, NULL); spin_unlock_irqrestore(&show_lock, flags); } @@ -243,7 +243,7 @@ static void sysrq_handle_showallcpus(int key) struct pt_regs *regs = get_irq_regs(); if (regs) { - printk(KERN_INFO "CPU%d:\n", smp_processor_id()); + pr_info("CPU%d:\n", smp_processor_id()); show_regs(regs); } schedule_work(&sysrq_showallcpus); @@ -522,7 +522,7 @@ void __handle_sysrq(int key, bool check_mask) */ orig_log_level = console_loglevel; console_loglevel = CONSOLE_LOGLEVEL_DEFAULT; - printk(KERN_INFO "SysRq : "); + pr_info("SysRq : "); op_p = __sysrq_get_key_op(key); if (op_p) { @@ -531,14 +531,14 @@ void __handle_sysrq(int key, bool check_mask) * should not) and is the invoked operation enabled? */ if (!check_mask || sysrq_on_mask(op_p->enable_mask)) { - printk("%s\n", op_p->action_msg); + pr_cont("%s\n", op_p->action_msg); console_loglevel = orig_log_level; op_p->handler(key); } else { - printk("This sysrq operation is disabled.\n"); + pr_cont("This sysrq operation is disabled.\n"); } } else { - printk("HELP : "); + pr_cont("HELP : "); /* Only print the help msg once per handler */ for (i = 0; i < ARRAY_SIZE(sysrq_key_table); i++) { if (sysrq_key_table[i]) { @@ -549,10 +549,10 @@ void __handle_sysrq(int key, bool check_mask) ; if (j != i) continue; - printk("%s ", sysrq_key_table[i]->help_msg); + pr_cont("%s ", sysrq_key_table[i]->help_msg); } } - printk("\n"); + pr_cont("\n"); console_loglevel = orig_log_level; } rcu_read_unlock(); From c32b3cbe0d067a9cfae85aa70ba1e97ceba0ced7 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 11 Feb 2015 15:26:24 -0800 Subject: [PATCH 553/601] oom, PM: make OOM detection in the freezer path raceless Commit 5695be142e20 ("OOM, PM: OOM killed task shouldn't escape PM suspend") has left a race window when OOM killer manages to note_oom_kill after freeze_processes checks the counter. The race window is quite small and really unlikely and partial solution deemed sufficient at the time of submission. Tejun wasn't happy about this partial solution though and insisted on a full solution. That requires the full OOM and freezer's task freezing exclusion, though. This is done by this patch which introduces oom_sem RW lock and turns oom_killer_disable() into a full OOM barrier. oom_killer_disabled check is moved from the allocation path to the OOM level and we take oom_sem for reading for both the check and the whole OOM invocation. oom_killer_disable() takes oom_sem for writing so it waits for all currently running OOM killer invocations. Then it disable all the further OOMs by setting oom_killer_disabled and checks for any oom victims. Victims are counted via mark_tsk_oom_victim resp. unmark_oom_victim. The last victim wakes up all waiters enqueued by oom_killer_disable(). Therefore this function acts as the full OOM barrier. The page fault path is covered now as well although it was assumed to be safe before. As per Tejun, "We used to have freezing points deep in file system code which may be reacheable from page fault." so it would be better and more robust to not rely on freezing points here. Same applies to the memcg OOM killer. out_of_memory tells the caller whether the OOM was allowed to trigger and the callers are supposed to handle the situation. The page allocation path simply fails the allocation same as before. The page fault path will retry the fault (more on that later) and Sysrq OOM trigger will simply complain to the log. Normally there wouldn't be any unfrozen user tasks after try_to_freeze_tasks so the function will not block. But if there was an OOM killer racing with try_to_freeze_tasks and the OOM victim didn't finish yet then we have to wait for it. This should complete in a finite time, though, because - the victim cannot loop in the page fault handler (it would die on the way out from the exception) - it cannot loop in the page allocator because all the further allocation would fail and __GFP_NOFAIL allocations are not acceptable at this stage - it shouldn't be blocked on any locks held by frozen tasks (try_to_freeze expects lockless context) and kernel threads and work queues are not frozen yet Signed-off-by: Michal Hocko Suggested-by: Tejun Heo Cc: David Rientjes Cc: Johannes Weiner Cc: Oleg Nesterov Cc: Cong Wang Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/tty/sysrq.c | 5 +- include/linux/oom.h | 14 +---- kernel/exit.c | 3 +- kernel/power/process.c | 50 +++------------- mm/memcontrol.c | 2 +- mm/oom_kill.c | 132 ++++++++++++++++++++++++++++++++++------- mm/page_alloc.c | 17 +----- 7 files changed, 132 insertions(+), 91 deletions(-) diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 0071469ecbf18d..259a4d5a4e8f4c 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -355,8 +355,9 @@ static struct sysrq_key_op sysrq_term_op = { static void moom_callback(struct work_struct *ignored) { - out_of_memory(node_zonelist(first_memory_node, GFP_KERNEL), GFP_KERNEL, - 0, NULL, true); + if (!out_of_memory(node_zonelist(first_memory_node, GFP_KERNEL), + GFP_KERNEL, 0, NULL, true)) + pr_info("OOM request ignored because killer is disabled\n"); } static DECLARE_WORK(moom_work, moom_callback); diff --git a/include/linux/oom.h b/include/linux/oom.h index b42b80f88c3ad6..d5771bed59c92d 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -72,22 +72,14 @@ extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task, unsigned long totalpages, const nodemask_t *nodemask, bool force_kill); -extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, +extern bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order, nodemask_t *mask, bool force_kill); extern int register_oom_notifier(struct notifier_block *nb); extern int unregister_oom_notifier(struct notifier_block *nb); extern bool oom_killer_disabled; - -static inline void oom_killer_disable(void) -{ - oom_killer_disabled = true; -} - -static inline void oom_killer_enable(void) -{ - oom_killer_disabled = false; -} +extern bool oom_killer_disable(void); +extern void oom_killer_enable(void); extern struct task_struct *find_lock_task_mm(struct task_struct *p); diff --git a/kernel/exit.c b/kernel/exit.c index 02b3d1ab2ec0d7..feff10bbb30777 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -435,7 +435,8 @@ static void exit_mm(struct task_struct *tsk) task_unlock(tsk); mm_update_next_owner(mm); mmput(mm); - unmark_oom_victim(); + if (test_thread_flag(TIF_MEMDIE)) + unmark_oom_victim(); } static struct task_struct *find_alive_thread(struct task_struct *p) diff --git a/kernel/power/process.c b/kernel/power/process.c index 3ac45f192e9fef..564f786df4701a 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -108,30 +108,6 @@ static int try_to_freeze_tasks(bool user_only) return todo ? -EBUSY : 0; } -static bool __check_frozen_processes(void) -{ - struct task_struct *g, *p; - - for_each_process_thread(g, p) - if (p != current && !freezer_should_skip(p) && !frozen(p)) - return false; - - return true; -} - -/* - * Returns true if all freezable tasks (except for current) are frozen already - */ -static bool check_frozen_processes(void) -{ - bool ret; - - read_lock(&tasklist_lock); - ret = __check_frozen_processes(); - read_unlock(&tasklist_lock); - return ret; -} - /** * freeze_processes - Signal user space processes to enter the refrigerator. * The current thread will not be frozen. The same process that calls @@ -142,7 +118,6 @@ static bool check_frozen_processes(void) int freeze_processes(void) { int error; - int oom_kills_saved; error = __usermodehelper_disable(UMH_FREEZING); if (error) @@ -157,29 +132,22 @@ int freeze_processes(void) pm_wakeup_clear(); pr_info("Freezing user space processes ... "); pm_freezing = true; - oom_kills_saved = oom_kills_count(); error = try_to_freeze_tasks(true); if (!error) { __usermodehelper_set_disable_depth(UMH_DISABLED); - oom_killer_disable(); - - /* - * There might have been an OOM kill while we were - * freezing tasks and the killed task might be still - * on the way out so we have to double check for race. - */ - if (oom_kills_count() != oom_kills_saved && - !check_frozen_processes()) { - __usermodehelper_set_disable_depth(UMH_ENABLED); - pr_cont("OOM in progress."); - error = -EBUSY; - } else { - pr_cont("done."); - } + pr_cont("done."); } pr_cont("\n"); BUG_ON(in_atomic()); + /* + * Now that the whole userspace is frozen we need to disbale + * the OOM killer to disallow any further interference with + * killable tasks. + */ + if (!error && !oom_killer_disable()) + error = -EBUSY; + if (error) thaw_processes(); return error; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index fe4d258ef32bd5..fbf64e6f64e42c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1930,7 +1930,7 @@ bool mem_cgroup_oom_synchronize(bool handle) if (!memcg) return false; - if (!handle) + if (!handle || oom_killer_disabled) goto cleanup; owait.memcg = memcg; diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 3cbd76b8c13b59..b8df76ee2be380 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -398,30 +398,27 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, } /* - * Number of OOM killer invocations (including memcg OOM killer). - * Primarily used by PM freezer to check for potential races with - * OOM killed frozen task. + * Number of OOM victims in flight */ -static atomic_t oom_kills = ATOMIC_INIT(0); +static atomic_t oom_victims = ATOMIC_INIT(0); +static DECLARE_WAIT_QUEUE_HEAD(oom_victims_wait); -int oom_kills_count(void) -{ - return atomic_read(&oom_kills); -} - -void note_oom_kill(void) -{ - atomic_inc(&oom_kills); -} +bool oom_killer_disabled __read_mostly; +static DECLARE_RWSEM(oom_sem); /** * mark_tsk_oom_victim - marks the given taks as OOM victim. * @tsk: task to mark + * + * Has to be called with oom_sem taken for read and never after + * oom has been disabled already. */ void mark_tsk_oom_victim(struct task_struct *tsk) { - set_tsk_thread_flag(tsk, TIF_MEMDIE); - + WARN_ON(oom_killer_disabled); + /* OOM killer might race with memcg OOM */ + if (test_and_set_tsk_thread_flag(tsk, TIF_MEMDIE)) + return; /* * Make sure that the task is woken up from uninterruptible sleep * if it is frozen because OOM killer wouldn't be able to free @@ -429,14 +426,70 @@ void mark_tsk_oom_victim(struct task_struct *tsk) * that TIF_MEMDIE tasks should be ignored. */ __thaw_task(tsk); + atomic_inc(&oom_victims); } /** * unmark_oom_victim - unmarks the current task as OOM victim. + * + * Wakes up all waiters in oom_killer_disable() */ void unmark_oom_victim(void) { - clear_thread_flag(TIF_MEMDIE); + if (!test_and_clear_thread_flag(TIF_MEMDIE)) + return; + + down_read(&oom_sem); + /* + * There is no need to signal the lasst oom_victim if there + * is nobody who cares. + */ + if (!atomic_dec_return(&oom_victims) && oom_killer_disabled) + wake_up_all(&oom_victims_wait); + up_read(&oom_sem); +} + +/** + * oom_killer_disable - disable OOM killer + * + * Forces all page allocations to fail rather than trigger OOM killer. + * Will block and wait until all OOM victims are killed. + * + * The function cannot be called when there are runnable user tasks because + * the userspace would see unexpected allocation failures as a result. Any + * new usage of this function should be consulted with MM people. + * + * Returns true if successful and false if the OOM killer cannot be + * disabled. + */ +bool oom_killer_disable(void) +{ + /* + * Make sure to not race with an ongoing OOM killer + * and that the current is not the victim. + */ + down_write(&oom_sem); + if (test_thread_flag(TIF_MEMDIE)) { + up_write(&oom_sem); + return false; + } + + oom_killer_disabled = true; + up_write(&oom_sem); + + wait_event(oom_victims_wait, !atomic_read(&oom_victims)); + + return true; +} + +/** + * oom_killer_enable - enable OOM killer + */ +void oom_killer_enable(void) +{ + down_write(&oom_sem); + oom_killer_disabled = false; + up_write(&oom_sem); } #define K(x) ((x) << (PAGE_SHIFT-10)) @@ -637,7 +690,7 @@ void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_mask) } /** - * out_of_memory - kill the "best" process when we run out of memory + * __out_of_memory - kill the "best" process when we run out of memory * @zonelist: zonelist pointer * @gfp_mask: memory allocation flags * @order: amount of memory being requested as a power of 2 @@ -649,7 +702,7 @@ void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_mask) * OR try to be smart about which process to kill. Note that we * don't have to be perfect here, we just have to be good. */ -void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, +static void __out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order, nodemask_t *nodemask, bool force_kill) { const nodemask_t *mpol_mask; @@ -718,6 +771,32 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, schedule_timeout_killable(1); } +/** + * out_of_memory - tries to invoke OOM killer. + * @zonelist: zonelist pointer + * @gfp_mask: memory allocation flags + * @order: amount of memory being requested as a power of 2 + * @nodemask: nodemask passed to page allocator + * @force_kill: true if a task must be killed, even if others are exiting + * + * invokes __out_of_memory if the OOM is not disabled by oom_killer_disable() + * when it returns false. Otherwise returns true. + */ +bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, + int order, nodemask_t *nodemask, bool force_kill) +{ + bool ret = false; + + down_read(&oom_sem); + if (!oom_killer_disabled) { + __out_of_memory(zonelist, gfp_mask, order, nodemask, force_kill); + ret = true; + } + up_read(&oom_sem); + + return ret; +} + /* * The pagefault handler calls here because it is out of memory, so kill a * memory-hogging task. If any populated zone has ZONE_OOM_LOCKED set, a @@ -727,12 +806,25 @@ void pagefault_out_of_memory(void) { struct zonelist *zonelist; + down_read(&oom_sem); if (mem_cgroup_oom_synchronize(true)) - return; + goto unlock; zonelist = node_zonelist(first_memory_node, GFP_KERNEL); if (oom_zonelist_trylock(zonelist, GFP_KERNEL)) { - out_of_memory(NULL, 0, 0, NULL, false); + if (!oom_killer_disabled) + __out_of_memory(NULL, 0, 0, NULL, false); + else + /* + * There shouldn't be any user tasks runable while the + * OOM killer is disabled so the current task has to + * be a racing OOM victim for which oom_killer_disable() + * is waiting for. + */ + WARN_ON(test_thread_flag(TIF_MEMDIE)); + oom_zonelist_unlock(zonelist, GFP_KERNEL); } +unlock: + up_read(&oom_sem); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 641d5a9a861790..134e255250447e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -244,8 +244,6 @@ void set_pageblock_migratetype(struct page *page, int migratetype) PB_migrate, PB_migrate_end); } -bool oom_killer_disabled __read_mostly; - #ifdef CONFIG_DEBUG_VM static int page_outside_zone_boundaries(struct zone *zone, struct page *page) { @@ -2317,9 +2315,6 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, *did_some_progress = 0; - if (oom_killer_disabled) - return NULL; - /* * Acquire the per-zone oom lock for each zone. If that * fails, somebody else is making progress for us. @@ -2330,14 +2325,6 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, return NULL; } - /* - * PM-freezer should be notified that there might be an OOM killer on - * its way to kill and wake somebody up. This is too early and we might - * end up not killing anything but false positives are acceptable. - * See freeze_processes. - */ - note_oom_kill(); - /* * Go through the zonelist yet one more time, keep very high watermark * here, this is only to catch a parallel oom killing, we must fail if @@ -2372,8 +2359,8 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, goto out; } /* Exhausted what can be done so it's blamo time */ - out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false); - *did_some_progress = 1; + if (out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false)) + *did_some_progress = 1; out: oom_zonelist_unlock(ac->zonelist, gfp_mask); return page; From 94737a85f332aee75255960eaa16e89ddfa4c75a Mon Sep 17 00:00:00 2001 From: "George G. Davis" Date: Wed, 11 Feb 2015 15:26:27 -0800 Subject: [PATCH 554/601] mm: cma: fix totalcma_pages to include DT defined CMA regions The totalcma_pages variable is not updated to account for CMA regions defined via device tree reserved-memory sub-nodes. Fix this omission by moving the calculation of totalcma_pages into cma_init_reserved_mem() instead of cma_declare_contiguous() such that it will include reserved memory used by all CMA regions. Signed-off-by: George G. Davis Cc: Marek Szyprowski Acked-by: Michal Nazarewicz Cc: Joonsoo Kim Cc: "Aneesh Kumar K.V" Cc: Laurent Pinchart Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/cma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/cma.c b/mm/cma.c index a85ae28709a330..75016fd1de9062 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -199,6 +199,7 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, cma->order_per_bit = order_per_bit; *res_cma = cma; cma_area_count++; + totalcma_pages += (size / PAGE_SIZE); return 0; } @@ -337,7 +338,6 @@ int __init cma_declare_contiguous(phys_addr_t base, if (ret) goto err; - totalcma_pages += (size / PAGE_SIZE); pr_info("Reserved %ld MiB at %pa\n", (unsigned long)size / SZ_1M, &base); return 0; From 9c608dbe6a0d137f78498a5181eb0cd309f8f067 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 11 Feb 2015 15:26:30 -0800 Subject: [PATCH 555/601] mm: memcontrol: simplify soft limit tree init code - No need to test the node for N_MEMORY. node_online() is enough for node fallback to work in slab, use NUMA_NO_NODE for everything else. - Remove the BUG_ON() for allocation failure. A NULL pointer crash is just as descriptive, and the absent return value check is obvious. - Move local variables to the inner-most blocks. - Point to the tree structure after its initialized, not before, it's just more logical that way. Signed-off-by: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Cc: Guenter Roeck Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index fbf64e6f64e42c..2efec685793b6e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4509,24 +4509,23 @@ EXPORT_SYMBOL(parent_mem_cgroup); static void __init mem_cgroup_soft_limit_tree_init(void) { - struct mem_cgroup_tree_per_node *rtpn; - struct mem_cgroup_tree_per_zone *rtpz; - int tmp, node, zone; + int node; for_each_node(node) { - tmp = node; - if (!node_state(node, N_NORMAL_MEMORY)) - tmp = -1; - rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, tmp); - BUG_ON(!rtpn); + struct mem_cgroup_tree_per_node *rtpn; + int zone; - soft_limit_tree.rb_tree_per_node[node] = rtpn; + rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, + node_online(node) ? node : NUMA_NO_NODE); for (zone = 0; zone < MAX_NR_ZONES; zone++) { + struct mem_cgroup_tree_per_zone *rtpz; + rtpz = &rtpn->rb_tree_per_zone[zone]; rtpz->rb_root = RB_ROOT; spin_lock_init(&rtpz->lock); } + soft_limit_tree.rb_tree_per_node[node] = rtpn; } } From 95a045f63d9868ee189fe24ee61689df5a133d5b Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 11 Feb 2015 15:26:33 -0800 Subject: [PATCH 556/601] mm: memcontrol: consolidate memory controller initialization The initialization code for the per-cpu charge stock and the soft limit tree is compact enough to inline it into mem_cgroup_init(). Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Reviewed-by: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 60 +++++++++++++++++++++---------------------------- 1 file changed, 25 insertions(+), 35 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2efec685793b6e..ebf1139f323e44 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2138,17 +2138,6 @@ static void drain_local_stock(struct work_struct *dummy) clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags); } -static void __init memcg_stock_init(void) -{ - int cpu; - - for_each_possible_cpu(cpu) { - struct memcg_stock_pcp *stock = - &per_cpu(memcg_stock, cpu); - INIT_WORK(&stock->work, drain_local_stock); - } -} - /* * Cache charges(val) to local per_cpu area. * This will be consumed by consume_stock() function, later. @@ -4507,28 +4496,6 @@ struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg) } EXPORT_SYMBOL(parent_mem_cgroup); -static void __init mem_cgroup_soft_limit_tree_init(void) -{ - int node; - - for_each_node(node) { - struct mem_cgroup_tree_per_node *rtpn; - int zone; - - rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, - node_online(node) ? node : NUMA_NO_NODE); - - for (zone = 0; zone < MAX_NR_ZONES; zone++) { - struct mem_cgroup_tree_per_zone *rtpz; - - rtpz = &rtpn->rb_tree_per_zone[zone]; - rtpz->rb_root = RB_ROOT; - spin_lock_init(&rtpz->lock); - } - soft_limit_tree.rb_tree_per_node[node] = rtpn; - } -} - static struct cgroup_subsys_state * __ref mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) { @@ -5905,10 +5872,33 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage, */ static int __init mem_cgroup_init(void) { + int cpu, node; + hotcpu_notifier(memcg_cpu_hotplug_callback, 0); + + for_each_possible_cpu(cpu) + INIT_WORK(&per_cpu_ptr(&memcg_stock, cpu)->work, + drain_local_stock); + + for_each_node(node) { + struct mem_cgroup_tree_per_node *rtpn; + int zone; + + rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, + node_online(node) ? node : NUMA_NO_NODE); + + for (zone = 0; zone < MAX_NR_ZONES; zone++) { + struct mem_cgroup_tree_per_zone *rtpz; + + rtpz = &rtpn->rb_tree_per_zone[zone]; + rtpz->rb_root = RB_ROOT; + spin_lock_init(&rtpz->lock); + } + soft_limit_tree.rb_tree_per_node[node] = rtpn; + } + enable_swap_cgroup(); - mem_cgroup_soft_limit_tree_init(); - memcg_stock_init(); + return 0; } subsys_initcall(mem_cgroup_init); From 21afa38eed655def15475b76681fa006c435b9de Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 11 Feb 2015 15:26:36 -0800 Subject: [PATCH 557/601] mm: memcontrol: consolidate swap controller code The swap controller code is scattered all over the file. Gather all the code that isn't directly needed by the memory controller at the end of the file in its own CONFIG_MEMCG_SWAP section. Signed-off-by: Johannes Weiner Cc: Michal Hocko Reviewed-by: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 264 +++++++++++++++++++++++------------------------- 1 file changed, 125 insertions(+), 139 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ebf1139f323e44..c7a9cb627180ca 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -72,22 +72,13 @@ EXPORT_SYMBOL(memory_cgrp_subsys); #define MEM_CGROUP_RECLAIM_RETRIES 5 static struct mem_cgroup *root_mem_cgroup __read_mostly; +/* Whether the swap controller is active */ #ifdef CONFIG_MEMCG_SWAP -/* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */ int do_swap_account __read_mostly; - -/* for remember boot option*/ -#ifdef CONFIG_MEMCG_SWAP_ENABLED -static int really_do_swap_account __initdata = 1; -#else -static int really_do_swap_account __initdata; -#endif - #else #define do_swap_account 0 #endif - static const char * const mem_cgroup_stat_names[] = { "cache", "rss", @@ -4373,34 +4364,6 @@ static struct cftype mem_cgroup_legacy_files[] = { { }, /* terminate */ }; -#ifdef CONFIG_MEMCG_SWAP -static struct cftype memsw_cgroup_files[] = { - { - .name = "memsw.usage_in_bytes", - .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE), - .read_u64 = mem_cgroup_read_u64, - }, - { - .name = "memsw.max_usage_in_bytes", - .private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE), - .write = mem_cgroup_reset, - .read_u64 = mem_cgroup_read_u64, - }, - { - .name = "memsw.limit_in_bytes", - .private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT), - .write = mem_cgroup_write, - .read_u64 = mem_cgroup_read_u64, - }, - { - .name = "memsw.failcnt", - .private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT), - .write = mem_cgroup_reset, - .read_u64 = mem_cgroup_read_u64, - }, - { }, /* terminate */ -}; -#endif static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) { struct mem_cgroup_per_node *pn; @@ -5391,37 +5354,6 @@ struct cgroup_subsys memory_cgrp_subsys = { .early_init = 0, }; -#ifdef CONFIG_MEMCG_SWAP -static int __init enable_swap_account(char *s) -{ - if (!strcmp(s, "1")) - really_do_swap_account = 1; - else if (!strcmp(s, "0")) - really_do_swap_account = 0; - return 1; -} -__setup("swapaccount=", enable_swap_account); - -static void __init memsw_file_init(void) -{ - WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys, - memsw_cgroup_files)); -} - -static void __init enable_swap_cgroup(void) -{ - if (!mem_cgroup_disabled() && really_do_swap_account) { - do_swap_account = 1; - memsw_file_init(); - } -} - -#else -static void __init enable_swap_cgroup(void) -{ -} -#endif - /** * mem_cgroup_events - count memory events against a cgroup * @memcg: the memory cgroup @@ -5472,74 +5404,6 @@ bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg) return true; } -#ifdef CONFIG_MEMCG_SWAP -/** - * mem_cgroup_swapout - transfer a memsw charge to swap - * @page: page whose memsw charge to transfer - * @entry: swap entry to move the charge to - * - * Transfer the memsw charge of @page to @entry. - */ -void mem_cgroup_swapout(struct page *page, swp_entry_t entry) -{ - struct mem_cgroup *memcg; - unsigned short oldid; - - VM_BUG_ON_PAGE(PageLRU(page), page); - VM_BUG_ON_PAGE(page_count(page), page); - - if (!do_swap_account) - return; - - memcg = page->mem_cgroup; - - /* Readahead page, never charged */ - if (!memcg) - return; - - oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg)); - VM_BUG_ON_PAGE(oldid, page); - mem_cgroup_swap_statistics(memcg, true); - - page->mem_cgroup = NULL; - - if (!mem_cgroup_is_root(memcg)) - page_counter_uncharge(&memcg->memory, 1); - - /* XXX: caller holds IRQ-safe mapping->tree_lock */ - VM_BUG_ON(!irqs_disabled()); - - mem_cgroup_charge_statistics(memcg, page, -1); - memcg_check_events(memcg, page); -} - -/** - * mem_cgroup_uncharge_swap - uncharge a swap entry - * @entry: swap entry to uncharge - * - * Drop the memsw charge associated with @entry. - */ -void mem_cgroup_uncharge_swap(swp_entry_t entry) -{ - struct mem_cgroup *memcg; - unsigned short id; - - if (!do_swap_account) - return; - - id = swap_cgroup_record(entry, 0); - rcu_read_lock(); - memcg = mem_cgroup_lookup(id); - if (memcg) { - if (!mem_cgroup_is_root(memcg)) - page_counter_uncharge(&memcg->memsw, 1); - mem_cgroup_swap_statistics(memcg, false); - css_put(&memcg->css); - } - rcu_read_unlock(); -} -#endif - /** * mem_cgroup_try_charge - try charging a page * @page: page to charge @@ -5897,8 +5761,130 @@ static int __init mem_cgroup_init(void) soft_limit_tree.rb_tree_per_node[node] = rtpn; } - enable_swap_cgroup(); - return 0; } subsys_initcall(mem_cgroup_init); + +#ifdef CONFIG_MEMCG_SWAP +/** + * mem_cgroup_swapout - transfer a memsw charge to swap + * @page: page whose memsw charge to transfer + * @entry: swap entry to move the charge to + * + * Transfer the memsw charge of @page to @entry. + */ +void mem_cgroup_swapout(struct page *page, swp_entry_t entry) +{ + struct mem_cgroup *memcg; + unsigned short oldid; + + VM_BUG_ON_PAGE(PageLRU(page), page); + VM_BUG_ON_PAGE(page_count(page), page); + + if (!do_swap_account) + return; + + memcg = page->mem_cgroup; + + /* Readahead page, never charged */ + if (!memcg) + return; + + oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg)); + VM_BUG_ON_PAGE(oldid, page); + mem_cgroup_swap_statistics(memcg, true); + + page->mem_cgroup = NULL; + + if (!mem_cgroup_is_root(memcg)) + page_counter_uncharge(&memcg->memory, 1); + + /* XXX: caller holds IRQ-safe mapping->tree_lock */ + VM_BUG_ON(!irqs_disabled()); + + mem_cgroup_charge_statistics(memcg, page, -1); + memcg_check_events(memcg, page); +} + +/** + * mem_cgroup_uncharge_swap - uncharge a swap entry + * @entry: swap entry to uncharge + * + * Drop the memsw charge associated with @entry. + */ +void mem_cgroup_uncharge_swap(swp_entry_t entry) +{ + struct mem_cgroup *memcg; + unsigned short id; + + if (!do_swap_account) + return; + + id = swap_cgroup_record(entry, 0); + rcu_read_lock(); + memcg = mem_cgroup_lookup(id); + if (memcg) { + if (!mem_cgroup_is_root(memcg)) + page_counter_uncharge(&memcg->memsw, 1); + mem_cgroup_swap_statistics(memcg, false); + css_put(&memcg->css); + } + rcu_read_unlock(); +} + +/* for remember boot option*/ +#ifdef CONFIG_MEMCG_SWAP_ENABLED +static int really_do_swap_account __initdata = 1; +#else +static int really_do_swap_account __initdata; +#endif + +static int __init enable_swap_account(char *s) +{ + if (!strcmp(s, "1")) + really_do_swap_account = 1; + else if (!strcmp(s, "0")) + really_do_swap_account = 0; + return 1; +} +__setup("swapaccount=", enable_swap_account); + +static struct cftype memsw_cgroup_files[] = { + { + .name = "memsw.usage_in_bytes", + .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE), + .read_u64 = mem_cgroup_read_u64, + }, + { + .name = "memsw.max_usage_in_bytes", + .private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE), + .write = mem_cgroup_reset, + .read_u64 = mem_cgroup_read_u64, + }, + { + .name = "memsw.limit_in_bytes", + .private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT), + .write = mem_cgroup_write, + .read_u64 = mem_cgroup_read_u64, + }, + { + .name = "memsw.failcnt", + .private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT), + .write = mem_cgroup_reset, + .read_u64 = mem_cgroup_read_u64, + }, + { }, /* terminate */ +}; + +static int __init mem_cgroup_swap_init(void) +{ + if (!mem_cgroup_disabled() && really_do_swap_account) { + do_swap_account = 1; + WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys, + memsw_cgroup_files)); + } + return 0; +} +subsys_initcall(mem_cgroup_swap_init); + +#endif /* CONFIG_MEMCG_SWAP */ From 3ae3ad4e639234a43fd3997887524d2e5345fa76 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 11 Feb 2015 15:26:38 -0800 Subject: [PATCH 558/601] microblaze: define __PAGETABLE_PMD_FOLDED Microblaze uses custom implementation of PMD folding, but doesn't define __PAGETABLE_PMD_FOLDED, which generic code expects to see. Let's fix it. Defining __PAGETABLE_PMD_FOLDED will drop out unused __pmd_alloc(). It also fixes problems with recently-introduced pmd accounting. Signed-off-by: Kirill A. Shutemov Reported-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/microblaze/include/asm/pgtable.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h index 91b9b46fbb5d5f..c6b6af4ca2a0be 100644 --- a/arch/microblaze/include/asm/pgtable.h +++ b/arch/microblaze/include/asm/pgtable.h @@ -61,6 +61,8 @@ extern int mem_init_done; #include +#define __PAGETABLE_PMD_FOLDED + #ifdef __KERNEL__ #ifndef __ASSEMBLY__ From d016bf7ece53b2b947bfd769e0842fd2feb7556b Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 11 Feb 2015 15:26:41 -0800 Subject: [PATCH 559/601] mm: make FIRST_USER_ADDRESS unsigned long on all archs LKP has triggered a compiler warning after my recent patch "mm: account pmd page tables to the process": mm/mmap.c: In function 'exit_mmap': >> mm/mmap.c:2857:2: warning: right shift count >= width of type [enabled by default] The code: > 2857 WARN_ON(mm_nr_pmds(mm) > 2858 round_up(FIRST_USER_ADDRESS, PUD_SIZE) >> PUD_SHIFT); In this, on tile, we have FIRST_USER_ADDRESS defined as 0. round_up() has the same type -- int. PUD_SHIFT. I think the best way to fix it is to define FIRST_USER_ADDRESS as unsigned long. On every arch for consistency. Signed-off-by: Kirill A. Shutemov Reported-by: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/pgtable.h | 2 +- arch/arc/include/asm/pgtable.h | 2 +- arch/arm/include/asm/pgtable-nommu.h | 2 +- arch/arm64/include/asm/pgtable.h | 2 +- arch/avr32/include/asm/pgtable.h | 2 +- arch/cris/include/asm/pgtable.h | 2 +- arch/frv/include/asm/pgtable.h | 2 +- arch/hexagon/include/asm/pgtable.h | 2 +- arch/ia64/include/asm/pgtable.h | 2 +- arch/m32r/include/asm/pgtable.h | 2 +- arch/m68k/include/asm/pgtable_mm.h | 2 +- arch/microblaze/include/asm/pgtable.h | 2 +- arch/mips/include/asm/pgtable-32.h | 2 +- arch/mn10300/include/asm/pgtable.h | 2 +- arch/nios2/include/asm/pgtable.h | 2 +- arch/openrisc/include/asm/pgtable.h | 2 +- arch/parisc/include/asm/pgtable.h | 2 +- arch/powerpc/include/asm/pgtable-ppc32.h | 2 +- arch/powerpc/include/asm/pgtable-ppc64.h | 2 +- arch/s390/include/asm/pgtable.h | 2 +- arch/score/include/asm/pgtable.h | 2 +- arch/sh/include/asm/pgtable.h | 2 +- arch/sparc/include/asm/pgtable_32.h | 2 +- arch/sparc/include/asm/pgtable_64.h | 2 +- arch/tile/include/asm/pgtable.h | 2 +- arch/um/include/asm/pgtable-2level.h | 2 +- arch/um/include/asm/pgtable-3level.h | 2 +- arch/x86/include/asm/pgtable_types.h | 2 +- arch/xtensa/include/asm/pgtable.h | 2 +- 29 files changed, 29 insertions(+), 29 deletions(-) diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h index fce22cf88ee9ca..a9a1195923722f 100644 --- a/arch/alpha/include/asm/pgtable.h +++ b/arch/alpha/include/asm/pgtable.h @@ -45,7 +45,7 @@ struct vm_area_struct; #define PTRS_PER_PMD (1UL << (PAGE_SHIFT-3)) #define PTRS_PER_PGD (1UL << (PAGE_SHIFT-3)) #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) -#define FIRST_USER_ADDRESS 0 +#define FIRST_USER_ADDRESS 0UL /* Number of pointers that fit on a page: this will go away. */ #define PTRS_PER_PAGE (1UL << (PAGE_SHIFT-3)) diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index bdc8ccaf390d5a..ffed3b2cf31392 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -211,7 +211,7 @@ * No special requirements for lowest virtual address we permit any user space * mapping to be mapped at. */ -#define FIRST_USER_ADDRESS 0 +#define FIRST_USER_ADDRESS 0UL /**************************************************************** diff --git a/arch/arm/include/asm/pgtable-nommu.h b/arch/arm/include/asm/pgtable-nommu.h index c35e53ee66630d..add094d09e3e2c 100644 --- a/arch/arm/include/asm/pgtable-nommu.h +++ b/arch/arm/include/asm/pgtable-nommu.h @@ -85,7 +85,7 @@ extern unsigned int kobjsize(const void *objp); #define VMALLOC_START 0UL #define VMALLOC_END 0xffffffffUL -#define FIRST_USER_ADDRESS (0) +#define FIRST_USER_ADDRESS 0UL #include diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 4c445057169d47..3e4d3c43632a57 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -45,7 +45,7 @@ #define vmemmap ((struct page *)(VMALLOC_END + SZ_64K)) -#define FIRST_USER_ADDRESS 0 +#define FIRST_USER_ADDRESS 0UL #ifndef __ASSEMBLY__ extern void __pte_error(const char *file, int line, unsigned long val); diff --git a/arch/avr32/include/asm/pgtable.h b/arch/avr32/include/asm/pgtable.h index ac7a817e21267f..35800664076e7d 100644 --- a/arch/avr32/include/asm/pgtable.h +++ b/arch/avr32/include/asm/pgtable.h @@ -30,7 +30,7 @@ #define PGDIR_MASK (~(PGDIR_SIZE-1)) #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) -#define FIRST_USER_ADDRESS 0 +#define FIRST_USER_ADDRESS 0UL #ifndef __ASSEMBLY__ extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; diff --git a/arch/cris/include/asm/pgtable.h b/arch/cris/include/asm/pgtable.h index e824257971c4dc..ceefc314d64d08 100644 --- a/arch/cris/include/asm/pgtable.h +++ b/arch/cris/include/asm/pgtable.h @@ -67,7 +67,7 @@ extern void paging_init(void); */ #define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE) -#define FIRST_USER_ADDRESS 0 +#define FIRST_USER_ADDRESS 0UL /* zero page used for uninitialized stuff */ #ifndef __ASSEMBLY__ diff --git a/arch/frv/include/asm/pgtable.h b/arch/frv/include/asm/pgtable.h index c49699d5902d21..93bcf2abd1a15d 100644 --- a/arch/frv/include/asm/pgtable.h +++ b/arch/frv/include/asm/pgtable.h @@ -140,7 +140,7 @@ extern unsigned long empty_zero_page; #define PTRS_PER_PTE 4096 #define USER_PGDS_IN_LAST_PML4 (TASK_SIZE / PGDIR_SIZE) -#define FIRST_USER_ADDRESS 0 +#define FIRST_USER_ADDRESS 0UL #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT) #define KERNEL_PGD_PTRS (PTRS_PER_PGD - USER_PGD_PTRS) diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h index 6e35e71d2aeaa3..49eab8136ec307 100644 --- a/arch/hexagon/include/asm/pgtable.h +++ b/arch/hexagon/include/asm/pgtable.h @@ -171,7 +171,7 @@ extern unsigned long _dflt_cache_att; extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; /* located in head.S */ /* Seems to be zero even in architectures where the zero page is firewalled? */ -#define FIRST_USER_ADDRESS 0 +#define FIRST_USER_ADDRESS 0UL #define pte_special(pte) 0 #define pte_mkspecial(pte) (pte) diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h index 2f07bb3dda9122..7b6f8801df5765 100644 --- a/arch/ia64/include/asm/pgtable.h +++ b/arch/ia64/include/asm/pgtable.h @@ -127,7 +127,7 @@ #define PTRS_PER_PGD_SHIFT PTRS_PER_PTD_SHIFT #define PTRS_PER_PGD (1UL << PTRS_PER_PGD_SHIFT) #define USER_PTRS_PER_PGD (5*PTRS_PER_PGD/8) /* regions 0-4 are user regions */ -#define FIRST_USER_ADDRESS 0 +#define FIRST_USER_ADDRESS 0UL /* * All the normal masks have the "page accessed" bits on, as any time diff --git a/arch/m32r/include/asm/pgtable.h b/arch/m32r/include/asm/pgtable.h index 050f7a686e3d53..8c1fb902a9ceae 100644 --- a/arch/m32r/include/asm/pgtable.h +++ b/arch/m32r/include/asm/pgtable.h @@ -53,7 +53,7 @@ extern unsigned long empty_zero_page[1024]; #define PGDIR_MASK (~(PGDIR_SIZE - 1)) #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) -#define FIRST_USER_ADDRESS 0 +#define FIRST_USER_ADDRESS 0UL #ifndef __ASSEMBLY__ /* Just any arbitrary offset to the start of the vmalloc VM area: the diff --git a/arch/m68k/include/asm/pgtable_mm.h b/arch/m68k/include/asm/pgtable_mm.h index 9f5abbda1ea72f..28a145bfbb7151 100644 --- a/arch/m68k/include/asm/pgtable_mm.h +++ b/arch/m68k/include/asm/pgtable_mm.h @@ -66,7 +66,7 @@ #define PTRS_PER_PGD 128 #endif #define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE) -#define FIRST_USER_ADDRESS 0 +#define FIRST_USER_ADDRESS 0UL /* Virtual address region for use by kernel_map() */ #ifdef CONFIG_SUN3 diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h index c6b6af4ca2a0be..e53b8532353c5b 100644 --- a/arch/microblaze/include/asm/pgtable.h +++ b/arch/microblaze/include/asm/pgtable.h @@ -72,7 +72,7 @@ extern int mem_init_done; #include #include -#define FIRST_USER_ADDRESS 0 +#define FIRST_USER_ADDRESS 0UL extern unsigned long va_to_phys(unsigned long address); extern pte_t *va_to_pte(unsigned long address); diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h index 16aa9f23e17b1e..a6be006b6f7540 100644 --- a/arch/mips/include/asm/pgtable-32.h +++ b/arch/mips/include/asm/pgtable-32.h @@ -57,7 +57,7 @@ extern int add_temporary_entry(unsigned long entrylo0, unsigned long entrylo1, #define PTRS_PER_PTE ((PAGE_SIZE << PTE_ORDER) / sizeof(pte_t)) #define USER_PTRS_PER_PGD (0x80000000UL/PGDIR_SIZE) -#define FIRST_USER_ADDRESS 0 +#define FIRST_USER_ADDRESS 0UL #define VMALLOC_START MAP_BASE diff --git a/arch/mn10300/include/asm/pgtable.h b/arch/mn10300/include/asm/pgtable.h index 629181ae111e5d..afab728ab65ecd 100644 --- a/arch/mn10300/include/asm/pgtable.h +++ b/arch/mn10300/include/asm/pgtable.h @@ -65,7 +65,7 @@ extern void paging_init(void); #define PGDIR_MASK (~(PGDIR_SIZE - 1)) #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) -#define FIRST_USER_ADDRESS 0 +#define FIRST_USER_ADDRESS 0UL #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT) #define KERNEL_PGD_PTRS (PTRS_PER_PGD - USER_PGD_PTRS) diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h index 7b292e3a3138cc..a213e8c9aad059 100644 --- a/arch/nios2/include/asm/pgtable.h +++ b/arch/nios2/include/asm/pgtable.h @@ -24,7 +24,7 @@ #include #include -#define FIRST_USER_ADDRESS 0 +#define FIRST_USER_ADDRESS 0UL #define VMALLOC_START CONFIG_NIOS2_KERNEL_MMU_REGION_BASE #define VMALLOC_END (CONFIG_NIOS2_KERNEL_REGION_BASE - 1) diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h index 18994ccb1185db..69c7df0e142047 100644 --- a/arch/openrisc/include/asm/pgtable.h +++ b/arch/openrisc/include/asm/pgtable.h @@ -77,7 +77,7 @@ extern void paging_init(void); */ #define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE) -#define FIRST_USER_ADDRESS 0 +#define FIRST_USER_ADDRESS 0UL /* * Kernels own virtual memory area. diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 1d49a4a7749b44..8c966b2270aa66 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -134,7 +134,7 @@ extern void purge_tlb_entries(struct mm_struct *, unsigned long); * pgd entries used up by user/kernel: */ -#define FIRST_USER_ADDRESS 0 +#define FIRST_USER_ADDRESS 0UL /* NB: The tlb miss handlers make certain assumptions about the order */ /* of the following bits, so be careful (One example, bits 25-31 */ diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h index 234e07c4780327..e48e3292e7135c 100644 --- a/arch/powerpc/include/asm/pgtable-ppc32.h +++ b/arch/powerpc/include/asm/pgtable-ppc32.h @@ -45,7 +45,7 @@ extern int icache_44x_need_flush; #define PTRS_PER_PGD (1 << (32 - PGDIR_SHIFT)) #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) -#define FIRST_USER_ADDRESS 0 +#define FIRST_USER_ADDRESS 0UL #define pte_ERROR(e) \ pr_err("%s:%d: bad pte %llx.\n", __FILE__, __LINE__, \ diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index b9dcc936e2d1d2..d46532ccc386a3 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -12,7 +12,7 @@ #endif #include -#define FIRST_USER_ADDRESS 0 +#define FIRST_USER_ADDRESS 0UL /* * Size of EA range mapped by our pagetables. diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index ffb1d8ce97aeac..aabcd3f62d3b8e 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -99,7 +99,7 @@ extern unsigned long zero_page_mask; #endif /* CONFIG_64BIT */ #define PTRS_PER_PGD 2048 -#define FIRST_USER_ADDRESS 0 +#define FIRST_USER_ADDRESS 0UL #define pte_ERROR(e) \ printk("%s:%d: bad pte %p.\n", __FILE__, __LINE__, (void *) pte_val(e)) diff --git a/arch/score/include/asm/pgtable.h b/arch/score/include/asm/pgtable.h index 5170ffdea64350..0553e5cd5985a0 100644 --- a/arch/score/include/asm/pgtable.h +++ b/arch/score/include/asm/pgtable.h @@ -27,7 +27,7 @@ extern pte_t invalid_pte_table[PAGE_SIZE/sizeof(pte_t)]; #define PTRS_PER_PTE 1024 #define USER_PTRS_PER_PGD (0x80000000UL/PGDIR_SIZE) -#define FIRST_USER_ADDRESS 0 +#define FIRST_USER_ADDRESS 0UL #define VMALLOC_START (0xc0000000UL) diff --git a/arch/sh/include/asm/pgtable.h b/arch/sh/include/asm/pgtable.h index cf434c64408dd4..89c513a982fcbb 100644 --- a/arch/sh/include/asm/pgtable.h +++ b/arch/sh/include/asm/pgtable.h @@ -62,7 +62,7 @@ static inline unsigned long long neff_sign_extend(unsigned long val) /* Entries per level */ #define PTRS_PER_PTE (PAGE_SIZE / (1 << PTE_MAGNITUDE)) -#define FIRST_USER_ADDRESS 0 +#define FIRST_USER_ADDRESS 0UL #define PHYS_ADDR_MASK29 0x1fffffff #define PHYS_ADDR_MASK32 0xffffffff diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h index 9912eb0b499ae5..f06b36a00a3b1e 100644 --- a/arch/sparc/include/asm/pgtable_32.h +++ b/arch/sparc/include/asm/pgtable_32.h @@ -44,7 +44,7 @@ unsigned long __init bootmem_init(unsigned long *pages_avail); #define PTRS_PER_PMD SRMMU_PTRS_PER_PMD #define PTRS_PER_PGD SRMMU_PTRS_PER_PGD #define USER_PTRS_PER_PGD PAGE_OFFSET / SRMMU_PGDIR_SIZE -#define FIRST_USER_ADDRESS 0 +#define FIRST_USER_ADDRESS 0UL #define PTE_SIZE (PTRS_PER_PTE*4) #define PAGE_NONE SRMMU_PAGE_NONE diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 2ac7873ad6fd80..dc165ebdf05aef 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -93,7 +93,7 @@ bool kern_addr_valid(unsigned long addr); #define PTRS_PER_PGD (1UL << PGDIR_BITS) /* Kernel has a separate 44bit address space. */ -#define FIRST_USER_ADDRESS 0 +#define FIRST_USER_ADDRESS 0UL #define pmd_ERROR(e) \ pr_err("%s:%d: bad pmd %p(%016lx) seen at (%pS)\n", \ diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h index bc75b6ef2e7933..95a4f19d16c526 100644 --- a/arch/tile/include/asm/pgtable.h +++ b/arch/tile/include/asm/pgtable.h @@ -67,7 +67,7 @@ extern void pgtable_cache_init(void); extern void paging_init(void); extern void set_page_homes(void); -#define FIRST_USER_ADDRESS 0 +#define FIRST_USER_ADDRESS 0UL #define _PAGE_PRESENT HV_PTE_PRESENT #define _PAGE_HUGE_PAGE HV_PTE_PAGE diff --git a/arch/um/include/asm/pgtable-2level.h b/arch/um/include/asm/pgtable-2level.h index 7afe86035fa768..cfbe597524698c 100644 --- a/arch/um/include/asm/pgtable-2level.h +++ b/arch/um/include/asm/pgtable-2level.h @@ -23,7 +23,7 @@ #define PTRS_PER_PTE 1024 #define USER_PTRS_PER_PGD ((TASK_SIZE + (PGDIR_SIZE - 1)) / PGDIR_SIZE) #define PTRS_PER_PGD 1024 -#define FIRST_USER_ADDRESS 0 +#define FIRST_USER_ADDRESS 0UL #define pte_ERROR(e) \ printk("%s:%d: bad pte %p(%08lx).\n", __FILE__, __LINE__, &(e), \ diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-3level.h index 344c559c0a1787..2b4274e7c0955f 100644 --- a/arch/um/include/asm/pgtable-3level.h +++ b/arch/um/include/asm/pgtable-3level.h @@ -41,7 +41,7 @@ #endif #define USER_PTRS_PER_PGD ((TASK_SIZE + (PGDIR_SIZE - 1)) / PGDIR_SIZE) -#define FIRST_USER_ADDRESS 0 +#define FIRST_USER_ADDRESS 0UL #define pte_ERROR(e) \ printk("%s:%d: bad pte %p(%016lx).\n", __FILE__, __LINE__, &(e), \ diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 5185a4f599ec03..3e0230c94cff1e 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -4,7 +4,7 @@ #include #include -#define FIRST_USER_ADDRESS 0 +#define FIRST_USER_ADDRESS 0UL #define _PAGE_BIT_PRESENT 0 /* is present */ #define _PAGE_BIT_RW 1 /* writeable */ diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index 01b80dce9d65d2..a5e929a10c20e4 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -57,7 +57,7 @@ #define PTRS_PER_PGD 1024 #define PGD_ORDER 0 #define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE) -#define FIRST_USER_ADDRESS 0 +#define FIRST_USER_ADDRESS 0UL #define FIRST_USER_PGD_NR (FIRST_USER_ADDRESS >> PGDIR_SHIFT) /* From 4155b8e0a79570d41ae77b5bf7df375bd4c36610 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 11 Feb 2015 15:26:44 -0800 Subject: [PATCH 560/601] mm, asm-generic: define PUD_SHIFT in If an architecure uses , build fails if we try to use PUD_SHIFT in generic code: In file included from arch/microblaze/include/asm/bug.h:1:0, from include/linux/bug.h:4, from include/linux/thread_info.h:11, from include/asm-generic/preempt.h:4, from arch/microblaze/include/generated/asm/preempt.h:1, from include/linux/preempt.h:18, from include/linux/spinlock.h:50, from include/linux/mmzone.h:7, from include/linux/gfp.h:5, from include/linux/slab.h:14, from mm/mmap.c:12: mm/mmap.c: In function 'exit_mmap': >> mm/mmap.c:2858:46: error: 'PUD_SHIFT' undeclared (first use in this function) round_up(FIRST_USER_ADDRESS, PUD_SIZE) >> PUD_SHIFT); ^ include/asm-generic/bug.h:86:25: note: in definition of macro 'WARN_ON' int __ret_warn_on = !!(condition); \ ^ mm/mmap.c:2858:46: note: each undeclared identifier is reported only once for each function it appears in round_up(FIRST_USER_ADDRESS, PUD_SIZE) >> PUD_SHIFT); ^ include/asm-generic/bug.h:86:25: note: in definition of macro 'WARN_ON' int __ret_warn_on = !!(condition); \ ^ As with , let's define PUD_SHIFT to PGDIR_SHIFT. Signed-off-by: Kirill A. Shutemov Reported-by: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/4level-fixup.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/asm-generic/4level-fixup.h b/include/asm-generic/4level-fixup.h index 77ff547730af53..5bdab6bffd23cf 100644 --- a/include/asm-generic/4level-fixup.h +++ b/include/asm-generic/4level-fixup.h @@ -4,6 +4,7 @@ #define __ARCH_HAS_4LEVEL_HACK #define __PAGETABLE_PUD_FOLDED +#define PUD_SHIFT PGDIR_SHIFT #define PUD_SIZE PGDIR_SIZE #define PUD_MASK PGDIR_MASK #define PTRS_PER_PUD 1 From 8aa76875dc15b2dd21fa74eb7c12dc3c75f4b6b6 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 11 Feb 2015 15:26:47 -0800 Subject: [PATCH 561/601] arm: define __PAGETABLE_PMD_FOLDED for !LPAE ARM uses custom implementation of PMD folding in 2-level page table case. Generic code expects to see __PAGETABLE_PMD_FOLDED to be defined if PMD is folded, but ARM doesn't do this. Let's fix it. Defining __PAGETABLE_PMD_FOLDED will drop out unused __pmd_alloc(). It also fixes problems with recently-introduced pmd accounting on ARM without LPAE. Signed-off-by: Kirill A. Shutemov Reported-by: Nishanth Menon Reported-by: Simon Horman Tested-by: Simon Horman Tested-by: Fabio Estevam Tested-by: Felipe Balbi Tested-by: Nishanth Menon Tested-by: Peter Ujfalusi Tested-by: Krzysztof Kozlowski Tested-by: Geert Uytterhoeven Cc: Dave Hansen Cc: Hugh Dickins Cc: Cyrill Gorcunov Cc: Pavel Emelyanov Cc: David Rientjes Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/include/asm/pgtable-2level.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index bcc5e300413f46..bfd662e49a25ef 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -10,6 +10,8 @@ #ifndef _ASM_PGTABLE_2LEVEL_H #define _ASM_PGTABLE_2LEVEL_H +#define __PAGETABLE_PMD_FOLDED + /* * Hardware-wise, we have a two level page table structure, where the first * level has 4096 entries, and the second level has 256 entries. Each entry From dc6c9a35b66b520cf67e05d8ca60ebecad3b0479 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 11 Feb 2015 15:26:50 -0800 Subject: [PATCH 562/601] mm: account pmd page tables to the process Dave noticed that unprivileged process can allocate significant amount of memory -- >500 MiB on x86_64 -- and stay unnoticed by oom-killer and memory cgroup. The trick is to allocate a lot of PMD page tables. Linux kernel doesn't account PMD tables to the process, only PTE. The use-cases below use few tricks to allocate a lot of PMD page tables while keeping VmRSS and VmPTE low. oom_score for the process will be 0. #include #include #include #include #include #include #define PUD_SIZE (1UL << 30) #define PMD_SIZE (1UL << 21) #define NR_PUD 130000 int main(void) { char *addr = NULL; unsigned long i; prctl(PR_SET_THP_DISABLE); for (i = 0; i < NR_PUD ; i++) { addr = mmap(addr + PUD_SIZE, PUD_SIZE, PROT_WRITE|PROT_READ, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); if (addr == MAP_FAILED) { perror("mmap"); break; } *addr = 'x'; munmap(addr, PMD_SIZE); mmap(addr, PMD_SIZE, PROT_WRITE|PROT_READ, MAP_ANONYMOUS|MAP_PRIVATE|MAP_FIXED, -1, 0); if (addr == MAP_FAILED) perror("re-mmap"), exit(1); } printf("PID %d consumed %lu KiB in PMD page tables\n", getpid(), i * 4096 >> 10); return pause(); } The patch addresses the issue by account PMD tables to the process the same way we account PTE. The main place where PMD tables is accounted is __pmd_alloc() and free_pmd_range(). But there're few corner cases: - HugeTLB can share PMD page tables. The patch handles by accounting the table to all processes who share it. - x86 PAE pre-allocates few PMD tables on fork. - Architectures with FIRST_USER_ADDRESS > 0. We need to adjust sanity check on exit(2). Accounting only happens on configuration where PMD page table's level is present (PMD is not folded). As with nr_ptes we use per-mm counter. The counter value is used to calculate baseline for badness score by oom-killer. Signed-off-by: Kirill A. Shutemov Reported-by: Dave Hansen Cc: Hugh Dickins Reviewed-by: Cyrill Gorcunov Cc: Pavel Emelyanov Cc: David Rientjes Tested-by: Sedat Dilek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/sysctl/vm.txt | 12 ++++++------ arch/x86/mm/pgtable.c | 14 +++++++++----- fs/proc/task_mmu.c | 9 ++++++--- include/linux/mm.h | 24 ++++++++++++++++++++++++ include/linux/mm_types.h | 3 ++- kernel/fork.c | 3 +++ mm/debug.c | 3 ++- mm/hugetlb.c | 8 ++++++-- mm/memory.c | 15 +++++++++------ mm/mmap.c | 4 +++- mm/oom_kill.c | 9 +++++---- 11 files changed, 75 insertions(+), 29 deletions(-) diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 4415aa91568104..e9c706e4627a86 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -555,12 +555,12 @@ this is causing problems for your system/application. oom_dump_tasks -Enables a system-wide task dump (excluding kernel threads) to be -produced when the kernel performs an OOM-killing and includes such -information as pid, uid, tgid, vm size, rss, nr_ptes, swapents, -oom_score_adj score, and name. This is helpful to determine why the -OOM killer was invoked, to identify the rogue task that caused it, -and to determine why the OOM killer chose the task it did to kill. +Enables a system-wide task dump (excluding kernel threads) to be produced +when the kernel performs an OOM-killing and includes such information as +pid, uid, tgid, vm size, rss, nr_ptes, nr_pmds, swapents, oom_score_adj +score, and name. This is helpful to determine why the OOM killer was +invoked, to identify the rogue task that caused it, and to determine why +the OOM killer chose the task it did to kill. If this is set to zero, this information is suppressed. On very large systems with thousands of tasks it may not be feasible to dump diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 6fb6927f9e76f3..7b22adaad4f137 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -190,7 +190,7 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) #endif /* CONFIG_X86_PAE */ -static void free_pmds(pmd_t *pmds[]) +static void free_pmds(struct mm_struct *mm, pmd_t *pmds[]) { int i; @@ -198,10 +198,11 @@ static void free_pmds(pmd_t *pmds[]) if (pmds[i]) { pgtable_pmd_page_dtor(virt_to_page(pmds[i])); free_page((unsigned long)pmds[i]); + mm_dec_nr_pmds(mm); } } -static int preallocate_pmds(pmd_t *pmds[]) +static int preallocate_pmds(struct mm_struct *mm, pmd_t *pmds[]) { int i; bool failed = false; @@ -215,11 +216,13 @@ static int preallocate_pmds(pmd_t *pmds[]) pmd = NULL; failed = true; } + if (pmd) + mm_inc_nr_pmds(mm); pmds[i] = pmd; } if (failed) { - free_pmds(pmds); + free_pmds(mm, pmds); return -ENOMEM; } @@ -246,6 +249,7 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp) paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT); pmd_free(mm, pmd); + mm_dec_nr_pmds(mm); } } } @@ -283,7 +287,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) mm->pgd = pgd; - if (preallocate_pmds(pmds) != 0) + if (preallocate_pmds(mm, pmds) != 0) goto out_free_pgd; if (paravirt_pgd_alloc(mm) != 0) @@ -304,7 +308,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) return pgd; out_free_pmds: - free_pmds(pmds); + free_pmds(mm, pmds); out_free_pgd: free_page((unsigned long)pgd); out: diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 6396f88c668738..e6e0abeb5d1240 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -21,7 +21,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) { - unsigned long data, text, lib, swap; + unsigned long data, text, lib, swap, ptes, pmds; unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss; /* @@ -42,6 +42,8 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10; lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text; swap = get_mm_counter(mm, MM_SWAPENTS); + ptes = PTRS_PER_PTE * sizeof(pte_t) * atomic_long_read(&mm->nr_ptes); + pmds = PTRS_PER_PMD * sizeof(pmd_t) * mm_nr_pmds(mm); seq_printf(m, "VmPeak:\t%8lu kB\n" "VmSize:\t%8lu kB\n" @@ -54,6 +56,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) "VmExe:\t%8lu kB\n" "VmLib:\t%8lu kB\n" "VmPTE:\t%8lu kB\n" + "VmPMD:\t%8lu kB\n" "VmSwap:\t%8lu kB\n", hiwater_vm << (PAGE_SHIFT-10), total_vm << (PAGE_SHIFT-10), @@ -63,8 +66,8 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) total_rss << (PAGE_SHIFT-10), data << (PAGE_SHIFT-10), mm->stack_vm << (PAGE_SHIFT-10), text, lib, - (PTRS_PER_PTE * sizeof(pte_t) * - atomic_long_read(&mm->nr_ptes)) >> 10, + ptes >> 10, + pmds >> 10, swap << (PAGE_SHIFT-10)); } diff --git a/include/linux/mm.h b/include/linux/mm.h index c6bf813a6b3d73..644990b83cda68 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1438,8 +1438,32 @@ static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud, { return 0; } + +static inline unsigned long mm_nr_pmds(struct mm_struct *mm) +{ + return 0; +} + +static inline void mm_inc_nr_pmds(struct mm_struct *mm) {} +static inline void mm_dec_nr_pmds(struct mm_struct *mm) {} + #else int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address); + +static inline unsigned long mm_nr_pmds(struct mm_struct *mm) +{ + return atomic_long_read(&mm->nr_pmds); +} + +static inline void mm_inc_nr_pmds(struct mm_struct *mm) +{ + atomic_long_inc(&mm->nr_pmds); +} + +static inline void mm_dec_nr_pmds(struct mm_struct *mm) +{ + atomic_long_dec(&mm->nr_pmds); +} #endif int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 20ff2105b56415..199a03aab8dc9c 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -363,7 +363,8 @@ struct mm_struct { pgd_t * pgd; atomic_t mm_users; /* How many users with user space? */ atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */ - atomic_long_t nr_ptes; /* Page table pages */ + atomic_long_t nr_ptes; /* PTE page table pages */ + atomic_long_t nr_pmds; /* PMD page table pages */ int map_count; /* number of VMAs */ spinlock_t page_table_lock; /* Protects page tables and some counters */ diff --git a/kernel/fork.c b/kernel/fork.c index b379d9abddc74c..c99098c5264125 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -555,6 +555,9 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) INIT_LIST_HEAD(&mm->mmlist); mm->core_state = NULL; atomic_long_set(&mm->nr_ptes, 0); +#ifndef __PAGETABLE_PMD_FOLDED + atomic_long_set(&mm->nr_pmds, 0); +#endif mm->map_count = 0; mm->locked_vm = 0; mm->pinned_vm = 0; diff --git a/mm/debug.c b/mm/debug.c index d69cb5a7ba9af3..3eb3ac2fcee7d1 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -173,7 +173,7 @@ void dump_mm(const struct mm_struct *mm) "get_unmapped_area %p\n" #endif "mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n" - "pgd %p mm_users %d mm_count %d nr_ptes %lu map_count %d\n" + "pgd %p mm_users %d mm_count %d nr_ptes %lu nr_pmds %lu map_count %d\n" "hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n" "pinned_vm %lx shared_vm %lx exec_vm %lx stack_vm %lx\n" "start_code %lx end_code %lx start_data %lx end_data %lx\n" @@ -206,6 +206,7 @@ void dump_mm(const struct mm_struct *mm) mm->pgd, atomic_read(&mm->mm_users), atomic_read(&mm->mm_count), atomic_long_read((atomic_long_t *)&mm->nr_ptes), + mm_nr_pmds((struct mm_struct *)mm), mm->map_count, mm->hiwater_rss, mm->hiwater_vm, mm->total_vm, mm->locked_vm, mm->pinned_vm, mm->shared_vm, mm->exec_vm, mm->stack_vm, diff --git a/mm/hugetlb.c b/mm/hugetlb.c index fd28d6ba5e5db3..0a9ac6c268325a 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3598,6 +3598,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) if (saddr) { spte = huge_pte_offset(svma->vm_mm, saddr); if (spte) { + mm_inc_nr_pmds(mm); get_page(virt_to_page(spte)); break; } @@ -3609,11 +3610,13 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) ptl = huge_pte_lockptr(hstate_vma(vma), mm, spte); spin_lock(ptl); - if (pud_none(*pud)) + if (pud_none(*pud)) { pud_populate(mm, pud, (pmd_t *)((unsigned long)spte & PAGE_MASK)); - else + } else { put_page(virt_to_page(spte)); + mm_inc_nr_pmds(mm); + } spin_unlock(ptl); out: pte = (pte_t *)pmd_alloc(mm, pud, addr); @@ -3644,6 +3647,7 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) pud_clear(pud); put_page(virt_to_page(ptep)); + mm_dec_nr_pmds(mm); *addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE; return 1; } diff --git a/mm/memory.c b/mm/memory.c index d63849b5188fe7..bbe6a73a899d2b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -428,6 +428,7 @@ static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud, pmd = pmd_offset(pud, start); pud_clear(pud); pmd_free_tlb(tlb, pmd, start); + mm_dec_nr_pmds(tlb->mm); } static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, @@ -3322,15 +3323,17 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) spin_lock(&mm->page_table_lock); #ifndef __ARCH_HAS_4LEVEL_HACK - if (pud_present(*pud)) /* Another has populated it */ - pmd_free(mm, new); - else + if (!pud_present(*pud)) { + mm_inc_nr_pmds(mm); pud_populate(mm, pud, new); -#else - if (pgd_present(*pud)) /* Another has populated it */ + } else /* Another has populated it */ pmd_free(mm, new); - else +#else + if (!pgd_present(*pud)) { + mm_inc_nr_pmds(mm); pgd_populate(mm, pud, new); + } else /* Another has populated it */ + pmd_free(mm, new); #endif /* __ARCH_HAS_4LEVEL_HACK */ spin_unlock(&mm->page_table_lock); return 0; diff --git a/mm/mmap.c b/mm/mmap.c index 14d84666e8ba6d..6a7d36d133fbd4 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2853,7 +2853,9 @@ void exit_mmap(struct mm_struct *mm) vm_unacct_memory(nr_accounted); WARN_ON(atomic_long_read(&mm->nr_ptes) > - (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT); + round_up(FIRST_USER_ADDRESS, PMD_SIZE) >> PMD_SHIFT); + WARN_ON(mm_nr_pmds(mm) > + round_up(FIRST_USER_ADDRESS, PUD_SIZE) >> PUD_SHIFT); } /* Insert vm structure into process list sorted by address diff --git a/mm/oom_kill.c b/mm/oom_kill.c index b8df76ee2be380..642f38cb175aa8 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -169,8 +169,8 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, * The baseline for the badness score is the proportion of RAM that each * task's rss, pagetable and swap space use. */ - points = get_mm_rss(p->mm) + atomic_long_read(&p->mm->nr_ptes) + - get_mm_counter(p->mm, MM_SWAPENTS); + points = get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS) + + atomic_long_read(&p->mm->nr_ptes) + mm_nr_pmds(p->mm); task_unlock(p); /* @@ -351,7 +351,7 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask) struct task_struct *p; struct task_struct *task; - pr_info("[ pid ] uid tgid total_vm rss nr_ptes swapents oom_score_adj name\n"); + pr_info("[ pid ] uid tgid total_vm rss nr_ptes nr_pmds swapents oom_score_adj name\n"); rcu_read_lock(); for_each_process(p) { if (oom_unkillable_task(p, memcg, nodemask)) @@ -367,10 +367,11 @@ static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask) continue; } - pr_info("[%5d] %5d %5d %8lu %8lu %7ld %8lu %5hd %s\n", + pr_info("[%5d] %5d %5d %8lu %8lu %7ld %7ld %8lu %5hd %s\n", task->pid, from_kuid(&init_user_ns, task_uid(task)), task->tgid, task->mm->total_vm, get_mm_rss(task->mm), atomic_long_read(&task->mm->nr_ptes), + mm_nr_pmds(task->mm), get_mm_counter(task->mm, MM_SWAPENTS), task->signal->oom_score_adj, task->comm); task_unlock(task); From b30fe6c7ced70f62862c3d09357e7e8084e98d9f Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 11 Feb 2015 15:26:53 -0800 Subject: [PATCH 563/601] mm: fix false-positive warning on exit due mm_nr_pmds(mm) The problem is that we check nr_ptes/nr_pmds in exit_mmap() which happens *before* pgd_free(). And if an arch does pte/pmd allocation in pgd_alloc() and frees them in pgd_free() we see offset in counters by the time of the checks. We tried to workaround this by offsetting expected counter value according to FIRST_USER_ADDRESS for both nr_pte and nr_pmd in exit_mmap(). But it doesn't work in some cases: 1. ARM with LPAE enabled also has non-zero USER_PGTABLES_CEILING, but upper addresses occupied with huge pmd entries, so the trick with offsetting expected counter value will get really ugly: we will have to apply it nr_pmds, but not nr_ptes. 2. Metag has non-zero FIRST_USER_ADDRESS, but doesn't do allocation pte/pmd page tables allocation in pgd_alloc(), just setup a pgd entry which is allocated at boot and shared accross all processes. The proposal is to move the check to check_mm() which happens *after* pgd_free() and do proper accounting during pgd_alloc() and pgd_free() which would bring counters to zero if nothing leaked. Signed-off-by: Kirill A. Shutemov Reported-by: Tyler Baker Tested-by: Tyler Baker Tested-by: Nishanth Menon Cc: Russell King Cc: James Hogan Cc: Guan Xuetao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mm/pgd.c | 4 ++++ arch/unicore32/mm/pgd.c | 3 +++ kernel/fork.c | 8 ++++++++ mm/mmap.c | 5 ----- 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c index 249379535be2c1..a3681f11dd9f12 100644 --- a/arch/arm/mm/pgd.c +++ b/arch/arm/mm/pgd.c @@ -97,6 +97,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) no_pte: pmd_free(mm, new_pmd); + mm_dec_nr_pmds(mm); no_pmd: pud_free(mm, new_pud); no_pud: @@ -130,9 +131,11 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd_base) pte = pmd_pgtable(*pmd); pmd_clear(pmd); pte_free(mm, pte); + atomic_long_dec(&mm->nr_ptes); no_pmd: pud_clear(pud); pmd_free(mm, pmd); + mm_dec_nr_pmds(mm); no_pud: pgd_clear(pgd); pud_free(mm, pud); @@ -152,6 +155,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd_base) pmd = pmd_offset(pud, 0); pud_clear(pud); pmd_free(mm, pmd); + mm_dec_nr_pmds(mm); pgd_clear(pgd); pud_free(mm, pud); } diff --git a/arch/unicore32/mm/pgd.c b/arch/unicore32/mm/pgd.c index 08b8d4295e7003..2ade20d8eab396 100644 --- a/arch/unicore32/mm/pgd.c +++ b/arch/unicore32/mm/pgd.c @@ -69,6 +69,7 @@ pgd_t *get_pgd_slow(struct mm_struct *mm) no_pte: pmd_free(mm, new_pmd); + mm_dec_nr_pmds(mm); no_pmd: free_pages((unsigned long)new_pgd, 0); no_pgd: @@ -96,7 +97,9 @@ void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd) pte = pmd_pgtable(*pmd); pmd_clear(pmd); pte_free(mm, pte); + atomic_long_dec(&mm->nr_ptes); pmd_free(mm, pmd); + mm_dec_nr_pmds(mm); free: free_pages((unsigned long) pgd, 0); } diff --git a/kernel/fork.c b/kernel/fork.c index c99098c5264125..66e19c251581d1 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -606,6 +606,14 @@ static void check_mm(struct mm_struct *mm) printk(KERN_ALERT "BUG: Bad rss-counter state " "mm:%p idx:%d val:%ld\n", mm, i, x); } + + if (atomic_long_read(&mm->nr_ptes)) + pr_alert("BUG: non-zero nr_ptes on freeing mm: %ld\n", + atomic_long_read(&mm->nr_ptes)); + if (mm_nr_pmds(mm)) + pr_alert("BUG: non-zero nr_pmds on freeing mm: %ld\n", + mm_nr_pmds(mm)); + #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS VM_BUG_ON_MM(mm->pmd_huge_pte, mm); #endif diff --git a/mm/mmap.c b/mm/mmap.c index 6a7d36d133fbd4..c5f44682c0d175 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2851,11 +2851,6 @@ void exit_mmap(struct mm_struct *mm) vma = remove_vma(vma); } vm_unacct_memory(nr_accounted); - - WARN_ON(atomic_long_read(&mm->nr_ptes) > - round_up(FIRST_USER_ADDRESS, PMD_SIZE) >> PMD_SHIFT); - WARN_ON(mm_nr_pmds(mm) > - round_up(FIRST_USER_ADDRESS, PUD_SIZE) >> PUD_SHIFT); } /* Insert vm structure into process list sorted by address From 8d38633c3b4093aca7524945f1e9249d7d3a44da Mon Sep 17 00:00:00 2001 From: Konstantin Khebnikov Date: Wed, 11 Feb 2015 15:26:55 -0800 Subject: [PATCH 564/601] page_writeback: put account_page_redirty() after set_page_dirty() Helper account_page_redirty() fixes dirty pages counter for redirtied pages. This patch puts it after dirtying and prevents temporary underflows of dirtied pages counters on zone/bdi and current->nr_dirtied. Signed-off-by: Konstantin Khebnikov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/btrfs/extent_io.c | 2 +- mm/page-writeback.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 790dbae3343c4f..c73df6a7c9b6ce 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1407,8 +1407,8 @@ int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end) while (index <= end_index) { page = find_get_page(inode->i_mapping, index); BUG_ON(!page); /* Pages should be in the extent_io_tree */ - account_page_redirty(page); __set_page_dirty_nobuffers(page); + account_page_redirty(page); page_cache_release(page); index++; } diff --git a/mm/page-writeback.c b/mm/page-writeback.c index fb71e9deca8569..6a73e47e81c67f 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2168,9 +2168,12 @@ EXPORT_SYMBOL(account_page_redirty); */ int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page) { + int ret; + wbc->pages_skipped++; + ret = __set_page_dirty_nobuffers(page); account_page_redirty(page); - return __set_page_dirty_nobuffers(page); + return ret; } EXPORT_SYMBOL(redirty_page_for_writepage); From 4645f06334be1ad0eb61aa182c7999fe51bc1ba6 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 11 Feb 2015 15:26:58 -0800 Subject: [PATCH 565/601] mm/compaction: change tracepoint format from decimal to hexadecimal To check the range that compaction is working, tracepoint print start/end pfn of zone and start pfn of both scanner with decimal format. Since we manage all pages in order of 2 and it is well represented by hexadecimal, this patch change the tracepoint format from decimal to hexadecimal. This would improve readability. For example, it makes us easily notice whether current scanner try to compact previously attempted pageblock or not. Signed-off-by: Joonsoo Kim Acked-by: Vlastimil Babka Cc: Mel Gorman Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/compaction.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h index c6814b917bdfb1..1337d9e01e3d31 100644 --- a/include/trace/events/compaction.h +++ b/include/trace/events/compaction.h @@ -104,7 +104,7 @@ TRACE_EVENT(mm_compaction_begin, __entry->zone_end = zone_end; ), - TP_printk("zone_start=%lu migrate_start=%lu free_start=%lu zone_end=%lu", + TP_printk("zone_start=0x%lx migrate_start=0x%lx free_start=0x%lx zone_end=0x%lx", __entry->zone_start, __entry->migrate_start, __entry->free_start, From 16c4a097a035c01809aa0c0abd458ca1fe4ff3d0 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 11 Feb 2015 15:27:01 -0800 Subject: [PATCH 566/601] mm/compaction: enhance tracepoint output for compaction begin/end We now have tracepoint for begin event of compaction and it prints start position of both scanners, but, tracepoint for end event of compaction doesn't print finish position of both scanners. It'd be also useful to know finish position of both scanners so this patch add it. It will help to find odd behavior or problem on compaction internal logic. And mode is added to both begin/end tracepoint output, since according to mode, compaction behavior is quite different. And lastly, status format is changed to string rather than status number for readability. [akpm@linux-foundation.org: fix sparse warning] Signed-off-by: Joonsoo Kim Acked-by: Vlastimil Babka Cc: Mel Gorman Cc: David Rientjes Cc: Dan Carpenter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 1 + include/trace/events/compaction.h | 49 ++++++++++++++++++++++--------- mm/compaction.c | 15 ++++++++-- 3 files changed, 49 insertions(+), 16 deletions(-) diff --git a/include/linux/compaction.h b/include/linux/compaction.h index f2efda2e6ac6dd..db64cae065305f 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -12,6 +12,7 @@ #define COMPACT_PARTIAL 3 /* The full zone was compacted */ #define COMPACT_COMPLETE 4 +/* When adding new state, please change compaction_status_string, too */ /* Used to signal whether compaction detected need_sched() or lock contention */ /* No contention detected */ diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h index 1337d9e01e3d31..839f6fac921a40 100644 --- a/include/trace/events/compaction.h +++ b/include/trace/events/compaction.h @@ -85,46 +85,67 @@ TRACE_EVENT(mm_compaction_migratepages, ); TRACE_EVENT(mm_compaction_begin, - TP_PROTO(unsigned long zone_start, unsigned long migrate_start, - unsigned long free_start, unsigned long zone_end), + TP_PROTO(unsigned long zone_start, unsigned long migrate_pfn, + unsigned long free_pfn, unsigned long zone_end, bool sync), - TP_ARGS(zone_start, migrate_start, free_start, zone_end), + TP_ARGS(zone_start, migrate_pfn, free_pfn, zone_end, sync), TP_STRUCT__entry( __field(unsigned long, zone_start) - __field(unsigned long, migrate_start) - __field(unsigned long, free_start) + __field(unsigned long, migrate_pfn) + __field(unsigned long, free_pfn) __field(unsigned long, zone_end) + __field(bool, sync) ), TP_fast_assign( __entry->zone_start = zone_start; - __entry->migrate_start = migrate_start; - __entry->free_start = free_start; + __entry->migrate_pfn = migrate_pfn; + __entry->free_pfn = free_pfn; __entry->zone_end = zone_end; + __entry->sync = sync; ), - TP_printk("zone_start=0x%lx migrate_start=0x%lx free_start=0x%lx zone_end=0x%lx", + TP_printk("zone_start=0x%lx migrate_pfn=0x%lx free_pfn=0x%lx zone_end=0x%lx, mode=%s", __entry->zone_start, - __entry->migrate_start, - __entry->free_start, - __entry->zone_end) + __entry->migrate_pfn, + __entry->free_pfn, + __entry->zone_end, + __entry->sync ? "sync" : "async") ); TRACE_EVENT(mm_compaction_end, - TP_PROTO(int status), + TP_PROTO(unsigned long zone_start, unsigned long migrate_pfn, + unsigned long free_pfn, unsigned long zone_end, bool sync, + int status), - TP_ARGS(status), + TP_ARGS(zone_start, migrate_pfn, free_pfn, zone_end, sync, status), TP_STRUCT__entry( + __field(unsigned long, zone_start) + __field(unsigned long, migrate_pfn) + __field(unsigned long, free_pfn) + __field(unsigned long, zone_end) + __field(bool, sync) __field(int, status) ), TP_fast_assign( + __entry->zone_start = zone_start; + __entry->migrate_pfn = migrate_pfn; + __entry->free_pfn = free_pfn; + __entry->zone_end = zone_end; + __entry->sync = sync; __entry->status = status; ), - TP_printk("status=%d", __entry->status) + TP_printk("zone_start=0x%lx migrate_pfn=0x%lx free_pfn=0x%lx zone_end=0x%lx, mode=%s status=%s", + __entry->zone_start, + __entry->migrate_pfn, + __entry->free_pfn, + __entry->zone_end, + __entry->sync ? "sync" : "async", + compaction_status_string[__entry->status]) ); #endif /* _TRACE_COMPACTION_H */ diff --git a/mm/compaction.c b/mm/compaction.c index 9c7e6909dd2908..66f7c365e88893 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -34,6 +34,15 @@ static inline void count_compact_events(enum vm_event_item item, long delta) #endif #if defined CONFIG_COMPACTION || defined CONFIG_CMA +#ifdef CONFIG_TRACEPOINTS +static const char *const compaction_status_string[] = { + "deferred", + "skipped", + "continue", + "partial", + "complete", +}; +#endif #define CREATE_TRACE_POINTS #include @@ -1197,7 +1206,8 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn; } - trace_mm_compaction_begin(start_pfn, cc->migrate_pfn, cc->free_pfn, end_pfn); + trace_mm_compaction_begin(start_pfn, cc->migrate_pfn, + cc->free_pfn, end_pfn, sync); migrate_prep_local(); @@ -1299,7 +1309,8 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) zone->compact_cached_free_pfn = free_pfn; } - trace_mm_compaction_end(ret); + trace_mm_compaction_end(start_pfn, cc->migrate_pfn, + cc->free_pfn, end_pfn, sync, ret); return ret; } From e34d85f0e3c60f7226e5589898b7c7c5cd2a4f02 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 11 Feb 2015 15:27:04 -0800 Subject: [PATCH 567/601] mm/compaction: print current range where compaction work It'd be useful to know current range where compaction work for detailed analysis. With it, we can know pageblock where we actually scan and isolate, and, how much pages we try in that pageblock and can guess why it doesn't become freepage with pageblock order roughly. Signed-off-by: Joonsoo Kim Acked-by: Vlastimil Babka Cc: Mel Gorman Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/compaction.h | 30 +++++++++++++++++++++++------- mm/compaction.c | 9 ++++++--- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h index 839f6fac921a40..139020b55612a5 100644 --- a/include/trace/events/compaction.h +++ b/include/trace/events/compaction.h @@ -11,39 +11,55 @@ DECLARE_EVENT_CLASS(mm_compaction_isolate_template, - TP_PROTO(unsigned long nr_scanned, + TP_PROTO( + unsigned long start_pfn, + unsigned long end_pfn, + unsigned long nr_scanned, unsigned long nr_taken), - TP_ARGS(nr_scanned, nr_taken), + TP_ARGS(start_pfn, end_pfn, nr_scanned, nr_taken), TP_STRUCT__entry( + __field(unsigned long, start_pfn) + __field(unsigned long, end_pfn) __field(unsigned long, nr_scanned) __field(unsigned long, nr_taken) ), TP_fast_assign( + __entry->start_pfn = start_pfn; + __entry->end_pfn = end_pfn; __entry->nr_scanned = nr_scanned; __entry->nr_taken = nr_taken; ), - TP_printk("nr_scanned=%lu nr_taken=%lu", + TP_printk("range=(0x%lx ~ 0x%lx) nr_scanned=%lu nr_taken=%lu", + __entry->start_pfn, + __entry->end_pfn, __entry->nr_scanned, __entry->nr_taken) ); DEFINE_EVENT(mm_compaction_isolate_template, mm_compaction_isolate_migratepages, - TP_PROTO(unsigned long nr_scanned, + TP_PROTO( + unsigned long start_pfn, + unsigned long end_pfn, + unsigned long nr_scanned, unsigned long nr_taken), - TP_ARGS(nr_scanned, nr_taken) + TP_ARGS(start_pfn, end_pfn, nr_scanned, nr_taken) ); DEFINE_EVENT(mm_compaction_isolate_template, mm_compaction_isolate_freepages, - TP_PROTO(unsigned long nr_scanned, + + TP_PROTO( + unsigned long start_pfn, + unsigned long end_pfn, + unsigned long nr_scanned, unsigned long nr_taken), - TP_ARGS(nr_scanned, nr_taken) + TP_ARGS(start_pfn, end_pfn, nr_scanned, nr_taken) ); TRACE_EVENT(mm_compaction_migratepages, diff --git a/mm/compaction.c b/mm/compaction.c index 66f7c365e88893..b12df9fe10b4e5 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -430,11 +430,12 @@ static unsigned long isolate_freepages_block(struct compact_control *cc, } + trace_mm_compaction_isolate_freepages(*start_pfn, blockpfn, + nr_scanned, total_isolated); + /* Record how far we have got within the block */ *start_pfn = blockpfn; - trace_mm_compaction_isolate_freepages(nr_scanned, total_isolated); - /* * If strict isolation is requested by CMA then check that all the * pages requested were isolated. If there were any failures, 0 is @@ -590,6 +591,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, unsigned long flags = 0; bool locked = false; struct page *page = NULL, *valid_page = NULL; + unsigned long start_pfn = low_pfn; /* * Ensure that there are not too many pages isolated from the LRU @@ -750,7 +752,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, if (low_pfn == end_pfn) update_pageblock_skip(cc, valid_page, nr_isolated, true); - trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated); + trace_mm_compaction_isolate_migratepages(start_pfn, low_pfn, + nr_scanned, nr_isolated); count_compact_events(COMPACTMIGRATE_SCANNED, nr_scanned); if (nr_isolated) From 837d026d560c5ef26abeca0441713d82e4e82cad Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 11 Feb 2015 15:27:06 -0800 Subject: [PATCH 568/601] mm/compaction: more trace to understand when/why compaction start/finish It is not well analyzed that when/why compaction start/finish or not. With these new tracepoints, we can know much more about start/finish reason of compaction. I can find following bug with these tracepoint. http://www.spinics.net/lists/linux-mm/msg81582.html Signed-off-by: Joonsoo Kim Cc: Vlastimil Babka Cc: Mel Gorman Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 3 ++ include/trace/events/compaction.h | 74 +++++++++++++++++++++++++++++++ mm/compaction.c | 38 ++++++++++++++-- 3 files changed, 111 insertions(+), 4 deletions(-) diff --git a/include/linux/compaction.h b/include/linux/compaction.h index db64cae065305f..501d7513aac1bb 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -12,6 +12,9 @@ #define COMPACT_PARTIAL 3 /* The full zone was compacted */ #define COMPACT_COMPLETE 4 +/* For more detailed tracepoint output */ +#define COMPACT_NO_SUITABLE_PAGE 5 +#define COMPACT_NOT_SUITABLE_ZONE 6 /* When adding new state, please change compaction_status_string, too */ /* Used to signal whether compaction detected need_sched() or lock contention */ diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h index 139020b55612a5..d46535801f6303 100644 --- a/include/trace/events/compaction.h +++ b/include/trace/events/compaction.h @@ -164,6 +164,80 @@ TRACE_EVENT(mm_compaction_end, compaction_status_string[__entry->status]) ); +TRACE_EVENT(mm_compaction_try_to_compact_pages, + + TP_PROTO( + int order, + gfp_t gfp_mask, + enum migrate_mode mode), + + TP_ARGS(order, gfp_mask, mode), + + TP_STRUCT__entry( + __field(int, order) + __field(gfp_t, gfp_mask) + __field(enum migrate_mode, mode) + ), + + TP_fast_assign( + __entry->order = order; + __entry->gfp_mask = gfp_mask; + __entry->mode = mode; + ), + + TP_printk("order=%d gfp_mask=0x%x mode=%d", + __entry->order, + __entry->gfp_mask, + (int)__entry->mode) +); + +DECLARE_EVENT_CLASS(mm_compaction_suitable_template, + + TP_PROTO(struct zone *zone, + int order, + int ret), + + TP_ARGS(zone, order, ret), + + TP_STRUCT__entry( + __field(int, nid) + __field(char *, name) + __field(int, order) + __field(int, ret) + ), + + TP_fast_assign( + __entry->nid = zone_to_nid(zone); + __entry->name = (char *)zone->name; + __entry->order = order; + __entry->ret = ret; + ), + + TP_printk("node=%d zone=%-8s order=%d ret=%s", + __entry->nid, + __entry->name, + __entry->order, + compaction_status_string[__entry->ret]) +); + +DEFINE_EVENT(mm_compaction_suitable_template, mm_compaction_finished, + + TP_PROTO(struct zone *zone, + int order, + int ret), + + TP_ARGS(zone, order, ret) +); + +DEFINE_EVENT(mm_compaction_suitable_template, mm_compaction_suitable, + + TP_PROTO(struct zone *zone, + int order, + int ret), + + TP_ARGS(zone, order, ret) +); + #endif /* _TRACE_COMPACTION_H */ /* This part must be outside protection */ diff --git a/mm/compaction.c b/mm/compaction.c index b12df9fe10b4e5..b6ede459c1bbf5 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -41,6 +41,8 @@ static const char *const compaction_status_string[] = { "continue", "partial", "complete", + "no_suitable_page", + "not_suitable_zone", }; #endif @@ -1049,7 +1051,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE; } -static int compact_finished(struct zone *zone, struct compact_control *cc, +static int __compact_finished(struct zone *zone, struct compact_control *cc, const int migratetype) { unsigned int order; @@ -1104,7 +1106,20 @@ static int compact_finished(struct zone *zone, struct compact_control *cc, return COMPACT_PARTIAL; } - return COMPACT_CONTINUE; + return COMPACT_NO_SUITABLE_PAGE; +} + +static int compact_finished(struct zone *zone, struct compact_control *cc, + const int migratetype) +{ + int ret; + + ret = __compact_finished(zone, cc, migratetype); + trace_mm_compaction_finished(zone, cc->order, ret); + if (ret == COMPACT_NO_SUITABLE_PAGE) + ret = COMPACT_CONTINUE; + + return ret; } /* @@ -1114,7 +1129,7 @@ static int compact_finished(struct zone *zone, struct compact_control *cc, * COMPACT_PARTIAL - If the allocation would succeed without compaction * COMPACT_CONTINUE - If compaction should run now */ -unsigned long compaction_suitable(struct zone *zone, int order, +static unsigned long __compaction_suitable(struct zone *zone, int order, int alloc_flags, int classzone_idx) { int fragindex; @@ -1158,11 +1173,24 @@ unsigned long compaction_suitable(struct zone *zone, int order, */ fragindex = fragmentation_index(zone, order); if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold) - return COMPACT_SKIPPED; + return COMPACT_NOT_SUITABLE_ZONE; return COMPACT_CONTINUE; } +unsigned long compaction_suitable(struct zone *zone, int order, + int alloc_flags, int classzone_idx) +{ + unsigned long ret; + + ret = __compaction_suitable(zone, order, alloc_flags, classzone_idx); + trace_mm_compaction_suitable(zone, order, ret); + if (ret == COMPACT_NOT_SUITABLE_ZONE) + ret = COMPACT_SKIPPED; + + return ret; +} + static int compact_zone(struct zone *zone, struct compact_control *cc) { int ret; @@ -1376,6 +1404,8 @@ unsigned long try_to_compact_pages(gfp_t gfp_mask, unsigned int order, if (!order || !may_enter_fs || !may_perform_io) return COMPACT_SKIPPED; + trace_mm_compaction_try_to_compact_pages(order, gfp_mask, mode); + /* Compact each zone in the list */ for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx, ac->nodemask) { From 24e2716f63e613cf15d3beba3faa0711bcacc427 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 11 Feb 2015 15:27:09 -0800 Subject: [PATCH 569/601] mm/compaction: add tracepoint to observe behaviour of compaction defer Compaction deferring logic is heavy hammer that block the way to the compaction. It doesn't consider overall system state, so it could prevent user from doing compaction falsely. In other words, even if system has enough range of memory to compact, compaction would be skipped due to compaction deferring logic. This patch add new tracepoint to understand work of deferring logic. This will also help to check compaction success and fail. Signed-off-by: Joonsoo Kim Cc: Vlastimil Babka Cc: Mel Gorman Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 65 +++------------------------- include/trace/events/compaction.h | 56 ++++++++++++++++++++++++ mm/compaction.c | 71 +++++++++++++++++++++++++++++++ 3 files changed, 132 insertions(+), 60 deletions(-) diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 501d7513aac1bb..a014559e4a49d7 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -44,66 +44,11 @@ extern void reset_isolation_suitable(pg_data_t *pgdat); extern unsigned long compaction_suitable(struct zone *zone, int order, int alloc_flags, int classzone_idx); -/* Do not skip compaction more than 64 times */ -#define COMPACT_MAX_DEFER_SHIFT 6 - -/* - * Compaction is deferred when compaction fails to result in a page - * allocation success. 1 << compact_defer_limit compactions are skipped up - * to a limit of 1 << COMPACT_MAX_DEFER_SHIFT - */ -static inline void defer_compaction(struct zone *zone, int order) -{ - zone->compact_considered = 0; - zone->compact_defer_shift++; - - if (order < zone->compact_order_failed) - zone->compact_order_failed = order; - - if (zone->compact_defer_shift > COMPACT_MAX_DEFER_SHIFT) - zone->compact_defer_shift = COMPACT_MAX_DEFER_SHIFT; -} - -/* Returns true if compaction should be skipped this time */ -static inline bool compaction_deferred(struct zone *zone, int order) -{ - unsigned long defer_limit = 1UL << zone->compact_defer_shift; - - if (order < zone->compact_order_failed) - return false; - - /* Avoid possible overflow */ - if (++zone->compact_considered > defer_limit) - zone->compact_considered = defer_limit; - - return zone->compact_considered < defer_limit; -} - -/* - * Update defer tracking counters after successful compaction of given order, - * which means an allocation either succeeded (alloc_success == true) or is - * expected to succeed. - */ -static inline void compaction_defer_reset(struct zone *zone, int order, - bool alloc_success) -{ - if (alloc_success) { - zone->compact_considered = 0; - zone->compact_defer_shift = 0; - } - if (order >= zone->compact_order_failed) - zone->compact_order_failed = order + 1; -} - -/* Returns true if restarting compaction after many failures */ -static inline bool compaction_restarting(struct zone *zone, int order) -{ - if (order < zone->compact_order_failed) - return false; - - return zone->compact_defer_shift == COMPACT_MAX_DEFER_SHIFT && - zone->compact_considered >= 1UL << zone->compact_defer_shift; -} +extern void defer_compaction(struct zone *zone, int order); +extern bool compaction_deferred(struct zone *zone, int order); +extern void compaction_defer_reset(struct zone *zone, int order, + bool alloc_success); +extern bool compaction_restarting(struct zone *zone, int order); #else static inline unsigned long try_to_compact_pages(gfp_t gfp_mask, diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h index d46535801f6303..9a6a3fe0fb51ad 100644 --- a/include/trace/events/compaction.h +++ b/include/trace/events/compaction.h @@ -238,6 +238,62 @@ DEFINE_EVENT(mm_compaction_suitable_template, mm_compaction_suitable, TP_ARGS(zone, order, ret) ); +#ifdef CONFIG_COMPACTION +DECLARE_EVENT_CLASS(mm_compaction_defer_template, + + TP_PROTO(struct zone *zone, int order), + + TP_ARGS(zone, order), + + TP_STRUCT__entry( + __field(int, nid) + __field(char *, name) + __field(int, order) + __field(unsigned int, considered) + __field(unsigned int, defer_shift) + __field(int, order_failed) + ), + + TP_fast_assign( + __entry->nid = zone_to_nid(zone); + __entry->name = (char *)zone->name; + __entry->order = order; + __entry->considered = zone->compact_considered; + __entry->defer_shift = zone->compact_defer_shift; + __entry->order_failed = zone->compact_order_failed; + ), + + TP_printk("node=%d zone=%-8s order=%d order_failed=%d consider=%u limit=%lu", + __entry->nid, + __entry->name, + __entry->order, + __entry->order_failed, + __entry->considered, + 1UL << __entry->defer_shift) +); + +DEFINE_EVENT(mm_compaction_defer_template, mm_compaction_deferred, + + TP_PROTO(struct zone *zone, int order), + + TP_ARGS(zone, order) +); + +DEFINE_EVENT(mm_compaction_defer_template, mm_compaction_defer_compaction, + + TP_PROTO(struct zone *zone, int order), + + TP_ARGS(zone, order) +); + +DEFINE_EVENT(mm_compaction_defer_template, mm_compaction_defer_reset, + + TP_PROTO(struct zone *zone, int order), + + TP_ARGS(zone, order) +); +#endif + #endif /* _TRACE_COMPACTION_H */ /* This part must be outside protection */ diff --git a/mm/compaction.c b/mm/compaction.c index b6ede459c1bbf5..b68736c8a1ce0d 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -124,6 +124,77 @@ static struct page *pageblock_pfn_to_page(unsigned long start_pfn, } #ifdef CONFIG_COMPACTION + +/* Do not skip compaction more than 64 times */ +#define COMPACT_MAX_DEFER_SHIFT 6 + +/* + * Compaction is deferred when compaction fails to result in a page + * allocation success. 1 << compact_defer_limit compactions are skipped up + * to a limit of 1 << COMPACT_MAX_DEFER_SHIFT + */ +void defer_compaction(struct zone *zone, int order) +{ + zone->compact_considered = 0; + zone->compact_defer_shift++; + + if (order < zone->compact_order_failed) + zone->compact_order_failed = order; + + if (zone->compact_defer_shift > COMPACT_MAX_DEFER_SHIFT) + zone->compact_defer_shift = COMPACT_MAX_DEFER_SHIFT; + + trace_mm_compaction_defer_compaction(zone, order); +} + +/* Returns true if compaction should be skipped this time */ +bool compaction_deferred(struct zone *zone, int order) +{ + unsigned long defer_limit = 1UL << zone->compact_defer_shift; + + if (order < zone->compact_order_failed) + return false; + + /* Avoid possible overflow */ + if (++zone->compact_considered > defer_limit) + zone->compact_considered = defer_limit; + + if (zone->compact_considered >= defer_limit) + return false; + + trace_mm_compaction_deferred(zone, order); + + return true; +} + +/* + * Update defer tracking counters after successful compaction of given order, + * which means an allocation either succeeded (alloc_success == true) or is + * expected to succeed. + */ +void compaction_defer_reset(struct zone *zone, int order, + bool alloc_success) +{ + if (alloc_success) { + zone->compact_considered = 0; + zone->compact_defer_shift = 0; + } + if (order >= zone->compact_order_failed) + zone->compact_order_failed = order + 1; + + trace_mm_compaction_defer_reset(zone, order); +} + +/* Returns true if restarting compaction after many failures */ +bool compaction_restarting(struct zone *zone, int order) +{ + if (order < zone->compact_order_failed) + return false; + + return zone->compact_defer_shift == COMPACT_MAX_DEFER_SHIFT && + zone->compact_considered >= 1UL << zone->compact_defer_shift; +} + /* Returns true if the pageblock should be scanned for pages to isolate. */ static inline bool isolation_suitable(struct compact_control *cc, struct page *page) From 077fcf116c8c2bd7ee9487b645aa3b50368db7e1 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 11 Feb 2015 15:27:12 -0800 Subject: [PATCH 570/601] mm/thp: allocate transparent hugepages on local node This make sure that we try to allocate hugepages from local node if allowed by mempolicy. If we can't, we fallback to small page allocation based on mempolicy. This is based on the observation that allocating pages on local node is more beneficial than allocating hugepages on remote node. With this patch applied we may find transparent huge page allocation failures if the current node doesn't have enough freee hugepages. Before this patch such failures result in us retrying the allocation on other nodes in the numa node mask. [akpm@linux-foundation.org: fix comment, add CONFIG_TRANSPARENT_HUGEPAGE dependency] Signed-off-by: Aneesh Kumar K.V Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Cc: David Rientjes Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 4 +++ mm/huge_memory.c | 24 ++++++--------- mm/mempolicy.c | 72 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 85 insertions(+), 15 deletions(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index b840e3b2770dd7..60110e06419dd2 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -335,11 +335,15 @@ alloc_pages(gfp_t gfp_mask, unsigned int order) extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, struct vm_area_struct *vma, unsigned long addr, int node); +extern struct page *alloc_hugepage_vma(gfp_t gfp, struct vm_area_struct *vma, + unsigned long addr, int order); #else #define alloc_pages(gfp_mask, order) \ alloc_pages_node(numa_node_id(), gfp_mask, order) #define alloc_pages_vma(gfp_mask, order, vma, addr, node) \ alloc_pages(gfp_mask, order) +#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ + alloc_pages(gfp_mask, order) #endif #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) #define alloc_page_vma(gfp_mask, vma, addr) \ diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 8897131809804e..0531ea7dd7cf49 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -761,15 +761,6 @@ static inline gfp_t alloc_hugepage_gfpmask(int defrag, gfp_t extra_gfp) return (GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_WAIT)) | extra_gfp; } -static inline struct page *alloc_hugepage_vma(int defrag, - struct vm_area_struct *vma, - unsigned long haddr, int nd, - gfp_t extra_gfp) -{ - return alloc_pages_vma(alloc_hugepage_gfpmask(defrag, extra_gfp), - HPAGE_PMD_ORDER, vma, haddr, nd); -} - /* Caller must hold page table lock. */ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd, @@ -790,6 +781,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pmd_t *pmd, unsigned int flags) { + gfp_t gfp; struct page *page; unsigned long haddr = address & HPAGE_PMD_MASK; @@ -824,8 +816,8 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, } return 0; } - page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), - vma, haddr, numa_node_id(), 0); + gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma), 0); + page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER); if (unlikely(!page)) { count_vm_event(THP_FAULT_FALLBACK); return VM_FAULT_FALLBACK; @@ -1113,10 +1105,12 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, spin_unlock(ptl); alloc: if (transparent_hugepage_enabled(vma) && - !transparent_hugepage_debug_cow()) - new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), - vma, haddr, numa_node_id(), 0); - else + !transparent_hugepage_debug_cow()) { + gfp_t gfp; + + gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma), 0); + new_page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER); + } else new_page = NULL; if (unlikely(!new_page)) { diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 0e0961b8c39ceb..8a32873fdbf714 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2030,6 +2030,78 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, return page; } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +/** + * alloc_hugepage_vma: Allocate a hugepage for a VMA + * @gfp: + * %GFP_USER user allocation. + * %GFP_KERNEL kernel allocations, + * %GFP_HIGHMEM highmem/user allocations, + * %GFP_FS allocation should not call back into a file system. + * %GFP_ATOMIC don't sleep. + * + * @vma: Pointer to VMA or NULL if not available. + * @addr: Virtual Address of the allocation. Must be inside the VMA. + * @order: Order of the hugepage for gfp allocation. + * + * This functions allocate a huge page from the kernel page pool and applies + * a NUMA policy associated with the VMA or the current process. + * For policy other than %MPOL_INTERLEAVE, we make sure we allocate hugepage + * only from the current node if the current node is part of the node mask. + * If we can't allocate a hugepage we fail the allocation and don' try to fallback + * to other nodes in the node mask. If the current node is not part of node mask + * or if the NUMA policy is MPOL_INTERLEAVE we use the allocator that can + * fallback to nodes in the policy node mask. + * + * When VMA is not NULL caller must hold down_read on the mmap_sem of the + * mm_struct of the VMA to prevent it from going away. Should be used for + * all allocations for pages that will be mapped into + * user space. Returns NULL when no page can be allocated. + * + * Should be called with vma->vm_mm->mmap_sem held. + * + */ +struct page *alloc_hugepage_vma(gfp_t gfp, struct vm_area_struct *vma, + unsigned long addr, int order) +{ + struct page *page; + nodemask_t *nmask; + struct mempolicy *pol; + int node = numa_node_id(); + unsigned int cpuset_mems_cookie; + +retry_cpuset: + pol = get_vma_policy(vma, addr); + cpuset_mems_cookie = read_mems_allowed_begin(); + /* + * For interleave policy, we don't worry about + * current node. Otherwise if current node is + * in nodemask, try to allocate hugepage from + * the current node. Don't fall back to other nodes + * for THP. + */ + if (unlikely(pol->mode == MPOL_INTERLEAVE)) + goto alloc_with_fallback; + nmask = policy_nodemask(gfp, pol); + if (!nmask || node_isset(node, *nmask)) { + mpol_cond_put(pol); + page = alloc_pages_exact_node(node, gfp, order); + if (unlikely(!page && + read_mems_allowed_retry(cpuset_mems_cookie))) + goto retry_cpuset; + return page; + } +alloc_with_fallback: + mpol_cond_put(pol); + /* + * if current node is not part of node mask, try + * the allocation from any node, and we can do retry + * in that case. + */ + return alloc_pages_vma(gfp, order, vma, addr, node); +} +#endif + /** * alloc_pages_current - Allocate pages. * From be97a41b291e495d6cb767b3ee0f84ed05804892 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 11 Feb 2015 15:27:15 -0800 Subject: [PATCH 571/601] mm/mempolicy.c: merge alloc_hugepage_vma to alloc_pages_vma The previous commit ("mm/thp: Allocate transparent hugepages on local node") introduced alloc_hugepage_vma() to mm/mempolicy.c to perform a special policy for THP allocations. The function has the same interface as alloc_pages_vma(), shares a lot of boilerplate code and a long comment. This patch merges the hugepage special case into alloc_pages_vma. The extra if condition should be cheap enough price to pay. We also prevent a (however unlikely) race with parallel mems_allowed update, which could make hugepage allocation restart only within the fallback call to alloc_hugepage_vma() and not reconsider the special rule in alloc_hugepage_vma(). Also by making sure mpol_cond_put(pol) is always called before actual allocation attempt, we can use a single exit path within the function. Also update the comment for missing node parameter and obsolete reference to mm_sem. Signed-off-by: Vlastimil Babka Cc: Aneesh Kumar K.V Cc: Kirill A. Shutemov Cc: Vlastimil Babka Cc: David Rientjes Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 12 ++--- mm/mempolicy.c | 118 +++++++++++++------------------------------- 2 files changed, 39 insertions(+), 91 deletions(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 60110e06419dd2..51bd1e72a91710 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -334,22 +334,22 @@ alloc_pages(gfp_t gfp_mask, unsigned int order) } extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, struct vm_area_struct *vma, unsigned long addr, - int node); -extern struct page *alloc_hugepage_vma(gfp_t gfp, struct vm_area_struct *vma, - unsigned long addr, int order); + int node, bool hugepage); +#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ + alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true) #else #define alloc_pages(gfp_mask, order) \ alloc_pages_node(numa_node_id(), gfp_mask, order) -#define alloc_pages_vma(gfp_mask, order, vma, addr, node) \ +#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\ alloc_pages(gfp_mask, order) #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ alloc_pages(gfp_mask, order) #endif #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) #define alloc_page_vma(gfp_mask, vma, addr) \ - alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id()) + alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false) #define alloc_page_vma_node(gfp_mask, vma, addr, node) \ - alloc_pages_vma(gfp_mask, 0, vma, addr, node) + alloc_pages_vma(gfp_mask, 0, vma, addr, node, false) extern struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order); extern struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask, diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 8a32873fdbf714..acbbf4c821e214 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1988,120 +1988,68 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, * @order:Order of the GFP allocation. * @vma: Pointer to VMA or NULL if not available. * @addr: Virtual Address of the allocation. Must be inside the VMA. + * @node: Which node to prefer for allocation (modulo policy). + * @hugepage: for hugepages try only the preferred node if possible * * This function allocates a page from the kernel page pool and applies * a NUMA policy associated with the VMA or the current process. * When VMA is not NULL caller must hold down_read on the mmap_sem of the * mm_struct of the VMA to prevent it from going away. Should be used for - * all allocations for pages that will be mapped into - * user space. Returns NULL when no page can be allocated. - * - * Should be called with the mm_sem of the vma hold. + * all allocations for pages that will be mapped into user space. Returns + * NULL when no page can be allocated. */ struct page * alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, - unsigned long addr, int node) + unsigned long addr, int node, bool hugepage) { struct mempolicy *pol; struct page *page; unsigned int cpuset_mems_cookie; + struct zonelist *zl; + nodemask_t *nmask; retry_cpuset: pol = get_vma_policy(vma, addr); cpuset_mems_cookie = read_mems_allowed_begin(); - if (unlikely(pol->mode == MPOL_INTERLEAVE)) { + if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage && + pol->mode != MPOL_INTERLEAVE)) { + /* + * For hugepage allocation and non-interleave policy which + * allows the current node, we only try to allocate from the + * current node and don't fall back to other nodes, as the + * cost of remote accesses would likely offset THP benefits. + * + * If the policy is interleave, or does not allow the current + * node in its nodemask, we allocate the standard way. + */ + nmask = policy_nodemask(gfp, pol); + if (!nmask || node_isset(node, *nmask)) { + mpol_cond_put(pol); + page = alloc_pages_exact_node(node, gfp, order); + goto out; + } + } + + if (pol->mode == MPOL_INTERLEAVE) { unsigned nid; nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order); mpol_cond_put(pol); page = alloc_page_interleave(gfp, order, nid); - if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) - goto retry_cpuset; - - return page; + goto out; } - page = __alloc_pages_nodemask(gfp, order, - policy_zonelist(gfp, pol, node), - policy_nodemask(gfp, pol)); + + nmask = policy_nodemask(gfp, pol); + zl = policy_zonelist(gfp, pol, node); mpol_cond_put(pol); + page = __alloc_pages_nodemask(gfp, order, zl, nmask); +out: if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) goto retry_cpuset; return page; } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -/** - * alloc_hugepage_vma: Allocate a hugepage for a VMA - * @gfp: - * %GFP_USER user allocation. - * %GFP_KERNEL kernel allocations, - * %GFP_HIGHMEM highmem/user allocations, - * %GFP_FS allocation should not call back into a file system. - * %GFP_ATOMIC don't sleep. - * - * @vma: Pointer to VMA or NULL if not available. - * @addr: Virtual Address of the allocation. Must be inside the VMA. - * @order: Order of the hugepage for gfp allocation. - * - * This functions allocate a huge page from the kernel page pool and applies - * a NUMA policy associated with the VMA or the current process. - * For policy other than %MPOL_INTERLEAVE, we make sure we allocate hugepage - * only from the current node if the current node is part of the node mask. - * If we can't allocate a hugepage we fail the allocation and don' try to fallback - * to other nodes in the node mask. If the current node is not part of node mask - * or if the NUMA policy is MPOL_INTERLEAVE we use the allocator that can - * fallback to nodes in the policy node mask. - * - * When VMA is not NULL caller must hold down_read on the mmap_sem of the - * mm_struct of the VMA to prevent it from going away. Should be used for - * all allocations for pages that will be mapped into - * user space. Returns NULL when no page can be allocated. - * - * Should be called with vma->vm_mm->mmap_sem held. - * - */ -struct page *alloc_hugepage_vma(gfp_t gfp, struct vm_area_struct *vma, - unsigned long addr, int order) -{ - struct page *page; - nodemask_t *nmask; - struct mempolicy *pol; - int node = numa_node_id(); - unsigned int cpuset_mems_cookie; - -retry_cpuset: - pol = get_vma_policy(vma, addr); - cpuset_mems_cookie = read_mems_allowed_begin(); - /* - * For interleave policy, we don't worry about - * current node. Otherwise if current node is - * in nodemask, try to allocate hugepage from - * the current node. Don't fall back to other nodes - * for THP. - */ - if (unlikely(pol->mode == MPOL_INTERLEAVE)) - goto alloc_with_fallback; - nmask = policy_nodemask(gfp, pol); - if (!nmask || node_isset(node, *nmask)) { - mpol_cond_put(pol); - page = alloc_pages_exact_node(node, gfp, order); - if (unlikely(!page && - read_mems_allowed_retry(cpuset_mems_cookie))) - goto retry_cpuset; - return page; - } -alloc_with_fallback: - mpol_cond_put(pol); - /* - * if current node is not part of node mask, try - * the allocation from any node, and we can do retry - * in that case. - */ - return alloc_pages_vma(gfp, order, vma, addr, node); -} -#endif - /** * alloc_pages_current - Allocate pages. * From f0818f472d8d527a96ec9cc2c3a56223497f9dd3 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Wed, 11 Feb 2015 15:27:17 -0800 Subject: [PATCH 572/601] mm: gup: add get_user_pages_locked and get_user_pages_unlocked FAULT_FOLL_ALLOW_RETRY allows the page fault to drop the mmap_sem for reading to reduce the mmap_sem contention (for writing), like while waiting for I/O completion. The problem is that right now practically no get_user_pages call uses FAULT_FOLL_ALLOW_RETRY, so we're not leveraging that nifty feature. Andres fixed it for the KVM page fault. However get_user_pages_fast remains uncovered, and 99% of other get_user_pages aren't using it either (the only exception being FOLL_NOWAIT in KVM which is really nonblocking and in fact it doesn't even release the mmap_sem). So this patchsets extends the optimization Andres did in the KVM page fault to the whole kernel. It makes most important places (including gup_fast) to use FAULT_FOLL_ALLOW_RETRY to reduce the mmap_sem hold times during I/O. The only few places that remains uncovered are drivers like v4l and other exceptions that tends to work on their own memory and they're not working on random user memory (for example like O_DIRECT that uses gup_fast and is fully covered by this patch). A follow up patch should probably also add a printk_once warning to get_user_pages that should go obsolete and be phased out eventually. The "vmas" parameter of get_user_pages makes it fundamentally incompatible with FAULT_FOLL_ALLOW_RETRY (vmas array becomes meaningless the moment the mmap_sem is released). While this is just an optimization, this becomes an absolute requirement for the userfaultfd feature http://lwn.net/Articles/615086/ . The userfaultfd allows to block the page fault, and in order to do so I need to drop the mmap_sem first. So this patch also ensures that all memory where userfaultfd could be registered by KVM, the very first fault (no matter if it is a regular page fault, or a get_user_pages) always has FAULT_FOLL_ALLOW_RETRY set. Then the userfaultfd blocks and it is waken only when the pagetable is already mapped. The second fault attempt after the wakeup doesn't need FAULT_FOLL_ALLOW_RETRY, so it's ok to retry without it. This patch (of 5): We can leverage the VM_FAULT_RETRY functionality in the page fault paths better by using either get_user_pages_locked or get_user_pages_unlocked. The former allows conversion of get_user_pages invocations that will have to pass a "&locked" parameter to know if the mmap_sem was dropped during the call. Example from: down_read(&mm->mmap_sem); do_something() get_user_pages(tsk, mm, ..., pages, NULL); up_read(&mm->mmap_sem); to: int locked = 1; down_read(&mm->mmap_sem); do_something() get_user_pages_locked(tsk, mm, ..., pages, &locked); if (locked) up_read(&mm->mmap_sem); The latter is suitable only as a drop in replacement of the form: down_read(&mm->mmap_sem); get_user_pages(tsk, mm, ..., pages, NULL); up_read(&mm->mmap_sem); into: get_user_pages_unlocked(tsk, mm, ..., pages); Where tsk, mm, the intermediate "..." paramters and "pages" can be any value as before. Just the last parameter of get_user_pages (vmas) must be NULL for get_user_pages_locked|unlocked to be usable (the latter original form wouldn't have been safe anyway if vmas wasn't null, for the former we just make it explicit by dropping the parameter). If vmas is not NULL these two methods cannot be used. Signed-off-by: Andrea Arcangeli Reviewed-by: Andres Lagar-Cavilla Reviewed-by: Peter Feiner Reviewed-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 7 ++ mm/gup.c | 177 ++++++++++++++++++++++++++++++++++++++++++--- mm/nommu.c | 23 ++++++ 3 files changed, 196 insertions(+), 11 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 644990b83cda68..fc499e67547566 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1261,6 +1261,13 @@ long get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, struct vm_area_struct **vmas); +long get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, unsigned long nr_pages, + int write, int force, struct page **pages, + int *locked); +long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, unsigned long nr_pages, + int write, int force, struct page **pages); int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); struct kvec; diff --git a/mm/gup.c b/mm/gup.c index 1a8ab05918e018..71a37738a3269a 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -575,6 +575,165 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, return 0; } +static __always_inline long __get_user_pages_locked(struct task_struct *tsk, + struct mm_struct *mm, + unsigned long start, + unsigned long nr_pages, + int write, int force, + struct page **pages, + struct vm_area_struct **vmas, + int *locked, bool notify_drop) +{ + int flags = FOLL_TOUCH; + long ret, pages_done; + bool lock_dropped; + + if (locked) { + /* if VM_FAULT_RETRY can be returned, vmas become invalid */ + BUG_ON(vmas); + /* check caller initialized locked */ + BUG_ON(*locked != 1); + } + + if (pages) + flags |= FOLL_GET; + if (write) + flags |= FOLL_WRITE; + if (force) + flags |= FOLL_FORCE; + + pages_done = 0; + lock_dropped = false; + for (;;) { + ret = __get_user_pages(tsk, mm, start, nr_pages, flags, pages, + vmas, locked); + if (!locked) + /* VM_FAULT_RETRY couldn't trigger, bypass */ + return ret; + + /* VM_FAULT_RETRY cannot return errors */ + if (!*locked) { + BUG_ON(ret < 0); + BUG_ON(ret >= nr_pages); + } + + if (!pages) + /* If it's a prefault don't insist harder */ + return ret; + + if (ret > 0) { + nr_pages -= ret; + pages_done += ret; + if (!nr_pages) + break; + } + if (*locked) { + /* VM_FAULT_RETRY didn't trigger */ + if (!pages_done) + pages_done = ret; + break; + } + /* VM_FAULT_RETRY triggered, so seek to the faulting offset */ + pages += ret; + start += ret << PAGE_SHIFT; + + /* + * Repeat on the address that fired VM_FAULT_RETRY + * without FAULT_FLAG_ALLOW_RETRY but with + * FAULT_FLAG_TRIED. + */ + *locked = 1; + lock_dropped = true; + down_read(&mm->mmap_sem); + ret = __get_user_pages(tsk, mm, start, 1, flags | FOLL_TRIED, + pages, NULL, NULL); + if (ret != 1) { + BUG_ON(ret > 1); + if (!pages_done) + pages_done = ret; + break; + } + nr_pages--; + pages_done++; + if (!nr_pages) + break; + pages++; + start += PAGE_SIZE; + } + if (notify_drop && lock_dropped && *locked) { + /* + * We must let the caller know we temporarily dropped the lock + * and so the critical section protected by it was lost. + */ + up_read(&mm->mmap_sem); + *locked = 0; + } + return pages_done; +} + +/* + * We can leverage the VM_FAULT_RETRY functionality in the page fault + * paths better by using either get_user_pages_locked() or + * get_user_pages_unlocked(). + * + * get_user_pages_locked() is suitable to replace the form: + * + * down_read(&mm->mmap_sem); + * do_something() + * get_user_pages(tsk, mm, ..., pages, NULL); + * up_read(&mm->mmap_sem); + * + * to: + * + * int locked = 1; + * down_read(&mm->mmap_sem); + * do_something() + * get_user_pages_locked(tsk, mm, ..., pages, &locked); + * if (locked) + * up_read(&mm->mmap_sem); + */ +long get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, unsigned long nr_pages, + int write, int force, struct page **pages, + int *locked) +{ + return __get_user_pages_locked(tsk, mm, start, nr_pages, write, force, + pages, NULL, locked, true); +} +EXPORT_SYMBOL(get_user_pages_locked); + +/* + * get_user_pages_unlocked() is suitable to replace the form: + * + * down_read(&mm->mmap_sem); + * get_user_pages(tsk, mm, ..., pages, NULL); + * up_read(&mm->mmap_sem); + * + * with: + * + * get_user_pages_unlocked(tsk, mm, ..., pages); + * + * It is functionally equivalent to get_user_pages_fast so + * get_user_pages_fast should be used instead, if the two parameters + * "tsk" and "mm" are respectively equal to current and current->mm, + * or if "force" shall be set to 1 (get_user_pages_fast misses the + * "force" parameter). + */ +long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, unsigned long nr_pages, + int write, int force, struct page **pages) +{ + long ret; + int locked = 1; + down_read(&mm->mmap_sem); + ret = __get_user_pages_locked(tsk, mm, start, nr_pages, write, force, + pages, NULL, &locked, false); + if (locked) + up_read(&mm->mmap_sem); + return ret; +} +EXPORT_SYMBOL(get_user_pages_unlocked); + /* * get_user_pages() - pin user pages in memory * @tsk: the task_struct to use for page fault accounting, or @@ -624,22 +783,18 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, * use the correct cache flushing APIs. * * See also get_user_pages_fast, for performance critical applications. + * + * get_user_pages should be phased out in favor of + * get_user_pages_locked|unlocked or get_user_pages_fast. Nothing + * should use get_user_pages because it cannot pass + * FAULT_FLAG_ALLOW_RETRY to handle_mm_fault. */ long get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, struct vm_area_struct **vmas) { - int flags = FOLL_TOUCH; - - if (pages) - flags |= FOLL_GET; - if (write) - flags |= FOLL_WRITE; - if (force) - flags |= FOLL_FORCE; - - return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas, - NULL); + return __get_user_pages_locked(tsk, mm, start, nr_pages, write, force, + pages, vmas, NULL, false); } EXPORT_SYMBOL(get_user_pages); diff --git a/mm/nommu.c b/mm/nommu.c index 541bed64e34870..bfb690b0f986e5 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -214,6 +214,29 @@ long get_user_pages(struct task_struct *tsk, struct mm_struct *mm, } EXPORT_SYMBOL(get_user_pages); +long get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, unsigned long nr_pages, + int write, int force, struct page **pages, + int *locked) +{ + return get_user_pages(tsk, mm, start, nr_pages, write, force, + pages, NULL); +} +EXPORT_SYMBOL(get_user_pages_locked); + +long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, unsigned long nr_pages, + int write, int force, struct page **pages) +{ + long ret; + down_read(&mm->mmap_sem); + ret = get_user_pages(tsk, mm, start, nr_pages, write, force, + pages, NULL); + up_read(&mm->mmap_sem); + return ret; +} +EXPORT_SYMBOL(get_user_pages_unlocked); + /** * follow_pfn - look up PFN at a user virtual address * @vma: memory mapping From 0fd71a56f41d4ffabeda1dae9ff5ed4f34d4e935 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Wed, 11 Feb 2015 15:27:20 -0800 Subject: [PATCH 573/601] mm: gup: add __get_user_pages_unlocked to customize gup_flags Some callers (like KVM) may want to set the gup_flags like FOLL_HWPOSION to get a proper -EHWPOSION retval instead of -EFAULT to take a more appropriate action if get_user_pages runs into a memory failure. Signed-off-by: Andrea Arcangeli Reviewed-by: Kirill A. Shutemov Cc: Andres Lagar-Cavilla Cc: Peter Feiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 4 ++++ mm/gup.c | 44 ++++++++++++++++++++++++++++++++------------ mm/nommu.c | 16 +++++++++++++--- 3 files changed, 49 insertions(+), 15 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index fc499e67547566..3696b3bd1d7e24 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1265,6 +1265,10 @@ long get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, int *locked); +long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, unsigned long nr_pages, + int write, int force, struct page **pages, + unsigned int gup_flags); long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages); diff --git a/mm/gup.c b/mm/gup.c index 71a37738a3269a..dad5875fb766b5 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -582,9 +582,9 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk, int write, int force, struct page **pages, struct vm_area_struct **vmas, - int *locked, bool notify_drop) + int *locked, bool notify_drop, + unsigned int flags) { - int flags = FOLL_TOUCH; long ret, pages_done; bool lock_dropped; @@ -698,10 +698,36 @@ long get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm, int *locked) { return __get_user_pages_locked(tsk, mm, start, nr_pages, write, force, - pages, NULL, locked, true); + pages, NULL, locked, true, FOLL_TOUCH); } EXPORT_SYMBOL(get_user_pages_locked); +/* + * Same as get_user_pages_unlocked(...., FOLL_TOUCH) but it allows to + * pass additional gup_flags as last parameter (like FOLL_HWPOISON). + * + * NOTE: here FOLL_TOUCH is not set implicitly and must be set by the + * caller if required (just like with __get_user_pages). "FOLL_GET", + * "FOLL_WRITE" and "FOLL_FORCE" are set implicitly as needed + * according to the parameters "pages", "write", "force" + * respectively. + */ +__always_inline long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, unsigned long nr_pages, + int write, int force, struct page **pages, + unsigned int gup_flags) +{ + long ret; + int locked = 1; + down_read(&mm->mmap_sem); + ret = __get_user_pages_locked(tsk, mm, start, nr_pages, write, force, + pages, NULL, &locked, false, gup_flags); + if (locked) + up_read(&mm->mmap_sem); + return ret; +} +EXPORT_SYMBOL(__get_user_pages_unlocked); + /* * get_user_pages_unlocked() is suitable to replace the form: * @@ -723,14 +749,8 @@ long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages) { - long ret; - int locked = 1; - down_read(&mm->mmap_sem); - ret = __get_user_pages_locked(tsk, mm, start, nr_pages, write, force, - pages, NULL, &locked, false); - if (locked) - up_read(&mm->mmap_sem); - return ret; + return __get_user_pages_unlocked(tsk, mm, start, nr_pages, write, + force, pages, FOLL_TOUCH); } EXPORT_SYMBOL(get_user_pages_unlocked); @@ -794,7 +814,7 @@ long get_user_pages(struct task_struct *tsk, struct mm_struct *mm, int force, struct page **pages, struct vm_area_struct **vmas) { return __get_user_pages_locked(tsk, mm, start, nr_pages, write, force, - pages, vmas, NULL, false); + pages, vmas, NULL, false, FOLL_TOUCH); } EXPORT_SYMBOL(get_user_pages); diff --git a/mm/nommu.c b/mm/nommu.c index bfb690b0f986e5..4d1b8a19986770 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -224,9 +224,10 @@ long get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm, } EXPORT_SYMBOL(get_user_pages_locked); -long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages) +long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, unsigned long nr_pages, + int write, int force, struct page **pages, + unsigned int gup_flags) { long ret; down_read(&mm->mmap_sem); @@ -235,6 +236,15 @@ long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, up_read(&mm->mmap_sem); return ret; } +EXPORT_SYMBOL(__get_user_pages_unlocked); + +long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, unsigned long nr_pages, + int write, int force, struct page **pages) +{ + return __get_user_pages_unlocked(tsk, mm, start, nr_pages, write, + force, pages, 0); +} EXPORT_SYMBOL(get_user_pages_unlocked); /** From a7b780750e1a1c7833812681e1f8fa30bbb06802 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Wed, 11 Feb 2015 15:27:23 -0800 Subject: [PATCH 574/601] mm: gup: use get_user_pages_unlocked within get_user_pages_fast This allows the get_user_pages_fast slow path to release the mmap_sem before blocking. Signed-off-by: Andrea Arcangeli Reviewed-by: Kirill A. Shutemov Cc: Andres Lagar-Cavilla Cc: Peter Feiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/mm/gup.c | 8 +++----- arch/s390/mm/gup.c | 6 ++---- arch/sh/mm/gup.c | 6 ++---- arch/sparc/mm/gup.c | 6 ++---- arch/x86/mm/gup.c | 7 +++---- mm/gup.c | 6 ++---- mm/util.c | 10 ++-------- 7 files changed, 16 insertions(+), 33 deletions(-) diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c index 70795a67a27622..349995d19c7f2c 100644 --- a/arch/mips/mm/gup.c +++ b/arch/mips/mm/gup.c @@ -301,11 +301,9 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, start += nr << PAGE_SHIFT; pages += nr; - down_read(&mm->mmap_sem); - ret = get_user_pages(current, mm, start, - (end - start) >> PAGE_SHIFT, - write, 0, pages, NULL); - up_read(&mm->mmap_sem); + ret = get_user_pages_unlocked(current, mm, start, + (end - start) >> PAGE_SHIFT, + write, 0, pages); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 639fce46400885..5c586c78ca8deb 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -235,10 +235,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, /* Try to get the remaining pages with get_user_pages */ start += nr << PAGE_SHIFT; pages += nr; - down_read(&mm->mmap_sem); - ret = get_user_pages(current, mm, start, - nr_pages - nr, write, 0, pages, NULL); - up_read(&mm->mmap_sem); + ret = get_user_pages_unlocked(current, mm, start, + nr_pages - nr, write, 0, pages); /* Have to be a bit careful with return values */ if (nr > 0) ret = (ret < 0) ? nr : ret + nr; diff --git a/arch/sh/mm/gup.c b/arch/sh/mm/gup.c index 37458f38b22093..e15f52a17b6c1c 100644 --- a/arch/sh/mm/gup.c +++ b/arch/sh/mm/gup.c @@ -257,10 +257,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, start += nr << PAGE_SHIFT; pages += nr; - down_read(&mm->mmap_sem); - ret = get_user_pages(current, mm, start, - (end - start) >> PAGE_SHIFT, write, 0, pages, NULL); - up_read(&mm->mmap_sem); + ret = get_user_pages_unlocked(current, mm, start, + (end - start) >> PAGE_SHIFT, write, 0, pages); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c index ae6ce383d4df6e..2e5c4fc2daa91e 100644 --- a/arch/sparc/mm/gup.c +++ b/arch/sparc/mm/gup.c @@ -249,10 +249,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, start += nr << PAGE_SHIFT; pages += nr; - down_read(&mm->mmap_sem); - ret = get_user_pages(current, mm, start, - (end - start) >> PAGE_SHIFT, write, 0, pages, NULL); - up_read(&mm->mmap_sem); + ret = get_user_pages_unlocked(current, mm, start, + (end - start) >> PAGE_SHIFT, write, 0, pages); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 224b14235e967b..89df70e0caa6b9 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -388,10 +388,9 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, start += nr << PAGE_SHIFT; pages += nr; - down_read(&mm->mmap_sem); - ret = get_user_pages(current, mm, start, - (end - start) >> PAGE_SHIFT, write, 0, pages, NULL); - up_read(&mm->mmap_sem); + ret = get_user_pages_unlocked(current, mm, start, + (end - start) >> PAGE_SHIFT, + write, 0, pages); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/mm/gup.c b/mm/gup.c index dad5875fb766b5..c2da1163986aa5 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1243,10 +1243,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, start += nr << PAGE_SHIFT; pages += nr; - down_read(&mm->mmap_sem); - ret = get_user_pages(current, mm, start, - nr_pages - nr, write, 0, pages, NULL); - up_read(&mm->mmap_sem); + ret = get_user_pages_unlocked(current, mm, start, + nr_pages - nr, write, 0, pages); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/mm/util.c b/mm/util.c index fec39d4509a958..f3ef639c4857e5 100644 --- a/mm/util.c +++ b/mm/util.c @@ -240,14 +240,8 @@ int __weak get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages) { struct mm_struct *mm = current->mm; - int ret; - - down_read(&mm->mmap_sem); - ret = get_user_pages(current, mm, start, nr_pages, - write, 0, pages, NULL); - up_read(&mm->mmap_sem); - - return ret; + return get_user_pages_unlocked(current, mm, start, nr_pages, + write, 0, pages); } EXPORT_SYMBOL_GPL(get_user_pages_fast); From 7e339128496284cc21977fba5416166ee81f5172 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Wed, 11 Feb 2015 15:27:26 -0800 Subject: [PATCH 575/601] mm: gup: use get_user_pages_unlocked This allows those get_user_pages calls to pass FAULT_FLAG_ALLOW_RETRY to the page fault in order to release the mmap_sem during the I/O. Signed-off-by: Andrea Arcangeli Reviewed-by: Kirill A. Shutemov Cc: Andres Lagar-Cavilla Cc: Peter Feiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/media/pci/ivtv/ivtv-udma.c | 6 ++---- drivers/scsi/st.c | 7 ++----- drivers/video/fbdev/pvr2fb.c | 6 ++---- mm/process_vm_access.c | 7 ++----- net/ceph/pagevec.c | 6 ++---- 5 files changed, 10 insertions(+), 22 deletions(-) diff --git a/drivers/media/pci/ivtv/ivtv-udma.c b/drivers/media/pci/ivtv/ivtv-udma.c index bee2329e0b2e20..24152accc66c75 100644 --- a/drivers/media/pci/ivtv/ivtv-udma.c +++ b/drivers/media/pci/ivtv/ivtv-udma.c @@ -124,10 +124,8 @@ int ivtv_udma_setup(struct ivtv *itv, unsigned long ivtv_dest_addr, } /* Get user pages for DMA Xfer */ - down_read(¤t->mm->mmap_sem); - err = get_user_pages(current, current->mm, - user_dma.uaddr, user_dma.page_count, 0, 1, dma->map, NULL); - up_read(¤t->mm->mmap_sem); + err = get_user_pages_unlocked(current, current->mm, + user_dma.uaddr, user_dma.page_count, 0, 1, dma->map); if (user_dma.page_count != err) { IVTV_DEBUG_WARN("failed to map user pages, returned %d instead of %d\n", diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 128d3b55bdd9fc..9a1c34205254f6 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -4551,18 +4551,15 @@ static int sgl_map_user_pages(struct st_buffer *STbp, return -ENOMEM; /* Try to fault in all of the necessary pages */ - down_read(¤t->mm->mmap_sem); /* rw==READ means read from drive, write into memory area */ - res = get_user_pages( + res = get_user_pages_unlocked( current, current->mm, uaddr, nr_pages, rw == READ, 0, /* don't force */ - pages, - NULL); - up_read(¤t->mm->mmap_sem); + pages); /* Errors and no page mapped should return here */ if (res < nr_pages) diff --git a/drivers/video/fbdev/pvr2fb.c b/drivers/video/fbdev/pvr2fb.c index 7c74f58fc10194..0e24eb9c219c04 100644 --- a/drivers/video/fbdev/pvr2fb.c +++ b/drivers/video/fbdev/pvr2fb.c @@ -686,10 +686,8 @@ static ssize_t pvr2fb_write(struct fb_info *info, const char *buf, if (!pages) return -ENOMEM; - down_read(¤t->mm->mmap_sem); - ret = get_user_pages(current, current->mm, (unsigned long)buf, - nr_pages, WRITE, 0, pages, NULL); - up_read(¤t->mm->mmap_sem); + ret = get_user_pages_unlocked(current, current->mm, (unsigned long)buf, + nr_pages, WRITE, 0, pages); if (ret < nr_pages) { nr_pages = ret; diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c index 5077afcd9e116b..b1597690530ce8 100644 --- a/mm/process_vm_access.c +++ b/mm/process_vm_access.c @@ -99,11 +99,8 @@ static int process_vm_rw_single_vec(unsigned long addr, size_t bytes; /* Get the pages we're interested in */ - down_read(&mm->mmap_sem); - pages = get_user_pages(task, mm, pa, pages, - vm_write, 0, process_pages, NULL); - up_read(&mm->mmap_sem); - + pages = get_user_pages_unlocked(task, mm, pa, pages, + vm_write, 0, process_pages); if (pages <= 0) return -EFAULT; diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index 555013034f7a89..096d91447e06e8 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c @@ -23,17 +23,15 @@ struct page **ceph_get_direct_page_vector(const void __user *data, if (!pages) return ERR_PTR(-ENOMEM); - down_read(¤t->mm->mmap_sem); while (got < num_pages) { - rc = get_user_pages(current, current->mm, + rc = get_user_pages_unlocked(current, current->mm, (unsigned long)data + ((unsigned long)got * PAGE_SIZE), - num_pages - got, write_page, 0, pages + got, NULL); + num_pages - got, write_page, 0, pages + got); if (rc < 0) break; BUG_ON(rc == 0); got += rc; } - up_read(¤t->mm->mmap_sem); if (rc < 0) goto fail; return pages; From 0664e57ff0c68cbca012a45a38288fa277eb6795 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Wed, 11 Feb 2015 15:27:28 -0800 Subject: [PATCH 576/601] mm: gup: kvm use get_user_pages_unlocked Use the more generic get_user_pages_unlocked which has the additional benefit of passing FAULT_FLAG_ALLOW_RETRY at the very first page fault (which allows the first page fault in an unmapped area to be always able to block indefinitely by being allowed to release the mmap_sem). Signed-off-by: Andrea Arcangeli Reviewed-by: Andres Lagar-Cavilla Reviewed-by: Kirill A. Shutemov Cc: Peter Feiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kvm_host.h | 11 --------- virt/kvm/async_pf.c | 2 +- virt/kvm/kvm_main.c | 50 ++++------------------------------------ 3 files changed, 5 insertions(+), 58 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 26f106022c8869..d189ee098aa2dd 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -200,17 +200,6 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); #endif -/* - * Carry out a gup that requires IO. Allow the mm to relinquish the mmap - * semaphore if the filemap/swap has to wait on a page lock. pagep == NULL - * controls whether we retry the gup one more time to completion in that case. - * Typically this is called after a FAULT_FLAG_RETRY_NOWAIT in the main tdp - * handler. - */ -int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm, - unsigned long addr, bool write_fault, - struct page **pagep); - enum { OUTSIDE_GUEST_MODE, IN_GUEST_MODE, diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 5ff7f7f2689afe..44660aee335f93 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -80,7 +80,7 @@ static void async_pf_execute(struct work_struct *work) might_sleep(); - kvm_get_user_page_io(NULL, mm, addr, 1, NULL); + get_user_pages_unlocked(NULL, mm, addr, 1, 1, 0, NULL); kvm_async_page_present_sync(vcpu, apf); spin_lock(&vcpu->async_pf.lock); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 1cc6e2e1998270..458b9b14b15c52 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1128,43 +1128,6 @@ static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, return __get_user_pages(tsk, mm, start, 1, flags, page, NULL, NULL); } -int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm, - unsigned long addr, bool write_fault, - struct page **pagep) -{ - int npages; - int locked = 1; - int flags = FOLL_TOUCH | FOLL_HWPOISON | - (pagep ? FOLL_GET : 0) | - (write_fault ? FOLL_WRITE : 0); - - /* - * If retrying the fault, we get here *not* having allowed the filemap - * to wait on the page lock. We should now allow waiting on the IO with - * the mmap semaphore released. - */ - down_read(&mm->mmap_sem); - npages = __get_user_pages(tsk, mm, addr, 1, flags, pagep, NULL, - &locked); - if (!locked) { - VM_BUG_ON(npages); - - if (!pagep) - return 0; - - /* - * The previous call has now waited on the IO. Now we can - * retry and complete. Pass TRIED to ensure we do not re - * schedule async IO (see e.g. filemap_fault). - */ - down_read(&mm->mmap_sem); - npages = __get_user_pages(tsk, mm, addr, 1, flags | FOLL_TRIED, - pagep, NULL, NULL); - } - up_read(&mm->mmap_sem); - return npages; -} - static inline int check_user_page_hwpoison(unsigned long addr) { int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE; @@ -1227,15 +1190,10 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, npages = get_user_page_nowait(current, current->mm, addr, write_fault, page); up_read(¤t->mm->mmap_sem); - } else { - /* - * By now we have tried gup_fast, and possibly async_pf, and we - * are certainly not atomic. Time to retry the gup, allowing - * mmap semaphore to be relinquished in the case of IO. - */ - npages = kvm_get_user_page_io(current, current->mm, addr, - write_fault, page); - } + } else + npages = __get_user_pages_unlocked(current, current->mm, addr, 1, + write_fault, 0, page, + FOLL_TOUCH|FOLL_HWPOISON); if (npages != 1) return npages; From 05fbf357d94152171bc50f8a369390f1f16efd89 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 11 Feb 2015 15:27:31 -0800 Subject: [PATCH 577/601] proc/pagemap: walk page tables under pte lock Lockless access to pte in pagemap_pte_range() might race with page migration and trigger BUG_ON(!PageLocked()) in migration_entry_to_page(): CPU A (pagemap) CPU B (migration) lock_page() try_to_unmap(page, TTU_MIGRATION...) make_migration_entry() set_pte_at() pte_to_pagemap_entry() remove_migration_ptes() unlock_page() if(is_migration_entry()) migration_entry_to_page() BUG_ON(!PageLocked(page)) Also lockless read might be non-atomic if pte is larger than wordsize. Other pte walkers (smaps, numa_maps, clear_refs) already lock ptes. Fixes: 052fb0d635df ("proc: report file/anon bit in /proc/pid/pagemap") Signed-off-by: Konstantin Khlebnikov Reported-by: Andrey Ryabinin Reviewed-by: Cyrill Gorcunov Acked-by: Naoya Horiguchi Acked-by: Kirill A. Shutemov Cc: [3.5+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index e6e0abeb5d1240..eeab30fcffcc1d 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1056,7 +1056,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct vm_area_struct *vma; struct pagemapread *pm = walk->private; spinlock_t *ptl; - pte_t *pte; + pte_t *pte, *orig_pte; int err = 0; /* find the first VMA at or above 'addr' */ @@ -1117,15 +1117,19 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, BUG_ON(is_vm_hugetlb_page(vma)); /* Addresses in the VMA. */ - for (; addr < min(end, vma->vm_end); addr += PAGE_SIZE) { + orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + for (; addr < min(end, vma->vm_end); pte++, addr += PAGE_SIZE) { pagemap_entry_t pme; - pte = pte_offset_map(pmd, addr); + pte_to_pagemap_entry(&pme, pm, vma, addr, *pte); - pte_unmap(pte); err = add_to_pagemap(addr, &pme, pm); if (err) - return err; + break; } + pte_unmap_unlock(orig_pte, ptl); + + if (err) + return err; if (addr == end) break; From 0b1fbfe50006c41014cc25660c0e735d21c34939 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 11 Feb 2015 15:27:34 -0800 Subject: [PATCH 578/601] mm/pagewalk: remove pgd_entry() and pud_entry() Currently no user of page table walker sets ->pgd_entry() or ->pud_entry(), so checking their existence in each loop is just wasting CPU cycle. So let's remove it to reduce overhead. Signed-off-by: Naoya Horiguchi Acked-by: Kirill A. Shutemov Cc: Andrea Arcangeli Cc: Cyrill Gorcunov Cc: Dave Hansen Cc: Kirill A. Shutemov Cc: Pavel Emelyanov Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 6 ------ mm/pagewalk.c | 9 ++------- 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 3696b3bd1d7e24..f6106d3f3dab55 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1164,8 +1164,6 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma, /** * mm_walk - callbacks for walk_page_range - * @pgd_entry: if set, called for each non-empty PGD (top-level) entry - * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry * this handler is required to be able to handle * pmd_trans_huge() pmds. They may simply choose to @@ -1179,10 +1177,6 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma, * (see walk_page_range for more details) */ struct mm_walk { - int (*pgd_entry)(pgd_t *pgd, unsigned long addr, - unsigned long next, struct mm_walk *walk); - int (*pud_entry)(pud_t *pud, unsigned long addr, - unsigned long next, struct mm_walk *walk); int (*pmd_entry)(pmd_t *pmd, unsigned long addr, unsigned long next, struct mm_walk *walk); int (*pte_entry)(pte_t *pte, unsigned long addr, diff --git a/mm/pagewalk.c b/mm/pagewalk.c index b264bda46e1be6..b793ef149da20c 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -86,9 +86,7 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, break; continue; } - if (walk->pud_entry) - err = walk->pud_entry(pud, addr, next, walk); - if (!err && (walk->pmd_entry || walk->pte_entry)) + if (walk->pmd_entry || walk->pte_entry) err = walk_pmd_range(pud, addr, next, walk); if (err) break; @@ -237,10 +235,7 @@ int walk_page_range(unsigned long addr, unsigned long end, pgd++; continue; } - if (walk->pgd_entry) - err = walk->pgd_entry(pgd, addr, next, walk); - if (!err && - (walk->pud_entry || walk->pmd_entry || walk->pte_entry)) + if (walk->pmd_entry || walk->pte_entry) err = walk_pud_range(pgd, addr, next, walk); if (err) break; From fafaa4264eba49fd10695c193a82760558d093f4 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 11 Feb 2015 15:27:37 -0800 Subject: [PATCH 579/601] pagewalk: improve vma handling Current implementation of page table walker has a fundamental problem in vma handling, which started when we tried to handle vma(VM_HUGETLB). Because it's done in pgd loop, considering vma boundary makes code complicated and bug-prone. From the users viewpoint, some user checks some vma-related condition to determine whether the user really does page walk over the vma. In order to solve these, this patch moves vma check outside pgd loop and introduce a new callback ->test_walk(). Signed-off-by: Naoya Horiguchi Acked-by: Kirill A. Shutemov Cc: "Kirill A. Shutemov" Cc: Andrea Arcangeli Cc: Cyrill Gorcunov Cc: Dave Hansen Cc: Pavel Emelyanov Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 15 +++- mm/pagewalk.c | 206 +++++++++++++++++++++++++-------------------- 2 files changed, 129 insertions(+), 92 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index f6106d3f3dab55..3891a368e5e041 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1171,10 +1171,16 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma, * @pte_entry: if set, called for each non-empty PTE (4th-level) entry * @pte_hole: if set, called for each hole at all levels * @hugetlb_entry: if set, called for each hugetlb entry - * *Caution*: The caller must hold mmap_sem() if @hugetlb_entry - * is used. + * @test_walk: caller specific callback function to determine whether + * we walk over the current vma or not. A positive returned + * value means "do page table walk over the current vma," + * and a negative one means "abort current page table walk + * right now." 0 means "skip the current vma." + * @mm: mm_struct representing the target process of page table walk + * @vma: vma currently walked (NULL if walking outside vmas) + * @private: private data for callbacks' usage * - * (see walk_page_range for more details) + * (see the comment on walk_page_range() for more details) */ struct mm_walk { int (*pmd_entry)(pmd_t *pmd, unsigned long addr, @@ -1186,7 +1192,10 @@ struct mm_walk { int (*hugetlb_entry)(pte_t *pte, unsigned long hmask, unsigned long addr, unsigned long next, struct mm_walk *walk); + int (*test_walk)(unsigned long addr, unsigned long next, + struct mm_walk *walk); struct mm_struct *mm; + struct vm_area_struct *vma; void *private; }; diff --git a/mm/pagewalk.c b/mm/pagewalk.c index b793ef149da20c..d9cc3caae80296 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -59,7 +59,7 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, continue; split_huge_page_pmd_mm(walk->mm, addr, pmd); - if (pmd_none_or_trans_huge_or_clear_bad(pmd)) + if (pmd_trans_unstable(pmd)) goto again; err = walk_pte_range(pmd, addr, next, walk); if (err) @@ -95,6 +95,32 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, return err; } +static int walk_pgd_range(unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + pgd_t *pgd; + unsigned long next; + int err = 0; + + pgd = pgd_offset(walk->mm, addr); + do { + next = pgd_addr_end(addr, end); + if (pgd_none_or_clear_bad(pgd)) { + if (walk->pte_hole) + err = walk->pte_hole(addr, next, walk); + if (err) + break; + continue; + } + if (walk->pmd_entry || walk->pte_entry) + err = walk_pud_range(pgd, addr, next, walk); + if (err) + break; + } while (pgd++, addr = next, addr != end); + + return err; +} + #ifdef CONFIG_HUGETLB_PAGE static unsigned long hugetlb_entry_end(struct hstate *h, unsigned long addr, unsigned long end) @@ -103,10 +129,10 @@ static unsigned long hugetlb_entry_end(struct hstate *h, unsigned long addr, return boundary < end ? boundary : end; } -static int walk_hugetlb_range(struct vm_area_struct *vma, - unsigned long addr, unsigned long end, +static int walk_hugetlb_range(unsigned long addr, unsigned long end, struct mm_walk *walk) { + struct vm_area_struct *vma = walk->vma; struct hstate *h = hstate_vma(vma); unsigned long next; unsigned long hmask = huge_page_mask(h); @@ -119,15 +145,14 @@ static int walk_hugetlb_range(struct vm_area_struct *vma, if (pte && walk->hugetlb_entry) err = walk->hugetlb_entry(pte, hmask, addr, next, walk); if (err) - return err; + break; } while (addr = next, addr != end); - return 0; + return err; } #else /* CONFIG_HUGETLB_PAGE */ -static int walk_hugetlb_range(struct vm_area_struct *vma, - unsigned long addr, unsigned long end, +static int walk_hugetlb_range(unsigned long addr, unsigned long end, struct mm_walk *walk) { return 0; @@ -135,112 +160,115 @@ static int walk_hugetlb_range(struct vm_area_struct *vma, #endif /* CONFIG_HUGETLB_PAGE */ +/* + * Decide whether we really walk over the current vma on [@start, @end) + * or skip it via the returned value. Return 0 if we do walk over the + * current vma, and return 1 if we skip the vma. Negative values means + * error, where we abort the current walk. + * + * Default check (only VM_PFNMAP check for now) is used when the caller + * doesn't define test_walk() callback. + */ +static int walk_page_test(unsigned long start, unsigned long end, + struct mm_walk *walk) +{ + struct vm_area_struct *vma = walk->vma; + if (walk->test_walk) + return walk->test_walk(start, end, walk); + + /* + * Do not walk over vma(VM_PFNMAP), because we have no valid struct + * page backing a VM_PFNMAP range. See also commit a9ff785e4437. + */ + if (vma->vm_flags & VM_PFNMAP) + return 1; + return 0; +} + +static int __walk_page_range(unsigned long start, unsigned long end, + struct mm_walk *walk) +{ + int err = 0; + struct vm_area_struct *vma = walk->vma; + + if (vma && is_vm_hugetlb_page(vma)) { + if (walk->hugetlb_entry) + err = walk_hugetlb_range(start, end, walk); + } else + err = walk_pgd_range(start, end, walk); + + return err; +} /** - * walk_page_range - walk a memory map's page tables with a callback - * @addr: starting address - * @end: ending address - * @walk: set of callbacks to invoke for each level of the tree - * - * Recursively walk the page table for the memory area in a VMA, - * calling supplied callbacks. Callbacks are called in-order (first - * PGD, first PUD, first PMD, first PTE, second PTE... second PMD, - * etc.). If lower-level callbacks are omitted, walking depth is reduced. + * walk_page_range - walk page table with caller specific callbacks * - * Each callback receives an entry pointer and the start and end of the - * associated range, and a copy of the original mm_walk for access to - * the ->private or ->mm fields. + * Recursively walk the page table tree of the process represented by @walk->mm + * within the virtual address range [@start, @end). During walking, we can do + * some caller-specific works for each entry, by setting up pmd_entry(), + * pte_entry(), and/or hugetlb_entry(). If you don't set up for some of these + * callbacks, the associated entries/pages are just ignored. + * The return values of these callbacks are commonly defined like below: + * - 0 : succeeded to handle the current entry, and if you don't reach the + * end address yet, continue to walk. + * - >0 : succeeded to handle the current entry, and return to the caller + * with caller specific value. + * - <0 : failed to handle the current entry, and return to the caller + * with error code. * - * Usually no locks are taken, but splitting transparent huge page may - * take page table lock. And the bottom level iterator will map PTE - * directories from highmem if necessary. + * Before starting to walk page table, some callers want to check whether + * they really want to walk over the current vma, typically by checking + * its vm_flags. walk_page_test() and @walk->test_walk() are used for this + * purpose. * - * If any callback returns a non-zero value, the walk is aborted and - * the return value is propagated back to the caller. Otherwise 0 is returned. + * struct mm_walk keeps current values of some common data like vma and pmd, + * which are useful for the access from callbacks. If you want to pass some + * caller-specific data to callbacks, @walk->private should be helpful. * - * walk->mm->mmap_sem must be held for at least read if walk->hugetlb_entry - * is !NULL. + * Locking: + * Callers of walk_page_range() and walk_page_vma() should hold + * @walk->mm->mmap_sem, because these function traverse vma list and/or + * access to vma's data. */ -int walk_page_range(unsigned long addr, unsigned long end, +int walk_page_range(unsigned long start, unsigned long end, struct mm_walk *walk) { - pgd_t *pgd; - unsigned long next; int err = 0; + unsigned long next; + struct vm_area_struct *vma; - if (addr >= end) - return err; + if (start >= end) + return -EINVAL; if (!walk->mm) return -EINVAL; VM_BUG_ON_MM(!rwsem_is_locked(&walk->mm->mmap_sem), walk->mm); - pgd = pgd_offset(walk->mm, addr); + vma = find_vma(walk->mm, start); do { - struct vm_area_struct *vma = NULL; + if (!vma) { /* after the last vma */ + walk->vma = NULL; + next = end; + } else if (start < vma->vm_start) { /* outside vma */ + walk->vma = NULL; + next = min(end, vma->vm_start); + } else { /* inside vma */ + walk->vma = vma; + next = min(end, vma->vm_end); + vma = vma->vm_next; - next = pgd_addr_end(addr, end); - - /* - * This function was not intended to be vma based. - * But there are vma special cases to be handled: - * - hugetlb vma's - * - VM_PFNMAP vma's - */ - vma = find_vma(walk->mm, addr); - if (vma) { - /* - * There are no page structures backing a VM_PFNMAP - * range, so do not allow split_huge_page_pmd(). - */ - if ((vma->vm_start <= addr) && - (vma->vm_flags & VM_PFNMAP)) { - if (walk->pte_hole) - err = walk->pte_hole(addr, next, walk); - if (err) - break; - pgd = pgd_offset(walk->mm, next); + err = walk_page_test(start, next, walk); + if (err > 0) continue; - } - /* - * Handle hugetlb vma individually because pagetable - * walk for the hugetlb page is dependent on the - * architecture and we can't handled it in the same - * manner as non-huge pages. - */ - if (walk->hugetlb_entry && (vma->vm_start <= addr) && - is_vm_hugetlb_page(vma)) { - if (vma->vm_end < next) - next = vma->vm_end; - /* - * Hugepage is very tightly coupled with vma, - * so walk through hugetlb entries within a - * given vma. - */ - err = walk_hugetlb_range(vma, addr, next, walk); - if (err) - break; - pgd = pgd_offset(walk->mm, next); - continue; - } - } - - if (pgd_none_or_clear_bad(pgd)) { - if (walk->pte_hole) - err = walk->pte_hole(addr, next, walk); - if (err) + if (err < 0) break; - pgd++; - continue; } - if (walk->pmd_entry || walk->pte_entry) - err = walk_pud_range(pgd, addr, next, walk); + if (walk->vma || walk->pte_hole) + err = __walk_page_range(start, next, walk); if (err) break; - pgd++; - } while (addr = next, addr < end); - + } while (start = next, start < end); return err; } From 900fc5f197b05253ae9433fb9a066c3f37d08f69 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 11 Feb 2015 15:27:40 -0800 Subject: [PATCH 580/601] pagewalk: add walk_page_vma() Introduce walk_page_vma(), which is useful for the callers which want to walk over a given vma. It's used by later patches. Signed-off-by: Naoya Horiguchi Acked-by: Kirill A. Shutemov Cc: "Kirill A. Shutemov" Cc: Andrea Arcangeli Cc: Cyrill Gorcunov Cc: Dave Hansen Cc: Pavel Emelyanov Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + mm/pagewalk.c | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index 3891a368e5e041..a4d24f3c5430fe 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1201,6 +1201,7 @@ struct mm_walk { int walk_page_range(unsigned long addr, unsigned long end, struct mm_walk *walk); +int walk_page_vma(struct vm_area_struct *vma, struct mm_walk *walk); void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling); int copy_page_range(struct mm_struct *dst, struct mm_struct *src, diff --git a/mm/pagewalk.c b/mm/pagewalk.c index d9cc3caae80296..4c9a653ba563fd 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -272,3 +272,21 @@ int walk_page_range(unsigned long start, unsigned long end, } while (start = next, start < end); return err; } + +int walk_page_vma(struct vm_area_struct *vma, struct mm_walk *walk) +{ + int err; + + if (!walk->mm) + return -EINVAL; + + VM_BUG_ON(!rwsem_is_locked(&walk->mm->mmap_sem)); + VM_BUG_ON(!vma); + walk->vma = vma; + err = walk_page_test(vma->vm_start, vma->vm_end, walk); + if (err > 0) + return 0; + if (err < 0) + return err; + return __walk_page_range(vma->vm_start, vma->vm_end, walk); +} From 14eb6fdd4204d215a14ecd9f84a1ca66faabcc4d Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 11 Feb 2015 15:27:43 -0800 Subject: [PATCH 581/601] smaps: remove mem_size_stats->vma and use walk_page_vma() pagewalk.c can handle vma in itself, so we don't have to pass vma via walk->private. And show_smap() walks pages on vma basis, so using walk_page_vma() is preferable. Signed-off-by: Naoya Horiguchi Acked-by: Kirill A. Shutemov Cc: "Kirill A. Shutemov" Cc: Andrea Arcangeli Cc: Cyrill Gorcunov Cc: Dave Hansen Cc: Pavel Emelyanov Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index eeab30fcffcc1d..73425398b38ca1 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -436,7 +436,6 @@ const struct file_operations proc_tid_maps_operations = { #ifdef CONFIG_PROC_PAGE_MONITOR struct mem_size_stats { - struct vm_area_struct *vma; unsigned long resident; unsigned long shared_clean; unsigned long shared_dirty; @@ -485,7 +484,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, struct mm_walk *walk) { struct mem_size_stats *mss = walk->private; - struct vm_area_struct *vma = mss->vma; + struct vm_area_struct *vma = walk->vma; struct page *page = NULL; if (pte_present(*pte)) { @@ -509,7 +508,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, struct mm_walk *walk) { struct mem_size_stats *mss = walk->private; - struct vm_area_struct *vma = mss->vma; + struct vm_area_struct *vma = walk->vma; struct page *page; /* FOLL_DUMP will return -EFAULT on huge zero page */ @@ -530,8 +529,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { - struct mem_size_stats *mss = walk->private; - struct vm_area_struct *vma = mss->vma; + struct vm_area_struct *vma = walk->vma; pte_t *pte; spinlock_t *ptl; @@ -623,10 +621,8 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) }; memset(&mss, 0, sizeof mss); - mss.vma = vma; /* mmap_sem is held in m_start */ - if (vma->vm_mm && !is_vm_hugetlb_page(vma)) - walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk); + walk_page_vma(vma, &smaps_walk); show_map_vma(m, vma, is_pid); From 5c64f52acdbc615e3ef58692f42ee00b83d0225d Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 11 Feb 2015 15:27:46 -0800 Subject: [PATCH 582/601] clear_refs: remove clear_refs_private->vma and introduce clear_refs_test_walk() clear_refs_write() has some prechecks to determine if we really walk over a given vma. Now we have a test_walk() callback to filter vmas, so let's utilize it. Signed-off-by: Naoya Horiguchi Acked-by: Kirill A. Shutemov Cc: "Kirill A. Shutemov" Cc: Andrea Arcangeli Cc: Cyrill Gorcunov Cc: Dave Hansen Cc: Pavel Emelyanov Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 46 ++++++++++++++++++++++------------------------ 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 73425398b38ca1..bed0834715a50d 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -736,7 +736,6 @@ enum clear_refs_types { }; struct clear_refs_private { - struct vm_area_struct *vma; enum clear_refs_types type; }; @@ -767,7 +766,7 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { struct clear_refs_private *cp = walk->private; - struct vm_area_struct *vma = cp->vma; + struct vm_area_struct *vma = walk->vma; pte_t *pte, ptent; spinlock_t *ptl; struct page *page; @@ -801,6 +800,25 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, return 0; } +static int clear_refs_test_walk(unsigned long start, unsigned long end, + struct mm_walk *walk) +{ + struct clear_refs_private *cp = walk->private; + struct vm_area_struct *vma = walk->vma; + + /* + * Writing 1 to /proc/pid/clear_refs affects all pages. + * Writing 2 to /proc/pid/clear_refs only affects anonymous pages. + * Writing 3 to /proc/pid/clear_refs only affects file mapped pages. + * Writing 4 to /proc/pid/clear_refs affects all pages. + */ + if (cp->type == CLEAR_REFS_ANON && vma->vm_file) + return 1; + if (cp->type == CLEAR_REFS_MAPPED && !vma->vm_file) + return 1; + return 0; +} + static ssize_t clear_refs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { @@ -841,6 +859,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, }; struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range, + .test_walk = clear_refs_test_walk, .mm = mm, .private = &cp, }; @@ -860,28 +879,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, } mmu_notifier_invalidate_range_start(mm, 0, -1); } - for (vma = mm->mmap; vma; vma = vma->vm_next) { - cp.vma = vma; - if (is_vm_hugetlb_page(vma)) - continue; - /* - * Writing 1 to /proc/pid/clear_refs affects all pages. - * - * Writing 2 to /proc/pid/clear_refs only affects - * Anonymous pages. - * - * Writing 3 to /proc/pid/clear_refs only affects file - * mapped pages. - * - * Writing 4 to /proc/pid/clear_refs affects all pages. - */ - if (type == CLEAR_REFS_ANON && vma->vm_file) - continue; - if (type == CLEAR_REFS_MAPPED && !vma->vm_file) - continue; - walk_page_range(vma->vm_start, vma->vm_end, - &clear_refs_walk); - } + walk_page_range(0, ~0UL, &clear_refs_walk); if (type == CLEAR_REFS_SOFT_DIRTY) mmu_notifier_invalidate_range_end(mm, 0, -1); flush_tlb_mm(mm); From f995ece24dfecb3614468befbe4e6e777b854cc0 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 11 Feb 2015 15:27:48 -0800 Subject: [PATCH 583/601] pagemap: use walk->vma instead of calling find_vma() Page table walker has the information of the current vma in mm_walk, so we don't have to call find_vma() in each pagemap_(pte|hugetlb)_range() call any longer. Currently pagemap_pte_range() does vma loop itself, so this patch reduces many lines of code. NULL-vma check is omitted because we assume that we never run these callbacks on any address outside vma. And even if it were broken, NULL pointer dereference would be detected, so we can get enough information for debugging. Signed-off-by: Naoya Horiguchi Cc: "Kirill A. Shutemov" Cc: Andrea Arcangeli Cc: Cyrill Gorcunov Cc: Dave Hansen Cc: Kirill A. Shutemov Cc: Pavel Emelyanov Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 68 ++++++++++------------------------------------ 1 file changed, 14 insertions(+), 54 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index bed0834715a50d..4206706dd92a7e 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1047,15 +1047,13 @@ static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemap static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { - struct vm_area_struct *vma; + struct vm_area_struct *vma = walk->vma; struct pagemapread *pm = walk->private; spinlock_t *ptl; pte_t *pte, *orig_pte; int err = 0; - /* find the first VMA at or above 'addr' */ - vma = find_vma(walk->mm, addr); - if (vma && pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { + if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { int pmd_flags2; if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd)) @@ -1081,55 +1079,20 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, if (pmd_trans_unstable(pmd)) return 0; - while (1) { - /* End of address space hole, which we mark as non-present. */ - unsigned long hole_end; - - if (vma) - hole_end = min(end, vma->vm_start); - else - hole_end = end; - - for (; addr < hole_end; addr += PAGE_SIZE) { - pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); - - err = add_to_pagemap(addr, &pme, pm); - if (err) - return err; - } - - if (!vma || vma->vm_start >= end) - break; - /* - * We can't possibly be in a hugetlb VMA. In general, - * for a mm_walk with a pmd_entry and a hugetlb_entry, - * the pmd_entry can only be called on addresses in a - * hugetlb if the walk starts in a non-hugetlb VMA and - * spans a hugepage VMA. Since pagemap_read walks are - * PMD-sized and PMD-aligned, this will never be true. - */ - BUG_ON(is_vm_hugetlb_page(vma)); - - /* Addresses in the VMA. */ - orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); - for (; addr < min(end, vma->vm_end); pte++, addr += PAGE_SIZE) { - pagemap_entry_t pme; - - pte_to_pagemap_entry(&pme, pm, vma, addr, *pte); - err = add_to_pagemap(addr, &pme, pm); - if (err) - break; - } - pte_unmap_unlock(orig_pte, ptl); + /* + * We can assume that @vma always points to a valid one and @end never + * goes beyond vma->vm_end. + */ + orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + for (; addr < end; pte++, addr += PAGE_SIZE) { + pagemap_entry_t pme; + pte_to_pagemap_entry(&pme, pm, vma, addr, *pte); + err = add_to_pagemap(addr, &pme, pm); if (err) - return err; - - if (addr == end) break; - - vma = find_vma(walk->mm, addr); } + pte_unmap_unlock(orig_pte, ptl); cond_resched(); @@ -1155,15 +1118,12 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, struct mm_walk *walk) { struct pagemapread *pm = walk->private; - struct vm_area_struct *vma; + struct vm_area_struct *vma = walk->vma; int err = 0; int flags2; pagemap_entry_t pme; - vma = find_vma(walk->mm, addr); - WARN_ON_ONCE(!vma); - - if (vma && (vma->vm_flags & VM_SOFTDIRTY)) + if (vma->vm_flags & VM_SOFTDIRTY) flags2 = __PM_SOFT_DIRTY; else flags2 = 0; From 632fd60fe46f9159f059ed7612eb529e475302a9 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 11 Feb 2015 15:27:51 -0800 Subject: [PATCH 584/601] numa_maps: fix typo in gather_hugetbl_stats Just doing s/gather_hugetbl_stats/gather_hugetlb_stats/g, this makes code grep-friendly. Signed-off-by: Naoya Horiguchi Acked-by: Kirill A. Shutemov Cc: "Kirill A. Shutemov" Cc: Andrea Arcangeli Cc: Cyrill Gorcunov Cc: Dave Hansen Cc: Pavel Emelyanov Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 4206706dd92a7e..ae4bc296007773 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1385,7 +1385,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, return 0; } #ifdef CONFIG_HUGETLB_PAGE -static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, +static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask, unsigned long addr, unsigned long end, struct mm_walk *walk) { struct numa_maps *md; @@ -1404,7 +1404,7 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, } #else -static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, +static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask, unsigned long addr, unsigned long end, struct mm_walk *walk) { return 0; @@ -1435,7 +1435,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) md->vma = vma; - walk.hugetlb_entry = gather_hugetbl_stats; + walk.hugetlb_entry = gather_hugetlb_stats; walk.pmd_entry = gather_pte_stats; walk.private = md; walk.mm = mm; From d85f4d6d3bfe3b82e2903ac51a2f837eab7115d7 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 11 Feb 2015 15:27:54 -0800 Subject: [PATCH 585/601] numa_maps: remove numa_maps->vma pagewalk.c can handle vma in itself, so we don't have to pass vma via walk->private. And show_numa_map() walks pages on vma basis, so using walk_page_vma() is preferable. Signed-off-by: Naoya Horiguchi Acked-by: Kirill A. Shutemov Cc: "Kirill A. Shutemov" Cc: Andrea Arcangeli Cc: Cyrill Gorcunov Cc: Dave Hansen Cc: Pavel Emelyanov Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index ae4bc296007773..a36db4ad140b69 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1283,7 +1283,6 @@ const struct file_operations proc_pagemap_operations = { #ifdef CONFIG_NUMA struct numa_maps { - struct vm_area_struct *vma; unsigned long pages; unsigned long anon; unsigned long active; @@ -1352,18 +1351,17 @@ static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma, static int gather_pte_stats(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { - struct numa_maps *md; + struct numa_maps *md = walk->private; + struct vm_area_struct *vma = walk->vma; spinlock_t *ptl; pte_t *orig_pte; pte_t *pte; - md = walk->private; - - if (pmd_trans_huge_lock(pmd, md->vma, &ptl) == 1) { + if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { pte_t huge_pte = *(pte_t *)pmd; struct page *page; - page = can_gather_numa_stats(huge_pte, md->vma, addr); + page = can_gather_numa_stats(huge_pte, vma, addr); if (page) gather_stats(page, md, pte_dirty(huge_pte), HPAGE_PMD_SIZE/PAGE_SIZE); @@ -1375,7 +1373,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, return 0; orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); do { - struct page *page = can_gather_numa_stats(*pte, md->vma, addr); + struct page *page = can_gather_numa_stats(*pte, vma, addr); if (!page) continue; gather_stats(page, md, pte_dirty(*pte), 1); @@ -1422,7 +1420,12 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) struct numa_maps *md = &numa_priv->md; struct file *file = vma->vm_file; struct mm_struct *mm = vma->vm_mm; - struct mm_walk walk = {}; + struct mm_walk walk = { + .hugetlb_entry = gather_hugetlb_stats, + .pmd_entry = gather_pte_stats, + .private = md, + .mm = mm, + }; struct mempolicy *pol; char buffer[64]; int nid; @@ -1433,13 +1436,6 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) /* Ensure we start with an empty set of numa_maps statistics. */ memset(md, 0, sizeof(*md)); - md->vma = vma; - - walk.hugetlb_entry = gather_hugetlb_stats; - walk.pmd_entry = gather_pte_stats; - walk.private = md; - walk.mm = mm; - pol = __get_vma_policy(vma, vma->vm_start); if (pol) { mpol_to_str(buffer, sizeof(buffer), pol); @@ -1473,7 +1469,8 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) if (is_vm_hugetlb_page(vma)) seq_puts(m, " huge"); - walk_page_range(vma->vm_start, vma->vm_end, &walk); + /* mmap_sem is held by m_start */ + walk_page_vma(vma, &walk); if (!md->pages) goto out; From 26bcd64aa9a4ded25f0dd1848759081422a14d80 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 11 Feb 2015 15:27:57 -0800 Subject: [PATCH 586/601] memcg: cleanup preparation for page table walk pagewalk.c can handle vma in itself, so we don't have to pass vma via walk->private. And both of mem_cgroup_count_precharge() and mem_cgroup_move_charge() do for each vma loop themselves, but now it's done in pagewalk.c, so let's clean up them. Signed-off-by: Naoya Horiguchi Acked-by: Johannes Weiner Cc: "Kirill A. Shutemov" Cc: Andrea Arcangeli Cc: Cyrill Gorcunov Cc: Dave Hansen Cc: Kirill A. Shutemov Cc: Pavel Emelyanov Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 49 ++++++++++++++++--------------------------------- 1 file changed, 16 insertions(+), 33 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c7a9cb627180ca..095c1f96fbecf3 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4839,7 +4839,7 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { - struct vm_area_struct *vma = walk->private; + struct vm_area_struct *vma = walk->vma; pte_t *pte; spinlock_t *ptl; @@ -4865,20 +4865,13 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd, static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm) { unsigned long precharge; - struct vm_area_struct *vma; + struct mm_walk mem_cgroup_count_precharge_walk = { + .pmd_entry = mem_cgroup_count_precharge_pte_range, + .mm = mm, + }; down_read(&mm->mmap_sem); - for (vma = mm->mmap; vma; vma = vma->vm_next) { - struct mm_walk mem_cgroup_count_precharge_walk = { - .pmd_entry = mem_cgroup_count_precharge_pte_range, - .mm = mm, - .private = vma, - }; - if (is_vm_hugetlb_page(vma)) - continue; - walk_page_range(vma->vm_start, vma->vm_end, - &mem_cgroup_count_precharge_walk); - } + walk_page_range(0, ~0UL, &mem_cgroup_count_precharge_walk); up_read(&mm->mmap_sem); precharge = mc.precharge; @@ -5011,7 +5004,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd, struct mm_walk *walk) { int ret = 0; - struct vm_area_struct *vma = walk->private; + struct vm_area_struct *vma = walk->vma; pte_t *pte; spinlock_t *ptl; enum mc_target_type target_type; @@ -5107,7 +5100,10 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd, static void mem_cgroup_move_charge(struct mm_struct *mm) { - struct vm_area_struct *vma; + struct mm_walk mem_cgroup_move_charge_walk = { + .pmd_entry = mem_cgroup_move_charge_pte_range, + .mm = mm, + }; lru_add_drain_all(); /* @@ -5130,24 +5126,11 @@ static void mem_cgroup_move_charge(struct mm_struct *mm) cond_resched(); goto retry; } - for (vma = mm->mmap; vma; vma = vma->vm_next) { - int ret; - struct mm_walk mem_cgroup_move_charge_walk = { - .pmd_entry = mem_cgroup_move_charge_pte_range, - .mm = mm, - .private = vma, - }; - if (is_vm_hugetlb_page(vma)) - continue; - ret = walk_page_range(vma->vm_start, vma->vm_end, - &mem_cgroup_move_charge_walk); - if (ret) - /* - * means we have consumed all precharges and failed in - * doing additional charge. Just abandon here. - */ - break; - } + /* + * When we have consumed all precharges and failed in doing + * additional charge, the page walk just aborts. + */ + walk_page_range(0, ~0UL, &mem_cgroup_move_charge_walk); up_read(&mm->mmap_sem); atomic_dec(&mc.from->moving_account); } From 1757bbd9c5918a9c1a8757141763a23f2e446caa Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 11 Feb 2015 15:28:00 -0800 Subject: [PATCH 587/601] arch/powerpc/mm/subpage-prot.c: use walk->vma and walk_page_vma() We don't have to use mm_walk->private to pass vma to the callback function because of mm_walk->vma. And walk_page_vma() is useful if we walk over a single vma. Signed-off-by: Naoya Horiguchi Acked-by: Kirill A. Shutemov Cc: "Kirill A. Shutemov" Cc: Andrea Arcangeli Cc: Cyrill Gorcunov Cc: Dave Hansen Cc: Pavel Emelyanov Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/mm/subpage-prot.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c index 6c0b1f5f8d2cce..fa9fb5b4c66cf8 100644 --- a/arch/powerpc/mm/subpage-prot.c +++ b/arch/powerpc/mm/subpage-prot.c @@ -134,7 +134,7 @@ static void subpage_prot_clear(unsigned long addr, unsigned long len) static int subpage_walk_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { - struct vm_area_struct *vma = walk->private; + struct vm_area_struct *vma = walk->vma; split_huge_page_pmd(vma, addr, pmd); return 0; } @@ -163,9 +163,7 @@ static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr, if (vma->vm_start >= (addr + len)) break; vma->vm_flags |= VM_NOHUGEPAGE; - subpage_proto_walk.private = vma; - walk_page_range(vma->vm_start, vma->vm_end, - &subpage_proto_walk); + walk_page_vma(vma, &subpage_proto_walk); vma = vma->vm_next; } } From 6f4576e3687b1f93145b89fce49d6a8fec9e7dc2 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 11 Feb 2015 15:28:03 -0800 Subject: [PATCH 588/601] mempolicy: apply page table walker on queue_pages_range() queue_pages_range() does page table walking in its own way now, but there is some code duplicate. This patch applies page table walker to reduce lines of code. queue_pages_range() has to do some precheck to determine whether we really walk over the vma or just skip it. Now we have test_walk() callback in mm_walk for this purpose, so we can do this replacement cleanly. queue_pages_test_walk() depends on not only the current vma but also the previous one, so queue_pages->prev is introduced to remember it. Signed-off-by: Naoya Horiguchi Cc: "Kirill A. Shutemov" Cc: Andrea Arcangeli Cc: Cyrill Gorcunov Cc: Dave Hansen Cc: Kirill A. Shutemov Cc: Pavel Emelyanov Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 228 ++++++++++++++++++++----------------------------- 1 file changed, 92 insertions(+), 136 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index acbbf4c821e214..b1dcd11d867a97 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -471,24 +471,34 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = { static void migrate_page_add(struct page *page, struct list_head *pagelist, unsigned long flags); +struct queue_pages { + struct list_head *pagelist; + unsigned long flags; + nodemask_t *nmask; + struct vm_area_struct *prev; +}; + /* * Scan through pages checking if pages follow certain conditions, * and move them to the pagelist if they do. */ -static int queue_pages_pte_range(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long addr, unsigned long end, - const nodemask_t *nodes, unsigned long flags, - void *private) +static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, + unsigned long end, struct mm_walk *walk) { - pte_t *orig_pte; + struct vm_area_struct *vma = walk->vma; + struct page *page; + struct queue_pages *qp = walk->private; + unsigned long flags = qp->flags; + int nid; pte_t *pte; spinlock_t *ptl; - orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); - do { - struct page *page; - int nid; + split_huge_page_pmd(vma, addr, pmd); + if (pmd_trans_unstable(pmd)) + return 0; + pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + for (; addr != end; pte++, addr += PAGE_SIZE) { if (!pte_present(*pte)) continue; page = vm_normal_page(vma, addr, *pte); @@ -501,114 +511,46 @@ static int queue_pages_pte_range(struct vm_area_struct *vma, pmd_t *pmd, if (PageReserved(page)) continue; nid = page_to_nid(page); - if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT)) + if (node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT)) continue; if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) - migrate_page_add(page, private, flags); - else - break; - } while (pte++, addr += PAGE_SIZE, addr != end); - pte_unmap_unlock(orig_pte, ptl); - return addr != end; + migrate_page_add(page, qp->pagelist, flags); + } + pte_unmap_unlock(pte - 1, ptl); + cond_resched(); + return 0; } -static void queue_pages_hugetlb_pmd_range(struct vm_area_struct *vma, - pmd_t *pmd, const nodemask_t *nodes, unsigned long flags, - void *private) +static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask, + unsigned long addr, unsigned long end, + struct mm_walk *walk) { #ifdef CONFIG_HUGETLB_PAGE + struct queue_pages *qp = walk->private; + unsigned long flags = qp->flags; int nid; struct page *page; spinlock_t *ptl; pte_t entry; - ptl = huge_pte_lock(hstate_vma(vma), vma->vm_mm, (pte_t *)pmd); - entry = huge_ptep_get((pte_t *)pmd); + ptl = huge_pte_lock(hstate_vma(walk->vma), walk->mm, pte); + entry = huge_ptep_get(pte); if (!pte_present(entry)) goto unlock; page = pte_page(entry); nid = page_to_nid(page); - if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT)) + if (node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT)) goto unlock; /* With MPOL_MF_MOVE, we migrate only unshared hugepage. */ if (flags & (MPOL_MF_MOVE_ALL) || (flags & MPOL_MF_MOVE && page_mapcount(page) == 1)) - isolate_huge_page(page, private); + isolate_huge_page(page, qp->pagelist); unlock: spin_unlock(ptl); #else BUG(); #endif -} - -static inline int queue_pages_pmd_range(struct vm_area_struct *vma, pud_t *pud, - unsigned long addr, unsigned long end, - const nodemask_t *nodes, unsigned long flags, - void *private) -{ - pmd_t *pmd; - unsigned long next; - - pmd = pmd_offset(pud, addr); - do { - next = pmd_addr_end(addr, end); - if (!pmd_present(*pmd)) - continue; - if (pmd_huge(*pmd) && is_vm_hugetlb_page(vma)) { - queue_pages_hugetlb_pmd_range(vma, pmd, nodes, - flags, private); - continue; - } - split_huge_page_pmd(vma, addr, pmd); - if (pmd_none_or_trans_huge_or_clear_bad(pmd)) - continue; - if (queue_pages_pte_range(vma, pmd, addr, next, nodes, - flags, private)) - return -EIO; - } while (pmd++, addr = next, addr != end); - return 0; -} - -static inline int queue_pages_pud_range(struct vm_area_struct *vma, pgd_t *pgd, - unsigned long addr, unsigned long end, - const nodemask_t *nodes, unsigned long flags, - void *private) -{ - pud_t *pud; - unsigned long next; - - pud = pud_offset(pgd, addr); - do { - next = pud_addr_end(addr, end); - if (pud_huge(*pud) && is_vm_hugetlb_page(vma)) - continue; - if (pud_none_or_clear_bad(pud)) - continue; - if (queue_pages_pmd_range(vma, pud, addr, next, nodes, - flags, private)) - return -EIO; - } while (pud++, addr = next, addr != end); - return 0; -} - -static inline int queue_pages_pgd_range(struct vm_area_struct *vma, - unsigned long addr, unsigned long end, - const nodemask_t *nodes, unsigned long flags, - void *private) -{ - pgd_t *pgd; - unsigned long next; - - pgd = pgd_offset(vma->vm_mm, addr); - do { - next = pgd_addr_end(addr, end); - if (pgd_none_or_clear_bad(pgd)) - continue; - if (queue_pages_pud_range(vma, pgd, addr, next, nodes, - flags, private)) - return -EIO; - } while (pgd++, addr = next, addr != end); return 0; } @@ -641,6 +583,46 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma, } #endif /* CONFIG_NUMA_BALANCING */ +static int queue_pages_test_walk(unsigned long start, unsigned long end, + struct mm_walk *walk) +{ + struct vm_area_struct *vma = walk->vma; + struct queue_pages *qp = walk->private; + unsigned long endvma = vma->vm_end; + unsigned long flags = qp->flags; + + if (endvma > end) + endvma = end; + if (vma->vm_start > start) + start = vma->vm_start; + + if (!(flags & MPOL_MF_DISCONTIG_OK)) { + if (!vma->vm_next && vma->vm_end < end) + return -EFAULT; + if (qp->prev && qp->prev->vm_end < vma->vm_start) + return -EFAULT; + } + + qp->prev = vma; + + if (vma->vm_flags & VM_PFNMAP) + return 1; + + if (flags & MPOL_MF_LAZY) { + /* Similar to task_numa_work, skip inaccessible VMAs */ + if (vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)) + change_prot_numa(vma, start, endvma); + return 1; + } + + if ((flags & MPOL_MF_STRICT) || + ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) && + vma_migratable(vma))) + /* queue pages from current vma */ + return 0; + return 1; +} + /* * Walk through page tables and collect pages to be migrated. * @@ -650,50 +632,24 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma, */ static int queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, - const nodemask_t *nodes, unsigned long flags, void *private) -{ - int err = 0; - struct vm_area_struct *vma, *prev; - - vma = find_vma(mm, start); - if (!vma) - return -EFAULT; - prev = NULL; - for (; vma && vma->vm_start < end; vma = vma->vm_next) { - unsigned long endvma = vma->vm_end; - - if (endvma > end) - endvma = end; - if (vma->vm_start > start) - start = vma->vm_start; - - if (!(flags & MPOL_MF_DISCONTIG_OK)) { - if (!vma->vm_next && vma->vm_end < end) - return -EFAULT; - if (prev && prev->vm_end < vma->vm_start) - return -EFAULT; - } - - if (flags & MPOL_MF_LAZY) { - /* Similar to task_numa_work, skip inaccessible VMAs */ - if (vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)) - change_prot_numa(vma, start, endvma); - goto next; - } - - if ((flags & MPOL_MF_STRICT) || - ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) && - vma_migratable(vma))) { - - err = queue_pages_pgd_range(vma, start, endvma, nodes, - flags, private); - if (err) - break; - } -next: - prev = vma; - } - return err; + nodemask_t *nodes, unsigned long flags, + struct list_head *pagelist) +{ + struct queue_pages qp = { + .pagelist = pagelist, + .flags = flags, + .nmask = nodes, + .prev = NULL, + }; + struct mm_walk queue_pages_walk = { + .hugetlb_entry = queue_pages_hugetlb, + .pmd_entry = queue_pages_pte_range, + .test_walk = queue_pages_test_walk, + .mm = mm, + .private = &qp, + }; + + return walk_page_range(start, end, &queue_pages_walk); } /* From 48684a65b4e3ff544d62532c1b78962c9677b632 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 11 Feb 2015 15:28:06 -0800 Subject: [PATCH 589/601] mm: pagewalk: fix misbehavior of walk_page_range for vma(VM_PFNMAP) walk_page_range() silently skips vma having VM_PFNMAP set, which leads to undesirable behaviour at client end (who called walk_page_range). For example for pagemap_read(), when no callbacks are called against VM_PFNMAP vma, pagemap_read() may prepare pagemap data for next virtual address range at wrong index. That could confuse and/or break userspace applications. This patch avoid this misbehavior caused by vma(VM_PFNMAP) like follows: - for pagemap_read() which has its own ->pte_hole(), call the ->pte_hole() over vma(VM_PFNMAP), - for clear_refs and queue_pages which have their own ->tests_walk, just return 1 and skip vma(VM_PFNMAP). This is no problem because these are not interested in hole regions, - for other callers, just skip the vma(VM_PFNMAP) as a default behavior. Signed-off-by: Naoya Horiguchi Signed-off-by: Shiraz Hashim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 3 +++ mm/mempolicy.c | 3 +++ mm/pagewalk.c | 21 +++++++++++++-------- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index a36db4ad140b69..f5ca96524f5f9f 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -806,6 +806,9 @@ static int clear_refs_test_walk(unsigned long start, unsigned long end, struct clear_refs_private *cp = walk->private; struct vm_area_struct *vma = walk->vma; + if (vma->vm_flags & VM_PFNMAP) + return 1; + /* * Writing 1 to /proc/pid/clear_refs affects all pages. * Writing 2 to /proc/pid/clear_refs only affects anonymous pages. diff --git a/mm/mempolicy.c b/mm/mempolicy.c index b1dcd11d867a97..f1bd23803576fe 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -591,6 +591,9 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end, unsigned long endvma = vma->vm_end; unsigned long flags = qp->flags; + if (vma->vm_flags & VM_PFNMAP) + return 1; + if (endvma > end) endvma = end; if (vma->vm_start > start) diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 4c9a653ba563fd..75c1f287851917 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -35,7 +35,7 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, do { again: next = pmd_addr_end(addr, end); - if (pmd_none(*pmd)) { + if (pmd_none(*pmd) || !walk->vma) { if (walk->pte_hole) err = walk->pte_hole(addr, next, walk); if (err) @@ -165,9 +165,6 @@ static int walk_hugetlb_range(unsigned long addr, unsigned long end, * or skip it via the returned value. Return 0 if we do walk over the * current vma, and return 1 if we skip the vma. Negative values means * error, where we abort the current walk. - * - * Default check (only VM_PFNMAP check for now) is used when the caller - * doesn't define test_walk() callback. */ static int walk_page_test(unsigned long start, unsigned long end, struct mm_walk *walk) @@ -178,11 +175,19 @@ static int walk_page_test(unsigned long start, unsigned long end, return walk->test_walk(start, end, walk); /* - * Do not walk over vma(VM_PFNMAP), because we have no valid struct - * page backing a VM_PFNMAP range. See also commit a9ff785e4437. + * vma(VM_PFNMAP) doesn't have any valid struct pages behind VM_PFNMAP + * range, so we don't walk over it as we do for normal vmas. However, + * Some callers are interested in handling hole range and they don't + * want to just ignore any single address range. Such users certainly + * define their ->pte_hole() callbacks, so let's delegate them to handle + * vma(VM_PFNMAP). */ - if (vma->vm_flags & VM_PFNMAP) - return 1; + if (vma->vm_flags & VM_PFNMAP) { + int err = 1; + if (walk->pte_hole) + err = walk->pte_hole(start, end, walk); + return err ? err : 1; + } return 0; } From 7d5b3bfaa2da150ce2dc45546f2125b854f962ef Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 11 Feb 2015 15:28:08 -0800 Subject: [PATCH 590/601] mm: /proc/pid/clear_refs: avoid split_huge_page() Currently pagewalker splits all THP pages on any clear_refs request. It's not necessary. We can handle this on PMD level. One side effect is that soft dirty will potentially see more dirty memory, since we will mark whole THP page dirty at once. Sanity checked with CRIU test suite. More testing is required. Signed-off-by: Kirill A. Shutemov Signed-off-by: Naoya Horiguchi Reviewed-by: Cyrill Gorcunov Cc: Pavel Emelyanov Cc: Andrea Arcangeli Cc: Dave Hansen Cc: "Kirill A. Shutemov" Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 47 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index f5ca96524f5f9f..0e36c1e49fe30b 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -739,10 +739,10 @@ struct clear_refs_private { enum clear_refs_types type; }; +#ifdef CONFIG_MEM_SOFT_DIRTY static inline void clear_soft_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *pte) { -#ifdef CONFIG_MEM_SOFT_DIRTY /* * The soft-dirty tracker uses #PF-s to catch writes * to pages, so write-protect the pte as well. See the @@ -759,9 +759,35 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma, } set_pte_at(vma->vm_mm, addr, pte, ptent); -#endif } +static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp) +{ + pmd_t pmd = *pmdp; + + pmd = pmd_wrprotect(pmd); + pmd = pmd_clear_flags(pmd, _PAGE_SOFT_DIRTY); + + if (vma->vm_flags & VM_SOFTDIRTY) + vma->vm_flags &= ~VM_SOFTDIRTY; + + set_pmd_at(vma->vm_mm, addr, pmdp, pmd); +} + +#else + +static inline void clear_soft_dirty(struct vm_area_struct *vma, + unsigned long addr, pte_t *pte) +{ +} + +static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp) +{ +} +#endif + static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, struct mm_walk *walk) { @@ -771,7 +797,22 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, spinlock_t *ptl; struct page *page; - split_huge_page_pmd(vma, addr, pmd); + if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { + if (cp->type == CLEAR_REFS_SOFT_DIRTY) { + clear_soft_dirty_pmd(vma, addr, pmd); + goto out; + } + + page = pmd_page(*pmd); + + /* Clear accessed and referenced bits. */ + pmdp_test_and_clear_young(vma, addr, pmd); + ClearPageReferenced(page); +out: + spin_unlock(ptl); + return 0; + } + if (pmd_trans_unstable(pmd)) return 0; From 1e25a271c8ac1c9faebf4eb3fa609189e4e7b1b6 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 11 Feb 2015 15:28:11 -0800 Subject: [PATCH 591/601] mincore: apply page table walker on do_mincore() This patch makes do_mincore() use walk_page_vma(), which reduces many lines of code by using common page table walk code. [daeseok.youn@gmail.com: remove unneeded variable 'err'] Signed-off-by: Naoya Horiguchi Acked-by: Johannes Weiner Cc: "Kirill A. Shutemov" Cc: Andrea Arcangeli Cc: Cyrill Gorcunov Cc: Dave Hansen Cc: Kirill A. Shutemov Cc: Pavel Emelyanov Cc: Benjamin Herrenschmidt Signed-off-by: Daeseok Youn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 20 ------ mm/mincore.c | 166 +++++++++++++++++------------------------------ 2 files changed, 60 insertions(+), 126 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 0531ea7dd7cf49..29bc6e471df467 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1412,26 +1412,6 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, return ret; } -int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long addr, unsigned long end, - unsigned char *vec) -{ - spinlock_t *ptl; - int ret = 0; - - if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { - /* - * All logical pages in the range are present - * if backed by a huge page. - */ - spin_unlock(ptl); - memset(vec, 1, (end - addr) >> PAGE_SHIFT); - ret = 1; - } - - return ret; -} - int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, unsigned long old_addr, unsigned long new_addr, unsigned long old_end, diff --git a/mm/mincore.c b/mm/mincore.c index 46527c023e0c84..be25efde64a409 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -19,38 +19,25 @@ #include #include -static void mincore_hugetlb_page_range(struct vm_area_struct *vma, - unsigned long addr, unsigned long end, - unsigned char *vec) +static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr, + unsigned long end, struct mm_walk *walk) { #ifdef CONFIG_HUGETLB_PAGE - struct hstate *h; + unsigned char present; + unsigned char *vec = walk->private; - h = hstate_vma(vma); - while (1) { - unsigned char present; - pte_t *ptep; - /* - * Huge pages are always in RAM for now, but - * theoretically it needs to be checked. - */ - ptep = huge_pte_offset(current->mm, - addr & huge_page_mask(h)); - present = ptep && !huge_pte_none(huge_ptep_get(ptep)); - while (1) { - *vec = present; - vec++; - addr += PAGE_SIZE; - if (addr == end) - return; - /* check hugepage border */ - if (!(addr & ~huge_page_mask(h))) - break; - } - } + /* + * Hugepages under user process are always in RAM and never + * swapped out, but theoretically it needs to be checked. + */ + present = pte && !huge_pte_none(huge_ptep_get(pte)); + for (; addr != end; vec++, addr += PAGE_SIZE) + *vec = present; + walk->private = vec; #else BUG(); #endif + return 0; } /* @@ -94,9 +81,8 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff) return present; } -static void mincore_unmapped_range(struct vm_area_struct *vma, - unsigned long addr, unsigned long end, - unsigned char *vec) +static int __mincore_unmapped_range(unsigned long addr, unsigned long end, + struct vm_area_struct *vma, unsigned char *vec) { unsigned long nr = (end - addr) >> PAGE_SHIFT; int i; @@ -111,23 +97,44 @@ static void mincore_unmapped_range(struct vm_area_struct *vma, for (i = 0; i < nr; i++) vec[i] = 0; } + return nr; +} + +static int mincore_unmapped_range(unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + walk->private += __mincore_unmapped_range(addr, end, + walk->vma, walk->private); + return 0; } -static void mincore_pte_range(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long addr, unsigned long end, - unsigned char *vec) +static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, + struct mm_walk *walk) { - unsigned long next; spinlock_t *ptl; + struct vm_area_struct *vma = walk->vma; pte_t *ptep; + unsigned char *vec = walk->private; + int nr = (end - addr) >> PAGE_SHIFT; + + if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { + memset(vec, 1, nr); + spin_unlock(ptl); + goto out; + } + + if (pmd_trans_unstable(pmd)) { + __mincore_unmapped_range(addr, end, vma, vec); + goto out; + } - ptep = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); - do { + ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + for (; addr != end; ptep++, addr += PAGE_SIZE) { pte_t pte = *ptep; - next = addr + PAGE_SIZE; if (pte_none(pte)) - mincore_unmapped_range(vma, addr, next, vec); + __mincore_unmapped_range(addr, addr + PAGE_SIZE, + vma, vec); else if (pte_present(pte)) *vec = 1; else { /* pte is a swap entry */ @@ -150,69 +157,12 @@ static void mincore_pte_range(struct vm_area_struct *vma, pmd_t *pmd, } } vec++; - } while (ptep++, addr = next, addr != end); + } pte_unmap_unlock(ptep - 1, ptl); -} - -static void mincore_pmd_range(struct vm_area_struct *vma, pud_t *pud, - unsigned long addr, unsigned long end, - unsigned char *vec) -{ - unsigned long next; - pmd_t *pmd; - - pmd = pmd_offset(pud, addr); - do { - next = pmd_addr_end(addr, end); - if (pmd_trans_huge(*pmd)) { - if (mincore_huge_pmd(vma, pmd, addr, next, vec)) { - vec += (next - addr) >> PAGE_SHIFT; - continue; - } - /* fall through */ - } - if (pmd_none_or_trans_huge_or_clear_bad(pmd)) - mincore_unmapped_range(vma, addr, next, vec); - else - mincore_pte_range(vma, pmd, addr, next, vec); - vec += (next - addr) >> PAGE_SHIFT; - } while (pmd++, addr = next, addr != end); -} - -static void mincore_pud_range(struct vm_area_struct *vma, pgd_t *pgd, - unsigned long addr, unsigned long end, - unsigned char *vec) -{ - unsigned long next; - pud_t *pud; - - pud = pud_offset(pgd, addr); - do { - next = pud_addr_end(addr, end); - if (pud_none_or_clear_bad(pud)) - mincore_unmapped_range(vma, addr, next, vec); - else - mincore_pmd_range(vma, pud, addr, next, vec); - vec += (next - addr) >> PAGE_SHIFT; - } while (pud++, addr = next, addr != end); -} - -static void mincore_page_range(struct vm_area_struct *vma, - unsigned long addr, unsigned long end, - unsigned char *vec) -{ - unsigned long next; - pgd_t *pgd; - - pgd = pgd_offset(vma->vm_mm, addr); - do { - next = pgd_addr_end(addr, end); - if (pgd_none_or_clear_bad(pgd)) - mincore_unmapped_range(vma, addr, next, vec); - else - mincore_pud_range(vma, pgd, addr, next, vec); - vec += (next - addr) >> PAGE_SHIFT; - } while (pgd++, addr = next, addr != end); +out: + walk->private += nr; + cond_resched(); + return 0; } /* @@ -224,18 +174,22 @@ static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *v { struct vm_area_struct *vma; unsigned long end; + int err; + struct mm_walk mincore_walk = { + .pmd_entry = mincore_pte_range, + .pte_hole = mincore_unmapped_range, + .hugetlb_entry = mincore_hugetlb, + .private = vec, + }; vma = find_vma(current->mm, addr); if (!vma || addr < vma->vm_start) return -ENOMEM; - + mincore_walk.mm = vma->vm_mm; end = min(vma->vm_end, addr + (pages << PAGE_SHIFT)); - - if (is_vm_hugetlb_page(vma)) - mincore_hugetlb_page_range(vma, addr, end, vec); - else - mincore_page_range(vma, addr, end, vec); - + err = walk_page_range(addr, end, &mincore_walk); + if (err < 0) + return err; return (end - addr) >> PAGE_SHIFT; } From 99592d598eca62bdbbf62b59941c189176dfc614 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 11 Feb 2015 15:28:15 -0800 Subject: [PATCH 592/601] mm: when stealing freepages, also take pages created by splitting buddy page When studying page stealing, I noticed some weird looking decisions in try_to_steal_freepages(). The first I assume is a bug (Patch 1), the following two patches were driven by evaluation. Testing was done with stress-highalloc of mmtests, using the mm_page_alloc_extfrag tracepoint and postprocessing to get counts of how often page stealing occurs for individual migratetypes, and what migratetypes are used for fallbacks. Arguably, the worst case of page stealing is when UNMOVABLE allocation steals from MOVABLE pageblock. RECLAIMABLE allocation stealing from MOVABLE allocation is also not ideal, so the goal is to minimize these two cases. The evaluation of v2 wasn't always clear win and Joonsoo questioned the results. Here I used different baseline which includes RFC compaction improvements from [1]. I found that the compaction improvements reduce variability of stress-highalloc, so there's less noise in the data. First, let's look at stress-highalloc configured to do sync compaction, and how these patches reduce page stealing events during the test. First column is after fresh reboot, other two are reiterations of test without reboot. That was all accumulater over 5 re-iterations (so the benchmark was run 5x3 times with 5 fresh restarts). Baseline: 3.19-rc4 3.19-rc4 3.19-rc4 5-nothp-1 5-nothp-2 5-nothp-3 Page alloc extfrag event 10264225 8702233 10244125 Extfrag fragmenting 10263271 8701552 10243473 Extfrag fragmenting for unmovable 13595 17616 15960 Extfrag fragmenting unmovable placed with movable 7989 12193 8447 Extfrag fragmenting for reclaimable 658 1840 1817 Extfrag fragmenting reclaimable placed with movable 558 1677 1679 Extfrag fragmenting for movable 10249018 8682096 10225696 With Patch 1: 3.19-rc4 3.19-rc4 3.19-rc4 6-nothp-1 6-nothp-2 6-nothp-3 Page alloc extfrag event 11834954 9877523 9774860 Extfrag fragmenting 11833993 9876880 9774245 Extfrag fragmenting for unmovable 7342 16129 11712 Extfrag fragmenting unmovable placed with movable 4191 10547 6270 Extfrag fragmenting for reclaimable 373 1130 923 Extfrag fragmenting reclaimable placed with movable 302 906 738 Extfrag fragmenting for movable 11826278 9859621 9761610 With Patch 2: 3.19-rc4 3.19-rc4 3.19-rc4 7-nothp-1 7-nothp-2 7-nothp-3 Page alloc extfrag event 4725990 3668793 3807436 Extfrag fragmenting 4725104 3668252 3806898 Extfrag fragmenting for unmovable 6678 7974 7281 Extfrag fragmenting unmovable placed with movable 2051 3829 4017 Extfrag fragmenting for reclaimable 429 1208 1278 Extfrag fragmenting reclaimable placed with movable 369 976 1034 Extfrag fragmenting for movable 4717997 3659070 3798339 With Patch 3: 3.19-rc4 3.19-rc4 3.19-rc4 8-nothp-1 8-nothp-2 8-nothp-3 Page alloc extfrag event 5016183 4700142 3850633 Extfrag fragmenting 5015325 4699613 3850072 Extfrag fragmenting for unmovable 1312 3154 3088 Extfrag fragmenting unmovable placed with movable 1115 2777 2714 Extfrag fragmenting for reclaimable 437 1193 1097 Extfrag fragmenting reclaimable placed with movable 330 969 879 Extfrag fragmenting for movable 5013576 4695266 3845887 In v2 we've seen apparent regression with Patch 1 for unmovable events, this is now gone, suggesting it was indeed noise. Here, each patch improves the situation for unmovable events. Reclaimable is improved by patch 1 and then either the same modulo noise, or perhaps sligtly worse - a small price for unmovable improvements, IMHO. The number of movable allocations falling back to other migratetypes is most noisy, but it's reduced to half at Patch 2 nevertheless. These are least critical as compaction can move them around. If we look at success rates, the patches don't affect them, that didn't change. Baseline: 3.19-rc4 3.19-rc4 3.19-rc4 5-nothp-1 5-nothp-2 5-nothp-3 Success 1 Min 49.00 ( 0.00%) 42.00 ( 14.29%) 41.00 ( 16.33%) Success 1 Mean 51.00 ( 0.00%) 45.00 ( 11.76%) 42.60 ( 16.47%) Success 1 Max 55.00 ( 0.00%) 51.00 ( 7.27%) 46.00 ( 16.36%) Success 2 Min 53.00 ( 0.00%) 47.00 ( 11.32%) 44.00 ( 16.98%) Success 2 Mean 59.60 ( 0.00%) 50.80 ( 14.77%) 48.20 ( 19.13%) Success 2 Max 64.00 ( 0.00%) 56.00 ( 12.50%) 52.00 ( 18.75%) Success 3 Min 84.00 ( 0.00%) 82.00 ( 2.38%) 78.00 ( 7.14%) Success 3 Mean 85.60 ( 0.00%) 82.80 ( 3.27%) 79.40 ( 7.24%) Success 3 Max 86.00 ( 0.00%) 83.00 ( 3.49%) 80.00 ( 6.98%) Patch 1: 3.19-rc4 3.19-rc4 3.19-rc4 6-nothp-1 6-nothp-2 6-nothp-3 Success 1 Min 49.00 ( 0.00%) 44.00 ( 10.20%) 44.00 ( 10.20%) Success 1 Mean 51.80 ( 0.00%) 46.00 ( 11.20%) 45.80 ( 11.58%) Success 1 Max 54.00 ( 0.00%) 49.00 ( 9.26%) 49.00 ( 9.26%) Success 2 Min 58.00 ( 0.00%) 49.00 ( 15.52%) 48.00 ( 17.24%) Success 2 Mean 60.40 ( 0.00%) 51.80 ( 14.24%) 50.80 ( 15.89%) Success 2 Max 63.00 ( 0.00%) 54.00 ( 14.29%) 55.00 ( 12.70%) Success 3 Min 84.00 ( 0.00%) 81.00 ( 3.57%) 79.00 ( 5.95%) Success 3 Mean 85.00 ( 0.00%) 81.60 ( 4.00%) 79.80 ( 6.12%) Success 3 Max 86.00 ( 0.00%) 82.00 ( 4.65%) 82.00 ( 4.65%) Patch 2: 3.19-rc4 3.19-rc4 3.19-rc4 7-nothp-1 7-nothp-2 7-nothp-3 Success 1 Min 50.00 ( 0.00%) 44.00 ( 12.00%) 39.00 ( 22.00%) Success 1 Mean 52.80 ( 0.00%) 45.60 ( 13.64%) 42.40 ( 19.70%) Success 1 Max 55.00 ( 0.00%) 46.00 ( 16.36%) 47.00 ( 14.55%) Success 2 Min 52.00 ( 0.00%) 48.00 ( 7.69%) 45.00 ( 13.46%) Success 2 Mean 53.40 ( 0.00%) 49.80 ( 6.74%) 48.80 ( 8.61%) Success 2 Max 57.00 ( 0.00%) 52.00 ( 8.77%) 52.00 ( 8.77%) Success 3 Min 84.00 ( 0.00%) 81.00 ( 3.57%) 79.00 ( 5.95%) Success 3 Mean 85.00 ( 0.00%) 82.40 ( 3.06%) 79.60 ( 6.35%) Success 3 Max 86.00 ( 0.00%) 83.00 ( 3.49%) 80.00 ( 6.98%) Patch 3: 3.19-rc4 3.19-rc4 3.19-rc4 8-nothp-1 8-nothp-2 8-nothp-3 Success 1 Min 46.00 ( 0.00%) 44.00 ( 4.35%) 42.00 ( 8.70%) Success 1 Mean 50.20 ( 0.00%) 45.60 ( 9.16%) 44.00 ( 12.35%) Success 1 Max 52.00 ( 0.00%) 47.00 ( 9.62%) 47.00 ( 9.62%) Success 2 Min 53.00 ( 0.00%) 49.00 ( 7.55%) 48.00 ( 9.43%) Success 2 Mean 55.80 ( 0.00%) 50.60 ( 9.32%) 49.00 ( 12.19%) Success 2 Max 59.00 ( 0.00%) 52.00 ( 11.86%) 51.00 ( 13.56%) Success 3 Min 84.00 ( 0.00%) 80.00 ( 4.76%) 79.00 ( 5.95%) Success 3 Mean 85.40 ( 0.00%) 81.60 ( 4.45%) 80.40 ( 5.85%) Success 3 Max 87.00 ( 0.00%) 83.00 ( 4.60%) 82.00 ( 5.75%) While there's no improvement here, I consider reduced fragmentation events to be worth on its own. Patch 2 also seems to reduce scanning for free pages, and migrations in compaction, suggesting it has somewhat less work to do: Patch 1: Compaction stalls 4153 3959 3978 Compaction success 1523 1441 1446 Compaction failures 2630 2517 2531 Page migrate success 4600827 4943120 5104348 Page migrate failure 19763 16656 17806 Compaction pages isolated 9597640 10305617 10653541 Compaction migrate scanned 77828948 86533283 87137064 Compaction free scanned 517758295 521312840 521462251 Compaction cost 5503 5932 6110 Patch 2: Compaction stalls 3800 3450 3518 Compaction success 1421 1316 1317 Compaction failures 2379 2134 2201 Page migrate success 4160421 4502708 4752148 Page migrate failure 19705 14340 14911 Compaction pages isolated 8731983 9382374 9910043 Compaction migrate scanned 98362797 96349194 98609686 Compaction free scanned 496512560 469502017 480442545 Compaction cost 5173 5526 5811 As with v2, /proc/pagetypeinfo appears unaffected with respect to numbers of unmovable and reclaimable pageblocks. Configuring the benchmark to allocate like THP page fault (i.e. no sync compaction) gives much noisier results for iterations 2 and 3 after reboot. This is not so surprising given how [1] offers lower improvements in this scenario due to less restarts after deferred compaction which would change compaction pivot. Baseline: 3.19-rc4 3.19-rc4 3.19-rc4 5-thp-1 5-thp-2 5-thp-3 Page alloc extfrag event 8148965 6227815 6646741 Extfrag fragmenting 8147872 6227130 6646117 Extfrag fragmenting for unmovable 10324 12942 15975 Extfrag fragmenting unmovable placed with movable 5972 8495 10907 Extfrag fragmenting for reclaimable 601 1707 2210 Extfrag fragmenting reclaimable placed with movable 520 1570 2000 Extfrag fragmenting for movable 8136947 6212481 6627932 Patch 1: 3.19-rc4 3.19-rc4 3.19-rc4 6-thp-1 6-thp-2 6-thp-3 Page alloc extfrag event 8345457 7574471 7020419 Extfrag fragmenting 8343546 7573777 7019718 Extfrag fragmenting for unmovable 10256 18535 30716 Extfrag fragmenting unmovable placed with movable 6893 11726 22181 Extfrag fragmenting for reclaimable 465 1208 1023 Extfrag fragmenting reclaimable placed with movable 353 996 843 Extfrag fragmenting for movable 8332825 7554034 6987979 Patch 2: 3.19-rc4 3.19-rc4 3.19-rc4 7-thp-1 7-thp-2 7-thp-3 Page alloc extfrag event 3512847 3020756 2891625 Extfrag fragmenting 3511940 3020185 2891059 Extfrag fragmenting for unmovable 9017 6892 6191 Extfrag fragmenting unmovable placed with movable 1524 3053 2435 Extfrag fragmenting for reclaimable 445 1081 1160 Extfrag fragmenting reclaimable placed with movable 375 918 986 Extfrag fragmenting for movable 3502478 3012212 2883708 Patch 3: 3.19-rc4 3.19-rc4 3.19-rc4 8-thp-1 8-thp-2 8-thp-3 Page alloc extfrag event 3181699 3082881 2674164 Extfrag fragmenting 3180812 3082303 2673611 Extfrag fragmenting for unmovable 1201 4031 4040 Extfrag fragmenting unmovable placed with movable 974 3611 3645 Extfrag fragmenting for reclaimable 478 1165 1294 Extfrag fragmenting reclaimable placed with movable 387 985 1030 Extfrag fragmenting for movable 3179133 3077107 2668277 The improvements for first iteration are clear, the rest is much noisier and can appear like regression for Patch 1. Anyway, patch 2 rectifies it. Allocation success rates are again unaffected so there's no point in making this e-mail any longer. [1] http://marc.info/?l=linux-mm&m=142166196321125&w=2 This patch (of 3): When __rmqueue_fallback() is called to allocate a page of order X, it will find a page of order Y >= X of a fallback migratetype, which is different from the desired migratetype. With the help of try_to_steal_freepages(), it may change the migratetype (to the desired one) also of: 1) all currently free pages in the pageblock containing the fallback page 2) the fallback pageblock itself 3) buddy pages created by splitting the fallback page (when Y > X) These decisions take the order Y into account, as well as the desired migratetype, with the goal of preventing multiple fallback allocations that could e.g. distribute UNMOVABLE allocations among multiple pageblocks. Originally, decision for 1) has implied the decision for 3). Commit 47118af076f6 ("mm: mmzone: MIGRATE_CMA migration type added") changed that (probably unintentionally) so that the buddy pages in case 3) are always changed to the desired migratetype, except for CMA pageblocks. Commit fef903efcf0c ("mm/page_allo.c: restructure free-page stealing code and fix a bug") did some refactoring and added a comment that the case of 3) is intended. Commit 0cbef29a7821 ("mm: __rmqueue_fallback() should respect pageblock type") removed the comment and tried to restore the original behavior where 1) implies 3), but due to the previous refactoring, the result is instead that only 2) implies 3) - and the conditions for 2) are less frequently met than conditions for 1). This may increase fragmentation in situations where the code decides to steal all free pages from the pageblock (case 1)), but then gives back the buddy pages produced by splitting. This patch restores the original intended logic where 1) implies 3). During testing with stress-highalloc from mmtests, this has shown to decrease the number of events where UNMOVABLE and RECLAIMABLE allocations steal from MOVABLE pageblocks, which can lead to permanent fragmentation. In some cases it has increased the number of events when MOVABLE allocations steal from UNMOVABLE or RECLAIMABLE pageblocks, but these are fixable by sync compaction and thus less harmful. Note that evaluation has shown that the behavior introduced by 47118af076f6 for buddy pages in case 3) is actually even better than the original logic, so the following patch will introduce it properly once again. For stable backports of this patch it makes thus sense to only fix versions containing 0cbef29a7821. [iamjoonsoo.kim@lge.com: tracepoint fix] Signed-off-by: Vlastimil Babka Acked-by: Mel Gorman Cc: Zhang Yanfei Acked-by: Minchan Kim Cc: David Rientjes Cc: Rik van Riel Cc: "Aneesh Kumar K.V" Cc: "Kirill A. Shutemov" Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Michal Hocko Cc: KOSAKI Motohiro Cc: [3.13+ containing 0cbef29a7821] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/kmem.h | 7 ++++--- mm/page_alloc.c | 12 +++++------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index aece1346ceb741..4ad10baecd4dcd 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -268,11 +268,11 @@ TRACE_EVENT(mm_page_alloc_extfrag, TP_PROTO(struct page *page, int alloc_order, int fallback_order, - int alloc_migratetype, int fallback_migratetype, int new_migratetype), + int alloc_migratetype, int fallback_migratetype), TP_ARGS(page, alloc_order, fallback_order, - alloc_migratetype, fallback_migratetype, new_migratetype), + alloc_migratetype, fallback_migratetype), TP_STRUCT__entry( __field( struct page *, page ) @@ -289,7 +289,8 @@ TRACE_EVENT(mm_page_alloc_extfrag, __entry->fallback_order = fallback_order; __entry->alloc_migratetype = alloc_migratetype; __entry->fallback_migratetype = fallback_migratetype; - __entry->change_ownership = (new_migratetype == alloc_migratetype); + __entry->change_ownership = (alloc_migratetype == + get_pageblock_migratetype(page)); ), TP_printk("page=%p pfn=%lu alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d", diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 134e255250447e..b7a881019352da 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1131,8 +1131,8 @@ static void change_pageblock_range(struct page *pageblock_page, * nor move CMA pages to different free lists. We don't want unmovable pages * to be allocated from MIGRATE_CMA areas. * - * Returns the new migratetype of the pageblock (or the same old migratetype - * if it was unchanged). + * Returns the allocation migratetype if free pages were stolen, or the + * fallback migratetype if it was decided not to steal. */ static int try_to_steal_freepages(struct zone *zone, struct page *page, int start_type, int fallback_type) @@ -1163,12 +1163,10 @@ static int try_to_steal_freepages(struct zone *zone, struct page *page, /* Claim the whole block if over half of it is free */ if (pages >= (1 << (pageblock_order-1)) || - page_group_by_mobility_disabled) { - + page_group_by_mobility_disabled) set_pageblock_migratetype(page, start_type); - return start_type; - } + return start_type; } return fallback_type; @@ -1220,7 +1218,7 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype) set_freepage_migratetype(page, new_type); trace_mm_page_alloc_extfrag(page, order, current_order, - start_migratetype, migratetype, new_type); + start_migratetype, migratetype); return page; } From 3a1086fba92b6e2311b6a342f68bc380beb240fe Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 11 Feb 2015 15:28:18 -0800 Subject: [PATCH 593/601] mm: always steal split buddies in fallback allocations When allocation falls back to another migratetype, it will steal a page with highest available order, and (depending on this order and desired migratetype), it might also steal the rest of free pages from the same pageblock. Given the preference of highest available order, it is likely that it will be higher than the desired order, and result in the stolen buddy page being split. The remaining pages after split are currently stolen only when the rest of the free pages are stolen. This can however lead to situations where for MOVABLE allocations we split e.g. order-4 fallback UNMOVABLE page, but steal only order-0 page. Then on the next MOVABLE allocation (which may be batched to fill the pcplists) we split another order-3 or higher page, etc. By stealing all pages that we have split, we can avoid further stealing. This patch therefore adjusts the page stealing so that buddy pages created by split are always stolen. This has effect only on MOVABLE allocations, as RECLAIMABLE and UNMOVABLE allocations already always do that in addition to stealing the rest of free pages from the pageblock. The change also allows to simplify try_to_steal_freepages() and factor out CMA handling. According to Mel, it has been intended since the beginning that buddy pages after split would be stolen always, but it doesn't seem like it was ever the case until commit 47118af076f6 ("mm: mmzone: MIGRATE_CMA migration type added"). The commit has unintentionally introduced this behavior, but was reverted by commit 0cbef29a7821 ("mm: __rmqueue_fallback() should respect pageblock type"). Neither included evaluation. My evaluation with stress-highalloc from mmtests shows about 2.5x reduction of page stealing events for MOVABLE allocations, without affecting the page stealing events for other allocation migratetypes. Signed-off-by: Vlastimil Babka Acked-by: Mel Gorman Cc: Zhang Yanfei Acked-by: Minchan Kim Cc: David Rientjes Cc: Rik van Riel Cc: "Aneesh Kumar K.V" Cc: "Kirill A. Shutemov" Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Michal Hocko Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 62 +++++++++++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 33 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index b7a881019352da..7c44b49cec40c3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1125,33 +1125,18 @@ static void change_pageblock_range(struct page *pageblock_page, /* * If breaking a large block of pages, move all free pages to the preferred * allocation list. If falling back for a reclaimable kernel allocation, be - * more aggressive about taking ownership of free pages. - * - * On the other hand, never change migration type of MIGRATE_CMA pageblocks - * nor move CMA pages to different free lists. We don't want unmovable pages - * to be allocated from MIGRATE_CMA areas. - * - * Returns the allocation migratetype if free pages were stolen, or the - * fallback migratetype if it was decided not to steal. + * more aggressive about taking ownership of free pages. If we claim more than + * half of the pageblock, change pageblock's migratetype as well. */ -static int try_to_steal_freepages(struct zone *zone, struct page *page, +static void try_to_steal_freepages(struct zone *zone, struct page *page, int start_type, int fallback_type) { int current_order = page_order(page); - /* - * When borrowing from MIGRATE_CMA, we need to release the excess - * buddy pages to CMA itself. We also ensure the freepage_migratetype - * is set to CMA so it is returned to the correct freelist in case - * the page ends up being not actually allocated from the pcp lists. - */ - if (is_migrate_cma(fallback_type)) - return fallback_type; - /* Take ownership for orders >= pageblock_order */ if (current_order >= pageblock_order) { change_pageblock_range(page, current_order, start_type); - return start_type; + return; } if (current_order >= pageblock_order / 2 || @@ -1165,11 +1150,7 @@ static int try_to_steal_freepages(struct zone *zone, struct page *page, if (pages >= (1 << (pageblock_order-1)) || page_group_by_mobility_disabled) set_pageblock_migratetype(page, start_type); - - return start_type; } - - return fallback_type; } /* Remove an element from the buddy allocator from the fallback list */ @@ -1179,14 +1160,15 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype) struct free_area *area; unsigned int current_order; struct page *page; - int migratetype, new_type, i; /* Find the largest possible block of pages in the other list */ for (current_order = MAX_ORDER-1; current_order >= order && current_order <= MAX_ORDER-1; --current_order) { + int i; for (i = 0;; i++) { - migratetype = fallbacks[start_migratetype][i]; + int migratetype = fallbacks[start_migratetype][i]; + int buddy_type = start_migratetype; /* MIGRATE_RESERVE handled later if necessary */ if (migratetype == MIGRATE_RESERVE) @@ -1200,22 +1182,36 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype) struct page, lru); area->nr_free--; - new_type = try_to_steal_freepages(zone, page, - start_migratetype, - migratetype); + if (!is_migrate_cma(migratetype)) { + try_to_steal_freepages(zone, page, + start_migratetype, + migratetype); + } else { + /* + * When borrowing from MIGRATE_CMA, we need to + * release the excess buddy pages to CMA + * itself, and we do not try to steal extra + * free pages. + */ + buddy_type = migratetype; + } /* Remove the page from the freelists */ list_del(&page->lru); rmv_page_order(page); expand(zone, page, order, current_order, area, - new_type); - /* The freepage_migratetype may differ from pageblock's + buddy_type); + + /* + * The freepage_migratetype may differ from pageblock's * migratetype depending on the decisions in - * try_to_steal_freepages. This is OK as long as it does - * not differ for MIGRATE_CMA type. + * try_to_steal_freepages(). This is OK as long as it + * does not differ for MIGRATE_CMA pageblocks. For CMA + * we need to make sure unallocated pages flushed from + * pcp lists are returned to the correct freelist. */ - set_freepage_migratetype(page, new_type); + set_freepage_migratetype(page, buddy_type); trace_mm_page_alloc_extfrag(page, order, current_order, start_migratetype, migratetype); From 9c0415eb8cbf0c8fd043b6c0f0354308ab099df5 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 11 Feb 2015 15:28:21 -0800 Subject: [PATCH 594/601] mm: more aggressive page stealing for UNMOVABLE allocations When allocation falls back to stealing free pages of another migratetype, it can decide to steal extra pages, or even the whole pageblock in order to reduce fragmentation, which could happen if further allocation fallbacks pick a different pageblock. In try_to_steal_freepages(), one of the situations where extra pages are stolen happens when we are trying to allocate a MIGRATE_RECLAIMABLE page. However, MIGRATE_UNMOVABLE allocations are not treated the same way, although spreading such allocation over multiple fallback pageblocks is arguably even worse than it is for RECLAIMABLE allocations. To minimize fragmentation, we should minimize the number of such fallbacks, and thus steal as much as is possible from each fallback pageblock. Note that in theory this might put more pressure on movable pageblocks and cause movable allocations to steal back from unmovable pageblocks. However, movable allocations are not as aggressive with stealing, and do not cause permanent fragmentation, so the tradeoff is reasonable, and evaluation seems to support the change. This patch thus adds a check for MIGRATE_UNMOVABLE to the decision to steal extra free pages. When evaluating with stress-highalloc from mmtests, this has reduced the number of MIGRATE_UNMOVABLE fallbacks to roughly 1/6. The number of these fallbacks stealing from MIGRATE_MOVABLE block is reduced to 1/3. There was no observation of growing number of unmovable pageblocks over time, and also not of increased movable allocation fallbacks. Signed-off-by: Vlastimil Babka Acked-by: Mel Gorman Cc: Zhang Yanfei Cc: Minchan Kim Cc: David Rientjes Cc: Rik van Riel Cc: "Aneesh Kumar K.V" Cc: "Kirill A. Shutemov" Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Michal Hocko Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7c44b49cec40c3..8d52ab18fe0dac 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1123,10 +1123,19 @@ static void change_pageblock_range(struct page *pageblock_page, } /* - * If breaking a large block of pages, move all free pages to the preferred - * allocation list. If falling back for a reclaimable kernel allocation, be - * more aggressive about taking ownership of free pages. If we claim more than - * half of the pageblock, change pageblock's migratetype as well. + * When we are falling back to another migratetype during allocation, try to + * steal extra free pages from the same pageblocks to satisfy further + * allocations, instead of polluting multiple pageblocks. + * + * If we are stealing a relatively large buddy page, it is likely there will + * be more free pages in the pageblock, so try to steal them all. For + * reclaimable and unmovable allocations, we steal regardless of page size, + * as fragmentation caused by those allocations polluting movable pageblocks + * is worse than movable allocations stealing from unmovable and reclaimable + * pageblocks. + * + * If we claim more than half of the pageblock, change pageblock's migratetype + * as well. */ static void try_to_steal_freepages(struct zone *zone, struct page *page, int start_type, int fallback_type) @@ -1141,6 +1150,7 @@ static void try_to_steal_freepages(struct zone *zone, struct page *page, if (current_order >= pageblock_order / 2 || start_type == MIGRATE_RECLAIMABLE || + start_type == MIGRATE_UNMOVABLE || page_group_by_mobility_disabled) { int pages; From ba4877b9ca51f80b5d30f304a46762f0509e1635 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 11 Feb 2015 15:28:24 -0800 Subject: [PATCH 595/601] vmstat: do not use deferrable delayed work for vmstat_update Vinayak Menon has reported that an excessive number of tasks was throttled in the direct reclaim inside too_many_isolated() because NR_ISOLATED_FILE was relatively high compared to NR_INACTIVE_FILE. However it turned out that the real number of NR_ISOLATED_FILE was 0 and the per-cpu vm_stat_diff wasn't transferred into the global counter. vmstat_work which is responsible for the sync is defined as deferrable delayed work which means that the defined timeout doesn't wake up an idle CPU. A CPU might stay in an idle state for a long time and general effort is to keep such a CPU in this state as long as possible which might lead to all sorts of troubles for vmstat consumers as can be seen with the excessive direct reclaim throttling. This patch basically reverts 39bf6270f524 ("VM statistics: Make timer deferrable") but it shouldn't cause any problems for idle CPUs because only CPUs with an active per-cpu drift are woken up since 7cc36bbddde5 ("vmstat: on-demand vmstat workers v8") and CPUs which are idle for a longer time shouldn't have per-cpu drift. Fixes: 39bf6270f524 (VM statistics: Make timer deferrable) Signed-off-by: Michal Hocko Reported-by: Vinayak Menon Acked-by: Christoph Lameter Cc: Johannes Weiner Cc: Vladimir Davydov Cc: Mel Gorman Cc: Minchan Kim Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmstat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/vmstat.c b/mm/vmstat.c index 9943e5fd74e622..470cdd5b924b05 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1452,7 +1452,7 @@ static void __init start_shepherd_timer(void) int cpu; for_each_possible_cpu(cpu) - INIT_DEFERRABLE_WORK(per_cpu_ptr(&vmstat_work, cpu), + INIT_DELAYED_WORK(per_cpu_ptr(&vmstat_work, cpu), vmstat_update); if (!alloc_cpumask_var(&cpu_stat_off, GFP_KERNEL)) From 10359213d05acf804558bda7cc9b8422a828d1cd Mon Sep 17 00:00:00 2001 From: Ebru Akagunduz Date: Wed, 11 Feb 2015 15:28:28 -0800 Subject: [PATCH 596/601] mm: incorporate read-only pages into transparent huge pages This patch aims to improve THP collapse rates, by allowing THP collapse in the presence of read-only ptes, like those left in place by do_swap_page after a read fault. Currently THP can collapse 4kB pages into a THP when there are up to khugepaged_max_ptes_none pte_none ptes in a 2MB range. This patch applies the same limit for read-only ptes. The patch was tested with a test program that allocates 800MB of memory, writes to it, and then sleeps. I force the system to swap out all but 190MB of the program by touching other memory. Afterwards, the test program does a mix of reads and writes to its memory, and the memory gets swapped back in. Without the patch, only the memory that did not get swapped out remained in THPs, which corresponds to 24% of the memory of the program. The percentage did not increase over time. With this patch, after 5 minutes of waiting khugepaged had collapsed 50% of the program's memory back into THPs. Test results: With the patch: After swapped out: cat /proc/pid/smaps: Anonymous: 100464 kB AnonHugePages: 100352 kB Swap: 699540 kB Fraction: 99,88 cat /proc/meminfo: AnonPages: 1754448 kB AnonHugePages: 1716224 kB Fraction: 97,82 After swapped in: In a few seconds: cat /proc/pid/smaps: Anonymous: 800004 kB AnonHugePages: 145408 kB Swap: 0 kB Fraction: 18,17 cat /proc/meminfo: AnonPages: 2455016 kB AnonHugePages: 1761280 kB Fraction: 71,74 In 5 minutes: cat /proc/pid/smaps Anonymous: 800004 kB AnonHugePages: 407552 kB Swap: 0 kB Fraction: 50,94 cat /proc/meminfo: AnonPages: 2456872 kB AnonHugePages: 2023424 kB Fraction: 82,35 Without the patch: After swapped out: cat /proc/pid/smaps: Anonymous: 190660 kB AnonHugePages: 190464 kB Swap: 609344 kB Fraction: 99,89 cat /proc/meminfo: AnonPages: 1740456 kB AnonHugePages: 1667072 kB Fraction: 95,78 After swapped in: cat /proc/pid/smaps: Anonymous: 800004 kB AnonHugePages: 190464 kB Swap: 0 kB Fraction: 23,80 cat /proc/meminfo: AnonPages: 2350032 kB AnonHugePages: 1667072 kB Fraction: 70,93 I waited 10 minutes the fractions did not change without the patch. Signed-off-by: Ebru Akagunduz Reviewed-by: Rik van Riel Acked-by: Vlastimil Babka Acked-by: Zhang Yanfei Acked-by: Andrea Arcangeli Acked-by: Kirill A. Shutemov Cc: Michal Hocko Cc: Mel Gorman Cc: David Rientjes Cc: Sasha Levin Cc: Hugh Dickins Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 55 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 42 insertions(+), 13 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 29bc6e471df467..cb7be110cad33a 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2117,7 +2117,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, { struct page *page; pte_t *_pte; - int referenced = 0, none = 0; + int none = 0; + bool referenced = false, writable = false; for (_pte = pte; _pte < pte+HPAGE_PMD_NR; _pte++, address += PAGE_SIZE) { pte_t pteval = *_pte; @@ -2127,7 +2128,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, else goto out; } - if (!pte_present(pteval) || !pte_write(pteval)) + if (!pte_present(pteval)) goto out; page = vm_normal_page(vma, address, pteval); if (unlikely(!page)) @@ -2137,9 +2138,6 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, VM_BUG_ON_PAGE(!PageAnon(page), page); VM_BUG_ON_PAGE(!PageSwapBacked(page), page); - /* cannot use mapcount: can't collapse if there's a gup pin */ - if (page_count(page) != 1) - goto out; /* * We can do it before isolate_lru_page because the * page can't be freed from under us. NOTE: PG_lock @@ -2148,6 +2146,29 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, */ if (!trylock_page(page)) goto out; + + /* + * cannot use mapcount: can't collapse if there's a gup pin. + * The page must only be referenced by the scanned process + * and page swap cache. + */ + if (page_count(page) != 1 + !!PageSwapCache(page)) { + unlock_page(page); + goto out; + } + if (pte_write(pteval)) { + writable = true; + } else { + if (PageSwapCache(page) && !reuse_swap_page(page)) { + unlock_page(page); + goto out; + } + /* + * Page is not in the swap cache. It can be collapsed + * into a THP. + */ + } + /* * Isolate the page to avoid collapsing an hugepage * currently in use by the VM. @@ -2164,9 +2185,9 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, /* If there is no mapped pte young don't collapse the page */ if (pte_young(pteval) || PageReferenced(page) || mmu_notifier_test_young(vma->vm_mm, address)) - referenced = 1; + referenced = true; } - if (likely(referenced)) + if (likely(referenced && writable)) return 1; out: release_pte_pages(pte, _pte); @@ -2519,11 +2540,12 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, { pmd_t *pmd; pte_t *pte, *_pte; - int ret = 0, referenced = 0, none = 0; + int ret = 0, none = 0; struct page *page; unsigned long _address; spinlock_t *ptl; int node = NUMA_NO_NODE; + bool writable = false, referenced = false; VM_BUG_ON(address & ~HPAGE_PMD_MASK); @@ -2542,8 +2564,11 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, else goto out_unmap; } - if (!pte_present(pteval) || !pte_write(pteval)) + if (!pte_present(pteval)) goto out_unmap; + if (pte_write(pteval)) + writable = true; + page = vm_normal_page(vma, _address, pteval); if (unlikely(!page)) goto out_unmap; @@ -2560,14 +2585,18 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, VM_BUG_ON_PAGE(PageCompound(page), page); if (!PageLRU(page) || PageLocked(page) || !PageAnon(page)) goto out_unmap; - /* cannot use mapcount: can't collapse if there's a gup pin */ - if (page_count(page) != 1) + /* + * cannot use mapcount: can't collapse if there's a gup pin. + * The page must only be referenced by the scanned process + * and page swap cache. + */ + if (page_count(page) != 1 + !!PageSwapCache(page)) goto out_unmap; if (pte_young(pteval) || PageReferenced(page) || mmu_notifier_test_young(vma->vm_mm, address)) - referenced = 1; + referenced = true; } - if (referenced) + if (referenced && writable) ret = 1; out_unmap: pte_unmap_unlock(pte, ptl); From 740a5ddb0e0d2ef3bd0a80ae027bf9d211b8c82d Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 11 Feb 2015 15:28:31 -0800 Subject: [PATCH 597/601] Documentation/filesystems/proc.txt: describe /proc//map_files [akpm@linux-foundation.org: tweaks] Signed-off-by: Cyrill Gorcunov Cc: Kees Cook Cc: "Kirill A. Shutemov" Cc: Calvin Owens Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/proc.txt | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index aae9dd13c91f5d..6d59ffe791ada7 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -42,6 +42,7 @@ Table of Contents 3.6 /proc//comm & /proc//task//comm 3.7 /proc//task//children - Information about task children 3.8 /proc//fdinfo/ - Information about opened file + 3.9 /proc//map_files - Information about memory mapped files 4 Configuring procfs 4.1 Mount options @@ -1763,6 +1764,28 @@ pair provide additional information particular to the objects they represent. with TIMER_ABSTIME option which will be shown in 'settime flags', but 'it_value' still exhibits timer's remaining time. +3.9 /proc//map_files - Information about memory mapped files +--------------------------------------------------------------------- +This directory contains symbolic links which represent memory mapped files +the process is maintaining. Example output: + + | lr-------- 1 root root 64 Jan 27 11:24 333c600000-333c620000 -> /usr/lib64/ld-2.18.so + | lr-------- 1 root root 64 Jan 27 11:24 333c81f000-333c820000 -> /usr/lib64/ld-2.18.so + | lr-------- 1 root root 64 Jan 27 11:24 333c820000-333c821000 -> /usr/lib64/ld-2.18.so + | ... + | lr-------- 1 root root 64 Jan 27 11:24 35d0421000-35d0422000 -> /usr/lib64/libselinux.so.1 + | lr-------- 1 root root 64 Jan 27 11:24 400000-41a000 -> /usr/bin/ls + +The name of a link represents the virtual memory bounds of a mapping, i.e. +vm_area_struct::vm_start-vm_area_struct::vm_end. + +The main purpose of the map_files is to retrieve a set of memory mapped +files in a fast way instead of parsing /proc//maps or +/proc//smaps, both of which contain many more records. At the same +time one can open(2) mappings from the listings of two processes and +comparing their inode numbers to figure out which anonymous memory areas +are actually shared. + ------------------------------------------------------------------------------ Configuring procfs ------------------------------------------------------------------------------ From 94f759d62b2c6a9d124b0622077b1ddcfac43fb5 Mon Sep 17 00:00:00 2001 From: Sergei Rogachev Date: Wed, 11 Feb 2015 15:28:34 -0800 Subject: [PATCH 598/601] mm/page_owner.c: remove unnecessary stack_trace field Page owner uses the page_ext structure to keep meta-information for every page in the system. The structure also contains a field of type 'struct stack_trace', page owner uses this field during invocation of the function save_stack_trace. It is easy to notice that keeping a copy of this structure for every page in the system is very inefficiently in terms of memory. The patch removes this unnecessary field of page_ext and forces page owner to use a stack_trace structure allocated on the stack. [akpm@linux-foundation.org: use struct initializers] Signed-off-by: Sergei Rogachev Acked-by: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page_ext.h | 2 +- mm/page_owner.c | 26 ++++++++++++++------------ 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index d2a2c84c72d04a..c42981cd99aae9 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h @@ -40,7 +40,7 @@ struct page_ext { #ifdef CONFIG_PAGE_OWNER unsigned int order; gfp_t gfp_mask; - struct stack_trace trace; + unsigned int nr_entries; unsigned long trace_entries[8]; #endif }; diff --git a/mm/page_owner.c b/mm/page_owner.c index 9ab4a9b5bc091b..0993f5f36b011e 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -59,20 +59,19 @@ void __reset_page_owner(struct page *page, unsigned int order) void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask) { - struct page_ext *page_ext; - struct stack_trace *trace; - - page_ext = lookup_page_ext(page); + struct page_ext *page_ext = lookup_page_ext(page); + struct stack_trace trace = { + .nr_entries = 0, + .max_entries = ARRAY_SIZE(page_ext->trace_entries), + .entries = &page_ext->trace_entries[0], + .skip = 3, + }; - trace = &page_ext->trace; - trace->nr_entries = 0; - trace->max_entries = ARRAY_SIZE(page_ext->trace_entries); - trace->entries = &page_ext->trace_entries[0]; - trace->skip = 3; - save_stack_trace(&page_ext->trace); + save_stack_trace(&trace); page_ext->order = order; page_ext->gfp_mask = gfp_mask; + page_ext->nr_entries = trace.nr_entries; __set_bit(PAGE_EXT_OWNER, &page_ext->flags); } @@ -84,6 +83,10 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn, int ret; int pageblock_mt, page_mt; char *kbuf; + struct stack_trace trace = { + .nr_entries = page_ext->nr_entries, + .entries = &page_ext->trace_entries[0], + }; kbuf = kmalloc(count, GFP_KERNEL); if (!kbuf) @@ -121,8 +124,7 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn, if (ret >= count) goto err; - ret += snprint_stack_trace(kbuf + ret, count - ret, - &page_ext->trace, 0); + ret += snprint_stack_trace(kbuf + ret, count - ret, &trace, 0); if (ret >= count) goto err; From 57c2e36b6f4dd52e7e90f4c748a665b13fa228d2 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Wed, 11 Feb 2015 15:28:36 -0800 Subject: [PATCH 599/601] vmstat: Reduce time interval to stat update on idle cpu It was noted that the vm stat shepherd runs every 2 seconds and that the vmstat update is then scheduled 2 seconds in the future. This yields an interval of double the time interval which is not desired. Change the shepherd so that it does not delay the vmstat update on the other cpu. We stil have to use schedule_delayed_work since we are using a delayed_work_struct but we can set the delay to 0. Signed-off-by: Christoph Lameter Acked-by: Michal Hocko Cc: Vinayak Menon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmstat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/vmstat.c b/mm/vmstat.c index 470cdd5b924b05..4f5cd974e11a0a 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1437,8 +1437,8 @@ static void vmstat_shepherd(struct work_struct *w) if (need_update(cpu) && cpumask_test_and_clear_cpu(cpu, cpu_stat_off)) - schedule_delayed_work_on(cpu, &per_cpu(vmstat_work, cpu), - __round_jiffies_relative(sysctl_stat_interval, cpu)); + schedule_delayed_work_on(cpu, + &per_cpu(vmstat_work, cpu), 0); put_online_cpus(); From 5703b087dc8eaf47bfb399d6cf512d471beff405 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Wed, 11 Feb 2015 15:28:39 -0800 Subject: [PATCH 600/601] mm/mmap.c: fix arithmetic overflow in __vm_enough_memory() I noticed, that "allowed" can easily overflow by falling below 0, because (total_vm / 32) can be larger than "allowed". The problem occurs in OVERCOMMIT_NONE mode. In this case, a huge allocation can success and overcommit the system (despite OVERCOMMIT_NONE mode). All subsequent allocations will fall (system-wide), so system become unusable. The problem was masked out by commit c9b1d0981fcc ("mm: limit growth of 3% hardcoded other user reserve"), but it's easy to reproduce it on older kernels: 1) set overcommit_memory sysctl to 2 2) mmap() large file multiple times (with VM_SHARED flag) 3) try to malloc() large amount of memory It also can be reproduced on newer kernels, but miss-configured sysctl_user_reserve_kbytes is required. Fix this issue by switching to signed arithmetic here. [akpm@linux-foundation.org: use min_t] Signed-off-by: Roman Gushchin Cc: Andrew Shewmaker Cc: Rik van Riel Cc: Konstantin Khlebnikov Reviewed-by: Michal Hocko Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/mmap.c b/mm/mmap.c index c5f44682c0d175..da9990acc08b2d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -152,7 +152,7 @@ EXPORT_SYMBOL_GPL(vm_memory_committed); */ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) { - unsigned long free, allowed, reserve; + long free, allowed, reserve; VM_WARN_ONCE(percpu_counter_read(&vm_committed_as) < -(s64)vm_committed_as_batch * num_online_cpus(), @@ -220,7 +220,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) */ if (mm) { reserve = sysctl_user_reserve_kbytes >> (PAGE_SHIFT - 10); - allowed -= min(mm->total_vm / 32, reserve); + allowed -= min_t(long, mm->total_vm / 32, reserve); } if (percpu_counter_read_positive(&vm_committed_as) < allowed) From 8138a67a5557ffea3a21dfd6f037842d4e748513 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Wed, 11 Feb 2015 15:28:42 -0800 Subject: [PATCH 601/601] mm/nommu.c: fix arithmetic overflow in __vm_enough_memory() I noticed that "allowed" can easily overflow by falling below 0, because (total_vm / 32) can be larger than "allowed". The problem occurs in OVERCOMMIT_NONE mode. In this case, a huge allocation can success and overcommit the system (despite OVERCOMMIT_NONE mode). All subsequent allocations will fall (system-wide), so system become unusable. The problem was masked out by commit c9b1d0981fcc ("mm: limit growth of 3% hardcoded other user reserve"), but it's easy to reproduce it on older kernels: 1) set overcommit_memory sysctl to 2 2) mmap() large file multiple times (with VM_SHARED flag) 3) try to malloc() large amount of memory It also can be reproduced on newer kernels, but miss-configured sysctl_user_reserve_kbytes is required. Fix this issue by switching to signed arithmetic here. Signed-off-by: Roman Gushchin Cc: Andrew Shewmaker Cc: Rik van Riel Cc: Konstantin Khlebnikov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/nommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/nommu.c b/mm/nommu.c index 4d1b8a19986770..1a19fb3b046355 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1928,7 +1928,7 @@ EXPORT_SYMBOL(unmap_mapping_range); */ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) { - unsigned long free, allowed, reserve; + long free, allowed, reserve; vm_acct_memory(pages); @@ -1992,7 +1992,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) */ if (mm) { reserve = sysctl_user_reserve_kbytes >> (PAGE_SHIFT - 10); - allowed -= min(mm->total_vm / 32, reserve); + allowed -= min_t(long, mm->total_vm / 32, reserve); } if (percpu_counter_read_positive(&vm_committed_as) < allowed)