diff options
| author | Eric Dumazet <eric.dumazet@gmail.com> | 2013-05-01 05:24:03 +0000 | 
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2013-05-01 15:13:49 -0400 | 
| commit | 60bc851ae59bfe99be6ee89d6bc50008c85ec75d (patch) | |
| tree | 5046b97e73431933b0205f5b9381fe09979ef2f4 | |
| parent | c3b28ea36946a22469a5519977a3b79428ded4af (diff) | |
| download | olio-linux-3.10-60bc851ae59bfe99be6ee89d6bc50008c85ec75d.tar.xz olio-linux-3.10-60bc851ae59bfe99be6ee89d6bc50008c85ec75d.zip  | |
af_unix: fix a fatal race with bit fields
Using bit fields is dangerous on ppc64/sparc64, as the compiler [1]
uses 64bit instructions to manipulate them.
If the 64bit word includes any atomic_t or spinlock_t, we can lose
critical concurrent changes.
This is happening in af_unix, where unix_sk(sk)->gc_candidate/
gc_maybe_cycle/lock share the same 64bit word.
This leads to fatal deadlock, as one/several cpus spin forever
on a spinlock that will never be available again.
A safer way would be to use a long to store flags.
This way we are sure compiler/arch wont do bad things.
As we own unix_gc_lock spinlock when clearing or setting bits,
we can use the non atomic __set_bit()/__clear_bit().
recursion_level can share the same 64bit location with the spinlock,
as it is set only with this spinlock held.
[1] bug fixed in gcc-4.8.0 :
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=52080
Reported-by: Ambrose Feinstein <ambrose@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
| -rw-r--r-- | include/net/af_unix.h | 5 | ||||
| -rw-r--r-- | net/unix/garbage.c | 12 | 
2 files changed, 9 insertions, 8 deletions
diff --git a/include/net/af_unix.h b/include/net/af_unix.h index a8836e8445c..dbdfd2b0f3b 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -57,9 +57,10 @@ struct unix_sock {  	struct list_head	link;  	atomic_long_t		inflight;  	spinlock_t		lock; -	unsigned int		gc_candidate : 1; -	unsigned int		gc_maybe_cycle : 1;  	unsigned char		recursion_level; +	unsigned long		gc_flags; +#define UNIX_GC_CANDIDATE	0 +#define UNIX_GC_MAYBE_CYCLE	1  	struct socket_wq	peer_wq;  };  #define unix_sk(__sk) ((struct unix_sock *)__sk) diff --git a/net/unix/garbage.c b/net/unix/garbage.c index d0f6545b001..9bc73f87f64 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -185,7 +185,7 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),  					 * have been added to the queues after  					 * starting the garbage collection  					 */ -					if (u->gc_candidate) { +					if (test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) {  						hit = true;  						func(u);  					} @@ -254,7 +254,7 @@ static void inc_inflight_move_tail(struct unix_sock *u)  	 * of the list, so that it's checked even if it was already  	 * passed over  	 */ -	if (u->gc_maybe_cycle) +	if (test_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags))  		list_move_tail(&u->link, &gc_candidates);  } @@ -315,8 +315,8 @@ void unix_gc(void)  		BUG_ON(total_refs < inflight_refs);  		if (total_refs == inflight_refs) {  			list_move_tail(&u->link, &gc_candidates); -			u->gc_candidate = 1; -			u->gc_maybe_cycle = 1; +			__set_bit(UNIX_GC_CANDIDATE, &u->gc_flags); +			__set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);  		}  	} @@ -344,7 +344,7 @@ void unix_gc(void)  		if (atomic_long_read(&u->inflight) > 0) {  			list_move_tail(&u->link, ¬_cycle_list); -			u->gc_maybe_cycle = 0; +			__clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);  			scan_children(&u->sk, inc_inflight_move_tail, NULL);  		}  	} @@ -356,7 +356,7 @@ void unix_gc(void)  	 */  	while (!list_empty(¬_cycle_list)) {  		u = list_entry(not_cycle_list.next, struct unix_sock, link); -		u->gc_candidate = 0; +		__clear_bit(UNIX_GC_CANDIDATE, &u->gc_flags);  		list_move_tail(&u->link, &gc_inflight_list);  	}  |