diff options
Diffstat (limited to 'include/net/sock.h')
| -rw-r--r-- | include/net/sock.h | 132 | 
1 files changed, 107 insertions, 25 deletions
diff --git a/include/net/sock.h b/include/net/sock.h index 56df440a950..328e03f47dd 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -74,7 +74,7 @@  					printk(KERN_DEBUG msg); } while (0)  #else  /* Validate arguments and do nothing */ -static void inline int __attribute__ ((format (printf, 2, 3))) +static inline void __attribute__ ((format (printf, 2, 3)))  SOCK_DEBUG(struct sock *sk, const char *msg, ...)  {  } @@ -159,7 +159,7 @@ struct sock_common {    *	@sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings    *	@sk_lock:	synchronizer    *	@sk_rcvbuf: size of receive buffer in bytes -  *	@sk_sleep: sock wait queue +  *	@sk_wq: sock wait queue and async head    *	@sk_dst_cache: destination cache    *	@sk_dst_lock: destination cache lock    *	@sk_policy: flow policy @@ -198,6 +198,7 @@ struct sock_common {    *	@sk_rcvlowat: %SO_RCVLOWAT setting    *	@sk_rcvtimeo: %SO_RCVTIMEO setting    *	@sk_sndtimeo: %SO_SNDTIMEO setting +  *	@sk_rxhash: flow hash received from netif layer    *	@sk_filter: socket filtering instructions    *	@sk_protinfo: private area, net family specific, when not using slab    *	@sk_timer: sock cleanup timer @@ -255,9 +256,8 @@ struct sock {  		struct sk_buff *head;  		struct sk_buff *tail;  		int len; -		int limit;  	} sk_backlog; -	wait_queue_head_t	*sk_sleep; +	struct socket_wq	*sk_wq;  	struct dst_entry	*sk_dst_cache;  #ifdef CONFIG_XFRM  	struct xfrm_policy	*sk_policy[2]; @@ -279,6 +279,9 @@ struct sock {  	int			sk_gso_type;  	unsigned int		sk_gso_max_size;  	int			sk_rcvlowat; +#ifdef CONFIG_RPS +	__u32			sk_rxhash; +#endif  	unsigned long 		sk_flags;  	unsigned long	        sk_lingertime;  	struct sk_buff_head	sk_error_queue; @@ -604,10 +607,20 @@ static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)  	skb->next = NULL;  } +/* + * Take into account size of receive queue and backlog queue + */ +static inline bool sk_rcvqueues_full(const struct sock *sk, const struct sk_buff *skb) +{ +	unsigned int qsize = sk->sk_backlog.len + atomic_read(&sk->sk_rmem_alloc); + +	return qsize + skb->truesize > sk->sk_rcvbuf; +} +  /* The per-socket spinlock must be held here. */  static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *skb)  { -	if (sk->sk_backlog.len >= max(sk->sk_backlog.limit, sk->sk_rcvbuf << 1)) +	if (sk_rcvqueues_full(sk, skb))  		return -ENOBUFS;  	__sk_add_backlog(sk, skb); @@ -620,6 +633,40 @@ static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)  	return sk->sk_backlog_rcv(sk, skb);  } +static inline void sock_rps_record_flow(const struct sock *sk) +{ +#ifdef CONFIG_RPS +	struct rps_sock_flow_table *sock_flow_table; + +	rcu_read_lock(); +	sock_flow_table = rcu_dereference(rps_sock_flow_table); +	rps_record_sock_flow(sock_flow_table, sk->sk_rxhash); +	rcu_read_unlock(); +#endif +} + +static inline void sock_rps_reset_flow(const struct sock *sk) +{ +#ifdef CONFIG_RPS +	struct rps_sock_flow_table *sock_flow_table; + +	rcu_read_lock(); +	sock_flow_table = rcu_dereference(rps_sock_flow_table); +	rps_reset_sock_flow(sock_flow_table, sk->sk_rxhash); +	rcu_read_unlock(); +#endif +} + +static inline void sock_rps_save_rxhash(struct sock *sk, u32 rxhash) +{ +#ifdef CONFIG_RPS +	if (unlikely(sk->sk_rxhash != rxhash)) { +		sock_rps_reset_flow(sk); +		sk->sk_rxhash = rxhash; +	} +#endif +} +  #define sk_wait_event(__sk, __timeo, __condition)			\  	({	int __rc;						\  		release_sock(__sk);					\ @@ -974,6 +1021,16 @@ extern void release_sock(struct sock *sk);  				SINGLE_DEPTH_NESTING)  #define bh_unlock_sock(__sk)	spin_unlock(&((__sk)->sk_lock.slock)) +static inline void lock_sock_bh(struct sock *sk) +{ +	spin_lock_bh(&sk->sk_lock.slock); +} + +static inline void unlock_sock_bh(struct sock *sk) +{ +	spin_unlock_bh(&sk->sk_lock.slock); +} +  extern struct sock		*sk_alloc(struct net *net, int family,  					  gfp_t priority,  					  struct proto *prot); @@ -1160,6 +1217,10 @@ static inline void sk_set_socket(struct sock *sk, struct socket *sock)  	sk->sk_socket = sock;  } +static inline wait_queue_head_t *sk_sleep(struct sock *sk) +{ +	return &sk->sk_wq->wait; +}  /* Detach socket from process context.   * Announce socket dead, detach it from wait queue and inode.   * Note that parent inode held reference count on this struct sock, @@ -1172,14 +1233,14 @@ static inline void sock_orphan(struct sock *sk)  	write_lock_bh(&sk->sk_callback_lock);  	sock_set_flag(sk, SOCK_DEAD);  	sk_set_socket(sk, NULL); -	sk->sk_sleep  = NULL; +	sk->sk_wq  = NULL;  	write_unlock_bh(&sk->sk_callback_lock);  }  static inline void sock_graft(struct sock *sk, struct socket *parent)  {  	write_lock_bh(&sk->sk_callback_lock); -	sk->sk_sleep = &parent->wait; +	rcu_assign_pointer(sk->sk_wq, parent->wq);  	parent->sk = sk;  	sk_set_socket(sk, parent);  	security_sock_graft(sk, parent); @@ -1193,7 +1254,8 @@ static inline struct dst_entry *  __sk_dst_get(struct sock *sk)  {  	return rcu_dereference_check(sk->sk_dst_cache, rcu_read_lock_held() || -						       sock_owned_by_user(sk)); +						       sock_owned_by_user(sk) || +						       lockdep_is_held(&sk->sk_lock.slock));  }  static inline struct dst_entry * @@ -1231,8 +1293,11 @@ __sk_dst_set(struct sock *sk, struct dst_entry *dst)  	struct dst_entry *old_dst;  	sk_tx_queue_clear(sk); -	old_dst = rcu_dereference_check(sk->sk_dst_cache, -					lockdep_is_held(&sk->sk_dst_lock)); +	/* +	 * This can be called while sk is owned by the caller only, +	 * with no state that can be checked in a rcu_dereference_check() cond +	 */ +	old_dst = rcu_dereference_raw(sk->sk_dst_cache);  	rcu_assign_pointer(sk->sk_dst_cache, dst);  	dst_release(old_dst);  } @@ -1327,12 +1392,12 @@ static inline int sk_has_allocations(const struct sock *sk)  }  /** - * sk_has_sleeper - check if there are any waiting processes - * @sk: socket + * wq_has_sleeper - check if there are any waiting processes + * @sk: struct socket_wq   * - * Returns true if socket has waiting processes + * Returns true if socket_wq has waiting processes   * - * The purpose of the sk_has_sleeper and sock_poll_wait is to wrap the memory + * The purpose of the wq_has_sleeper and sock_poll_wait is to wrap the memory   * barrier call. They were added due to the race found within the tcp code.   *   * Consider following tcp code paths: @@ -1345,9 +1410,10 @@ static inline int sk_has_allocations(const struct sock *sk)   *   ...                 ...   *   tp->rcv_nxt check   sock_def_readable   *   ...                 { - *   schedule               ... - *                          if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - *                              wake_up_interruptible(sk->sk_sleep) + *   schedule               rcu_read_lock(); + *                          wq = rcu_dereference(sk->sk_wq); + *                          if (wq && waitqueue_active(&wq->wait)) + *                              wake_up_interruptible(&wq->wait)   *                          ...   *                       }   * @@ -1356,19 +1422,18 @@ static inline int sk_has_allocations(const struct sock *sk)   * could then endup calling schedule and sleep forever if there are no more   * data on the socket.   * - * The sk_has_sleeper is always called right after a call to read_lock, so we - * can use smp_mb__after_lock barrier.   */ -static inline int sk_has_sleeper(struct sock *sk) +static inline bool wq_has_sleeper(struct socket_wq *wq)  { +  	/*  	 * We need to be sure we are in sync with the  	 * add_wait_queue modifications to the wait queue.  	 *  	 * This memory barrier is paired in the sock_poll_wait.  	 */ -	smp_mb__after_lock(); -	return sk->sk_sleep && waitqueue_active(sk->sk_sleep); +	smp_mb(); +	return wq && waitqueue_active(&wq->wait);  }  /** @@ -1377,7 +1442,7 @@ static inline int sk_has_sleeper(struct sock *sk)   * @wait_address:   socket wait queue   * @p:              poll_table   * - * See the comments in the sk_has_sleeper function. + * See the comments in the wq_has_sleeper function.   */  static inline void sock_poll_wait(struct file *filp,  		wait_queue_head_t *wait_address, poll_table *p) @@ -1388,7 +1453,7 @@ static inline void sock_poll_wait(struct file *filp,  		 * We need to be sure we are in sync with the  		 * socket flags modification.  		 * -		 * This memory barrier is paired in the sk_has_sleeper. +		 * This memory barrier is paired in the wq_has_sleeper.  		*/  		smp_mb();  	} @@ -1570,7 +1635,24 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)  		sk->sk_stamp = kt;  } -extern void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb); +extern void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, +				     struct sk_buff *skb); + +static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, +					  struct sk_buff *skb) +{ +#define FLAGS_TS_OR_DROPS ((1UL << SOCK_RXQ_OVFL)			| \ +			   (1UL << SOCK_RCVTSTAMP)			| \ +			   (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)	| \ +			   (1UL << SOCK_TIMESTAMPING_SOFTWARE)		| \ +			   (1UL << SOCK_TIMESTAMPING_RAW_HARDWARE) 	| \ +			   (1UL << SOCK_TIMESTAMPING_SYS_HARDWARE)) + +	if (sk->sk_flags & FLAGS_TS_OR_DROPS) +		__sock_recv_ts_and_drops(msg, sk, skb); +	else +		sk->sk_stamp = skb->tstamp; +}  /**   * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped  |