diff options
Diffstat (limited to 'net')
422 files changed, 22549 insertions, 15230 deletions
diff --git a/net/802/garp.c b/net/802/garp.c index 8456f5d98b8..5d9630a0eb9 100644 --- a/net/802/garp.c +++ b/net/802/garp.c @@ -609,8 +609,12 @@ void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl  	/* Delete timer and generate a final TRANSMIT_PDU event to flush out  	 * all pending messages before the applicant is gone. */  	del_timer_sync(&app->join_timer); + +	spin_lock_bh(&app->lock);  	garp_gid_event(app, GARP_EVENT_TRANSMIT_PDU);  	garp_pdu_queue(app); +	spin_unlock_bh(&app->lock); +  	garp_queue_xmit(app);  	dev_mc_del(dev, appl->proto.group_address); diff --git a/net/8021q/Kconfig b/net/8021q/Kconfig index 8f7517df41a..b85a91fa61f 100644 --- a/net/8021q/Kconfig +++ b/net/8021q/Kconfig @@ -3,7 +3,7 @@  #  config VLAN_8021Q -	tristate "802.1Q VLAN Support" +	tristate "802.1Q/802.1ad VLAN Support"  	---help---  	  Select this and you will be able to create 802.1Q VLAN interfaces  	  on your ethernet interfaces.  802.1Q VLAN supports almost diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 85addcd9372..9424f3718ea 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -51,14 +51,18 @@ const char vlan_version[] = DRV_VERSION;  /* End of global variables definitions. */ -static int vlan_group_prealloc_vid(struct vlan_group *vg, u16 vlan_id) +static int vlan_group_prealloc_vid(struct vlan_group *vg, +				   __be16 vlan_proto, u16 vlan_id)  {  	struct net_device **array; +	unsigned int pidx, vidx;  	unsigned int size;  	ASSERT_RTNL(); -	array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN]; +	pidx  = vlan_proto_idx(vlan_proto); +	vidx  = vlan_id / VLAN_GROUP_ARRAY_PART_LEN; +	array = vg->vlan_devices_arrays[pidx][vidx];  	if (array != NULL)  		return 0; @@ -67,7 +71,7 @@ static int vlan_group_prealloc_vid(struct vlan_group *vg, u16 vlan_id)  	if (array == NULL)  		return -ENOBUFS; -	vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN] = array; +	vg->vlan_devices_arrays[pidx][vidx] = array;  	return 0;  } @@ -93,7 +97,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)  	if (vlan->flags & VLAN_FLAG_GVRP)  		vlan_gvrp_request_leave(dev); -	vlan_group_set_device(grp, vlan_id, NULL); +	vlan_group_set_device(grp, vlan->vlan_proto, vlan_id, NULL);  	/* Because unregister_netdevice_queue() makes sure at least one rcu  	 * grace period is respected before device freeing,  	 * we dont need to call synchronize_net() here. @@ -112,13 +116,14 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)  	 * VLAN is not 0 (leave it there for 802.1p).  	 */  	if (vlan_id) -		vlan_vid_del(real_dev, vlan_id); +		vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);  	/* Get rid of the vlan's reference to real_dev */  	dev_put(real_dev);  } -int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id) +int vlan_check_real_dev(struct net_device *real_dev, +			__be16 protocol, u16 vlan_id)  {  	const char *name = real_dev->name; @@ -127,7 +132,7 @@ int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id)  		return -EOPNOTSUPP;  	} -	if (vlan_find_dev(real_dev, vlan_id) != NULL) +	if (vlan_find_dev(real_dev, protocol, vlan_id) != NULL)  		return -EEXIST;  	return 0; @@ -142,7 +147,7 @@ int register_vlan_dev(struct net_device *dev)  	struct vlan_group *grp;  	int err; -	err = vlan_vid_add(real_dev, vlan_id); +	err = vlan_vid_add(real_dev, vlan->vlan_proto, vlan_id);  	if (err)  		return err; @@ -160,7 +165,7 @@ int register_vlan_dev(struct net_device *dev)  			goto out_uninit_gvrp;  	} -	err = vlan_group_prealloc_vid(grp, vlan_id); +	err = vlan_group_prealloc_vid(grp, vlan->vlan_proto, vlan_id);  	if (err < 0)  		goto out_uninit_mvrp; @@ -181,7 +186,7 @@ int register_vlan_dev(struct net_device *dev)  	/* So, got the sucker initialized, now lets place  	 * it into our local structure.  	 */ -	vlan_group_set_device(grp, vlan_id, dev); +	vlan_group_set_device(grp, vlan->vlan_proto, vlan_id, dev);  	grp->nr_vlan_devs++;  	return 0; @@ -195,7 +200,7 @@ out_uninit_gvrp:  	if (grp->nr_vlan_devs == 0)  		vlan_gvrp_uninit_applicant(real_dev);  out_vid_del: -	vlan_vid_del(real_dev, vlan_id); +	vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id);  	return err;  } @@ -213,7 +218,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)  	if (vlan_id >= VLAN_VID_MASK)  		return -ERANGE; -	err = vlan_check_real_dev(real_dev, vlan_id); +	err = vlan_check_real_dev(real_dev, htons(ETH_P_8021Q), vlan_id);  	if (err < 0)  		return err; @@ -255,6 +260,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)  	new_dev->mtu = real_dev->mtu;  	new_dev->priv_flags |= (real_dev->priv_flags & IFF_UNICAST_FLT); +	vlan_dev_priv(new_dev)->vlan_proto = htons(ETH_P_8021Q);  	vlan_dev_priv(new_dev)->vlan_id = vlan_id;  	vlan_dev_priv(new_dev)->real_dev = real_dev;  	vlan_dev_priv(new_dev)->dent = NULL; @@ -301,7 +307,7 @@ static void vlan_transfer_features(struct net_device *dev,  {  	vlandev->gso_max_size = dev->gso_max_size; -	if (dev->features & NETIF_F_HW_VLAN_TX) +	if (dev->features & NETIF_F_HW_VLAN_CTAG_TX)  		vlandev->hard_header_len = dev->hard_header_len;  	else  		vlandev->hard_header_len = dev->hard_header_len + VLAN_HLEN; @@ -341,16 +347,17 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,  	int i, flgs;  	struct net_device *vlandev;  	struct vlan_dev_priv *vlan; +	bool last = false;  	LIST_HEAD(list);  	if (is_vlan_dev(dev))  		__vlan_device_event(dev, event);  	if ((event == NETDEV_UP) && -	    (dev->features & NETIF_F_HW_VLAN_FILTER)) { +	    (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) {  		pr_info("adding VLAN 0 to HW filter on device %s\n",  			dev->name); -		vlan_vid_add(dev, 0); +		vlan_vid_add(dev, htons(ETH_P_8021Q), 0);  	}  	vlan_info = rtnl_dereference(dev->vlan_info); @@ -365,22 +372,13 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,  	switch (event) {  	case NETDEV_CHANGE:  		/* Propagate real device state to vlan devices */ -		for (i = 0; i < VLAN_N_VID; i++) { -			vlandev = vlan_group_get_device(grp, i); -			if (!vlandev) -				continue; - +		vlan_group_for_each_dev(grp, i, vlandev)  			netif_stacked_transfer_operstate(dev, vlandev); -		}  		break;  	case NETDEV_CHANGEADDR:  		/* Adjust unicast filters on underlying device */ -		for (i = 0; i < VLAN_N_VID; i++) { -			vlandev = vlan_group_get_device(grp, i); -			if (!vlandev) -				continue; - +		vlan_group_for_each_dev(grp, i, vlandev) {  			flgs = vlandev->flags;  			if (!(flgs & IFF_UP))  				continue; @@ -390,11 +388,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,  		break;  	case NETDEV_CHANGEMTU: -		for (i = 0; i < VLAN_N_VID; i++) { -			vlandev = vlan_group_get_device(grp, i); -			if (!vlandev) -				continue; - +		vlan_group_for_each_dev(grp, i, vlandev) {  			if (vlandev->mtu <= dev->mtu)  				continue; @@ -404,26 +398,16 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,  	case NETDEV_FEAT_CHANGE:  		/* Propagate device features to underlying device */ -		for (i = 0; i < VLAN_N_VID; i++) { -			vlandev = vlan_group_get_device(grp, i); -			if (!vlandev) -				continue; - +		vlan_group_for_each_dev(grp, i, vlandev)  			vlan_transfer_features(dev, vlandev); -		} -  		break;  	case NETDEV_DOWN: -		if (dev->features & NETIF_F_HW_VLAN_FILTER) -			vlan_vid_del(dev, 0); +		if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) +			vlan_vid_del(dev, htons(ETH_P_8021Q), 0);  		/* Put all VLANs for this dev in the down state too.  */ -		for (i = 0; i < VLAN_N_VID; i++) { -			vlandev = vlan_group_get_device(grp, i); -			if (!vlandev) -				continue; - +		vlan_group_for_each_dev(grp, i, vlandev) {  			flgs = vlandev->flags;  			if (!(flgs & IFF_UP))  				continue; @@ -437,11 +421,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,  	case NETDEV_UP:  		/* Put all VLANs for this dev in the up state too.  */ -		for (i = 0; i < VLAN_N_VID; i++) { -			vlandev = vlan_group_get_device(grp, i); -			if (!vlandev) -				continue; - +		vlan_group_for_each_dev(grp, i, vlandev) {  			flgs = vlandev->flags;  			if (flgs & IFF_UP)  				continue; @@ -458,17 +438,15 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,  		if (dev->reg_state != NETREG_UNREGISTERING)  			break; -		for (i = 0; i < VLAN_N_VID; i++) { -			vlandev = vlan_group_get_device(grp, i); -			if (!vlandev) -				continue; - +		vlan_group_for_each_dev(grp, i, vlandev) {  			/* removal of last vid destroys vlan_info, abort  			 * afterwards */  			if (vlan_info->nr_vids == 1) -				i = VLAN_N_VID; +				last = true;  			unregister_vlan_dev(vlandev, &list); +			if (last) +				break;  		}  		unregister_netdevice_many(&list);  		break; @@ -482,13 +460,8 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,  	case NETDEV_NOTIFY_PEERS:  	case NETDEV_BONDING_FAILOVER:  		/* Propagate to vlan devices */ -		for (i = 0; i < VLAN_N_VID; i++) { -			vlandev = vlan_group_get_device(grp, i); -			if (!vlandev) -				continue; - +		vlan_group_for_each_dev(grp, i, vlandev)  			call_netdevice_notifiers(event, vlandev); -		}  		break;  	} diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 670f1e8cfc0..ba5983f34c4 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -49,6 +49,7 @@ struct netpoll;   *	@ingress_priority_map: ingress priority mappings   *	@nr_egress_mappings: number of egress priority mappings   *	@egress_priority_map: hash of egress priority mappings + *	@vlan_proto: VLAN encapsulation protocol   *	@vlan_id: VLAN identifier   *	@flags: device flags   *	@real_dev: underlying netdevice @@ -62,6 +63,7 @@ struct vlan_dev_priv {  	unsigned int				nr_egress_mappings;  	struct vlan_priority_tci_mapping	*egress_priority_map[16]; +	__be16					vlan_proto;  	u16					vlan_id;  	u16					flags; @@ -87,10 +89,17 @@ static inline struct vlan_dev_priv *vlan_dev_priv(const struct net_device *dev)  #define VLAN_GROUP_ARRAY_SPLIT_PARTS  8  #define VLAN_GROUP_ARRAY_PART_LEN     (VLAN_N_VID/VLAN_GROUP_ARRAY_SPLIT_PARTS) +enum vlan_protos { +	VLAN_PROTO_8021Q	= 0, +	VLAN_PROTO_8021AD, +	VLAN_PROTO_NUM, +}; +  struct vlan_group {  	unsigned int		nr_vlan_devs;  	struct hlist_node	hlist;	/* linked list */ -	struct net_device **vlan_devices_arrays[VLAN_GROUP_ARRAY_SPLIT_PARTS]; +	struct net_device **vlan_devices_arrays[VLAN_PROTO_NUM] +					       [VLAN_GROUP_ARRAY_SPLIT_PARTS];  };  struct vlan_info { @@ -103,37 +112,67 @@ struct vlan_info {  	struct rcu_head		rcu;  }; -static inline struct net_device *vlan_group_get_device(struct vlan_group *vg, -						       u16 vlan_id) +static inline unsigned int vlan_proto_idx(__be16 proto) +{ +	switch (proto) { +	case __constant_htons(ETH_P_8021Q): +		return VLAN_PROTO_8021Q; +	case __constant_htons(ETH_P_8021AD): +		return VLAN_PROTO_8021AD; +	default: +		BUG(); +		return 0; +	} +} + +static inline struct net_device *__vlan_group_get_device(struct vlan_group *vg, +							 unsigned int pidx, +							 u16 vlan_id)  {  	struct net_device **array; -	array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN]; + +	array = vg->vlan_devices_arrays[pidx] +				       [vlan_id / VLAN_GROUP_ARRAY_PART_LEN];  	return array ? array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] : NULL;  } +static inline struct net_device *vlan_group_get_device(struct vlan_group *vg, +						       __be16 vlan_proto, +						       u16 vlan_id) +{ +	return __vlan_group_get_device(vg, vlan_proto_idx(vlan_proto), vlan_id); +} +  static inline void vlan_group_set_device(struct vlan_group *vg, -					 u16 vlan_id, +					 __be16 vlan_proto, u16 vlan_id,  					 struct net_device *dev)  {  	struct net_device **array;  	if (!vg)  		return; -	array = vg->vlan_devices_arrays[vlan_id / VLAN_GROUP_ARRAY_PART_LEN]; +	array = vg->vlan_devices_arrays[vlan_proto_idx(vlan_proto)] +				       [vlan_id / VLAN_GROUP_ARRAY_PART_LEN];  	array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] = dev;  }  /* Must be invoked with rcu_read_lock or with RTNL. */  static inline struct net_device *vlan_find_dev(struct net_device *real_dev, -					       u16 vlan_id) +					       __be16 vlan_proto, u16 vlan_id)  {  	struct vlan_info *vlan_info = rcu_dereference_rtnl(real_dev->vlan_info);  	if (vlan_info) -		return vlan_group_get_device(&vlan_info->grp, vlan_id); +		return vlan_group_get_device(&vlan_info->grp, +					     vlan_proto, vlan_id);  	return NULL;  } +#define vlan_group_for_each_dev(grp, i, dev) \ +	for ((i) = 0; i < VLAN_PROTO_NUM * VLAN_N_VID; i++) \ +		if (((dev) = __vlan_group_get_device((grp), (i) / VLAN_N_VID, \ +							    (i) % VLAN_N_VID))) +  /* found in vlan_dev.c */  void vlan_dev_set_ingress_priority(const struct net_device *dev,  				   u32 skb_prio, u16 vlan_prio); @@ -142,7 +181,8 @@ int vlan_dev_set_egress_priority(const struct net_device *dev,  int vlan_dev_change_flags(const struct net_device *dev, u32 flag, u32 mask);  void vlan_dev_get_realdev_name(const struct net_device *dev, char *result); -int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id); +int vlan_check_real_dev(struct net_device *real_dev, +			__be16 protocol, u16 vlan_id);  void vlan_setup(struct net_device *dev);  int register_vlan_dev(struct net_device *dev);  void unregister_vlan_dev(struct net_device *dev, struct list_head *head); diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index f3b6f515eba..8a15eaadc4b 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -8,11 +8,12 @@  bool vlan_do_receive(struct sk_buff **skbp)  {  	struct sk_buff *skb = *skbp; +	__be16 vlan_proto = skb->vlan_proto;  	u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK;  	struct net_device *vlan_dev;  	struct vlan_pcpu_stats *rx_stats; -	vlan_dev = vlan_find_dev(skb->dev, vlan_id); +	vlan_dev = vlan_find_dev(skb->dev, vlan_proto, vlan_id);  	if (!vlan_dev)  		return false; @@ -38,7 +39,8 @@ bool vlan_do_receive(struct sk_buff **skbp)  		 * original position later  		 */  		skb_push(skb, offset); -		skb = *skbp = vlan_insert_tag(skb, skb->vlan_tci); +		skb = *skbp = vlan_insert_tag(skb, skb->vlan_proto, +					      skb->vlan_tci);  		if (!skb)  			return false;  		skb_pull(skb, offset + VLAN_HLEN); @@ -62,12 +64,13 @@ bool vlan_do_receive(struct sk_buff **skbp)  /* Must be invoked with rcu_read_lock. */  struct net_device *__vlan_find_dev_deep(struct net_device *dev, -					u16 vlan_id) +					__be16 vlan_proto, u16 vlan_id)  {  	struct vlan_info *vlan_info = rcu_dereference(dev->vlan_info);  	if (vlan_info) { -		return vlan_group_get_device(&vlan_info->grp, vlan_id); +		return vlan_group_get_device(&vlan_info->grp, +					     vlan_proto, vlan_id);  	} else {  		/*  		 * Lower devices of master uppers (bonding, team) do not have @@ -78,7 +81,8 @@ struct net_device *__vlan_find_dev_deep(struct net_device *dev,  		upper_dev = netdev_master_upper_dev_get_rcu(dev);  		if (upper_dev) -			return __vlan_find_dev_deep(upper_dev, vlan_id); +			return __vlan_find_dev_deep(upper_dev, +						    vlan_proto, vlan_id);  	}  	return NULL; @@ -125,7 +129,7 @@ struct sk_buff *vlan_untag(struct sk_buff *skb)  	vhdr = (struct vlan_hdr *) skb->data;  	vlan_tci = ntohs(vhdr->h_vlan_TCI); -	__vlan_hwaccel_put_tag(skb, vlan_tci); +	__vlan_hwaccel_put_tag(skb, skb->protocol, vlan_tci);  	skb_pull_rcsum(skb, VLAN_HLEN);  	vlan_set_encap_proto(skb, vhdr); @@ -153,10 +157,11 @@ EXPORT_SYMBOL(vlan_untag);  static void vlan_group_free(struct vlan_group *grp)  { -	int i; +	int i, j; -	for (i = 0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++) -		kfree(grp->vlan_devices_arrays[i]); +	for (i = 0; i < VLAN_PROTO_NUM; i++) +		for (j = 0; j < VLAN_GROUP_ARRAY_SPLIT_PARTS; j++) +			kfree(grp->vlan_devices_arrays[i][j]);  }  static void vlan_info_free(struct vlan_info *vlan_info) @@ -185,35 +190,49 @@ static struct vlan_info *vlan_info_alloc(struct net_device *dev)  struct vlan_vid_info {  	struct list_head list; -	unsigned short vid; +	__be16 proto; +	u16 vid;  	int refcount;  }; +static bool vlan_hw_filter_capable(const struct net_device *dev, +				     const struct vlan_vid_info *vid_info) +{ +	if (vid_info->proto == htons(ETH_P_8021Q) && +	    dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) +		return true; +	if (vid_info->proto == htons(ETH_P_8021AD) && +	    dev->features & NETIF_F_HW_VLAN_STAG_FILTER) +		return true; +	return false; +} +  static struct vlan_vid_info *vlan_vid_info_get(struct vlan_info *vlan_info, -					       unsigned short vid) +					       __be16 proto, u16 vid)  {  	struct vlan_vid_info *vid_info;  	list_for_each_entry(vid_info, &vlan_info->vid_list, list) { -		if (vid_info->vid == vid) +		if (vid_info->proto == proto && vid_info->vid == vid)  			return vid_info;  	}  	return NULL;  } -static struct vlan_vid_info *vlan_vid_info_alloc(unsigned short vid) +static struct vlan_vid_info *vlan_vid_info_alloc(__be16 proto, u16 vid)  {  	struct vlan_vid_info *vid_info;  	vid_info = kzalloc(sizeof(struct vlan_vid_info), GFP_KERNEL);  	if (!vid_info)  		return NULL; +	vid_info->proto = proto;  	vid_info->vid = vid;  	return vid_info;  } -static int __vlan_vid_add(struct vlan_info *vlan_info, unsigned short vid, +static int __vlan_vid_add(struct vlan_info *vlan_info, __be16 proto, u16 vid,  			  struct vlan_vid_info **pvid_info)  {  	struct net_device *dev = vlan_info->real_dev; @@ -221,12 +240,12 @@ static int __vlan_vid_add(struct vlan_info *vlan_info, unsigned short vid,  	struct vlan_vid_info *vid_info;  	int err; -	vid_info = vlan_vid_info_alloc(vid); +	vid_info = vlan_vid_info_alloc(proto, vid);  	if (!vid_info)  		return -ENOMEM; -	if (dev->features & NETIF_F_HW_VLAN_FILTER) { -		err =  ops->ndo_vlan_rx_add_vid(dev, vid); +	if (vlan_hw_filter_capable(dev, vid_info)) { +		err =  ops->ndo_vlan_rx_add_vid(dev, proto, vid);  		if (err) {  			kfree(vid_info);  			return err; @@ -238,7 +257,7 @@ static int __vlan_vid_add(struct vlan_info *vlan_info, unsigned short vid,  	return 0;  } -int vlan_vid_add(struct net_device *dev, unsigned short vid) +int vlan_vid_add(struct net_device *dev, __be16 proto, u16 vid)  {  	struct vlan_info *vlan_info;  	struct vlan_vid_info *vid_info; @@ -254,9 +273,9 @@ int vlan_vid_add(struct net_device *dev, unsigned short vid)  			return -ENOMEM;  		vlan_info_created = true;  	} -	vid_info = vlan_vid_info_get(vlan_info, vid); +	vid_info = vlan_vid_info_get(vlan_info, proto, vid);  	if (!vid_info) { -		err = __vlan_vid_add(vlan_info, vid, &vid_info); +		err = __vlan_vid_add(vlan_info, proto, vid, &vid_info);  		if (err)  			goto out_free_vlan_info;  	} @@ -279,14 +298,15 @@ static void __vlan_vid_del(struct vlan_info *vlan_info,  {  	struct net_device *dev = vlan_info->real_dev;  	const struct net_device_ops *ops = dev->netdev_ops; -	unsigned short vid = vid_info->vid; +	__be16 proto = vid_info->proto; +	u16 vid = vid_info->vid;  	int err; -	if (dev->features & NETIF_F_HW_VLAN_FILTER) { -		err = ops->ndo_vlan_rx_kill_vid(dev, vid); +	if (vlan_hw_filter_capable(dev, vid_info)) { +		err = ops->ndo_vlan_rx_kill_vid(dev, proto, vid);  		if (err) { -			pr_warn("failed to kill vid %d for device %s\n", -				vid, dev->name); +			pr_warn("failed to kill vid %04x/%d for device %s\n", +				proto, vid, dev->name);  		}  	}  	list_del(&vid_info->list); @@ -294,7 +314,7 @@ static void __vlan_vid_del(struct vlan_info *vlan_info,  	vlan_info->nr_vids--;  } -void vlan_vid_del(struct net_device *dev, unsigned short vid) +void vlan_vid_del(struct net_device *dev, __be16 proto, u16 vid)  {  	struct vlan_info *vlan_info;  	struct vlan_vid_info *vid_info; @@ -305,7 +325,7 @@ void vlan_vid_del(struct net_device *dev, unsigned short vid)  	if (!vlan_info)  		return; -	vid_info = vlan_vid_info_get(vlan_info, vid); +	vid_info = vlan_vid_info_get(vlan_info, proto, vid);  	if (!vid_info)  		return;  	vid_info->refcount--; @@ -333,7 +353,7 @@ int vlan_vids_add_by_dev(struct net_device *dev,  		return 0;  	list_for_each_entry(vid_info, &vlan_info->vid_list, list) { -		err = vlan_vid_add(dev, vid_info->vid); +		err = vlan_vid_add(dev, vid_info->proto, vid_info->vid);  		if (err)  			goto unwind;  	} @@ -343,7 +363,7 @@ unwind:  	list_for_each_entry_continue_reverse(vid_info,  					     &vlan_info->vid_list,  					     list) { -		vlan_vid_del(dev, vid_info->vid); +		vlan_vid_del(dev, vid_info->proto, vid_info->vid);  	}  	return err; @@ -363,7 +383,7 @@ void vlan_vids_del_by_dev(struct net_device *dev,  		return;  	list_for_each_entry(vid_info, &vlan_info->vid_list, list) -		vlan_vid_del(dev, vid_info->vid); +		vlan_vid_del(dev, vid_info->proto, vid_info->vid);  }  EXPORT_SYMBOL(vlan_vids_del_by_dev); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 19cf81bf9f6..8af508536d3 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -99,6 +99,7 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,  				const void *daddr, const void *saddr,  				unsigned int len)  { +	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);  	struct vlan_hdr *vhdr;  	unsigned int vhdrlen = 0;  	u16 vlan_tci = 0; @@ -120,8 +121,8 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,  		else  			vhdr->h_vlan_encapsulated_proto = htons(len); -		skb->protocol = htons(ETH_P_8021Q); -		type = ETH_P_8021Q; +		skb->protocol = vlan->vlan_proto; +		type = ntohs(vlan->vlan_proto);  		vhdrlen = VLAN_HLEN;  	} @@ -161,12 +162,12 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,  	 * NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING  	 * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs...  	 */ -	if (veth->h_vlan_proto != htons(ETH_P_8021Q) || +	if (veth->h_vlan_proto != vlan->vlan_proto ||  	    vlan->flags & VLAN_FLAG_REORDER_HDR) {  		u16 vlan_tci;  		vlan_tci = vlan->vlan_id;  		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb); -		skb = __vlan_hwaccel_put_tag(skb, vlan_tci); +		skb = __vlan_hwaccel_put_tag(skb, vlan->vlan_proto, vlan_tci);  	}  	skb->dev = vlan->real_dev; @@ -583,7 +584,7 @@ static int vlan_dev_init(struct net_device *dev)  #endif  	dev->needed_headroom = real_dev->needed_headroom; -	if (real_dev->features & NETIF_F_HW_VLAN_TX) { +	if (real_dev->features & NETIF_F_HW_VLAN_CTAG_TX) {  		dev->header_ops      = real_dev->header_ops;  		dev->hard_header_len = real_dev->hard_header_len;  	} else { diff --git a/net/8021q/vlan_gvrp.c b/net/8021q/vlan_gvrp.c index 6f975535276..66a80320b03 100644 --- a/net/8021q/vlan_gvrp.c +++ b/net/8021q/vlan_gvrp.c @@ -32,6 +32,8 @@ int vlan_gvrp_request_join(const struct net_device *dev)  	const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);  	__be16 vlan_id = htons(vlan->vlan_id); +	if (vlan->vlan_proto != htons(ETH_P_8021Q)) +		return 0;  	return garp_request_join(vlan->real_dev, &vlan_gvrp_app,  				 &vlan_id, sizeof(vlan_id), GVRP_ATTR_VID);  } @@ -41,6 +43,8 @@ void vlan_gvrp_request_leave(const struct net_device *dev)  	const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);  	__be16 vlan_id = htons(vlan->vlan_id); +	if (vlan->vlan_proto != htons(ETH_P_8021Q)) +		return;  	garp_request_leave(vlan->real_dev, &vlan_gvrp_app,  			   &vlan_id, sizeof(vlan_id), GVRP_ATTR_VID);  } diff --git a/net/8021q/vlan_mvrp.c b/net/8021q/vlan_mvrp.c index d9ec1d5964a..e0fe091801b 100644 --- a/net/8021q/vlan_mvrp.c +++ b/net/8021q/vlan_mvrp.c @@ -38,6 +38,8 @@ int vlan_mvrp_request_join(const struct net_device *dev)  	const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);  	__be16 vlan_id = htons(vlan->vlan_id); +	if (vlan->vlan_proto != htons(ETH_P_8021Q)) +		return 0;  	return mrp_request_join(vlan->real_dev, &vlan_mrp_app,  				&vlan_id, sizeof(vlan_id), MVRP_ATTR_VID);  } @@ -47,6 +49,8 @@ void vlan_mvrp_request_leave(const struct net_device *dev)  	const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);  	__be16 vlan_id = htons(vlan->vlan_id); +	if (vlan->vlan_proto != htons(ETH_P_8021Q)) +		return;  	mrp_request_leave(vlan->real_dev, &vlan_mrp_app,  			  &vlan_id, sizeof(vlan_id), MVRP_ATTR_VID);  } diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 1789658b7cd..30912973228 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -23,6 +23,7 @@ static const struct nla_policy vlan_policy[IFLA_VLAN_MAX + 1] = {  	[IFLA_VLAN_FLAGS]	= { .len = sizeof(struct ifla_vlan_flags) },  	[IFLA_VLAN_EGRESS_QOS]	= { .type = NLA_NESTED },  	[IFLA_VLAN_INGRESS_QOS] = { .type = NLA_NESTED }, +	[IFLA_VLAN_PROTOCOL]	= { .type = NLA_U16 },  };  static const struct nla_policy vlan_map_policy[IFLA_VLAN_QOS_MAX + 1] = { @@ -53,6 +54,16 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[])  	if (!data)  		return -EINVAL; +	if (data[IFLA_VLAN_PROTOCOL]) { +		switch (nla_get_be16(data[IFLA_VLAN_PROTOCOL])) { +		case __constant_htons(ETH_P_8021Q): +		case __constant_htons(ETH_P_8021AD): +			break; +		default: +			return -EPROTONOSUPPORT; +		} +	} +  	if (data[IFLA_VLAN_ID]) {  		id = nla_get_u16(data[IFLA_VLAN_ID]);  		if (id >= VLAN_VID_MASK) @@ -107,6 +118,7 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,  {  	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);  	struct net_device *real_dev; +	__be16 proto;  	int err;  	if (!data[IFLA_VLAN_ID]) @@ -118,11 +130,17 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,  	if (!real_dev)  		return -ENODEV; -	vlan->vlan_id  = nla_get_u16(data[IFLA_VLAN_ID]); -	vlan->real_dev = real_dev; -	vlan->flags    = VLAN_FLAG_REORDER_HDR; +	if (data[IFLA_VLAN_PROTOCOL]) +		proto = nla_get_be16(data[IFLA_VLAN_PROTOCOL]); +	else +		proto = htons(ETH_P_8021Q); + +	vlan->vlan_proto = proto; +	vlan->vlan_id	 = nla_get_u16(data[IFLA_VLAN_ID]); +	vlan->real_dev	 = real_dev; +	vlan->flags	 = VLAN_FLAG_REORDER_HDR; -	err = vlan_check_real_dev(real_dev, vlan->vlan_id); +	err = vlan_check_real_dev(real_dev, vlan->vlan_proto, vlan->vlan_id);  	if (err < 0)  		return err; @@ -151,7 +169,8 @@ static size_t vlan_get_size(const struct net_device *dev)  {  	struct vlan_dev_priv *vlan = vlan_dev_priv(dev); -	return nla_total_size(2) +	/* IFLA_VLAN_ID */ +	return nla_total_size(2) +	/* IFLA_VLAN_PROTOCOL */ +	       nla_total_size(2) +	/* IFLA_VLAN_ID */  	       sizeof(struct ifla_vlan_flags) + /* IFLA_VLAN_FLAGS */  	       vlan_qos_map_size(vlan->nr_ingress_mappings) +  	       vlan_qos_map_size(vlan->nr_egress_mappings); @@ -166,7 +185,8 @@ static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev)  	struct nlattr *nest;  	unsigned int i; -	if (nla_put_u16(skb, IFLA_VLAN_ID, vlan_dev_priv(dev)->vlan_id)) +	if (nla_put_be16(skb, IFLA_VLAN_PROTOCOL, vlan->vlan_proto) || +	    nla_put_u16(skb, IFLA_VLAN_ID, vlan->vlan_id))  		goto nla_put_failure;  	if (vlan->flags) {  		f.flags = vlan->flags; diff --git a/net/Kconfig b/net/Kconfig index 6f676ab885b..2ddc9046868 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -217,6 +217,7 @@ source "net/dns_resolver/Kconfig"  source "net/batman-adv/Kconfig"  source "net/openvswitch/Kconfig"  source "net/vmw_vsock/Kconfig" +source "net/netlink/Kconfig"  config RPS  	boolean diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 4a141e3cf07..ef12839a7cf 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1253,7 +1253,7 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr,  			goto out;  	*uaddr_len = sizeof(struct sockaddr_at); -	memset(&sat.sat_zero, 0, sizeof(sat.sat_zero)); +	memset(&sat, 0, sizeof(sat));  	if (peer) {  		err = -ENOTCONN; diff --git a/net/atm/lec.h b/net/atm/lec.h index a86aff9a3c0..4149db1b788 100644 --- a/net/atm/lec.h +++ b/net/atm/lec.h @@ -58,7 +58,7 @@ struct lane2_ops {   *    field in h_type field. Data follows immediately after header.   * 2. LLC Data frames whose total length, including LLC field and data,   *    but not padding required to meet the minimum data frame length, - *    is less than 1536(0x0600) MUST be encoded by placing that length + *    is less than ETH_P_802_3_MIN MUST be encoded by placing that length   *    in the h_type field. The LLC field follows header immediately.   * 3. LLC data frames longer than this maximum MUST be encoded by placing   *    the value 0 in the h_type field. diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig index 8d8afb134b3..fa780b76630 100644 --- a/net/batman-adv/Kconfig +++ b/net/batman-adv/Kconfig @@ -36,6 +36,20 @@ config BATMAN_ADV_DAT  	  mesh networks. If you think that your network does not need  	  this option you can safely remove it and save some space. +config BATMAN_ADV_NC +	bool "Network Coding" +	depends on BATMAN_ADV +	default n +	help +	  This option enables network coding, a mechanism that aims to +	  increase the overall network throughput by fusing multiple +	  packets in one transmission. +	  Note that interfaces controlled by batman-adv must be manually +	  configured to have promiscuous mode enabled in order to make +	  network coding work. +	  If you think that your network does not need this feature you +	  can safely disable it and save some space. +  config BATMAN_ADV_DEBUG  	bool "B.A.T.M.A.N. debugging"  	depends on BATMAN_ADV diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index e45e3b4e32e..acbac2a9c62 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -1,5 +1,5 @@  # -# Copyright (C) 2007-2012 B.A.T.M.A.N. contributors: +# Copyright (C) 2007-2013 B.A.T.M.A.N. contributors:  #  # Marek Lindner, Simon Wunderlich  # @@ -30,6 +30,7 @@ batman-adv-y += hard-interface.o  batman-adv-y += hash.o  batman-adv-y += icmp_socket.o  batman-adv-y += main.o +batman-adv-$(CONFIG_BATMAN_ADV_NC) += network-coding.o  batman-adv-y += originator.o  batman-adv-y += ring_buffer.o  batman-adv-y += routing.o diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index a5bb0a769eb..071f288b77a 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -27,6 +27,7 @@  #include "hard-interface.h"  #include "send.h"  #include "bat_algo.h" +#include "network-coding.h"  static struct batadv_neigh_node *  batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface, @@ -1185,6 +1186,10 @@ static void batadv_iv_ogm_process(const struct ethhdr *ethhdr,  	if (!orig_neigh_node)  		goto out; +	/* Update nc_nodes of the originator */ +	batadv_nc_update_nc_node(bat_priv, orig_node, orig_neigh_node, +				 batadv_ogm_packet, is_single_hop_neigh); +  	orig_neigh_router = batadv_orig_node_get_router(orig_neigh_node);  	/* drop packet if sender is not a direct neighbor and if we diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 6a4f728680a..379061c7254 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -341,7 +341,7 @@ static void batadv_bla_send_claim(struct batadv_priv *bat_priv, uint8_t *mac,  	}  	if (vid != -1) -		skb = vlan_insert_tag(skb, vid); +		skb = vlan_insert_tag(skb, htons(ETH_P_8021Q), vid);  	skb_reset_mac_header(skb);  	skb->protocol = eth_type_trans(skb, soft_iface); diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index 6ae86516db4..f186a55b23c 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -32,6 +32,7 @@  #include "icmp_socket.h"  #include "bridge_loop_avoidance.h"  #include "distributed-arp-table.h" +#include "network-coding.h"  static struct dentry *batadv_debugfs; @@ -310,6 +311,14 @@ struct batadv_debuginfo {  	const struct file_operations fops;  }; +#ifdef CONFIG_BATMAN_ADV_NC +static int batadv_nc_nodes_open(struct inode *inode, struct file *file) +{ +	struct net_device *net_dev = (struct net_device *)inode->i_private; +	return single_open(file, batadv_nc_nodes_seq_print_text, net_dev); +} +#endif +  #define BATADV_DEBUGINFO(_name, _mode, _open)		\  struct batadv_debuginfo batadv_debuginfo_##_name = {	\  	.attr = { .name = __stringify(_name),		\ @@ -348,6 +357,9 @@ static BATADV_DEBUGINFO(dat_cache, S_IRUGO, batadv_dat_cache_open);  static BATADV_DEBUGINFO(transtable_local, S_IRUGO,  			batadv_transtable_local_open);  static BATADV_DEBUGINFO(vis_data, S_IRUGO, batadv_vis_data_open); +#ifdef CONFIG_BATMAN_ADV_NC +static BATADV_DEBUGINFO(nc_nodes, S_IRUGO, batadv_nc_nodes_open); +#endif  static struct batadv_debuginfo *batadv_mesh_debuginfos[] = {  	&batadv_debuginfo_originators, @@ -362,6 +374,9 @@ static struct batadv_debuginfo *batadv_mesh_debuginfos[] = {  #endif  	&batadv_debuginfo_transtable_local,  	&batadv_debuginfo_vis_data, +#ifdef CONFIG_BATMAN_ADV_NC +	&batadv_debuginfo_nc_nodes, +#endif  	NULL,  }; @@ -431,6 +446,9 @@ int batadv_debugfs_add_meshif(struct net_device *dev)  		}  	} +	if (batadv_nc_init_debugfs(bat_priv) < 0) +		goto rem_attr; +  	return 0;  rem_attr:  	debugfs_remove_recursive(bat_priv->debug_dir); diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index d54188a112e..8e15d966d9b 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -816,7 +816,6 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,  	bool ret = false;  	struct batadv_dat_entry *dat_entry = NULL;  	struct sk_buff *skb_new; -	struct batadv_hard_iface *primary_if = NULL;  	if (!atomic_read(&bat_priv->distributed_arp_table))  		goto out; @@ -838,22 +837,18 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,  	dat_entry = batadv_dat_entry_hash_find(bat_priv, ip_dst);  	if (dat_entry) { -		primary_if = batadv_primary_if_get_selected(bat_priv); -		if (!primary_if) -			goto out; -  		skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src, -				     primary_if->soft_iface, ip_dst, hw_src, +				     bat_priv->soft_iface, ip_dst, hw_src,  				     dat_entry->mac_addr, hw_src);  		if (!skb_new)  			goto out;  		skb_reset_mac_header(skb_new);  		skb_new->protocol = eth_type_trans(skb_new, -						   primary_if->soft_iface); +						   bat_priv->soft_iface);  		bat_priv->stats.rx_packets++;  		bat_priv->stats.rx_bytes += skb->len + ETH_HLEN; -		primary_if->soft_iface->last_rx = jiffies; +		bat_priv->soft_iface->last_rx = jiffies;  		netif_rx(skb_new);  		batadv_dbg(BATADV_DBG_DAT, bat_priv, "ARP request replied locally\n"); @@ -866,8 +861,6 @@ bool batadv_dat_snoop_outgoing_arp_request(struct batadv_priv *bat_priv,  out:  	if (dat_entry)  		batadv_dat_entry_free_ref(dat_entry); -	if (primary_if) -		batadv_hardif_free_ref(primary_if);  	return ret;  } @@ -887,7 +880,6 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,  	__be32 ip_src, ip_dst;  	uint8_t *hw_src;  	struct sk_buff *skb_new; -	struct batadv_hard_iface *primary_if = NULL;  	struct batadv_dat_entry *dat_entry = NULL;  	bool ret = false;  	int err; @@ -912,12 +904,8 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,  	if (!dat_entry)  		goto out; -	primary_if = batadv_primary_if_get_selected(bat_priv); -	if (!primary_if) -		goto out; -  	skb_new = arp_create(ARPOP_REPLY, ETH_P_ARP, ip_src, -			     primary_if->soft_iface, ip_dst, hw_src, +			     bat_priv->soft_iface, ip_dst, hw_src,  			     dat_entry->mac_addr, hw_src);  	if (!skb_new) @@ -941,8 +929,6 @@ bool batadv_dat_snoop_incoming_arp_request(struct batadv_priv *bat_priv,  out:  	if (dat_entry)  		batadv_dat_entry_free_ref(dat_entry); -	if (primary_if) -		batadv_hardif_free_ref(primary_if);  	if (ret)  		kfree_skb(skb);  	return ret; diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 34f99a46ec1..f105219f4a4 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -500,7 +500,7 @@ int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset)  	rcu_read_unlock();  	if (gw_count == 0) -		seq_printf(seq, "No gateways in range ...\n"); +		seq_puts(seq, "No gateways in range ...\n");  out:  	if (primary_if) diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 368219e026a..522243aff2f 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -307,11 +307,35 @@ batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface)  	batadv_update_min_mtu(hard_iface->soft_iface);  } +/** + * batadv_master_del_slave - remove hard_iface from the current master interface + * @slave: the interface enslaved in another master + * @master: the master from which slave has to be removed + * + * Invoke ndo_del_slave on master passing slave as argument. In this way slave + * is free'd and master can correctly change its internal state. + * Return 0 on success, a negative value representing the error otherwise + */ +static int batadv_master_del_slave(struct batadv_hard_iface *slave, +				   struct net_device *master) +{ +	int ret; + +	if (!master) +		return 0; + +	ret = -EBUSY; +	if (master->netdev_ops->ndo_del_slave) +		ret = master->netdev_ops->ndo_del_slave(master, slave->net_dev); + +	return ret; +} +  int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,  				   const char *iface_name)  {  	struct batadv_priv *bat_priv; -	struct net_device *soft_iface; +	struct net_device *soft_iface, *master;  	__be16 ethertype = __constant_htons(ETH_P_BATMAN);  	int ret; @@ -321,11 +345,6 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,  	if (!atomic_inc_not_zero(&hard_iface->refcount))  		goto out; -	/* hard-interface is part of a bridge */ -	if (hard_iface->net_dev->priv_flags & IFF_BRIDGE_PORT) -		pr_err("You are about to enable batman-adv on '%s' which already is part of a bridge. Unless you know exactly what you are doing this is probably wrong and won't work the way you think it would.\n", -		       hard_iface->net_dev->name); -  	soft_iface = dev_get_by_name(&init_net, iface_name);  	if (!soft_iface) { @@ -347,12 +366,24 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,  		goto err_dev;  	} +	/* check if the interface is enslaved in another virtual one and +	 * in that case unlink it first +	 */ +	master = netdev_master_upper_dev_get(hard_iface->net_dev); +	ret = batadv_master_del_slave(hard_iface, master); +	if (ret) +		goto err_dev; +  	hard_iface->soft_iface = soft_iface;  	bat_priv = netdev_priv(hard_iface->soft_iface); +	ret = netdev_master_upper_dev_link(hard_iface->net_dev, soft_iface); +	if (ret) +		goto err_dev; +  	ret = bat_priv->bat_algo_ops->bat_iface_enable(hard_iface);  	if (ret < 0) -		goto err_dev; +		goto err_upper;  	hard_iface->if_num = bat_priv->num_ifaces;  	bat_priv->num_ifaces++; @@ -362,7 +393,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,  		bat_priv->bat_algo_ops->bat_iface_disable(hard_iface);  		bat_priv->num_ifaces--;  		hard_iface->if_status = BATADV_IF_NOT_IN_USE; -		goto err_dev; +		goto err_upper;  	}  	hard_iface->batman_adv_ptype.type = ethertype; @@ -401,14 +432,18 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,  out:  	return 0; +err_upper: +	netdev_upper_dev_unlink(hard_iface->net_dev, soft_iface);  err_dev: +	hard_iface->soft_iface = NULL;  	dev_put(soft_iface);  err:  	batadv_hardif_free_ref(hard_iface);  	return ret;  } -void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface) +void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, +				     enum batadv_hard_if_cleanup autodel)  {  	struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);  	struct batadv_hard_iface *primary_if = NULL; @@ -446,9 +481,10 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface)  	dev_put(hard_iface->soft_iface);  	/* nobody uses this interface anymore */ -	if (!bat_priv->num_ifaces) -		batadv_softif_destroy(hard_iface->soft_iface); +	if (!bat_priv->num_ifaces && autodel == BATADV_IF_CLEANUP_AUTO) +		batadv_softif_destroy_sysfs(hard_iface->soft_iface); +	netdev_upper_dev_unlink(hard_iface->net_dev, hard_iface->soft_iface);  	hard_iface->soft_iface = NULL;  	batadv_hardif_free_ref(hard_iface); @@ -533,7 +569,8 @@ static void batadv_hardif_remove_interface(struct batadv_hard_iface *hard_iface)  	/* first deactivate interface */  	if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) -		batadv_hardif_disable_interface(hard_iface); +		batadv_hardif_disable_interface(hard_iface, +						BATADV_IF_CLEANUP_AUTO);  	if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)  		return; @@ -563,6 +600,11 @@ static int batadv_hard_if_event(struct notifier_block *this,  	struct batadv_hard_iface *primary_if = NULL;  	struct batadv_priv *bat_priv; +	if (batadv_softif_is_valid(net_dev) && event == NETDEV_REGISTER) { +		batadv_sysfs_add_meshif(net_dev); +		return NOTIFY_DONE; +	} +  	hard_iface = batadv_hardif_get_by_netdev(net_dev);  	if (!hard_iface && event == NETDEV_REGISTER)  		hard_iface = batadv_hardif_add_interface(net_dev); diff --git a/net/batman-adv/hard-interface.h b/net/batman-adv/hard-interface.h index 308437d52e2..49892881a7c 100644 --- a/net/batman-adv/hard-interface.h +++ b/net/batman-adv/hard-interface.h @@ -29,13 +29,24 @@ enum batadv_hard_if_state {  	BATADV_IF_I_WANT_YOU,  }; +/** + * enum batadv_hard_if_cleanup - Cleanup modi for soft_iface after slave removal + * @BATADV_IF_CLEANUP_KEEP: Don't automatically delete soft-interface + * @BATADV_IF_CLEANUP_AUTO: Delete soft-interface after last slave was removed + */ +enum batadv_hard_if_cleanup { +	BATADV_IF_CLEANUP_KEEP, +	BATADV_IF_CLEANUP_AUTO, +}; +  extern struct notifier_block batadv_hard_if_notifier;  struct batadv_hard_iface*  batadv_hardif_get_by_netdev(const struct net_device *net_dev);  int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,  				   const char *iface_name); -void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface); +void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, +				     enum batadv_hard_if_cleanup autodel);  void batadv_hardif_remove_interfaces(void);  int batadv_hardif_min_mtu(struct net_device *soft_iface);  void batadv_update_min_mtu(struct net_device *soft_iface); diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index fa563e497c4..3e30a0f1b90 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -35,6 +35,7 @@  #include "vis.h"  #include "hash.h"  #include "bat_algo.h" +#include "network-coding.h"  /* List manipulations on hardif_list have to be rtnl_lock()'ed, @@ -70,6 +71,7 @@ static int __init batadv_init(void)  	batadv_debugfs_init();  	register_netdevice_notifier(&batadv_hard_if_notifier); +	rtnl_link_register(&batadv_link_ops);  	pr_info("B.A.T.M.A.N. advanced %s (compatibility version %i) loaded\n",  		BATADV_SOURCE_VERSION, BATADV_COMPAT_VERSION); @@ -80,6 +82,7 @@ static int __init batadv_init(void)  static void __exit batadv_exit(void)  {  	batadv_debugfs_destroy(); +	rtnl_link_unregister(&batadv_link_ops);  	unregister_netdevice_notifier(&batadv_hard_if_notifier);  	batadv_hardif_remove_interfaces(); @@ -135,6 +138,10 @@ int batadv_mesh_init(struct net_device *soft_iface)  	if (ret < 0)  		goto err; +	ret = batadv_nc_init(bat_priv); +	if (ret < 0) +		goto err; +  	atomic_set(&bat_priv->gw.reselect, 0);  	atomic_set(&bat_priv->mesh_state, BATADV_MESH_ACTIVE); @@ -157,6 +164,7 @@ void batadv_mesh_free(struct net_device *soft_iface)  	batadv_gw_node_purge(bat_priv);  	batadv_originator_free(bat_priv); +	batadv_nc_free(bat_priv);  	batadv_tt_free(bat_priv); @@ -169,6 +177,12 @@ void batadv_mesh_free(struct net_device *soft_iface)  	atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE);  } +/** + * batadv_is_my_mac - check if the given mac address belongs to any of the real + * interfaces in the current mesh + * @bat_priv: the bat priv with all the soft interface information + * @addr: the address to check + */  int batadv_is_my_mac(struct batadv_priv *bat_priv, const uint8_t *addr)  {  	const struct batadv_hard_iface *hard_iface; @@ -414,7 +428,7 @@ int batadv_algo_seq_print_text(struct seq_file *seq, void *offset)  {  	struct batadv_algo_ops *bat_algo_ops; -	seq_printf(seq, "Available routing algorithms:\n"); +	seq_puts(seq, "Available routing algorithms:\n");  	hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) {  		seq_printf(seq, "%s\n", bat_algo_ops->name); diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index d40910dfc8e..59a0d6af15c 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -26,7 +26,7 @@  #define BATADV_DRIVER_DEVICE "batman-adv"  #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2013.1.0" +#define BATADV_SOURCE_VERSION "2013.2.0"  #endif  /* B.A.T.M.A.N. parameters */ @@ -105,6 +105,8 @@  #define BATADV_RESET_PROTECTION_MS 30000  #define BATADV_EXPECTED_SEQNO_RANGE	65536 +#define BATADV_NC_NODE_TIMEOUT 10000 /* Milliseconds */ +  enum batadv_mesh_state {  	BATADV_MESH_INACTIVE,  	BATADV_MESH_ACTIVE, @@ -150,6 +152,7 @@ enum batadv_uev_type {  #include <linux/percpu.h>  #include <linux/slab.h>  #include <net/sock.h>		/* struct sock */ +#include <net/rtnetlink.h>  #include <linux/jiffies.h>  #include <linux/seq_file.h>  #include "types.h" @@ -185,6 +188,7 @@ __be32 batadv_skb_crc32(struct sk_buff *skb, u8 *payload_ptr);   * @BATADV_DBG_TT: translation table messages   * @BATADV_DBG_BLA: bridge loop avoidance messages   * @BATADV_DBG_DAT: ARP snooping and DAT related messages + * @BATADV_DBG_NC: network coding related messages   * @BATADV_DBG_ALL: the union of all the above log levels   */  enum batadv_dbg_level { @@ -193,7 +197,8 @@ enum batadv_dbg_level {  	BATADV_DBG_TT	  = BIT(2),  	BATADV_DBG_BLA    = BIT(3),  	BATADV_DBG_DAT    = BIT(4), -	BATADV_DBG_ALL    = 31, +	BATADV_DBG_NC	  = BIT(5), +	BATADV_DBG_ALL    = 63,  };  #ifdef CONFIG_BATMAN_ADV_DEBUG @@ -298,4 +303,10 @@ static inline uint64_t batadv_sum_counter(struct batadv_priv *bat_priv,  	return sum;  } +/* Define a macro to reach the control buffer of the skb. The members of the + * control buffer are defined in struct batadv_skb_cb in types.h. + * The macro is inspired by the similar macro TCP_SKB_CB() in tcp.h. + */ +#define BATADV_SKB_CB(__skb)       ((struct batadv_skb_cb *)&((__skb)->cb[0])) +  #endif /* _NET_BATMAN_ADV_MAIN_H_ */ diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c new file mode 100644 index 00000000000..f7c54305a91 --- /dev/null +++ b/net/batman-adv/network-coding.c @@ -0,0 +1,1822 @@ +/* Copyright (C) 2012-2013 B.A.T.M.A.N. contributors: + * + * Martin Hundebøll, Jeppe Ledet-Pedersen + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#include <linux/debugfs.h> + +#include "main.h" +#include "hash.h" +#include "network-coding.h" +#include "send.h" +#include "originator.h" +#include "hard-interface.h" +#include "routing.h" + +static struct lock_class_key batadv_nc_coding_hash_lock_class_key; +static struct lock_class_key batadv_nc_decoding_hash_lock_class_key; + +static void batadv_nc_worker(struct work_struct *work); +static int batadv_nc_recv_coded_packet(struct sk_buff *skb, +				       struct batadv_hard_iface *recv_if); + +/** + * batadv_nc_start_timer - initialise the nc periodic worker + * @bat_priv: the bat priv with all the soft interface information + */ +static void batadv_nc_start_timer(struct batadv_priv *bat_priv) +{ +	queue_delayed_work(batadv_event_workqueue, &bat_priv->nc.work, +			   msecs_to_jiffies(10)); +} + +/** + * batadv_nc_init - initialise coding hash table and start house keeping + * @bat_priv: the bat priv with all the soft interface information + */ +int batadv_nc_init(struct batadv_priv *bat_priv) +{ +	bat_priv->nc.timestamp_fwd_flush = jiffies; +	bat_priv->nc.timestamp_sniffed_purge = jiffies; + +	if (bat_priv->nc.coding_hash || bat_priv->nc.decoding_hash) +		return 0; + +	bat_priv->nc.coding_hash = batadv_hash_new(128); +	if (!bat_priv->nc.coding_hash) +		goto err; + +	batadv_hash_set_lock_class(bat_priv->nc.coding_hash, +				   &batadv_nc_coding_hash_lock_class_key); + +	bat_priv->nc.decoding_hash = batadv_hash_new(128); +	if (!bat_priv->nc.decoding_hash) +		goto err; + +	batadv_hash_set_lock_class(bat_priv->nc.coding_hash, +				   &batadv_nc_decoding_hash_lock_class_key); + +	/* Register our packet type */ +	if (batadv_recv_handler_register(BATADV_CODED, +					 batadv_nc_recv_coded_packet) < 0) +		goto err; + +	INIT_DELAYED_WORK(&bat_priv->nc.work, batadv_nc_worker); +	batadv_nc_start_timer(bat_priv); + +	return 0; + +err: +	return -ENOMEM; +} + +/** + * batadv_nc_init_bat_priv - initialise the nc specific bat_priv variables + * @bat_priv: the bat priv with all the soft interface information + */ +void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv) +{ +	atomic_set(&bat_priv->network_coding, 1); +	bat_priv->nc.min_tq = 200; +	bat_priv->nc.max_fwd_delay = 10; +	bat_priv->nc.max_buffer_time = 200; +} + +/** + * batadv_nc_init_orig - initialise the nc fields of an orig_node + * @orig_node: the orig_node which is going to be initialised + */ +void batadv_nc_init_orig(struct batadv_orig_node *orig_node) +{ +	INIT_LIST_HEAD(&orig_node->in_coding_list); +	INIT_LIST_HEAD(&orig_node->out_coding_list); +	spin_lock_init(&orig_node->in_coding_list_lock); +	spin_lock_init(&orig_node->out_coding_list_lock); +} + +/** + * batadv_nc_node_free_rcu - rcu callback to free an nc node and remove + *  its refcount on the orig_node + * @rcu: rcu pointer of the nc node + */ +static void batadv_nc_node_free_rcu(struct rcu_head *rcu) +{ +	struct batadv_nc_node *nc_node; + +	nc_node = container_of(rcu, struct batadv_nc_node, rcu); +	batadv_orig_node_free_ref(nc_node->orig_node); +	kfree(nc_node); +} + +/** + * batadv_nc_node_free_ref - decrements the nc node refcounter and possibly + * frees it + * @nc_node: the nc node to free + */ +static void batadv_nc_node_free_ref(struct batadv_nc_node *nc_node) +{ +	if (atomic_dec_and_test(&nc_node->refcount)) +		call_rcu(&nc_node->rcu, batadv_nc_node_free_rcu); +} + +/** + * batadv_nc_path_free_ref - decrements the nc path refcounter and possibly + * frees it + * @nc_path: the nc node to free + */ +static void batadv_nc_path_free_ref(struct batadv_nc_path *nc_path) +{ +	if (atomic_dec_and_test(&nc_path->refcount)) +		kfree_rcu(nc_path, rcu); +} + +/** + * batadv_nc_packet_free - frees nc packet + * @nc_packet: the nc packet to free + */ +static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet) +{ +	if (nc_packet->skb) +		kfree_skb(nc_packet->skb); + +	batadv_nc_path_free_ref(nc_packet->nc_path); +	kfree(nc_packet); +} + +/** + * batadv_nc_to_purge_nc_node - checks whether an nc node has to be purged + * @bat_priv: the bat priv with all the soft interface information + * @nc_node: the nc node to check + * + * Returns true if the entry has to be purged now, false otherwise + */ +static bool batadv_nc_to_purge_nc_node(struct batadv_priv *bat_priv, +				       struct batadv_nc_node *nc_node) +{ +	if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) +		return true; + +	return batadv_has_timed_out(nc_node->last_seen, BATADV_NC_NODE_TIMEOUT); +} + +/** + * batadv_nc_to_purge_nc_path_coding - checks whether an nc path has timed out + * @bat_priv: the bat priv with all the soft interface information + * @nc_path: the nc path to check + * + * Returns true if the entry has to be purged now, false otherwise + */ +static bool batadv_nc_to_purge_nc_path_coding(struct batadv_priv *bat_priv, +					      struct batadv_nc_path *nc_path) +{ +	if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) +		return true; + +	/* purge the path when no packets has been added for 10 times the +	 * max_fwd_delay time +	 */ +	return batadv_has_timed_out(nc_path->last_valid, +				    bat_priv->nc.max_fwd_delay * 10); +} + +/** + * batadv_nc_to_purge_nc_path_decoding - checks whether an nc path has timed out + * @bat_priv: the bat priv with all the soft interface information + * @nc_path: the nc path to check + * + * Returns true if the entry has to be purged now, false otherwise + */ +static bool batadv_nc_to_purge_nc_path_decoding(struct batadv_priv *bat_priv, +						struct batadv_nc_path *nc_path) +{ +	if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) +		return true; + +	/* purge the path when no packets has been added for 10 times the +	 * max_buffer time +	 */ +	return batadv_has_timed_out(nc_path->last_valid, +				    bat_priv->nc.max_buffer_time*10); +} + +/** + * batadv_nc_purge_orig_nc_nodes - go through list of nc nodes and purge stale + *  entries + * @bat_priv: the bat priv with all the soft interface information + * @list: list of nc nodes + * @lock: nc node list lock + * @to_purge: function in charge to decide whether an entry has to be purged or + *	      not. This function takes the nc node as argument and has to return + *	      a boolean value: true if the entry has to be deleted, false + *	      otherwise + */ +static void +batadv_nc_purge_orig_nc_nodes(struct batadv_priv *bat_priv, +			      struct list_head *list, +			      spinlock_t *lock, +			      bool (*to_purge)(struct batadv_priv *, +					       struct batadv_nc_node *)) +{ +	struct batadv_nc_node *nc_node, *nc_node_tmp; + +	/* For each nc_node in list */ +	spin_lock_bh(lock); +	list_for_each_entry_safe(nc_node, nc_node_tmp, list, list) { +		/* if an helper function has been passed as parameter, +		 * ask it if the entry has to be purged or not +		 */ +		if (to_purge && !to_purge(bat_priv, nc_node)) +			continue; + +		batadv_dbg(BATADV_DBG_NC, bat_priv, +			   "Removing nc_node %pM -> %pM\n", +			   nc_node->addr, nc_node->orig_node->orig); +		list_del_rcu(&nc_node->list); +		batadv_nc_node_free_ref(nc_node); +	} +	spin_unlock_bh(lock); +} + +/** + * batadv_nc_purge_orig - purges all nc node data attached of the given + *  originator + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: orig_node with the nc node entries to be purged + * @to_purge: function in charge to decide whether an entry has to be purged or + *	      not. This function takes the nc node as argument and has to return + *	      a boolean value: true is the entry has to be deleted, false + *	      otherwise + */ +void batadv_nc_purge_orig(struct batadv_priv *bat_priv, +			  struct batadv_orig_node *orig_node, +			  bool (*to_purge)(struct batadv_priv *, +					   struct batadv_nc_node *)) +{ +	/* Check ingoing nc_node's of this orig_node */ +	batadv_nc_purge_orig_nc_nodes(bat_priv, &orig_node->in_coding_list, +				      &orig_node->in_coding_list_lock, +				      to_purge); + +	/* Check outgoing nc_node's of this orig_node */ +	batadv_nc_purge_orig_nc_nodes(bat_priv, &orig_node->out_coding_list, +				      &orig_node->out_coding_list_lock, +				      to_purge); +} + +/** + * batadv_nc_purge_orig_hash - traverse entire originator hash to check if they + *  have timed out nc nodes + * @bat_priv: the bat priv with all the soft interface information + */ +static void batadv_nc_purge_orig_hash(struct batadv_priv *bat_priv) +{ +	struct batadv_hashtable *hash = bat_priv->orig_hash; +	struct hlist_head *head; +	struct batadv_orig_node *orig_node; +	uint32_t i; + +	if (!hash) +		return; + +	/* For each orig_node */ +	for (i = 0; i < hash->size; i++) { +		head = &hash->table[i]; + +		rcu_read_lock(); +		hlist_for_each_entry_rcu(orig_node, head, hash_entry) +			batadv_nc_purge_orig(bat_priv, orig_node, +					     batadv_nc_to_purge_nc_node); +		rcu_read_unlock(); +	} +} + +/** + * batadv_nc_purge_paths - traverse all nc paths part of the hash and remove + *  unused ones + * @bat_priv: the bat priv with all the soft interface information + * @hash: hash table containing the nc paths to check + * @to_purge: function in charge to decide whether an entry has to be purged or + *	      not. This function takes the nc node as argument and has to return + *	      a boolean value: true is the entry has to be deleted, false + *	      otherwise + */ +static void batadv_nc_purge_paths(struct batadv_priv *bat_priv, +				  struct batadv_hashtable *hash, +				  bool (*to_purge)(struct batadv_priv *, +						   struct batadv_nc_path *)) +{ +	struct hlist_head *head; +	struct hlist_node *node_tmp; +	struct batadv_nc_path *nc_path; +	spinlock_t *lock; /* Protects lists in hash */ +	uint32_t i; + +	for (i = 0; i < hash->size; i++) { +		head = &hash->table[i]; +		lock = &hash->list_locks[i]; + +		/* For each nc_path in this bin */ +		spin_lock_bh(lock); +		hlist_for_each_entry_safe(nc_path, node_tmp, head, hash_entry) { +			/* if an helper function has been passed as parameter, +			 * ask it if the entry has to be purged or not +			 */ +			if (to_purge && !to_purge(bat_priv, nc_path)) +				continue; + +			/* purging an non-empty nc_path should never happen, but +			 * is observed under high CPU load. Delay the purging +			 * until next iteration to allow the packet_list to be +			 * emptied first. +			 */ +			if (!unlikely(list_empty(&nc_path->packet_list))) { +				net_ratelimited_function(printk, +							 KERN_WARNING +							 "Skipping free of non-empty nc_path (%pM -> %pM)!\n", +							 nc_path->prev_hop, +							 nc_path->next_hop); +				continue; +			} + +			/* nc_path is unused, so remove it */ +			batadv_dbg(BATADV_DBG_NC, bat_priv, +				   "Remove nc_path %pM -> %pM\n", +				   nc_path->prev_hop, nc_path->next_hop); +			hlist_del_rcu(&nc_path->hash_entry); +			batadv_nc_path_free_ref(nc_path); +		} +		spin_unlock_bh(lock); +	} +} + +/** + * batadv_nc_hash_key_gen - computes the nc_path hash key + * @key: buffer to hold the final hash key + * @src: source ethernet mac address going into the hash key + * @dst: destination ethernet mac address going into the hash key + */ +static void batadv_nc_hash_key_gen(struct batadv_nc_path *key, const char *src, +				   const char *dst) +{ +	memcpy(key->prev_hop, src, sizeof(key->prev_hop)); +	memcpy(key->next_hop, dst, sizeof(key->next_hop)); +} + +/** + * batadv_nc_hash_choose - compute the hash value for an nc path + * @data: data to hash + * @size: size of the hash table + * + * Returns the selected index in the hash table for the given data. + */ +static uint32_t batadv_nc_hash_choose(const void *data, uint32_t size) +{ +	const struct batadv_nc_path *nc_path = data; +	uint32_t hash = 0; + +	hash = batadv_hash_bytes(hash, &nc_path->prev_hop, +				 sizeof(nc_path->prev_hop)); +	hash = batadv_hash_bytes(hash, &nc_path->next_hop, +				 sizeof(nc_path->next_hop)); + +	hash += (hash << 3); +	hash ^= (hash >> 11); +	hash += (hash << 15); + +	return hash % size; +} + +/** + * batadv_nc_hash_compare - comparing function used in the network coding hash + *  tables + * @node: node in the local table + * @data2: second object to compare the node to + * + * Returns 1 if the two entry are the same, 0 otherwise + */ +static int batadv_nc_hash_compare(const struct hlist_node *node, +				  const void *data2) +{ +	const struct batadv_nc_path *nc_path1, *nc_path2; + +	nc_path1 = container_of(node, struct batadv_nc_path, hash_entry); +	nc_path2 = data2; + +	/* Return 1 if the two keys are identical */ +	if (memcmp(nc_path1->prev_hop, nc_path2->prev_hop, +		   sizeof(nc_path1->prev_hop)) != 0) +		return 0; + +	if (memcmp(nc_path1->next_hop, nc_path2->next_hop, +		   sizeof(nc_path1->next_hop)) != 0) +		return 0; + +	return 1; +} + +/** + * batadv_nc_hash_find - search for an existing nc path and return it + * @hash: hash table containing the nc path + * @data: search key + * + * Returns the nc_path if found, NULL otherwise. + */ +static struct batadv_nc_path * +batadv_nc_hash_find(struct batadv_hashtable *hash, +		    void *data) +{ +	struct hlist_head *head; +	struct batadv_nc_path *nc_path, *nc_path_tmp = NULL; +	int index; + +	if (!hash) +		return NULL; + +	index = batadv_nc_hash_choose(data, hash->size); +	head = &hash->table[index]; + +	rcu_read_lock(); +	hlist_for_each_entry_rcu(nc_path, head, hash_entry) { +		if (!batadv_nc_hash_compare(&nc_path->hash_entry, data)) +			continue; + +		if (!atomic_inc_not_zero(&nc_path->refcount)) +			continue; + +		nc_path_tmp = nc_path; +		break; +	} +	rcu_read_unlock(); + +	return nc_path_tmp; +} + +/** + * batadv_nc_send_packet - send non-coded packet and free nc_packet struct + * @nc_packet: the nc packet to send + */ +static void batadv_nc_send_packet(struct batadv_nc_packet *nc_packet) +{ +	batadv_send_skb_packet(nc_packet->skb, +			       nc_packet->neigh_node->if_incoming, +			       nc_packet->nc_path->next_hop); +	nc_packet->skb = NULL; +	batadv_nc_packet_free(nc_packet); +} + +/** + * batadv_nc_sniffed_purge - Checks timestamp of given sniffed nc_packet. + * @bat_priv: the bat priv with all the soft interface information + * @nc_path: the nc path the packet belongs to + * @nc_packet: the nc packet to be checked + * + * Checks whether the given sniffed (overheard) nc_packet has hit its buffering + * timeout. If so, the packet is no longer kept and the entry deleted from the + * queue. Has to be called with the appropriate locks. + * + * Returns false as soon as the entry in the fifo queue has not been timed out + * yet and true otherwise. + */ +static bool batadv_nc_sniffed_purge(struct batadv_priv *bat_priv, +				    struct batadv_nc_path *nc_path, +				    struct batadv_nc_packet *nc_packet) +{ +	unsigned long timeout = bat_priv->nc.max_buffer_time; +	bool res = false; + +	/* Packets are added to tail, so the remaining packets did not time +	 * out and we can stop processing the current queue +	 */ +	if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE && +	    !batadv_has_timed_out(nc_packet->timestamp, timeout)) +		goto out; + +	/* purge nc packet */ +	list_del(&nc_packet->list); +	batadv_nc_packet_free(nc_packet); + +	res = true; + +out: +	return res; +} + +/** + * batadv_nc_fwd_flush - Checks the timestamp of the given nc packet. + * @bat_priv: the bat priv with all the soft interface information + * @nc_path: the nc path the packet belongs to + * @nc_packet: the nc packet to be checked + * + * Checks whether the given nc packet has hit its forward timeout. If so, the + * packet is no longer delayed, immediately sent and the entry deleted from the + * queue. Has to be called with the appropriate locks. + * + * Returns false as soon as the entry in the fifo queue has not been timed out + * yet and true otherwise. + */ +static bool batadv_nc_fwd_flush(struct batadv_priv *bat_priv, +				struct batadv_nc_path *nc_path, +				struct batadv_nc_packet *nc_packet) +{ +	unsigned long timeout = bat_priv->nc.max_fwd_delay; + +	/* Packets are added to tail, so the remaining packets did not time +	 * out and we can stop processing the current queue +	 */ +	if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE && +	    !batadv_has_timed_out(nc_packet->timestamp, timeout)) +		return false; + +	/* Send packet */ +	batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD); +	batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES, +			   nc_packet->skb->len + ETH_HLEN); +	list_del(&nc_packet->list); +	batadv_nc_send_packet(nc_packet); + +	return true; +} + +/** + * batadv_nc_process_nc_paths - traverse given nc packet pool and free timed out + *  nc packets + * @bat_priv: the bat priv with all the soft interface information + * @hash: to be processed hash table + * @process_fn: Function called to process given nc packet. Should return true + *	        to encourage this function to proceed with the next packet. + *	        Otherwise the rest of the current queue is skipped. + */ +static void +batadv_nc_process_nc_paths(struct batadv_priv *bat_priv, +			   struct batadv_hashtable *hash, +			   bool (*process_fn)(struct batadv_priv *, +					      struct batadv_nc_path *, +					      struct batadv_nc_packet *)) +{ +	struct hlist_head *head; +	struct batadv_nc_packet *nc_packet, *nc_packet_tmp; +	struct batadv_nc_path *nc_path; +	bool ret; +	int i; + +	if (!hash) +		return; + +	/* Loop hash table bins */ +	for (i = 0; i < hash->size; i++) { +		head = &hash->table[i]; + +		/* Loop coding paths */ +		rcu_read_lock(); +		hlist_for_each_entry_rcu(nc_path, head, hash_entry) { +			/* Loop packets */ +			spin_lock_bh(&nc_path->packet_list_lock); +			list_for_each_entry_safe(nc_packet, nc_packet_tmp, +						 &nc_path->packet_list, list) { +				ret = process_fn(bat_priv, nc_path, nc_packet); +				if (!ret) +					break; +			} +			spin_unlock_bh(&nc_path->packet_list_lock); +		} +		rcu_read_unlock(); +	} +} + +/** + * batadv_nc_worker - periodic task for house keeping related to network coding + * @work: kernel work struct + */ +static void batadv_nc_worker(struct work_struct *work) +{ +	struct delayed_work *delayed_work; +	struct batadv_priv_nc *priv_nc; +	struct batadv_priv *bat_priv; +	unsigned long timeout; + +	delayed_work = container_of(work, struct delayed_work, work); +	priv_nc = container_of(delayed_work, struct batadv_priv_nc, work); +	bat_priv = container_of(priv_nc, struct batadv_priv, nc); + +	batadv_nc_purge_orig_hash(bat_priv); +	batadv_nc_purge_paths(bat_priv, bat_priv->nc.coding_hash, +			      batadv_nc_to_purge_nc_path_coding); +	batadv_nc_purge_paths(bat_priv, bat_priv->nc.decoding_hash, +			      batadv_nc_to_purge_nc_path_decoding); + +	timeout = bat_priv->nc.max_fwd_delay; + +	if (batadv_has_timed_out(bat_priv->nc.timestamp_fwd_flush, timeout)) { +		batadv_nc_process_nc_paths(bat_priv, bat_priv->nc.coding_hash, +					   batadv_nc_fwd_flush); +		bat_priv->nc.timestamp_fwd_flush = jiffies; +	} + +	if (batadv_has_timed_out(bat_priv->nc.timestamp_sniffed_purge, +				 bat_priv->nc.max_buffer_time)) { +		batadv_nc_process_nc_paths(bat_priv, bat_priv->nc.decoding_hash, +					   batadv_nc_sniffed_purge); +		bat_priv->nc.timestamp_sniffed_purge = jiffies; +	} + +	/* Schedule a new check */ +	batadv_nc_start_timer(bat_priv); +} + +/** + * batadv_can_nc_with_orig - checks whether the given orig node is suitable for + *  coding or not + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: neighboring orig node which may be used as nc candidate + * @ogm_packet: incoming ogm packet also used for the checks + * + * Returns true if: + *  1) The OGM must have the most recent sequence number. + *  2) The TTL must be decremented by one and only one. + *  3) The OGM must be received from the first hop from orig_node. + *  4) The TQ value of the OGM must be above bat_priv->nc.min_tq. + */ +static bool batadv_can_nc_with_orig(struct batadv_priv *bat_priv, +				    struct batadv_orig_node *orig_node, +				    struct batadv_ogm_packet *ogm_packet) +{ +	if (orig_node->last_real_seqno != ntohl(ogm_packet->seqno)) +		return false; +	if (orig_node->last_ttl != ogm_packet->header.ttl + 1) +		return false; +	if (!batadv_compare_eth(ogm_packet->orig, ogm_packet->prev_sender)) +		return false; +	if (ogm_packet->tq < bat_priv->nc.min_tq) +		return false; + +	return true; +} + +/** + * batadv_nc_find_nc_node - search for an existing nc node and return it + * @orig_node: orig node originating the ogm packet + * @orig_neigh_node: neighboring orig node from which we received the ogm packet + *  (can be equal to orig_node) + * @in_coding: traverse incoming or outgoing network coding list + * + * Returns the nc_node if found, NULL otherwise. + */ +static struct batadv_nc_node +*batadv_nc_find_nc_node(struct batadv_orig_node *orig_node, +			struct batadv_orig_node *orig_neigh_node, +			bool in_coding) +{ +	struct batadv_nc_node *nc_node, *nc_node_out = NULL; +	struct list_head *list; + +	if (in_coding) +		list = &orig_neigh_node->in_coding_list; +	else +		list = &orig_neigh_node->out_coding_list; + +	/* Traverse list of nc_nodes to orig_node */ +	rcu_read_lock(); +	list_for_each_entry_rcu(nc_node, list, list) { +		if (!batadv_compare_eth(nc_node->addr, orig_node->orig)) +			continue; + +		if (!atomic_inc_not_zero(&nc_node->refcount)) +			continue; + +		/* Found a match */ +		nc_node_out = nc_node; +		break; +	} +	rcu_read_unlock(); + +	return nc_node_out; +} + +/** + * batadv_nc_get_nc_node - retrieves an nc node or creates the entry if it was + *  not found + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: orig node originating the ogm packet + * @orig_neigh_node: neighboring orig node from which we received the ogm packet + *  (can be equal to orig_node) + * @in_coding: traverse incoming or outgoing network coding list + * + * Returns the nc_node if found or created, NULL in case of an error. + */ +static struct batadv_nc_node +*batadv_nc_get_nc_node(struct batadv_priv *bat_priv, +		       struct batadv_orig_node *orig_node, +		       struct batadv_orig_node *orig_neigh_node, +		       bool in_coding) +{ +	struct batadv_nc_node *nc_node; +	spinlock_t *lock; /* Used to lock list selected by "int in_coding" */ +	struct list_head *list; + +	/* Check if nc_node is already added */ +	nc_node = batadv_nc_find_nc_node(orig_node, orig_neigh_node, in_coding); + +	/* Node found */ +	if (nc_node) +		return nc_node; + +	nc_node = kzalloc(sizeof(*nc_node), GFP_ATOMIC); +	if (!nc_node) +		return NULL; + +	if (!atomic_inc_not_zero(&orig_neigh_node->refcount)) +		goto free; + +	/* Initialize nc_node */ +	INIT_LIST_HEAD(&nc_node->list); +	memcpy(nc_node->addr, orig_node->orig, ETH_ALEN); +	nc_node->orig_node = orig_neigh_node; +	atomic_set(&nc_node->refcount, 2); + +	/* Select ingoing or outgoing coding node */ +	if (in_coding) { +		lock = &orig_neigh_node->in_coding_list_lock; +		list = &orig_neigh_node->in_coding_list; +	} else { +		lock = &orig_neigh_node->out_coding_list_lock; +		list = &orig_neigh_node->out_coding_list; +	} + +	batadv_dbg(BATADV_DBG_NC, bat_priv, "Adding nc_node %pM -> %pM\n", +		   nc_node->addr, nc_node->orig_node->orig); + +	/* Add nc_node to orig_node */ +	spin_lock_bh(lock); +	list_add_tail_rcu(&nc_node->list, list); +	spin_unlock_bh(lock); + +	return nc_node; + +free: +	kfree(nc_node); +	return NULL; +} + +/** + * batadv_nc_update_nc_node - updates stored incoming and outgoing nc node structs + *  (best called on incoming OGMs) + * @bat_priv: the bat priv with all the soft interface information + * @orig_node: orig node originating the ogm packet + * @orig_neigh_node: neighboring orig node from which we received the ogm packet + *  (can be equal to orig_node) + * @ogm_packet: incoming ogm packet + * @is_single_hop_neigh: orig_node is a single hop neighbor + */ +void batadv_nc_update_nc_node(struct batadv_priv *bat_priv, +			      struct batadv_orig_node *orig_node, +			      struct batadv_orig_node *orig_neigh_node, +			      struct batadv_ogm_packet *ogm_packet, +			      int is_single_hop_neigh) +{ +	struct batadv_nc_node *in_nc_node = NULL, *out_nc_node = NULL; + +	/* Check if network coding is enabled */ +	if (!atomic_read(&bat_priv->network_coding)) +		goto out; + +	/* accept ogms from 'good' neighbors and single hop neighbors */ +	if (!batadv_can_nc_with_orig(bat_priv, orig_node, ogm_packet) && +	    !is_single_hop_neigh) +		goto out; + +	/* Add orig_node as in_nc_node on hop */ +	in_nc_node = batadv_nc_get_nc_node(bat_priv, orig_node, +					   orig_neigh_node, true); +	if (!in_nc_node) +		goto out; + +	in_nc_node->last_seen = jiffies; + +	/* Add hop as out_nc_node on orig_node */ +	out_nc_node = batadv_nc_get_nc_node(bat_priv, orig_neigh_node, +					    orig_node, false); +	if (!out_nc_node) +		goto out; + +	out_nc_node->last_seen = jiffies; + +out: +	if (in_nc_node) +		batadv_nc_node_free_ref(in_nc_node); +	if (out_nc_node) +		batadv_nc_node_free_ref(out_nc_node); +} + +/** + * batadv_nc_get_path - get existing nc_path or allocate a new one + * @bat_priv: the bat priv with all the soft interface information + * @hash: hash table containing the nc path + * @src: ethernet source address - first half of the nc path search key + * @dst: ethernet destination address - second half of the nc path search key + * + * Returns pointer to nc_path if the path was found or created, returns NULL + * on error. + */ +static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv, +						 struct batadv_hashtable *hash, +						 uint8_t *src, +						 uint8_t *dst) +{ +	int hash_added; +	struct batadv_nc_path *nc_path, nc_path_key; + +	batadv_nc_hash_key_gen(&nc_path_key, src, dst); + +	/* Search for existing nc_path */ +	nc_path = batadv_nc_hash_find(hash, (void *)&nc_path_key); + +	if (nc_path) { +		/* Set timestamp to delay removal of nc_path */ +		nc_path->last_valid = jiffies; +		return nc_path; +	} + +	/* No existing nc_path was found; create a new */ +	nc_path = kzalloc(sizeof(*nc_path), GFP_ATOMIC); + +	if (!nc_path) +		return NULL; + +	/* Initialize nc_path */ +	INIT_LIST_HEAD(&nc_path->packet_list); +	spin_lock_init(&nc_path->packet_list_lock); +	atomic_set(&nc_path->refcount, 2); +	nc_path->last_valid = jiffies; +	memcpy(nc_path->next_hop, dst, ETH_ALEN); +	memcpy(nc_path->prev_hop, src, ETH_ALEN); + +	batadv_dbg(BATADV_DBG_NC, bat_priv, "Adding nc_path %pM -> %pM\n", +		   nc_path->prev_hop, +		   nc_path->next_hop); + +	/* Add nc_path to hash table */ +	hash_added = batadv_hash_add(hash, batadv_nc_hash_compare, +				     batadv_nc_hash_choose, &nc_path_key, +				     &nc_path->hash_entry); + +	if (hash_added < 0) { +		kfree(nc_path); +		return NULL; +	} + +	return nc_path; +} + +/** + * batadv_nc_random_weight_tq - scale the receivers TQ-value to avoid unfair + *  selection of a receiver with slightly lower TQ than the other + * @tq: to be weighted tq value + */ +static uint8_t batadv_nc_random_weight_tq(uint8_t tq) +{ +	uint8_t rand_val, rand_tq; + +	get_random_bytes(&rand_val, sizeof(rand_val)); + +	/* randomize the estimated packet loss (max TQ - estimated TQ) */ +	rand_tq = rand_val * (BATADV_TQ_MAX_VALUE - tq); + +	/* normalize the randomized packet loss */ +	rand_tq /= BATADV_TQ_MAX_VALUE; + +	/* convert to (randomized) estimated tq again */ +	return BATADV_TQ_MAX_VALUE - rand_tq; +} + +/** + * batadv_nc_memxor - XOR destination with source + * @dst: byte array to XOR into + * @src: byte array to XOR from + * @len: length of destination array + */ +static void batadv_nc_memxor(char *dst, const char *src, unsigned int len) +{ +	unsigned int i; + +	for (i = 0; i < len; ++i) +		dst[i] ^= src[i]; +} + +/** + * batadv_nc_code_packets - code a received unicast_packet with an nc packet + *  into a coded_packet and send it + * @bat_priv: the bat priv with all the soft interface information + * @skb: data skb to forward + * @ethhdr: pointer to the ethernet header inside the skb + * @nc_packet: structure containing the packet to the skb can be coded with + * @neigh_node: next hop to forward packet to + * + * Returns true if both packets are consumed, false otherwise. + */ +static bool batadv_nc_code_packets(struct batadv_priv *bat_priv, +				   struct sk_buff *skb, +				   struct ethhdr *ethhdr, +				   struct batadv_nc_packet *nc_packet, +				   struct batadv_neigh_node *neigh_node) +{ +	uint8_t tq_weighted_neigh, tq_weighted_coding; +	struct sk_buff *skb_dest, *skb_src; +	struct batadv_unicast_packet *packet1; +	struct batadv_unicast_packet *packet2; +	struct batadv_coded_packet *coded_packet; +	struct batadv_neigh_node *neigh_tmp, *router_neigh; +	struct batadv_neigh_node *router_coding = NULL; +	uint8_t *first_source, *first_dest, *second_source, *second_dest; +	__be32 packet_id1, packet_id2; +	size_t count; +	bool res = false; +	int coding_len; +	int unicast_size = sizeof(*packet1); +	int coded_size = sizeof(*coded_packet); +	int header_add = coded_size - unicast_size; + +	router_neigh = batadv_orig_node_get_router(neigh_node->orig_node); +	if (!router_neigh) +		goto out; + +	neigh_tmp = nc_packet->neigh_node; +	router_coding = batadv_orig_node_get_router(neigh_tmp->orig_node); +	if (!router_coding) +		goto out; + +	tq_weighted_neigh = batadv_nc_random_weight_tq(router_neigh->tq_avg); +	tq_weighted_coding = batadv_nc_random_weight_tq(router_coding->tq_avg); + +	/* Select one destination for the MAC-header dst-field based on +	 * weighted TQ-values. +	 */ +	if (tq_weighted_neigh >= tq_weighted_coding) { +		/* Destination from nc_packet is selected for MAC-header */ +		first_dest = nc_packet->nc_path->next_hop; +		first_source = nc_packet->nc_path->prev_hop; +		second_dest = neigh_node->addr; +		second_source = ethhdr->h_source; +		packet1 = (struct batadv_unicast_packet *)nc_packet->skb->data; +		packet2 = (struct batadv_unicast_packet *)skb->data; +		packet_id1 = nc_packet->packet_id; +		packet_id2 = batadv_skb_crc32(skb, +					      skb->data + sizeof(*packet2)); +	} else { +		/* Destination for skb is selected for MAC-header */ +		first_dest = neigh_node->addr; +		first_source = ethhdr->h_source; +		second_dest = nc_packet->nc_path->next_hop; +		second_source = nc_packet->nc_path->prev_hop; +		packet1 = (struct batadv_unicast_packet *)skb->data; +		packet2 = (struct batadv_unicast_packet *)nc_packet->skb->data; +		packet_id1 = batadv_skb_crc32(skb, +					      skb->data + sizeof(*packet1)); +		packet_id2 = nc_packet->packet_id; +	} + +	/* Instead of zero padding the smallest data buffer, we +	 * code into the largest. +	 */ +	if (skb->len <= nc_packet->skb->len) { +		skb_dest = nc_packet->skb; +		skb_src = skb; +	} else { +		skb_dest = skb; +		skb_src = nc_packet->skb; +	} + +	/* coding_len is used when decoding the packet shorter packet */ +	coding_len = skb_src->len - unicast_size; + +	if (skb_linearize(skb_dest) < 0 || skb_linearize(skb_src) < 0) +		goto out; + +	skb_push(skb_dest, header_add); + +	coded_packet = (struct batadv_coded_packet *)skb_dest->data; +	skb_reset_mac_header(skb_dest); + +	coded_packet->header.packet_type = BATADV_CODED; +	coded_packet->header.version = BATADV_COMPAT_VERSION; +	coded_packet->header.ttl = packet1->header.ttl; + +	/* Info about first unicast packet */ +	memcpy(coded_packet->first_source, first_source, ETH_ALEN); +	memcpy(coded_packet->first_orig_dest, packet1->dest, ETH_ALEN); +	coded_packet->first_crc = packet_id1; +	coded_packet->first_ttvn = packet1->ttvn; + +	/* Info about second unicast packet */ +	memcpy(coded_packet->second_dest, second_dest, ETH_ALEN); +	memcpy(coded_packet->second_source, second_source, ETH_ALEN); +	memcpy(coded_packet->second_orig_dest, packet2->dest, ETH_ALEN); +	coded_packet->second_crc = packet_id2; +	coded_packet->second_ttl = packet2->header.ttl; +	coded_packet->second_ttvn = packet2->ttvn; +	coded_packet->coded_len = htons(coding_len); + +	/* This is where the magic happens: Code skb_src into skb_dest */ +	batadv_nc_memxor(skb_dest->data + coded_size, +			 skb_src->data + unicast_size, coding_len); + +	/* Update counters accordingly */ +	if (BATADV_SKB_CB(skb_src)->decoded && +	    BATADV_SKB_CB(skb_dest)->decoded) { +		/* Both packets are recoded */ +		count = skb_src->len + ETH_HLEN; +		count += skb_dest->len + ETH_HLEN; +		batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE, 2); +		batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE_BYTES, count); +	} else if (!BATADV_SKB_CB(skb_src)->decoded && +		   !BATADV_SKB_CB(skb_dest)->decoded) { +		/* Both packets are newly coded */ +		count = skb_src->len + ETH_HLEN; +		count += skb_dest->len + ETH_HLEN; +		batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE, 2); +		batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE_BYTES, count); +	} else if (BATADV_SKB_CB(skb_src)->decoded && +		   !BATADV_SKB_CB(skb_dest)->decoded) { +		/* skb_src recoded and skb_dest is newly coded */ +		batadv_inc_counter(bat_priv, BATADV_CNT_NC_RECODE); +		batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE_BYTES, +				   skb_src->len + ETH_HLEN); +		batadv_inc_counter(bat_priv, BATADV_CNT_NC_CODE); +		batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE_BYTES, +				   skb_dest->len + ETH_HLEN); +	} else if (!BATADV_SKB_CB(skb_src)->decoded && +		   BATADV_SKB_CB(skb_dest)->decoded) { +		/* skb_src is newly coded and skb_dest is recoded */ +		batadv_inc_counter(bat_priv, BATADV_CNT_NC_CODE); +		batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE_BYTES, +				   skb_src->len + ETH_HLEN); +		batadv_inc_counter(bat_priv, BATADV_CNT_NC_RECODE); +		batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE_BYTES, +				   skb_dest->len + ETH_HLEN); +	} + +	/* skb_src is now coded into skb_dest, so free it */ +	kfree_skb(skb_src); + +	/* avoid duplicate free of skb from nc_packet */ +	nc_packet->skb = NULL; +	batadv_nc_packet_free(nc_packet); + +	/* Send the coded packet and return true */ +	batadv_send_skb_packet(skb_dest, neigh_node->if_incoming, first_dest); +	res = true; +out: +	if (router_neigh) +		batadv_neigh_node_free_ref(router_neigh); +	if (router_coding) +		batadv_neigh_node_free_ref(router_coding); +	return res; +} + +/** + * batadv_nc_skb_coding_possible - true if a decoded skb is available at dst. + * @skb: data skb to forward + * @dst: destination mac address of the other skb to code with + * @src: source mac address of skb + * + * Whenever we network code a packet we have to check whether we received it in + * a network coded form. If so, we may not be able to use it for coding because + * some neighbors may also have received (overheard) the packet in the network + * coded form without being able to decode it. It is hard to know which of the + * neighboring nodes was able to decode the packet, therefore we can only + * re-code the packet if the source of the previous encoded packet is involved. + * Since the source encoded the packet we can be certain it has all necessary + * decode information. + * + * Returns true if coding of a decoded packet is allowed. + */ +static bool batadv_nc_skb_coding_possible(struct sk_buff *skb, +					  uint8_t *dst, uint8_t *src) +{ +	if (BATADV_SKB_CB(skb)->decoded && !batadv_compare_eth(dst, src)) +		return false; +	else +		return true; +} + +/** + * batadv_nc_path_search - Find the coding path matching in_nc_node and + *  out_nc_node to retrieve a buffered packet that can be used for coding. + * @bat_priv: the bat priv with all the soft interface information + * @in_nc_node: pointer to skb next hop's neighbor nc node + * @out_nc_node: pointer to skb source's neighbor nc node + * @skb: data skb to forward + * @eth_dst: next hop mac address of skb + * + * Returns true if coding of a decoded skb is allowed. + */ +static struct batadv_nc_packet * +batadv_nc_path_search(struct batadv_priv *bat_priv, +		      struct batadv_nc_node *in_nc_node, +		      struct batadv_nc_node *out_nc_node, +		      struct sk_buff *skb, +		      uint8_t *eth_dst) +{ +	struct batadv_nc_path *nc_path, nc_path_key; +	struct batadv_nc_packet *nc_packet_out = NULL; +	struct batadv_nc_packet *nc_packet, *nc_packet_tmp; +	struct batadv_hashtable *hash = bat_priv->nc.coding_hash; +	int idx; + +	if (!hash) +		return NULL; + +	/* Create almost path key */ +	batadv_nc_hash_key_gen(&nc_path_key, in_nc_node->addr, +			       out_nc_node->addr); +	idx = batadv_nc_hash_choose(&nc_path_key, hash->size); + +	/* Check for coding opportunities in this nc_path */ +	rcu_read_lock(); +	hlist_for_each_entry_rcu(nc_path, &hash->table[idx], hash_entry) { +		if (!batadv_compare_eth(nc_path->prev_hop, in_nc_node->addr)) +			continue; + +		if (!batadv_compare_eth(nc_path->next_hop, out_nc_node->addr)) +			continue; + +		spin_lock_bh(&nc_path->packet_list_lock); +		if (list_empty(&nc_path->packet_list)) { +			spin_unlock_bh(&nc_path->packet_list_lock); +			continue; +		} + +		list_for_each_entry_safe(nc_packet, nc_packet_tmp, +					 &nc_path->packet_list, list) { +			if (!batadv_nc_skb_coding_possible(nc_packet->skb, +							   eth_dst, +							   in_nc_node->addr)) +				continue; + +			/* Coding opportunity is found! */ +			list_del(&nc_packet->list); +			nc_packet_out = nc_packet; +			break; +		} + +		spin_unlock_bh(&nc_path->packet_list_lock); +		break; +	} +	rcu_read_unlock(); + +	return nc_packet_out; +} + +/** + * batadv_nc_skb_src_search - Loops through the list of neighoring nodes of the + *  skb's sender (may be equal to the originator). + * @bat_priv: the bat priv with all the soft interface information + * @skb: data skb to forward + * @eth_dst: next hop mac address of skb + * @eth_src: source mac address of skb + * @in_nc_node: pointer to skb next hop's neighbor nc node + * + * Returns an nc packet if a suitable coding packet was found, NULL otherwise. + */ +static struct batadv_nc_packet * +batadv_nc_skb_src_search(struct batadv_priv *bat_priv, +			 struct sk_buff *skb, +			 uint8_t *eth_dst, +			 uint8_t *eth_src, +			 struct batadv_nc_node *in_nc_node) +{ +	struct batadv_orig_node *orig_node; +	struct batadv_nc_node *out_nc_node; +	struct batadv_nc_packet *nc_packet = NULL; + +	orig_node = batadv_orig_hash_find(bat_priv, eth_src); +	if (!orig_node) +		return NULL; + +	rcu_read_lock(); +	list_for_each_entry_rcu(out_nc_node, +				&orig_node->out_coding_list, list) { +		/* Check if the skb is decoded and if recoding is possible */ +		if (!batadv_nc_skb_coding_possible(skb, +						   out_nc_node->addr, eth_src)) +			continue; + +		/* Search for an opportunity in this nc_path */ +		nc_packet = batadv_nc_path_search(bat_priv, in_nc_node, +						  out_nc_node, skb, eth_dst); +		if (nc_packet) +			break; +	} +	rcu_read_unlock(); + +	batadv_orig_node_free_ref(orig_node); +	return nc_packet; +} + +/** + * batadv_nc_skb_store_before_coding - set the ethernet src and dst of the + *  unicast skb before it is stored for use in later decoding + * @bat_priv: the bat priv with all the soft interface information + * @skb: data skb to store + * @eth_dst_new: new destination mac address of skb + */ +static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv, +					      struct sk_buff *skb, +					      uint8_t *eth_dst_new) +{ +	struct ethhdr *ethhdr; + +	/* Copy skb header to change the mac header */ +	skb = pskb_copy(skb, GFP_ATOMIC); +	if (!skb) +		return; + +	/* Set the mac header as if we actually sent the packet uncoded */ +	ethhdr = (struct ethhdr *)skb_mac_header(skb); +	memcpy(ethhdr->h_source, ethhdr->h_dest, ETH_ALEN); +	memcpy(ethhdr->h_dest, eth_dst_new, ETH_ALEN); + +	/* Set data pointer to MAC header to mimic packets from our tx path */ +	skb_push(skb, ETH_HLEN); + +	/* Add the packet to the decoding packet pool */ +	batadv_nc_skb_store_for_decoding(bat_priv, skb); + +	/* batadv_nc_skb_store_for_decoding() clones the skb, so we must free +	 * our ref +	 */ +	kfree_skb(skb); +} + +/** + * batadv_nc_skb_dst_search - Loops through list of neighboring nodes to dst. + * @skb: data skb to forward + * @neigh_node: next hop to forward packet to + * @ethhdr: pointer to the ethernet header inside the skb + * + * Loops through list of neighboring nodes the next hop has a good connection to + * (receives OGMs with a sufficient quality). We need to find a neighbor of our + * next hop that potentially sent a packet which our next hop also received + * (overheard) and has stored for later decoding. + * + * Returns true if the skb was consumed (encoded packet sent) or false otherwise + */ +static bool batadv_nc_skb_dst_search(struct sk_buff *skb, +				     struct batadv_neigh_node *neigh_node, +				     struct ethhdr *ethhdr) +{ +	struct net_device *netdev = neigh_node->if_incoming->soft_iface; +	struct batadv_priv *bat_priv = netdev_priv(netdev); +	struct batadv_orig_node *orig_node = neigh_node->orig_node; +	struct batadv_nc_node *nc_node; +	struct batadv_nc_packet *nc_packet = NULL; + +	rcu_read_lock(); +	list_for_each_entry_rcu(nc_node, &orig_node->in_coding_list, list) { +		/* Search for coding opportunity with this in_nc_node */ +		nc_packet = batadv_nc_skb_src_search(bat_priv, skb, +						     neigh_node->addr, +						     ethhdr->h_source, nc_node); + +		/* Opportunity was found, so stop searching */ +		if (nc_packet) +			break; +	} +	rcu_read_unlock(); + +	if (!nc_packet) +		return false; + +	/* Save packets for later decoding */ +	batadv_nc_skb_store_before_coding(bat_priv, skb, +					  neigh_node->addr); +	batadv_nc_skb_store_before_coding(bat_priv, nc_packet->skb, +					  nc_packet->neigh_node->addr); + +	/* Code and send packets */ +	if (batadv_nc_code_packets(bat_priv, skb, ethhdr, nc_packet, +				   neigh_node)) +		return true; + +	/* out of mem ? Coding failed - we have to free the buffered packet +	 * to avoid memleaks. The skb passed as argument will be dealt with +	 * by the calling function. +	 */ +	batadv_nc_send_packet(nc_packet); +	return false; +} + +/** + * batadv_nc_skb_add_to_path - buffer skb for later encoding / decoding + * @skb: skb to add to path + * @nc_path: path to add skb to + * @neigh_node: next hop to forward packet to + * @packet_id: checksum to identify packet + * + * Returns true if the packet was buffered or false in case of an error. + */ +static bool batadv_nc_skb_add_to_path(struct sk_buff *skb, +				      struct batadv_nc_path *nc_path, +				      struct batadv_neigh_node *neigh_node, +				      __be32 packet_id) +{ +	struct batadv_nc_packet *nc_packet; + +	nc_packet = kzalloc(sizeof(*nc_packet), GFP_ATOMIC); +	if (!nc_packet) +		return false; + +	/* Initialize nc_packet */ +	nc_packet->timestamp = jiffies; +	nc_packet->packet_id = packet_id; +	nc_packet->skb = skb; +	nc_packet->neigh_node = neigh_node; +	nc_packet->nc_path = nc_path; + +	/* Add coding packet to list */ +	spin_lock_bh(&nc_path->packet_list_lock); +	list_add_tail(&nc_packet->list, &nc_path->packet_list); +	spin_unlock_bh(&nc_path->packet_list_lock); + +	return true; +} + +/** + * batadv_nc_skb_forward - try to code a packet or add it to the coding packet + *  buffer + * @skb: data skb to forward + * @neigh_node: next hop to forward packet to + * @ethhdr: pointer to the ethernet header inside the skb + * + * Returns true if the skb was consumed (encoded packet sent) or false otherwise + */ +bool batadv_nc_skb_forward(struct sk_buff *skb, +			   struct batadv_neigh_node *neigh_node, +			   struct ethhdr *ethhdr) +{ +	const struct net_device *netdev = neigh_node->if_incoming->soft_iface; +	struct batadv_priv *bat_priv = netdev_priv(netdev); +	struct batadv_unicast_packet *packet; +	struct batadv_nc_path *nc_path; +	__be32 packet_id; +	u8 *payload; + +	/* Check if network coding is enabled */ +	if (!atomic_read(&bat_priv->network_coding)) +		goto out; + +	/* We only handle unicast packets */ +	payload = skb_network_header(skb); +	packet = (struct batadv_unicast_packet *)payload; +	if (packet->header.packet_type != BATADV_UNICAST) +		goto out; + +	/* Try to find a coding opportunity and send the skb if one is found */ +	if (batadv_nc_skb_dst_search(skb, neigh_node, ethhdr)) +		return true; + +	/* Find or create a nc_path for this src-dst pair */ +	nc_path = batadv_nc_get_path(bat_priv, +				     bat_priv->nc.coding_hash, +				     ethhdr->h_source, +				     neigh_node->addr); + +	if (!nc_path) +		goto out; + +	/* Add skb to nc_path */ +	packet_id = batadv_skb_crc32(skb, payload + sizeof(*packet)); +	if (!batadv_nc_skb_add_to_path(skb, nc_path, neigh_node, packet_id)) +		goto free_nc_path; + +	/* Packet is consumed */ +	return true; + +free_nc_path: +	batadv_nc_path_free_ref(nc_path); +out: +	/* Packet is not consumed */ +	return false; +} + +/** + * batadv_nc_skb_store_for_decoding - save a clone of the skb which can be used + *  when decoding coded packets + * @bat_priv: the bat priv with all the soft interface information + * @skb: data skb to store + */ +void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv, +				      struct sk_buff *skb) +{ +	struct batadv_unicast_packet *packet; +	struct batadv_nc_path *nc_path; +	struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); +	__be32 packet_id; +	u8 *payload; + +	/* Check if network coding is enabled */ +	if (!atomic_read(&bat_priv->network_coding)) +		goto out; + +	/* Check for supported packet type */ +	payload = skb_network_header(skb); +	packet = (struct batadv_unicast_packet *)payload; +	if (packet->header.packet_type != BATADV_UNICAST) +		goto out; + +	/* Find existing nc_path or create a new */ +	nc_path = batadv_nc_get_path(bat_priv, +				     bat_priv->nc.decoding_hash, +				     ethhdr->h_source, +				     ethhdr->h_dest); + +	if (!nc_path) +		goto out; + +	/* Clone skb and adjust skb->data to point at batman header */ +	skb = skb_clone(skb, GFP_ATOMIC); +	if (unlikely(!skb)) +		goto free_nc_path; + +	if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) +		goto free_skb; + +	if (unlikely(!skb_pull_rcsum(skb, ETH_HLEN))) +		goto free_skb; + +	/* Add skb to nc_path */ +	packet_id = batadv_skb_crc32(skb, payload + sizeof(*packet)); +	if (!batadv_nc_skb_add_to_path(skb, nc_path, NULL, packet_id)) +		goto free_skb; + +	batadv_inc_counter(bat_priv, BATADV_CNT_NC_BUFFER); +	return; + +free_skb: +	kfree_skb(skb); +free_nc_path: +	batadv_nc_path_free_ref(nc_path); +out: +	return; +} + +/** + * batadv_nc_skb_store_sniffed_unicast - check if a received unicast packet + *  should be saved in the decoding buffer and, if so, store it there + * @bat_priv: the bat priv with all the soft interface information + * @skb: unicast skb to store + */ +void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv, +					 struct sk_buff *skb) +{ +	struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb); + +	if (batadv_is_my_mac(bat_priv, ethhdr->h_dest)) +		return; + +	/* Set data pointer to MAC header to mimic packets from our tx path */ +	skb_push(skb, ETH_HLEN); + +	batadv_nc_skb_store_for_decoding(bat_priv, skb); +} + +/** + * batadv_nc_skb_decode_packet - decode given skb using the decode data stored + *  in nc_packet + * @bat_priv: the bat priv with all the soft interface information + * @skb: unicast skb to decode + * @nc_packet: decode data needed to decode the skb + * + * Returns pointer to decoded unicast packet if the packet was decoded or NULL + * in case of an error. + */ +static struct batadv_unicast_packet * +batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, +			    struct batadv_nc_packet *nc_packet) +{ +	const int h_size = sizeof(struct batadv_unicast_packet); +	const int h_diff = sizeof(struct batadv_coded_packet) - h_size; +	struct batadv_unicast_packet *unicast_packet; +	struct batadv_coded_packet coded_packet_tmp; +	struct ethhdr *ethhdr, ethhdr_tmp; +	uint8_t *orig_dest, ttl, ttvn; +	unsigned int coding_len; + +	/* Save headers temporarily */ +	memcpy(&coded_packet_tmp, skb->data, sizeof(coded_packet_tmp)); +	memcpy(ðhdr_tmp, skb_mac_header(skb), sizeof(ethhdr_tmp)); + +	if (skb_cow(skb, 0) < 0) +		return NULL; + +	if (unlikely(!skb_pull_rcsum(skb, h_diff))) +		return NULL; + +	/* Data points to batman header, so set mac header 14 bytes before +	 * and network to data +	 */ +	skb_set_mac_header(skb, -ETH_HLEN); +	skb_reset_network_header(skb); + +	/* Reconstruct original mac header */ +	ethhdr = (struct ethhdr *)skb_mac_header(skb); +	memcpy(ethhdr, ðhdr_tmp, sizeof(*ethhdr)); + +	/* Select the correct unicast header information based on the location +	 * of our mac address in the coded_packet header +	 */ +	if (batadv_is_my_mac(bat_priv, coded_packet_tmp.second_dest)) { +		/* If we are the second destination the packet was overheard, +		 * so the Ethernet address must be copied to h_dest and +		 * pkt_type changed from PACKET_OTHERHOST to PACKET_HOST +		 */ +		memcpy(ethhdr->h_dest, coded_packet_tmp.second_dest, ETH_ALEN); +		skb->pkt_type = PACKET_HOST; + +		orig_dest = coded_packet_tmp.second_orig_dest; +		ttl = coded_packet_tmp.second_ttl; +		ttvn = coded_packet_tmp.second_ttvn; +	} else { +		orig_dest = coded_packet_tmp.first_orig_dest; +		ttl = coded_packet_tmp.header.ttl; +		ttvn = coded_packet_tmp.first_ttvn; +	} + +	coding_len = ntohs(coded_packet_tmp.coded_len); + +	if (coding_len > skb->len) +		return NULL; + +	/* Here the magic is reversed: +	 *   extract the missing packet from the received coded packet +	 */ +	batadv_nc_memxor(skb->data + h_size, +			 nc_packet->skb->data + h_size, +			 coding_len); + +	/* Resize decoded skb if decoded with larger packet */ +	if (nc_packet->skb->len > coding_len + h_size) +		pskb_trim_rcsum(skb, coding_len + h_size); + +	/* Create decoded unicast packet */ +	unicast_packet = (struct batadv_unicast_packet *)skb->data; +	unicast_packet->header.packet_type = BATADV_UNICAST; +	unicast_packet->header.version = BATADV_COMPAT_VERSION; +	unicast_packet->header.ttl = ttl; +	memcpy(unicast_packet->dest, orig_dest, ETH_ALEN); +	unicast_packet->ttvn = ttvn; + +	batadv_nc_packet_free(nc_packet); +	return unicast_packet; +} + +/** + * batadv_nc_find_decoding_packet - search through buffered decoding data to + *  find the data needed to decode the coded packet + * @bat_priv: the bat priv with all the soft interface information + * @ethhdr: pointer to the ethernet header inside the coded packet + * @coded: coded packet we try to find decode data for + * + * Returns pointer to nc packet if the needed data was found or NULL otherwise. + */ +static struct batadv_nc_packet * +batadv_nc_find_decoding_packet(struct batadv_priv *bat_priv, +			       struct ethhdr *ethhdr, +			       struct batadv_coded_packet *coded) +{ +	struct batadv_hashtable *hash = bat_priv->nc.decoding_hash; +	struct batadv_nc_packet *tmp_nc_packet, *nc_packet = NULL; +	struct batadv_nc_path *nc_path, nc_path_key; +	uint8_t *dest, *source; +	__be32 packet_id; +	int index; + +	if (!hash) +		return NULL; + +	/* Select the correct packet id based on the location of our mac-addr */ +	dest = ethhdr->h_source; +	if (!batadv_is_my_mac(bat_priv, coded->second_dest)) { +		source = coded->second_source; +		packet_id = coded->second_crc; +	} else { +		source = coded->first_source; +		packet_id = coded->first_crc; +	} + +	batadv_nc_hash_key_gen(&nc_path_key, source, dest); +	index = batadv_nc_hash_choose(&nc_path_key, hash->size); + +	/* Search for matching coding path */ +	rcu_read_lock(); +	hlist_for_each_entry_rcu(nc_path, &hash->table[index], hash_entry) { +		/* Find matching nc_packet */ +		spin_lock_bh(&nc_path->packet_list_lock); +		list_for_each_entry(tmp_nc_packet, +				    &nc_path->packet_list, list) { +			if (packet_id == tmp_nc_packet->packet_id) { +				list_del(&tmp_nc_packet->list); + +				nc_packet = tmp_nc_packet; +				break; +			} +		} +		spin_unlock_bh(&nc_path->packet_list_lock); + +		if (nc_packet) +			break; +	} +	rcu_read_unlock(); + +	if (!nc_packet) +		batadv_dbg(BATADV_DBG_NC, bat_priv, +			   "No decoding packet found for %u\n", packet_id); + +	return nc_packet; +} + +/** + * batadv_nc_recv_coded_packet - try to decode coded packet and enqueue the + *  resulting unicast packet + * @skb: incoming coded packet + * @recv_if: pointer to interface this packet was received on + */ +static int batadv_nc_recv_coded_packet(struct sk_buff *skb, +				       struct batadv_hard_iface *recv_if) +{ +	struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); +	struct batadv_unicast_packet *unicast_packet; +	struct batadv_coded_packet *coded_packet; +	struct batadv_nc_packet *nc_packet; +	struct ethhdr *ethhdr; +	int hdr_size = sizeof(*coded_packet); + +	/* Check if network coding is enabled */ +	if (!atomic_read(&bat_priv->network_coding)) +		return NET_RX_DROP; + +	/* Make sure we can access (and remove) header */ +	if (unlikely(!pskb_may_pull(skb, hdr_size))) +		return NET_RX_DROP; + +	coded_packet = (struct batadv_coded_packet *)skb->data; +	ethhdr = (struct ethhdr *)skb_mac_header(skb); + +	/* Verify frame is destined for us */ +	if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest) && +	    !batadv_is_my_mac(bat_priv, coded_packet->second_dest)) +		return NET_RX_DROP; + +	/* Update stat counter */ +	if (batadv_is_my_mac(bat_priv, coded_packet->second_dest)) +		batadv_inc_counter(bat_priv, BATADV_CNT_NC_SNIFFED); + +	nc_packet = batadv_nc_find_decoding_packet(bat_priv, ethhdr, +						   coded_packet); +	if (!nc_packet) { +		batadv_inc_counter(bat_priv, BATADV_CNT_NC_DECODE_FAILED); +		return NET_RX_DROP; +	} + +	/* Make skb's linear, because decoding accesses the entire buffer */ +	if (skb_linearize(skb) < 0) +		goto free_nc_packet; + +	if (skb_linearize(nc_packet->skb) < 0) +		goto free_nc_packet; + +	/* Decode the packet */ +	unicast_packet = batadv_nc_skb_decode_packet(bat_priv, skb, nc_packet); +	if (!unicast_packet) { +		batadv_inc_counter(bat_priv, BATADV_CNT_NC_DECODE_FAILED); +		goto free_nc_packet; +	} + +	/* Mark packet as decoded to do correct recoding when forwarding */ +	BATADV_SKB_CB(skb)->decoded = true; +	batadv_inc_counter(bat_priv, BATADV_CNT_NC_DECODE); +	batadv_add_counter(bat_priv, BATADV_CNT_NC_DECODE_BYTES, +			   skb->len + ETH_HLEN); +	return batadv_recv_unicast_packet(skb, recv_if); + +free_nc_packet: +	batadv_nc_packet_free(nc_packet); +	return NET_RX_DROP; +} + +/** + * batadv_nc_free - clean up network coding memory + * @bat_priv: the bat priv with all the soft interface information + */ +void batadv_nc_free(struct batadv_priv *bat_priv) +{ +	batadv_recv_handler_unregister(BATADV_CODED); +	cancel_delayed_work_sync(&bat_priv->nc.work); + +	batadv_nc_purge_paths(bat_priv, bat_priv->nc.coding_hash, NULL); +	batadv_hash_destroy(bat_priv->nc.coding_hash); +	batadv_nc_purge_paths(bat_priv, bat_priv->nc.decoding_hash, NULL); +	batadv_hash_destroy(bat_priv->nc.decoding_hash); +} + +/** + * batadv_nc_nodes_seq_print_text - print the nc node information + * @seq: seq file to print on + * @offset: not used + */ +int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset) +{ +	struct net_device *net_dev = (struct net_device *)seq->private; +	struct batadv_priv *bat_priv = netdev_priv(net_dev); +	struct batadv_hashtable *hash = bat_priv->orig_hash; +	struct batadv_hard_iface *primary_if; +	struct hlist_head *head; +	struct batadv_orig_node *orig_node; +	struct batadv_nc_node *nc_node; +	int i; + +	primary_if = batadv_seq_print_text_primary_if_get(seq); +	if (!primary_if) +		goto out; + +	/* Traverse list of originators */ +	for (i = 0; i < hash->size; i++) { +		head = &hash->table[i]; + +		/* For each orig_node in this bin */ +		rcu_read_lock(); +		hlist_for_each_entry_rcu(orig_node, head, hash_entry) { +			seq_printf(seq, "Node:      %pM\n", orig_node->orig); + +			seq_puts(seq, " Ingoing:  "); +			/* For each in_nc_node to this orig_node */ +			list_for_each_entry_rcu(nc_node, +						&orig_node->in_coding_list, +						list) +				seq_printf(seq, "%pM ", +					   nc_node->addr); +			seq_puts(seq, "\n"); + +			seq_puts(seq, " Outgoing: "); +			/* For out_nc_node to this orig_node */ +			list_for_each_entry_rcu(nc_node, +						&orig_node->out_coding_list, +						list) +				seq_printf(seq, "%pM ", +					   nc_node->addr); +			seq_puts(seq, "\n\n"); +		} +		rcu_read_unlock(); +	} + +out: +	if (primary_if) +		batadv_hardif_free_ref(primary_if); +	return 0; +} + +/** + * batadv_nc_init_debugfs - create nc folder and related files in debugfs + * @bat_priv: the bat priv with all the soft interface information + */ +int batadv_nc_init_debugfs(struct batadv_priv *bat_priv) +{ +	struct dentry *nc_dir, *file; + +	nc_dir = debugfs_create_dir("nc", bat_priv->debug_dir); +	if (!nc_dir) +		goto out; + +	file = debugfs_create_u8("min_tq", S_IRUGO | S_IWUSR, nc_dir, +				 &bat_priv->nc.min_tq); +	if (!file) +		goto out; + +	file = debugfs_create_u32("max_fwd_delay", S_IRUGO | S_IWUSR, nc_dir, +				  &bat_priv->nc.max_fwd_delay); +	if (!file) +		goto out; + +	file = debugfs_create_u32("max_buffer_time", S_IRUGO | S_IWUSR, nc_dir, +				  &bat_priv->nc.max_buffer_time); +	if (!file) +		goto out; + +	return 0; + +out: +	return -ENOMEM; +} diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h new file mode 100644 index 00000000000..4fa6d0caddb --- /dev/null +++ b/net/batman-adv/network-coding.h @@ -0,0 +1,123 @@ +/* Copyright (C) 2012-2013 B.A.T.M.A.N. contributors: + * + * Martin Hundebøll, Jeppe Ledet-Pedersen + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#ifndef _NET_BATMAN_ADV_NETWORK_CODING_H_ +#define _NET_BATMAN_ADV_NETWORK_CODING_H_ + +#ifdef CONFIG_BATMAN_ADV_NC + +int batadv_nc_init(struct batadv_priv *bat_priv); +void batadv_nc_free(struct batadv_priv *bat_priv); +void batadv_nc_update_nc_node(struct batadv_priv *bat_priv, +			      struct batadv_orig_node *orig_node, +			      struct batadv_orig_node *orig_neigh_node, +			      struct batadv_ogm_packet *ogm_packet, +			      int is_single_hop_neigh); +void batadv_nc_purge_orig(struct batadv_priv *bat_priv, +			  struct batadv_orig_node *orig_node, +			  bool (*to_purge)(struct batadv_priv *, +					   struct batadv_nc_node *)); +void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv); +void batadv_nc_init_orig(struct batadv_orig_node *orig_node); +bool batadv_nc_skb_forward(struct sk_buff *skb, +			   struct batadv_neigh_node *neigh_node, +			   struct ethhdr *ethhdr); +void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv, +				      struct sk_buff *skb); +void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv, +					 struct sk_buff *skb); +int batadv_nc_nodes_seq_print_text(struct seq_file *seq, void *offset); +int batadv_nc_init_debugfs(struct batadv_priv *bat_priv); + +#else /* ifdef CONFIG_BATMAN_ADV_NC */ + +static inline int batadv_nc_init(struct batadv_priv *bat_priv) +{ +	return 0; +} + +static inline void batadv_nc_free(struct batadv_priv *bat_priv) +{ +	return; +} + +static inline void +batadv_nc_update_nc_node(struct batadv_priv *bat_priv, +			 struct batadv_orig_node *orig_node, +			 struct batadv_orig_node *orig_neigh_node, +			 struct batadv_ogm_packet *ogm_packet, +			 int is_single_hop_neigh) +{ +	return; +} + +static inline void +batadv_nc_purge_orig(struct batadv_priv *bat_priv, +		     struct batadv_orig_node *orig_node, +		     bool (*to_purge)(struct batadv_priv *, +				      struct batadv_nc_node *)) +{ +	return; +} + +static inline void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv) +{ +	return; +} + +static inline void batadv_nc_init_orig(struct batadv_orig_node *orig_node) +{ +	return; +} + +static inline bool batadv_nc_skb_forward(struct sk_buff *skb, +					 struct batadv_neigh_node *neigh_node, +					 struct ethhdr *ethhdr) +{ +	return false; +} + +static inline void +batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv, +				 struct sk_buff *skb) +{ +	return; +} + +static inline void +batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv, +				    struct sk_buff *skb) +{ +	return; +} + +static inline int batadv_nc_nodes_seq_print_text(struct seq_file *seq, +						 void *offset) +{ +	return 0; +} + +static inline int batadv_nc_init_debugfs(struct batadv_priv *bat_priv) +{ +	return 0; +} + +#endif /* ifdef CONFIG_BATMAN_ADV_NC */ + +#endif /* _NET_BATMAN_ADV_NETWORK_CODING_H_ */ diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 96fb80b724d..2f345254663 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -28,6 +28,7 @@  #include "unicast.h"  #include "soft-interface.h"  #include "bridge_loop_avoidance.h" +#include "network-coding.h"  /* hash class keys */  static struct lock_class_key batadv_orig_hash_lock_class_key; @@ -142,6 +143,9 @@ static void batadv_orig_node_free_rcu(struct rcu_head *rcu)  	spin_unlock_bh(&orig_node->neigh_list_lock); +	/* Free nc_nodes */ +	batadv_nc_purge_orig(orig_node->bat_priv, orig_node, NULL); +  	batadv_frag_list_free(&orig_node->frag_list);  	batadv_tt_global_del_orig(orig_node->bat_priv, orig_node,  				  "originator timed out"); @@ -219,6 +223,8 @@ struct batadv_orig_node *batadv_get_orig_node(struct batadv_priv *bat_priv,  	spin_lock_init(&orig_node->neigh_list_lock);  	spin_lock_init(&orig_node->tt_buff_lock); +	batadv_nc_init_orig(orig_node); +  	/* extra reference for return */  	atomic_set(&orig_node->refcount, 2); @@ -459,7 +465,7 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset)  					   neigh_node_tmp->tq_avg);  			} -			seq_printf(seq, "\n"); +			seq_puts(seq, "\n");  			batman_count++;  next: @@ -469,7 +475,7 @@ next:  	}  	if (batman_count == 0) -		seq_printf(seq, "No batman nodes in range ...\n"); +		seq_puts(seq, "No batman nodes in range ...\n");  out:  	if (primary_if) diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index ed0aa89bbf8..a51ccfc39da 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -30,6 +30,7 @@ enum batadv_packettype {  	BATADV_TT_QUERY		= 0x07,  	BATADV_ROAM_ADV		= 0x08,  	BATADV_UNICAST_4ADDR	= 0x09, +	BATADV_CODED		= 0x0a,  };  /** @@ -278,4 +279,36 @@ struct batadv_tt_change {  	uint8_t addr[ETH_ALEN];  } __packed; +/** + * struct batadv_coded_packet - network coded packet + * @header: common batman packet header and ttl of first included packet + * @reserved: Align following fields to 2-byte boundaries + * @first_source: original source of first included packet + * @first_orig_dest: original destinal of first included packet + * @first_crc: checksum of first included packet + * @first_ttvn: tt-version number of first included packet + * @second_ttl: ttl of second packet + * @second_dest: second receiver of this coded packet + * @second_source: original source of second included packet + * @second_orig_dest: original destination of second included packet + * @second_crc: checksum of second included packet + * @second_ttvn: tt version number of second included packet + * @coded_len: length of network coded part of the payload + */ +struct batadv_coded_packet { +	struct batadv_header header; +	uint8_t  first_ttvn; +	/* uint8_t  first_dest[ETH_ALEN]; - saved in mac header destination */ +	uint8_t  first_source[ETH_ALEN]; +	uint8_t  first_orig_dest[ETH_ALEN]; +	__be32   first_crc; +	uint8_t  second_ttl; +	uint8_t  second_ttvn; +	uint8_t  second_dest[ETH_ALEN]; +	uint8_t  second_source[ETH_ALEN]; +	uint8_t  second_orig_dest[ETH_ALEN]; +	__be32   second_crc; +	__be16   coded_len; +}; +  #endif /* _NET_BATMAN_ADV_PACKET_H_ */ diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 319f2906c71..b27a4d792d1 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -29,6 +29,7 @@  #include "unicast.h"  #include "bridge_loop_avoidance.h"  #include "distributed-arp-table.h" +#include "network-coding.h"  static int batadv_route_unicast_packet(struct sk_buff *skb,  				       struct batadv_hard_iface *recv_if); @@ -548,6 +549,17 @@ batadv_find_ifalter_router(struct batadv_orig_node *primary_orig,  	return router;  } +/** + * batadv_check_unicast_packet - Check for malformed unicast packets + * @bat_priv: the bat priv with all the soft interface information + * @skb: packet to check + * @hdr_size: size of header to pull + * + * Check for short header and bad addresses in given packet. Returns negative + * value when check fails and 0 otherwise. The negative value depends on the + * reason: -ENODATA for bad header, -EBADR for broadcast destination or source, + * and -EREMOTE for non-local (other host) destination. + */  static int batadv_check_unicast_packet(struct batadv_priv *bat_priv,  				       struct sk_buff *skb, int hdr_size)  { @@ -555,21 +567,21 @@ static int batadv_check_unicast_packet(struct batadv_priv *bat_priv,  	/* drop packet if it has not necessary minimum size */  	if (unlikely(!pskb_may_pull(skb, hdr_size))) -		return -1; +		return -ENODATA;  	ethhdr = (struct ethhdr *)skb_mac_header(skb);  	/* packet with unicast indication but broadcast recipient */  	if (is_broadcast_ether_addr(ethhdr->h_dest)) -		return -1; +		return -EBADR;  	/* packet with broadcast sender address */  	if (is_broadcast_ether_addr(ethhdr->h_source)) -		return -1; +		return -EBADR;  	/* not for me */  	if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest)) -		return -1; +		return -EREMOTE;  	return 0;  } @@ -852,15 +864,18 @@ static int batadv_route_unicast_packet(struct sk_buff *skb,  	/* decrement ttl */  	unicast_packet->header.ttl--; -	/* Update stats counter */ -	batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD); -	batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES, -			   skb->len + ETH_HLEN); - -	/* route it */ -	if (batadv_send_skb_to_orig(skb, orig_node, recv_if)) +	/* network code packet if possible */ +	if (batadv_nc_skb_forward(skb, neigh_node, ethhdr)) { +		ret = NET_RX_SUCCESS; +	} else if (batadv_send_skb_to_orig(skb, orig_node, recv_if)) {  		ret = NET_RX_SUCCESS; +		/* Update stats counter */ +		batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD); +		batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES, +				   skb->len + ETH_HLEN); +	} +  out:  	if (neigh_node)  		batadv_neigh_node_free_ref(neigh_node); @@ -924,7 +939,7 @@ out:  }  static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv, -				     struct sk_buff *skb) { +				     struct sk_buff *skb, int hdr_len) {  	uint8_t curr_ttvn, old_ttvn;  	struct batadv_orig_node *orig_node;  	struct ethhdr *ethhdr; @@ -933,7 +948,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,  	int is_old_ttvn;  	/* check if there is enough data before accessing it */ -	if (pskb_may_pull(skb, sizeof(*unicast_packet) + ETH_HLEN) < 0) +	if (pskb_may_pull(skb, hdr_len + ETH_HLEN) < 0)  		return 0;  	/* create a copy of the skb (in case of for re-routing) to modify it. */ @@ -941,7 +956,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,  		return 0;  	unicast_packet = (struct batadv_unicast_packet *)skb->data; -	ethhdr = (struct ethhdr *)(skb->data + sizeof(*unicast_packet)); +	ethhdr = (struct ethhdr *)(skb->data + hdr_len);  	/* check if the destination client was served by this node and it is now  	 * roaming. In this case, it means that the node has got a ROAM_ADV @@ -1035,7 +1050,7 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,  	struct batadv_unicast_4addr_packet *unicast_4addr_packet;  	uint8_t *orig_addr;  	struct batadv_orig_node *orig_node = NULL; -	int hdr_size = sizeof(*unicast_packet); +	int check, hdr_size = sizeof(*unicast_packet);  	bool is4addr;  	unicast_packet = (struct batadv_unicast_packet *)skb->data; @@ -1046,10 +1061,18 @@ int batadv_recv_unicast_packet(struct sk_buff *skb,  	if (is4addr)  		hdr_size = sizeof(*unicast_4addr_packet); -	if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0) -		return NET_RX_DROP; +	/* function returns -EREMOTE for promiscuous packets */ +	check = batadv_check_unicast_packet(bat_priv, skb, hdr_size); + +	/* Even though the packet is not for us, we might save it to use for +	 * decoding a later received coded packet +	 */ +	if (check == -EREMOTE) +		batadv_nc_skb_store_sniffed_unicast(bat_priv, skb); -	if (!batadv_check_unicast_ttvn(bat_priv, skb)) +	if (check < 0) +		return NET_RX_DROP; +	if (!batadv_check_unicast_ttvn(bat_priv, skb, hdr_size))  		return NET_RX_DROP;  	/* packet for me */ @@ -1093,7 +1116,7 @@ int batadv_recv_ucast_frag_packet(struct sk_buff *skb,  	if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0)  		return NET_RX_DROP; -	if (!batadv_check_unicast_ttvn(bat_priv, skb)) +	if (!batadv_check_unicast_ttvn(bat_priv, skb, hdr_size))  		return NET_RX_DROP;  	unicast_packet = (struct batadv_unicast_frag_packet *)skb->data; diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index a67cffde37a..263cfd1ccee 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -27,6 +27,7 @@  #include "vis.h"  #include "gateway_common.h"  #include "originator.h" +#include "network-coding.h"  #include <linux/if_ether.h> @@ -39,6 +40,7 @@ int batadv_send_skb_packet(struct sk_buff *skb,  			   struct batadv_hard_iface *hard_iface,  			   const uint8_t *dst_addr)  { +	struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);  	struct ethhdr *ethhdr;  	if (hard_iface->if_status != BATADV_IF_ACTIVE) @@ -70,6 +72,9 @@ int batadv_send_skb_packet(struct sk_buff *skb,  	skb->dev = hard_iface->net_dev; +	/* Save a clone of the skb to use when decoding coded packets */ +	batadv_nc_skb_store_for_decoding(bat_priv, skb); +  	/* dev_queue_xmit() returns a negative result on error.	 However on  	 * congestion and traffic shaping, it drops and returns NET_XMIT_DROP  	 * (which is > 0). This will not be treated as an error. diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 2711e870f55..6f20d339e33 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -37,6 +37,7 @@  #include <linux/if_ether.h>  #include "unicast.h"  #include "bridge_loop_avoidance.h" +#include "network-coding.h"  static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd); @@ -401,55 +402,6 @@ static void batadv_set_lockdep_class(struct net_device *dev)  }  /** - * batadv_softif_init - Late stage initialization of soft interface - * @dev: registered network device to modify - * - * Returns error code on failures - */ -static int batadv_softif_init(struct net_device *dev) -{ -	batadv_set_lockdep_class(dev); - -	return 0; -} - -static const struct net_device_ops batadv_netdev_ops = { -	.ndo_init = batadv_softif_init, -	.ndo_open = batadv_interface_open, -	.ndo_stop = batadv_interface_release, -	.ndo_get_stats = batadv_interface_stats, -	.ndo_set_mac_address = batadv_interface_set_mac_addr, -	.ndo_change_mtu = batadv_interface_change_mtu, -	.ndo_start_xmit = batadv_interface_tx, -	.ndo_validate_addr = eth_validate_addr -}; - -static void batadv_interface_setup(struct net_device *dev) -{ -	struct batadv_priv *priv = netdev_priv(dev); - -	ether_setup(dev); - -	dev->netdev_ops = &batadv_netdev_ops; -	dev->destructor = free_netdev; -	dev->tx_queue_len = 0; - -	/* can't call min_mtu, because the needed variables -	 * have not been initialized yet -	 */ -	dev->mtu = ETH_DATA_LEN; -	/* reserve more space in the skbuff for our header */ -	dev->hard_header_len = BATADV_HEADER_LEN; - -	/* generate random address */ -	eth_hw_addr_random(dev); - -	SET_ETHTOOL_OPS(dev, &batadv_ethtool_ops); - -	memset(priv, 0, sizeof(*priv)); -} - -/**   * batadv_softif_destroy_finish - cleans up the remains of a softif   * @work: work queue item   * @@ -465,7 +417,6 @@ static void batadv_softif_destroy_finish(struct work_struct *work)  				cleanup_work);  	soft_iface = bat_priv->soft_iface; -	batadv_debugfs_del_meshif(soft_iface);  	batadv_sysfs_del_meshif(soft_iface);  	rtnl_lock(); @@ -473,21 +424,22 @@ static void batadv_softif_destroy_finish(struct work_struct *work)  	rtnl_unlock();  } -struct net_device *batadv_softif_create(const char *name) +/** + * batadv_softif_init_late - late stage initialization of soft interface + * @dev: registered network device to modify + * + * Returns error code on failures + */ +static int batadv_softif_init_late(struct net_device *dev)  { -	struct net_device *soft_iface;  	struct batadv_priv *bat_priv;  	int ret;  	size_t cnt_len = sizeof(uint64_t) * BATADV_CNT_NUM; -	soft_iface = alloc_netdev(sizeof(*bat_priv), name, -				  batadv_interface_setup); - -	if (!soft_iface) -		goto out; +	batadv_set_lockdep_class(dev); -	bat_priv = netdev_priv(soft_iface); -	bat_priv->soft_iface = soft_iface; +	bat_priv = netdev_priv(dev); +	bat_priv->soft_iface = dev;  	INIT_WORK(&bat_priv->cleanup_work, batadv_softif_destroy_finish);  	/* batadv_interface_stats() needs to be available as soon as @@ -495,14 +447,7 @@ struct net_device *batadv_softif_create(const char *name)  	 */  	bat_priv->bat_counters = __alloc_percpu(cnt_len, __alignof__(uint64_t));  	if (!bat_priv->bat_counters) -		goto free_soft_iface; - -	ret = register_netdevice(soft_iface); -	if (ret < 0) { -		pr_err("Unable to register the batman interface '%s': %i\n", -		       name, ret); -		goto free_bat_counters; -	} +		return -ENOMEM;  	atomic_set(&bat_priv->aggregated_ogms, 1);  	atomic_set(&bat_priv->bonding, 0); @@ -540,49 +485,196 @@ struct net_device *batadv_softif_create(const char *name)  	bat_priv->primary_if = NULL;  	bat_priv->num_ifaces = 0; -	ret = batadv_algo_select(bat_priv, batadv_routing_algo); -	if (ret < 0) -		goto unreg_soft_iface; +	batadv_nc_init_bat_priv(bat_priv); -	ret = batadv_sysfs_add_meshif(soft_iface); +	ret = batadv_algo_select(bat_priv, batadv_routing_algo);  	if (ret < 0) -		goto unreg_soft_iface; +		goto free_bat_counters; -	ret = batadv_debugfs_add_meshif(soft_iface); +	ret = batadv_debugfs_add_meshif(dev);  	if (ret < 0) -		goto unreg_sysfs; +		goto free_bat_counters; -	ret = batadv_mesh_init(soft_iface); +	ret = batadv_mesh_init(dev);  	if (ret < 0)  		goto unreg_debugfs; -	return soft_iface; +	return 0;  unreg_debugfs: -	batadv_debugfs_del_meshif(soft_iface); -unreg_sysfs: -	batadv_sysfs_del_meshif(soft_iface); -unreg_soft_iface: -	free_percpu(bat_priv->bat_counters); -	unregister_netdevice(soft_iface); -	return NULL; - +	batadv_debugfs_del_meshif(dev);  free_bat_counters:  	free_percpu(bat_priv->bat_counters); -free_soft_iface: -	free_netdev(soft_iface); + +	return ret; +} + +/** + * batadv_softif_slave_add - Add a slave interface to a batadv_soft_interface + * @dev: batadv_soft_interface used as master interface + * @slave_dev: net_device which should become the slave interface + * + * Return 0 if successful or error otherwise. + */ +static int batadv_softif_slave_add(struct net_device *dev, +				   struct net_device *slave_dev) +{ +	struct batadv_hard_iface *hard_iface; +	int ret = -EINVAL; + +	hard_iface = batadv_hardif_get_by_netdev(slave_dev); +	if (!hard_iface || hard_iface->soft_iface != NULL) +		goto out; + +	ret = batadv_hardif_enable_interface(hard_iface, dev->name); +  out: -	return NULL; +	if (hard_iface) +		batadv_hardif_free_ref(hard_iface); +	return ret;  } -void batadv_softif_destroy(struct net_device *soft_iface) +/** + * batadv_softif_slave_del - Delete a slave iface from a batadv_soft_interface + * @dev: batadv_soft_interface used as master interface + * @slave_dev: net_device which should be removed from the master interface + * + * Return 0 if successful or error otherwise. + */ +static int batadv_softif_slave_del(struct net_device *dev, +				   struct net_device *slave_dev) +{ +	struct batadv_hard_iface *hard_iface; +	int ret = -EINVAL; + +	hard_iface = batadv_hardif_get_by_netdev(slave_dev); + +	if (!hard_iface || hard_iface->soft_iface != dev) +		goto out; + +	batadv_hardif_disable_interface(hard_iface, BATADV_IF_CLEANUP_KEEP); +	ret = 0; + +out: +	if (hard_iface) +		batadv_hardif_free_ref(hard_iface); +	return ret; +} + +static const struct net_device_ops batadv_netdev_ops = { +	.ndo_init = batadv_softif_init_late, +	.ndo_open = batadv_interface_open, +	.ndo_stop = batadv_interface_release, +	.ndo_get_stats = batadv_interface_stats, +	.ndo_set_mac_address = batadv_interface_set_mac_addr, +	.ndo_change_mtu = batadv_interface_change_mtu, +	.ndo_start_xmit = batadv_interface_tx, +	.ndo_validate_addr = eth_validate_addr, +	.ndo_add_slave = batadv_softif_slave_add, +	.ndo_del_slave = batadv_softif_slave_del, +}; + +/** + * batadv_softif_free - Deconstructor of batadv_soft_interface + * @dev: Device to cleanup and remove + */ +static void batadv_softif_free(struct net_device *dev) +{ +	batadv_debugfs_del_meshif(dev); +	batadv_mesh_free(dev); + +	/* some scheduled RCU callbacks need the bat_priv struct to accomplish +	 * their tasks. Wait for them all to be finished before freeing the +	 * netdev and its private data (bat_priv) +	 */ +	rcu_barrier(); + +	free_netdev(dev); +} + +/** + * batadv_softif_init_early - early stage initialization of soft interface + * @dev: registered network device to modify + */ +static void batadv_softif_init_early(struct net_device *dev) +{ +	struct batadv_priv *priv = netdev_priv(dev); + +	ether_setup(dev); + +	dev->netdev_ops = &batadv_netdev_ops; +	dev->destructor = batadv_softif_free; +	dev->tx_queue_len = 0; + +	/* can't call min_mtu, because the needed variables +	 * have not been initialized yet +	 */ +	dev->mtu = ETH_DATA_LEN; +	/* reserve more space in the skbuff for our header */ +	dev->hard_header_len = BATADV_HEADER_LEN; + +	/* generate random address */ +	eth_hw_addr_random(dev); + +	SET_ETHTOOL_OPS(dev, &batadv_ethtool_ops); + +	memset(priv, 0, sizeof(*priv)); +} + +struct net_device *batadv_softif_create(const char *name) +{ +	struct net_device *soft_iface; +	int ret; + +	soft_iface = alloc_netdev(sizeof(struct batadv_priv), name, +				  batadv_softif_init_early); +	if (!soft_iface) +		return NULL; + +	soft_iface->rtnl_link_ops = &batadv_link_ops; + +	ret = register_netdevice(soft_iface); +	if (ret < 0) { +		pr_err("Unable to register the batman interface '%s': %i\n", +		       name, ret); +		free_netdev(soft_iface); +		return NULL; +	} + +	return soft_iface; +} + +/** + * batadv_softif_destroy_sysfs - deletion of batadv_soft_interface via sysfs + * @soft_iface: the to-be-removed batman-adv interface + */ +void batadv_softif_destroy_sysfs(struct net_device *soft_iface)  {  	struct batadv_priv *bat_priv = netdev_priv(soft_iface); -	batadv_mesh_free(soft_iface);  	queue_work(batadv_event_workqueue, &bat_priv->cleanup_work);  } +/** + * batadv_softif_destroy_netlink - deletion of batadv_soft_interface via netlink + * @soft_iface: the to-be-removed batman-adv interface + * @head: list pointer + */ +static void batadv_softif_destroy_netlink(struct net_device *soft_iface, +					  struct list_head *head) +{ +	struct batadv_hard_iface *hard_iface; + +	list_for_each_entry(hard_iface, &batadv_hardif_list, list) { +		if (hard_iface->soft_iface == soft_iface) +			batadv_hardif_disable_interface(hard_iface, +							BATADV_IF_CLEANUP_KEEP); +	} + +	batadv_sysfs_del_meshif(soft_iface); +	unregister_netdevice_queue(soft_iface, head); +} +  int batadv_softif_is_valid(const struct net_device *net_dev)  {  	if (net_dev->netdev_ops->ndo_start_xmit == batadv_interface_tx) @@ -591,6 +683,13 @@ int batadv_softif_is_valid(const struct net_device *net_dev)  	return 0;  } +struct rtnl_link_ops batadv_link_ops __read_mostly = { +	.kind		= "batadv", +	.priv_size	= sizeof(struct batadv_priv), +	.setup		= batadv_softif_init_early, +	.dellink	= batadv_softif_destroy_netlink, +}; +  /* ethtool */  static int batadv_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)  { @@ -662,6 +761,17 @@ static const struct {  	{ "dat_put_rx" },  	{ "dat_cached_reply_tx" },  #endif +#ifdef CONFIG_BATMAN_ADV_NC +	{ "nc_code" }, +	{ "nc_code_bytes" }, +	{ "nc_recode" }, +	{ "nc_recode_bytes" }, +	{ "nc_buffer" }, +	{ "nc_decode" }, +	{ "nc_decode_bytes" }, +	{ "nc_decode_failed" }, +	{ "nc_sniffed" }, +#endif  };  static void batadv_get_strings(struct net_device *dev, uint32_t stringset, diff --git a/net/batman-adv/soft-interface.h b/net/batman-adv/soft-interface.h index 43182e5e603..2f2472c2ea0 100644 --- a/net/batman-adv/soft-interface.h +++ b/net/batman-adv/soft-interface.h @@ -25,7 +25,8 @@ void batadv_interface_rx(struct net_device *soft_iface,  			 struct sk_buff *skb, struct batadv_hard_iface *recv_if,  			 int hdr_size, struct batadv_orig_node *orig_node);  struct net_device *batadv_softif_create(const char *name); -void batadv_softif_destroy(struct net_device *soft_iface); +void batadv_softif_destroy_sysfs(struct net_device *soft_iface);  int batadv_softif_is_valid(const struct net_device *net_dev); +extern struct rtnl_link_ops batadv_link_ops;  #endif /* _NET_BATMAN_ADV_SOFT_INTERFACE_H_ */ diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index afbba319d73..15a22efa9a6 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -442,6 +442,9 @@ static BATADV_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, batadv_show_gw_bwidth,  #ifdef CONFIG_BATMAN_ADV_DEBUG  BATADV_ATTR_SIF_UINT(log_level, S_IRUGO | S_IWUSR, 0, BATADV_DBG_ALL, NULL);  #endif +#ifdef CONFIG_BATMAN_ADV_NC +BATADV_ATTR_SIF_BOOL(network_coding, S_IRUGO | S_IWUSR, NULL); +#endif  static struct batadv_attribute *batadv_mesh_attrs[] = {  	&batadv_attr_aggregated_ogms, @@ -464,6 +467,9 @@ static struct batadv_attribute *batadv_mesh_attrs[] = {  #ifdef CONFIG_BATMAN_ADV_DEBUG  	&batadv_attr_log_level,  #endif +#ifdef CONFIG_BATMAN_ADV_NC +	&batadv_attr_network_coding, +#endif  	NULL,  }; @@ -582,13 +588,15 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj,  	}  	if (status_tmp == BATADV_IF_NOT_IN_USE) { -		batadv_hardif_disable_interface(hard_iface); +		batadv_hardif_disable_interface(hard_iface, +						BATADV_IF_CLEANUP_AUTO);  		goto unlock;  	}  	/* if the interface already is in use */  	if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) -		batadv_hardif_disable_interface(hard_iface); +		batadv_hardif_disable_interface(hard_iface, +						BATADV_IF_CLEANUP_AUTO);  	ret = batadv_hardif_enable_interface(hard_iface, buff); @@ -688,15 +696,10 @@ int batadv_throw_uevent(struct batadv_priv *bat_priv, enum batadv_uev_type type,  			enum batadv_uev_action action, const char *data)  {  	int ret = -ENOMEM; -	struct batadv_hard_iface *primary_if;  	struct kobject *bat_kobj;  	char *uevent_env[4] = { NULL, NULL, NULL, NULL }; -	primary_if = batadv_primary_if_get_selected(bat_priv); -	if (!primary_if) -		goto out; - -	bat_kobj = &primary_if->soft_iface->dev.kobj; +	bat_kobj = &bat_priv->soft_iface->dev.kobj;  	uevent_env[0] = kmalloc(strlen(BATADV_UEV_TYPE_VAR) +  				strlen(batadv_uev_type_str[type]) + 1, @@ -732,9 +735,6 @@ out:  	kfree(uevent_env[1]);  	kfree(uevent_env[2]); -	if (primary_if) -		batadv_hardif_free_ref(primary_if); -  	if (ret)  		batadv_dbg(BATADV_DBG_BATMAN, bat_priv,  			   "Impossible to send uevent for (%s,%s,%s) event (err: %d)\n", diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 7abee19567e..5e89deeb954 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -385,25 +385,19 @@ static void batadv_tt_prepare_packet_buff(struct batadv_priv *bat_priv,  					  int *packet_buff_len,  					  int min_packet_len)  { -	struct batadv_hard_iface *primary_if;  	int req_len; -	primary_if = batadv_primary_if_get_selected(bat_priv); -  	req_len = min_packet_len;  	req_len += batadv_tt_len(atomic_read(&bat_priv->tt.local_changes));  	/* if we have too many changes for one packet don't send any  	 * and wait for the tt table request which will be fragmented  	 */ -	if ((!primary_if) || (req_len > primary_if->soft_iface->mtu)) +	if (req_len > bat_priv->soft_iface->mtu)  		req_len = min_packet_len;  	batadv_tt_realloc_packet_buff(packet_buff, packet_buff_len,  				      min_packet_len, req_len); - -	if (primary_if) -		batadv_hardif_free_ref(primary_if);  }  static int batadv_tt_changes_fill_buff(struct batadv_priv *bat_priv, @@ -908,7 +902,7 @@ out_remove:  	/* remove address from local hash if present */  	local_flags = batadv_tt_local_remove(bat_priv, tt_addr,  					     "global tt received", -					     !!(flags & BATADV_TT_CLIENT_ROAM)); +					     flags & BATADV_TT_CLIENT_ROAM);  	tt_global_entry->common.flags |= local_flags & BATADV_TT_CLIENT_WIFI;  	if (!(flags & BATADV_TT_CLIENT_ROAM)) @@ -1580,7 +1574,7 @@ static int batadv_tt_global_valid(const void *entry_ptr,  static struct sk_buff *  batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn,  			      struct batadv_hashtable *hash, -			      struct batadv_hard_iface *primary_if, +			      struct batadv_priv *bat_priv,  			      int (*valid_cb)(const void *, const void *),  			      void *cb_data)  { @@ -1594,8 +1588,8 @@ batadv_tt_response_fill_table(uint16_t tt_len, uint8_t ttvn,  	uint32_t i;  	size_t len; -	if (tt_query_size + tt_len > primary_if->soft_iface->mtu) { -		tt_len = primary_if->soft_iface->mtu - tt_query_size; +	if (tt_query_size + tt_len > bat_priv->soft_iface->mtu) { +		tt_len = bat_priv->soft_iface->mtu - tt_query_size;  		tt_len -= tt_len % sizeof(struct batadv_tt_change);  	}  	tt_tot = tt_len / sizeof(struct batadv_tt_change); @@ -1715,7 +1709,6 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv,  {  	struct batadv_orig_node *req_dst_orig_node;  	struct batadv_orig_node *res_dst_orig_node = NULL; -	struct batadv_hard_iface *primary_if = NULL;  	uint8_t orig_ttvn, req_ttvn, ttvn;  	int ret = false;  	unsigned char *tt_buff; @@ -1740,10 +1733,6 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv,  	if (!res_dst_orig_node)  		goto out; -	primary_if = batadv_primary_if_get_selected(bat_priv); -	if (!primary_if) -		goto out; -  	orig_ttvn = (uint8_t)atomic_read(&req_dst_orig_node->last_ttvn);  	req_ttvn = tt_request->ttvn; @@ -1791,7 +1780,7 @@ batadv_send_other_tt_response(struct batadv_priv *bat_priv,  		skb = batadv_tt_response_fill_table(tt_len, ttvn,  						    bat_priv->tt.global_hash, -						    primary_if, +						    bat_priv,  						    batadv_tt_global_valid,  						    req_dst_orig_node);  		if (!skb) @@ -1828,8 +1817,6 @@ out:  		batadv_orig_node_free_ref(res_dst_orig_node);  	if (req_dst_orig_node)  		batadv_orig_node_free_ref(req_dst_orig_node); -	if (primary_if) -		batadv_hardif_free_ref(primary_if);  	if (!ret)  		kfree_skb(skb);  	return ret; @@ -1907,7 +1894,7 @@ batadv_send_my_tt_response(struct batadv_priv *bat_priv,  		skb = batadv_tt_response_fill_table(tt_len, ttvn,  						    bat_priv->tt.local_hash, -						    primary_if, +						    bat_priv,  						    batadv_tt_local_valid_entry,  						    NULL);  		if (!skb) @@ -2528,7 +2515,7 @@ bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv,  	if (!tt_global_entry)  		goto out; -	ret = !!(tt_global_entry->common.flags & BATADV_TT_CLIENT_ROAM); +	ret = tt_global_entry->common.flags & BATADV_TT_CLIENT_ROAM;  	batadv_tt_global_entry_free_ref(tt_global_entry);  out:  	return ret; diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 4cd87a0b5b8..aba8364c368 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -128,6 +128,10 @@ struct batadv_hard_iface {   * @bond_list: list of bonding candidates   * @refcount: number of contexts the object is used   * @rcu: struct used for freeing in an RCU-safe manner + * @in_coding_list: list of nodes this orig can hear + * @out_coding_list: list of nodes that can hear this orig + * @in_coding_list_lock: protects in_coding_list + * @out_coding_list_lock: protects out_coding_list   */  struct batadv_orig_node {  	uint8_t orig[ETH_ALEN]; @@ -171,6 +175,12 @@ struct batadv_orig_node {  	struct list_head bond_list;  	atomic_t refcount;  	struct rcu_head rcu; +#ifdef CONFIG_BATMAN_ADV_NC +	struct list_head in_coding_list; +	struct list_head out_coding_list; +	spinlock_t in_coding_list_lock; /* Protects in_coding_list */ +	spinlock_t out_coding_list_lock; /* Protects out_coding_list */ +#endif  };  /** @@ -265,6 +275,17 @@ struct batadv_bcast_duplist_entry {   * @BATADV_CNT_DAT_PUT_RX: received dht PUT traffic packet counter   * @BATADV_CNT_DAT_CACHED_REPLY_TX: transmitted dat cache reply traffic packet   *  counter + * @BATADV_CNT_NC_CODE: transmitted nc-combined traffic packet counter + * @BATADV_CNT_NC_CODE_BYTES: transmitted nc-combined traffic bytes counter + * @BATADV_CNT_NC_RECODE: transmitted nc-recombined traffic packet counter + * @BATADV_CNT_NC_RECODE_BYTES: transmitted nc-recombined traffic bytes counter + * @BATADV_CNT_NC_BUFFER: counter for packets buffered for later nc decoding + * @BATADV_CNT_NC_DECODE: received and nc-decoded traffic packet counter + * @BATADV_CNT_NC_DECODE_BYTES: received and nc-decoded traffic bytes counter + * @BATADV_CNT_NC_DECODE_FAILED: received and decode-failed traffic packet + *  counter + * @BATADV_CNT_NC_SNIFFED: counter for nc-decoded packets received in promisc + *  mode.   * @BATADV_CNT_NUM: number of traffic counters   */  enum batadv_counters { @@ -292,6 +313,17 @@ enum batadv_counters {  	BATADV_CNT_DAT_PUT_RX,  	BATADV_CNT_DAT_CACHED_REPLY_TX,  #endif +#ifdef CONFIG_BATMAN_ADV_NC +	BATADV_CNT_NC_CODE, +	BATADV_CNT_NC_CODE_BYTES, +	BATADV_CNT_NC_RECODE, +	BATADV_CNT_NC_RECODE_BYTES, +	BATADV_CNT_NC_BUFFER, +	BATADV_CNT_NC_DECODE, +	BATADV_CNT_NC_DECODE_BYTES, +	BATADV_CNT_NC_DECODE_FAILED, +	BATADV_CNT_NC_SNIFFED, +#endif  	BATADV_CNT_NUM,  }; @@ -428,6 +460,35 @@ struct batadv_priv_dat {  #endif  /** + * struct batadv_priv_nc - per mesh interface network coding private data + * @work: work queue callback item for cleanup + * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs + * @min_tq: only consider neighbors for encoding if neigh_tq > min_tq + * @max_fwd_delay: maximum packet forward delay to allow coding of packets + * @max_buffer_time: buffer time for sniffed packets used to decoding + * @timestamp_fwd_flush: timestamp of last forward packet queue flush + * @timestamp_sniffed_purge: timestamp of last sniffed packet queue purge + * @coding_hash: Hash table used to buffer skbs while waiting for another + *  incoming skb to code it with. Skbs are added to the buffer just before being + *  forwarded in routing.c + * @decoding_hash: Hash table used to buffer skbs that might be needed to decode + *  a received coded skb. The buffer is used for 1) skbs arriving on the + *  soft-interface; 2) skbs overheard on the hard-interface; and 3) skbs + *  forwarded by batman-adv. + */ +struct batadv_priv_nc { +	struct delayed_work work; +	struct dentry *debug_dir; +	u8 min_tq; +	u32 max_fwd_delay; +	u32 max_buffer_time; +	unsigned long timestamp_fwd_flush; +	unsigned long timestamp_sniffed_purge; +	struct batadv_hashtable *coding_hash; +	struct batadv_hashtable *decoding_hash; +}; + +/**   * struct batadv_priv - per mesh interface data   * @mesh_state: current status of the mesh (inactive/active/deactivating)   * @soft_iface: net device which holds this struct as private data @@ -470,6 +531,8 @@ struct batadv_priv_dat {   * @tt: translation table data   * @vis: vis data   * @dat: distributed arp table data + * @network_coding: bool indicating whether network coding is enabled + * @batadv_priv_nc: network coding data   */  struct batadv_priv {  	atomic_t mesh_state; @@ -522,6 +585,10 @@ struct batadv_priv {  #ifdef CONFIG_BATMAN_ADV_DAT  	struct batadv_priv_dat dat;  #endif +#ifdef CONFIG_BATMAN_ADV_NC +	atomic_t network_coding; +	struct batadv_priv_nc nc; +#endif /* CONFIG_BATMAN_ADV_NC */  };  /** @@ -702,6 +769,75 @@ struct batadv_tt_roam_node {  };  /** + * struct batadv_nc_node - network coding node + * @list: next and prev pointer for the list handling + * @addr: the node's mac address + * @refcount: number of contexts the object is used by + * @rcu: struct used for freeing in an RCU-safe manner + * @orig_node: pointer to corresponding orig node struct + * @last_seen: timestamp of last ogm received from this node + */ +struct batadv_nc_node { +	struct list_head list; +	uint8_t addr[ETH_ALEN]; +	atomic_t refcount; +	struct rcu_head rcu; +	struct batadv_orig_node *orig_node; +	unsigned long last_seen; +}; + +/** + * struct batadv_nc_path - network coding path + * @hash_entry: next and prev pointer for the list handling + * @rcu: struct used for freeing in an RCU-safe manner + * @refcount: number of contexts the object is used by + * @packet_list: list of buffered packets for this path + * @packet_list_lock: access lock for packet list + * @next_hop: next hop (destination) of path + * @prev_hop: previous hop (source) of path + * @last_valid: timestamp for last validation of path + */ +struct batadv_nc_path { +	struct hlist_node hash_entry; +	struct rcu_head rcu; +	atomic_t refcount; +	struct list_head packet_list; +	spinlock_t packet_list_lock; /* Protects packet_list */ +	uint8_t next_hop[ETH_ALEN]; +	uint8_t prev_hop[ETH_ALEN]; +	unsigned long last_valid; +}; + +/** + * struct batadv_nc_packet - network coding packet used when coding and + *  decoding packets + * @list: next and prev pointer for the list handling + * @packet_id: crc32 checksum of skb data + * @timestamp: field containing the info when the packet was added to path + * @neigh_node: pointer to original next hop neighbor of skb + * @skb: skb which can be encoded or used for decoding + * @nc_path: pointer to path this nc packet is attached to + */ +struct batadv_nc_packet { +	struct list_head list; +	__be32 packet_id; +	unsigned long timestamp; +	struct batadv_neigh_node *neigh_node; +	struct sk_buff *skb; +	struct batadv_nc_path *nc_path; +}; + +/** + * batadv_skb_cb - control buffer structure used to store private data relevant + *  to batman-adv in the skb->cb buffer in skbs. + * @decoded: Marks a skb as decoded, which is checked when searching for coding + *  opportunities in network-coding.c + */ +struct batadv_skb_cb { +	bool decoded; +}; + +/**   * struct batadv_forw_packet - structure for bcast packets to be sent/forwarded   * @list: list node for batadv_socket_client::queue_list   * @send_time: execution time for delayed_work (packet sending) diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c index 50e079f00be..0bb3b5982f9 100644 --- a/net/batman-adv/unicast.c +++ b/net/batman-adv/unicast.c @@ -122,7 +122,7 @@ batadv_frag_search_packet(struct list_head *head,  {  	struct batadv_frag_packet_list_entry *tfp;  	struct batadv_unicast_frag_packet *tmp_up = NULL; -	int is_head_tmp, is_head; +	bool is_head_tmp, is_head;  	uint16_t search_seqno;  	if (up->flags & BATADV_UNI_FRAG_HEAD) @@ -130,7 +130,7 @@ batadv_frag_search_packet(struct list_head *head,  	else  		search_seqno = ntohs(up->seqno)-1; -	is_head = !!(up->flags & BATADV_UNI_FRAG_HEAD); +	is_head = up->flags & BATADV_UNI_FRAG_HEAD;  	list_for_each_entry(tfp, head, list) {  		if (!tfp->skb) @@ -142,7 +142,7 @@ batadv_frag_search_packet(struct list_head *head,  		tmp_up = (struct batadv_unicast_frag_packet *)tfp->skb->data;  		if (tfp->seqno == search_seqno) { -			is_head_tmp = !!(tmp_up->flags & BATADV_UNI_FRAG_HEAD); +			is_head_tmp = tmp_up->flags & BATADV_UNI_FRAG_HEAD;  			if (is_head_tmp != is_head)  				return tfp;  			else diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index 6a1e646be96..1625e5793a8 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -149,7 +149,7 @@ static void batadv_vis_data_read_prim_sec(struct seq_file *seq,  	hlist_for_each_entry(entry, if_list, list) {  		if (entry->primary) -			seq_printf(seq, "PRIMARY, "); +			seq_puts(seq, "PRIMARY, ");  		else  			seq_printf(seq,  "SEC %pM, ", entry->addr);  	} @@ -207,7 +207,7 @@ static void batadv_vis_data_read_entries(struct seq_file *seq,  		if (batadv_compare_eth(entry->addr, packet->vis_orig))  			batadv_vis_data_read_prim_sec(seq, list); -		seq_printf(seq, "\n"); +		seq_puts(seq, "\n");  	}  } diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c index eb0f4b16ff0..17f33a62f6d 100644 --- a/net/bluetooth/a2mp.c +++ b/net/bluetooth/a2mp.c @@ -397,13 +397,12 @@ static int a2mp_getampassoc_rsp(struct amp_mgr *mgr, struct sk_buff *skb,  	if (ctrl) {  		u8 *assoc; -		assoc = kzalloc(assoc_len, GFP_KERNEL); +		assoc = kmemdup(rsp->amp_assoc, assoc_len, GFP_KERNEL);  		if (!assoc) {  			amp_ctrl_put(ctrl);  			return -ENOMEM;  		} -		memcpy(assoc, rsp->amp_assoc, assoc_len);  		ctrl->assoc = assoc;  		ctrl->assoc_len = assoc_len;  		ctrl->assoc_rem_len = assoc_len; @@ -472,13 +471,12 @@ static int a2mp_createphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb,  		size_t assoc_len = le16_to_cpu(hdr->len) - sizeof(*req);  		u8 *assoc; -		assoc = kzalloc(assoc_len, GFP_KERNEL); +		assoc = kmemdup(req->amp_assoc, assoc_len, GFP_KERNEL);  		if (!assoc) {  			amp_ctrl_put(ctrl);  			return -ENOMEM;  		} -		memcpy(assoc, req->amp_assoc, assoc_len);  		ctrl->assoc = assoc;  		ctrl->assoc_len = assoc_len;  		ctrl->assoc_rem_len = assoc_len; diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 0d1b08cc76e..e5338f787d6 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -92,23 +92,14 @@ int bt_sock_register(int proto, const struct net_proto_family *ops)  }  EXPORT_SYMBOL(bt_sock_register); -int bt_sock_unregister(int proto) +void bt_sock_unregister(int proto)  { -	int err = 0; -  	if (proto < 0 || proto >= BT_MAX_PROTO) -		return -EINVAL; +		return;  	write_lock(&bt_proto_lock); - -	if (!bt_proto[proto]) -		err = -ENOENT; -	else -		bt_proto[proto] = NULL; - +	bt_proto[proto] = NULL;  	write_unlock(&bt_proto_lock); - -	return err;  }  EXPORT_SYMBOL(bt_sock_unregister); @@ -422,7 +413,8 @@ unsigned int bt_sock_poll(struct file *file, struct socket *sock,  		return bt_accept_poll(sk);  	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) -		mask |= POLLERR; +		mask |= POLLERR | +			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);  	if (sk->sk_shutdown & RCV_SHUTDOWN)  		mask |= POLLRDHUP | POLLIN | POLLRDNORM; diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index e58c8b32589..4b488ec2610 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -136,7 +136,7 @@ static u16 bnep_net_eth_proto(struct sk_buff *skb)  	struct ethhdr *eh = (void *) skb->data;  	u16 proto = ntohs(eh->h_proto); -	if (proto >= 1536) +	if (proto >= ETH_P_802_3_MIN)  		return proto;  	if (get_unaligned((__be16 *) skb->data) == htons(0xFFFF)) diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c index e7154a58465..5b1c04e2882 100644 --- a/net/bluetooth/bnep/sock.c +++ b/net/bluetooth/bnep/sock.c @@ -253,8 +253,6 @@ error:  void __exit bnep_sock_cleanup(void)  {  	bt_procfs_cleanup(&init_net, "bnep"); -	if (bt_sock_unregister(BTPROTO_BNEP) < 0) -		BT_ERR("Can't unregister BNEP socket"); - +	bt_sock_unregister(BTPROTO_BNEP);  	proto_unregister(&bnep_proto);  } diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c index 1c57482112b..58d9edebab4 100644 --- a/net/bluetooth/cmtp/sock.c +++ b/net/bluetooth/cmtp/sock.c @@ -264,8 +264,6 @@ error:  void cmtp_cleanup_sockets(void)  {  	bt_procfs_cleanup(&init_net, "cmtp"); -	if (bt_sock_unregister(BTPROTO_CMTP) < 0) -		BT_ERR("Can't unregister CMTP socket"); - +	bt_sock_unregister(BTPROTO_CMTP);  	proto_unregister(&cmtp_proto);  } diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 4925a02ae7e..6c7f3637972 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -117,7 +117,17 @@ static void hci_acl_create_connection_cancel(struct hci_conn *conn)  	hci_send_cmd(conn->hdev, HCI_OP_CREATE_CONN_CANCEL, sizeof(cp), &cp);  } -void hci_acl_disconn(struct hci_conn *conn, __u8 reason) +static void hci_reject_sco(struct hci_conn *conn) +{ +	struct hci_cp_reject_sync_conn_req cp; + +	cp.reason = HCI_ERROR_REMOTE_USER_TERM; +	bacpy(&cp.bdaddr, &conn->dst); + +	hci_send_cmd(conn->hdev, HCI_OP_REJECT_SYNC_CONN_REQ, sizeof(cp), &cp); +} + +void hci_disconnect(struct hci_conn *conn, __u8 reason)  {  	struct hci_cp_disconnect cp; @@ -253,7 +263,7 @@ static void hci_conn_disconnect(struct hci_conn *conn)  		hci_amp_disconn(conn, reason);  		break;  	default: -		hci_acl_disconn(conn, reason); +		hci_disconnect(conn, reason);  		break;  	}  } @@ -276,6 +286,8 @@ static void hci_conn_timeout(struct work_struct *work)  				hci_acl_create_connection_cancel(conn);  			else if (conn->type == LE_LINK)  				hci_le_create_connection_cancel(conn); +		} else if (conn->type == SCO_LINK || conn->type == ESCO_LINK) { +			hci_reject_sco(conn);  		}  		break;  	case BT_CONFIG: @@ -398,8 +410,6 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)  	if (hdev->notify)  		hdev->notify(hdev, HCI_NOTIFY_CONN_ADD); -	atomic_set(&conn->devref, 0); -  	hci_conn_init_sysfs(conn);  	return conn; @@ -433,7 +443,7 @@ int hci_conn_del(struct hci_conn *conn)  		struct hci_conn *acl = conn->link;  		if (acl) {  			acl->link = NULL; -			hci_conn_put(acl); +			hci_conn_drop(acl);  		}  	} @@ -448,12 +458,11 @@ int hci_conn_del(struct hci_conn *conn)  	skb_queue_purge(&conn->data_q); -	hci_conn_put_device(conn); +	hci_conn_del_sysfs(conn);  	hci_dev_put(hdev); -	if (conn->handle == 0) -		kfree(conn); +	hci_conn_put(conn);  	return 0;  } @@ -565,7 +574,7 @@ static struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type,  	if (!sco) {  		sco = hci_conn_add(hdev, type, dst);  		if (!sco) { -			hci_conn_put(acl); +			hci_conn_drop(acl);  			return ERR_PTR(-ENOMEM);  		}  	} @@ -835,19 +844,6 @@ void hci_conn_check_pending(struct hci_dev *hdev)  	hci_dev_unlock(hdev);  } -void hci_conn_hold_device(struct hci_conn *conn) -{ -	atomic_inc(&conn->devref); -} -EXPORT_SYMBOL(hci_conn_hold_device); - -void hci_conn_put_device(struct hci_conn *conn) -{ -	if (atomic_dec_and_test(&conn->devref)) -		hci_conn_del_sysfs(conn); -} -EXPORT_SYMBOL(hci_conn_put_device); -  int hci_get_conn_list(void __user *arg)  {  	struct hci_conn *c; @@ -980,7 +976,7 @@ void hci_chan_del(struct hci_chan *chan)  	synchronize_rcu(); -	hci_conn_put(conn); +	hci_conn_drop(conn);  	skb_queue_purge(&chan->data_q);  	kfree(chan); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 60793e7b768..33843c5c493 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -57,36 +57,9 @@ static void hci_notify(struct hci_dev *hdev, int event)  /* ---- HCI requests ---- */ -void hci_req_complete(struct hci_dev *hdev, __u16 cmd, int result) +static void hci_req_sync_complete(struct hci_dev *hdev, u8 result)  { -	BT_DBG("%s command 0x%4.4x result 0x%2.2x", hdev->name, cmd, result); - -	/* If this is the init phase check if the completed command matches -	 * the last init command, and if not just return. -	 */ -	if (test_bit(HCI_INIT, &hdev->flags) && hdev->init_last_cmd != cmd) { -		struct hci_command_hdr *sent = (void *) hdev->sent_cmd->data; -		u16 opcode = __le16_to_cpu(sent->opcode); -		struct sk_buff *skb; - -		/* Some CSR based controllers generate a spontaneous -		 * reset complete event during init and any pending -		 * command will never be completed. In such a case we -		 * need to resend whatever was the last sent -		 * command. -		 */ - -		if (cmd != HCI_OP_RESET || opcode == HCI_OP_RESET) -			return; - -		skb = skb_clone(hdev->sent_cmd, GFP_ATOMIC); -		if (skb) { -			skb_queue_head(&hdev->cmd_q, skb); -			queue_work(hdev->workqueue, &hdev->cmd_work); -		} - -		return; -	} +	BT_DBG("%s result 0x%2.2x", hdev->name, result);  	if (hdev->req_status == HCI_REQ_PEND) {  		hdev->req_result = result; @@ -106,22 +79,158 @@ static void hci_req_cancel(struct hci_dev *hdev, int err)  	}  } +static struct sk_buff *hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode, +					    u8 event) +{ +	struct hci_ev_cmd_complete *ev; +	struct hci_event_hdr *hdr; +	struct sk_buff *skb; + +	hci_dev_lock(hdev); + +	skb = hdev->recv_evt; +	hdev->recv_evt = NULL; + +	hci_dev_unlock(hdev); + +	if (!skb) +		return ERR_PTR(-ENODATA); + +	if (skb->len < sizeof(*hdr)) { +		BT_ERR("Too short HCI event"); +		goto failed; +	} + +	hdr = (void *) skb->data; +	skb_pull(skb, HCI_EVENT_HDR_SIZE); + +	if (event) { +		if (hdr->evt != event) +			goto failed; +		return skb; +	} + +	if (hdr->evt != HCI_EV_CMD_COMPLETE) { +		BT_DBG("Last event is not cmd complete (0x%2.2x)", hdr->evt); +		goto failed; +	} + +	if (skb->len < sizeof(*ev)) { +		BT_ERR("Too short cmd_complete event"); +		goto failed; +	} + +	ev = (void *) skb->data; +	skb_pull(skb, sizeof(*ev)); + +	if (opcode == __le16_to_cpu(ev->opcode)) +		return skb; + +	BT_DBG("opcode doesn't match (0x%2.2x != 0x%2.2x)", opcode, +	       __le16_to_cpu(ev->opcode)); + +failed: +	kfree_skb(skb); +	return ERR_PTR(-ENODATA); +} + +struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen, +				  const void *param, u8 event, u32 timeout) +{ +	DECLARE_WAITQUEUE(wait, current); +	struct hci_request req; +	int err = 0; + +	BT_DBG("%s", hdev->name); + +	hci_req_init(&req, hdev); + +	hci_req_add_ev(&req, opcode, plen, param, event); + +	hdev->req_status = HCI_REQ_PEND; + +	err = hci_req_run(&req, hci_req_sync_complete); +	if (err < 0) +		return ERR_PTR(err); + +	add_wait_queue(&hdev->req_wait_q, &wait); +	set_current_state(TASK_INTERRUPTIBLE); + +	schedule_timeout(timeout); + +	remove_wait_queue(&hdev->req_wait_q, &wait); + +	if (signal_pending(current)) +		return ERR_PTR(-EINTR); + +	switch (hdev->req_status) { +	case HCI_REQ_DONE: +		err = -bt_to_errno(hdev->req_result); +		break; + +	case HCI_REQ_CANCELED: +		err = -hdev->req_result; +		break; + +	default: +		err = -ETIMEDOUT; +		break; +	} + +	hdev->req_status = hdev->req_result = 0; + +	BT_DBG("%s end: err %d", hdev->name, err); + +	if (err < 0) +		return ERR_PTR(err); + +	return hci_get_cmd_complete(hdev, opcode, event); +} +EXPORT_SYMBOL(__hci_cmd_sync_ev); + +struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, +			       const void *param, u32 timeout) +{ +	return __hci_cmd_sync_ev(hdev, opcode, plen, param, 0, timeout); +} +EXPORT_SYMBOL(__hci_cmd_sync); +  /* Execute request and wait for completion. */ -static int __hci_request(struct hci_dev *hdev, -			 void (*req)(struct hci_dev *hdev, unsigned long opt), -			 unsigned long opt, __u32 timeout) +static int __hci_req_sync(struct hci_dev *hdev, +			  void (*func)(struct hci_request *req, +				      unsigned long opt), +			  unsigned long opt, __u32 timeout)  { +	struct hci_request req;  	DECLARE_WAITQUEUE(wait, current);  	int err = 0;  	BT_DBG("%s start", hdev->name); +	hci_req_init(&req, hdev); +  	hdev->req_status = HCI_REQ_PEND; +	func(&req, opt); + +	err = hci_req_run(&req, hci_req_sync_complete); +	if (err < 0) { +		hdev->req_status = 0; + +		/* ENODATA means the HCI request command queue is empty. +		 * This can happen when a request with conditionals doesn't +		 * trigger any commands to be sent. This is normal behavior +		 * and should not trigger an error return. +		 */ +		if (err == -ENODATA) +			return 0; + +		return err; +	} +  	add_wait_queue(&hdev->req_wait_q, &wait);  	set_current_state(TASK_INTERRUPTIBLE); -	req(hdev, opt);  	schedule_timeout(timeout);  	remove_wait_queue(&hdev->req_wait_q, &wait); @@ -150,9 +259,10 @@ static int __hci_request(struct hci_dev *hdev,  	return err;  } -static int hci_request(struct hci_dev *hdev, -		       void (*req)(struct hci_dev *hdev, unsigned long opt), -		       unsigned long opt, __u32 timeout) +static int hci_req_sync(struct hci_dev *hdev, +			void (*req)(struct hci_request *req, +				    unsigned long opt), +			unsigned long opt, __u32 timeout)  {  	int ret; @@ -161,75 +271,66 @@ static int hci_request(struct hci_dev *hdev,  	/* Serialize all requests */  	hci_req_lock(hdev); -	ret = __hci_request(hdev, req, opt, timeout); +	ret = __hci_req_sync(hdev, req, opt, timeout);  	hci_req_unlock(hdev);  	return ret;  } -static void hci_reset_req(struct hci_dev *hdev, unsigned long opt) +static void hci_reset_req(struct hci_request *req, unsigned long opt)  { -	BT_DBG("%s %ld", hdev->name, opt); +	BT_DBG("%s %ld", req->hdev->name, opt);  	/* Reset device */ -	set_bit(HCI_RESET, &hdev->flags); -	hci_send_cmd(hdev, HCI_OP_RESET, 0, NULL); +	set_bit(HCI_RESET, &req->hdev->flags); +	hci_req_add(req, HCI_OP_RESET, 0, NULL);  } -static void bredr_init(struct hci_dev *hdev) +static void bredr_init(struct hci_request *req)  { -	hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_PACKET_BASED; +	req->hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_PACKET_BASED;  	/* Read Local Supported Features */ -	hci_send_cmd(hdev, HCI_OP_READ_LOCAL_FEATURES, 0, NULL); +	hci_req_add(req, HCI_OP_READ_LOCAL_FEATURES, 0, NULL);  	/* Read Local Version */ -	hci_send_cmd(hdev, HCI_OP_READ_LOCAL_VERSION, 0, NULL); +	hci_req_add(req, HCI_OP_READ_LOCAL_VERSION, 0, NULL); + +	/* Read BD Address */ +	hci_req_add(req, HCI_OP_READ_BD_ADDR, 0, NULL);  } -static void amp_init(struct hci_dev *hdev) +static void amp_init(struct hci_request *req)  { -	hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_BLOCK_BASED; +	req->hdev->flow_ctl_mode = HCI_FLOW_CTL_MODE_BLOCK_BASED;  	/* Read Local Version */ -	hci_send_cmd(hdev, HCI_OP_READ_LOCAL_VERSION, 0, NULL); +	hci_req_add(req, HCI_OP_READ_LOCAL_VERSION, 0, NULL);  	/* Read Local AMP Info */ -	hci_send_cmd(hdev, HCI_OP_READ_LOCAL_AMP_INFO, 0, NULL); +	hci_req_add(req, HCI_OP_READ_LOCAL_AMP_INFO, 0, NULL);  	/* Read Data Blk size */ -	hci_send_cmd(hdev, HCI_OP_READ_DATA_BLOCK_SIZE, 0, NULL); +	hci_req_add(req, HCI_OP_READ_DATA_BLOCK_SIZE, 0, NULL);  } -static void hci_init_req(struct hci_dev *hdev, unsigned long opt) +static void hci_init1_req(struct hci_request *req, unsigned long opt)  { -	struct sk_buff *skb; +	struct hci_dev *hdev = req->hdev;  	BT_DBG("%s %ld", hdev->name, opt); -	/* Driver initialization */ - -	/* Special commands */ -	while ((skb = skb_dequeue(&hdev->driver_init))) { -		bt_cb(skb)->pkt_type = HCI_COMMAND_PKT; -		skb->dev = (void *) hdev; - -		skb_queue_tail(&hdev->cmd_q, skb); -		queue_work(hdev->workqueue, &hdev->cmd_work); -	} -	skb_queue_purge(&hdev->driver_init); -  	/* Reset */  	if (!test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) -		hci_reset_req(hdev, 0); +		hci_reset_req(req, 0);  	switch (hdev->dev_type) {  	case HCI_BREDR: -		bredr_init(hdev); +		bredr_init(req);  		break;  	case HCI_AMP: -		amp_init(hdev); +		amp_init(req);  		break;  	default: @@ -238,44 +339,347 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt)  	}  } -static void hci_scan_req(struct hci_dev *hdev, unsigned long opt) +static void bredr_setup(struct hci_request *req) +{ +	struct hci_cp_delete_stored_link_key cp; +	__le16 param; +	__u8 flt_type; + +	/* Read Buffer Size (ACL mtu, max pkt, etc.) */ +	hci_req_add(req, HCI_OP_READ_BUFFER_SIZE, 0, NULL); + +	/* Read Class of Device */ +	hci_req_add(req, HCI_OP_READ_CLASS_OF_DEV, 0, NULL); + +	/* Read Local Name */ +	hci_req_add(req, HCI_OP_READ_LOCAL_NAME, 0, NULL); + +	/* Read Voice Setting */ +	hci_req_add(req, HCI_OP_READ_VOICE_SETTING, 0, NULL); + +	/* Clear Event Filters */ +	flt_type = HCI_FLT_CLEAR_ALL; +	hci_req_add(req, HCI_OP_SET_EVENT_FLT, 1, &flt_type); + +	/* Connection accept timeout ~20 secs */ +	param = __constant_cpu_to_le16(0x7d00); +	hci_req_add(req, HCI_OP_WRITE_CA_TIMEOUT, 2, ¶m); + +	bacpy(&cp.bdaddr, BDADDR_ANY); +	cp.delete_all = 0x01; +	hci_req_add(req, HCI_OP_DELETE_STORED_LINK_KEY, sizeof(cp), &cp); + +	/* Read page scan parameters */ +	if (req->hdev->hci_ver > BLUETOOTH_VER_1_1) { +		hci_req_add(req, HCI_OP_READ_PAGE_SCAN_ACTIVITY, 0, NULL); +		hci_req_add(req, HCI_OP_READ_PAGE_SCAN_TYPE, 0, NULL); +	} +} + +static void le_setup(struct hci_request *req) +{ +	struct hci_dev *hdev = req->hdev; + +	/* Read LE Buffer Size */ +	hci_req_add(req, HCI_OP_LE_READ_BUFFER_SIZE, 0, NULL); + +	/* Read LE Local Supported Features */ +	hci_req_add(req, HCI_OP_LE_READ_LOCAL_FEATURES, 0, NULL); + +	/* Read LE Advertising Channel TX Power */ +	hci_req_add(req, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL); + +	/* Read LE White List Size */ +	hci_req_add(req, HCI_OP_LE_READ_WHITE_LIST_SIZE, 0, NULL); + +	/* Read LE Supported States */ +	hci_req_add(req, HCI_OP_LE_READ_SUPPORTED_STATES, 0, NULL); + +	/* LE-only controllers have LE implicitly enabled */ +	if (!lmp_bredr_capable(hdev)) +		set_bit(HCI_LE_ENABLED, &hdev->dev_flags); +} + +static u8 hci_get_inquiry_mode(struct hci_dev *hdev) +{ +	if (lmp_ext_inq_capable(hdev)) +		return 0x02; + +	if (lmp_inq_rssi_capable(hdev)) +		return 0x01; + +	if (hdev->manufacturer == 11 && hdev->hci_rev == 0x00 && +	    hdev->lmp_subver == 0x0757) +		return 0x01; + +	if (hdev->manufacturer == 15) { +		if (hdev->hci_rev == 0x03 && hdev->lmp_subver == 0x6963) +			return 0x01; +		if (hdev->hci_rev == 0x09 && hdev->lmp_subver == 0x6963) +			return 0x01; +		if (hdev->hci_rev == 0x00 && hdev->lmp_subver == 0x6965) +			return 0x01; +	} + +	if (hdev->manufacturer == 31 && hdev->hci_rev == 0x2005 && +	    hdev->lmp_subver == 0x1805) +		return 0x01; + +	return 0x00; +} + +static void hci_setup_inquiry_mode(struct hci_request *req) +{ +	u8 mode; + +	mode = hci_get_inquiry_mode(req->hdev); + +	hci_req_add(req, HCI_OP_WRITE_INQUIRY_MODE, 1, &mode); +} + +static void hci_setup_event_mask(struct hci_request *req) +{ +	struct hci_dev *hdev = req->hdev; + +	/* The second byte is 0xff instead of 0x9f (two reserved bits +	 * disabled) since a Broadcom 1.2 dongle doesn't respond to the +	 * command otherwise. +	 */ +	u8 events[8] = { 0xff, 0xff, 0xfb, 0xff, 0x00, 0x00, 0x00, 0x00 }; + +	/* CSR 1.1 dongles does not accept any bitfield so don't try to set +	 * any event mask for pre 1.2 devices. +	 */ +	if (hdev->hci_ver < BLUETOOTH_VER_1_2) +		return; + +	if (lmp_bredr_capable(hdev)) { +		events[4] |= 0x01; /* Flow Specification Complete */ +		events[4] |= 0x02; /* Inquiry Result with RSSI */ +		events[4] |= 0x04; /* Read Remote Extended Features Complete */ +		events[5] |= 0x08; /* Synchronous Connection Complete */ +		events[5] |= 0x10; /* Synchronous Connection Changed */ +	} + +	if (lmp_inq_rssi_capable(hdev)) +		events[4] |= 0x02; /* Inquiry Result with RSSI */ + +	if (lmp_sniffsubr_capable(hdev)) +		events[5] |= 0x20; /* Sniff Subrating */ + +	if (lmp_pause_enc_capable(hdev)) +		events[5] |= 0x80; /* Encryption Key Refresh Complete */ + +	if (lmp_ext_inq_capable(hdev)) +		events[5] |= 0x40; /* Extended Inquiry Result */ + +	if (lmp_no_flush_capable(hdev)) +		events[7] |= 0x01; /* Enhanced Flush Complete */ + +	if (lmp_lsto_capable(hdev)) +		events[6] |= 0x80; /* Link Supervision Timeout Changed */ + +	if (lmp_ssp_capable(hdev)) { +		events[6] |= 0x01;	/* IO Capability Request */ +		events[6] |= 0x02;	/* IO Capability Response */ +		events[6] |= 0x04;	/* User Confirmation Request */ +		events[6] |= 0x08;	/* User Passkey Request */ +		events[6] |= 0x10;	/* Remote OOB Data Request */ +		events[6] |= 0x20;	/* Simple Pairing Complete */ +		events[7] |= 0x04;	/* User Passkey Notification */ +		events[7] |= 0x08;	/* Keypress Notification */ +		events[7] |= 0x10;	/* Remote Host Supported +					 * Features Notification +					 */ +	} + +	if (lmp_le_capable(hdev)) +		events[7] |= 0x20;	/* LE Meta-Event */ + +	hci_req_add(req, HCI_OP_SET_EVENT_MASK, sizeof(events), events); + +	if (lmp_le_capable(hdev)) { +		memset(events, 0, sizeof(events)); +		events[0] = 0x1f; +		hci_req_add(req, HCI_OP_LE_SET_EVENT_MASK, +			    sizeof(events), events); +	} +} + +static void hci_init2_req(struct hci_request *req, unsigned long opt) +{ +	struct hci_dev *hdev = req->hdev; + +	if (lmp_bredr_capable(hdev)) +		bredr_setup(req); + +	if (lmp_le_capable(hdev)) +		le_setup(req); + +	hci_setup_event_mask(req); + +	if (hdev->hci_ver > BLUETOOTH_VER_1_1) +		hci_req_add(req, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL); + +	if (lmp_ssp_capable(hdev)) { +		if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) { +			u8 mode = 0x01; +			hci_req_add(req, HCI_OP_WRITE_SSP_MODE, +				    sizeof(mode), &mode); +		} else { +			struct hci_cp_write_eir cp; + +			memset(hdev->eir, 0, sizeof(hdev->eir)); +			memset(&cp, 0, sizeof(cp)); + +			hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp); +		} +	} + +	if (lmp_inq_rssi_capable(hdev)) +		hci_setup_inquiry_mode(req); + +	if (lmp_inq_tx_pwr_capable(hdev)) +		hci_req_add(req, HCI_OP_READ_INQ_RSP_TX_POWER, 0, NULL); + +	if (lmp_ext_feat_capable(hdev)) { +		struct hci_cp_read_local_ext_features cp; + +		cp.page = 0x01; +		hci_req_add(req, HCI_OP_READ_LOCAL_EXT_FEATURES, +			    sizeof(cp), &cp); +	} + +	if (test_bit(HCI_LINK_SECURITY, &hdev->dev_flags)) { +		u8 enable = 1; +		hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, sizeof(enable), +			    &enable); +	} +} + +static void hci_setup_link_policy(struct hci_request *req) +{ +	struct hci_dev *hdev = req->hdev; +	struct hci_cp_write_def_link_policy cp; +	u16 link_policy = 0; + +	if (lmp_rswitch_capable(hdev)) +		link_policy |= HCI_LP_RSWITCH; +	if (lmp_hold_capable(hdev)) +		link_policy |= HCI_LP_HOLD; +	if (lmp_sniff_capable(hdev)) +		link_policy |= HCI_LP_SNIFF; +	if (lmp_park_capable(hdev)) +		link_policy |= HCI_LP_PARK; + +	cp.policy = cpu_to_le16(link_policy); +	hci_req_add(req, HCI_OP_WRITE_DEF_LINK_POLICY, sizeof(cp), &cp); +} + +static void hci_set_le_support(struct hci_request *req) +{ +	struct hci_dev *hdev = req->hdev; +	struct hci_cp_write_le_host_supported cp; + +	/* LE-only devices do not support explicit enablement */ +	if (!lmp_bredr_capable(hdev)) +		return; + +	memset(&cp, 0, sizeof(cp)); + +	if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) { +		cp.le = 0x01; +		cp.simul = lmp_le_br_capable(hdev); +	} + +	if (cp.le != lmp_host_le_capable(hdev)) +		hci_req_add(req, HCI_OP_WRITE_LE_HOST_SUPPORTED, sizeof(cp), +			    &cp); +} + +static void hci_init3_req(struct hci_request *req, unsigned long opt) +{ +	struct hci_dev *hdev = req->hdev; +	u8 p; + +	if (hdev->commands[5] & 0x10) +		hci_setup_link_policy(req); + +	if (lmp_le_capable(hdev)) { +		hci_set_le_support(req); +		hci_update_ad(req); +	} + +	/* Read features beyond page 1 if available */ +	for (p = 2; p < HCI_MAX_PAGES && p <= hdev->max_page; p++) { +		struct hci_cp_read_local_ext_features cp; + +		cp.page = p; +		hci_req_add(req, HCI_OP_READ_LOCAL_EXT_FEATURES, +			    sizeof(cp), &cp); +	} +} + +static int __hci_init(struct hci_dev *hdev) +{ +	int err; + +	err = __hci_req_sync(hdev, hci_init1_req, 0, HCI_INIT_TIMEOUT); +	if (err < 0) +		return err; + +	/* HCI_BREDR covers both single-mode LE, BR/EDR and dual-mode +	 * BR/EDR/LE type controllers. AMP controllers only need the +	 * first stage init. +	 */ +	if (hdev->dev_type != HCI_BREDR) +		return 0; + +	err = __hci_req_sync(hdev, hci_init2_req, 0, HCI_INIT_TIMEOUT); +	if (err < 0) +		return err; + +	return __hci_req_sync(hdev, hci_init3_req, 0, HCI_INIT_TIMEOUT); +} + +static void hci_scan_req(struct hci_request *req, unsigned long opt)  {  	__u8 scan = opt; -	BT_DBG("%s %x", hdev->name, scan); +	BT_DBG("%s %x", req->hdev->name, scan);  	/* Inquiry and Page scans */ -	hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); +	hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);  } -static void hci_auth_req(struct hci_dev *hdev, unsigned long opt) +static void hci_auth_req(struct hci_request *req, unsigned long opt)  {  	__u8 auth = opt; -	BT_DBG("%s %x", hdev->name, auth); +	BT_DBG("%s %x", req->hdev->name, auth);  	/* Authentication */ -	hci_send_cmd(hdev, HCI_OP_WRITE_AUTH_ENABLE, 1, &auth); +	hci_req_add(req, HCI_OP_WRITE_AUTH_ENABLE, 1, &auth);  } -static void hci_encrypt_req(struct hci_dev *hdev, unsigned long opt) +static void hci_encrypt_req(struct hci_request *req, unsigned long opt)  {  	__u8 encrypt = opt; -	BT_DBG("%s %x", hdev->name, encrypt); +	BT_DBG("%s %x", req->hdev->name, encrypt);  	/* Encryption */ -	hci_send_cmd(hdev, HCI_OP_WRITE_ENCRYPT_MODE, 1, &encrypt); +	hci_req_add(req, HCI_OP_WRITE_ENCRYPT_MODE, 1, &encrypt);  } -static void hci_linkpol_req(struct hci_dev *hdev, unsigned long opt) +static void hci_linkpol_req(struct hci_request *req, unsigned long opt)  {  	__le16 policy = cpu_to_le16(opt); -	BT_DBG("%s %x", hdev->name, policy); +	BT_DBG("%s %x", req->hdev->name, policy);  	/* Default link policy */ -	hci_send_cmd(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy); +	hci_req_add(req, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy);  }  /* Get HCI device by index. @@ -512,9 +916,10 @@ static int inquiry_cache_dump(struct hci_dev *hdev, int num, __u8 *buf)  	return copied;  } -static void hci_inq_req(struct hci_dev *hdev, unsigned long opt) +static void hci_inq_req(struct hci_request *req, unsigned long opt)  {  	struct hci_inquiry_req *ir = (struct hci_inquiry_req *) opt; +	struct hci_dev *hdev = req->hdev;  	struct hci_cp_inquiry cp;  	BT_DBG("%s", hdev->name); @@ -526,7 +931,13 @@ static void hci_inq_req(struct hci_dev *hdev, unsigned long opt)  	memcpy(&cp.lap, &ir->lap, 3);  	cp.length  = ir->length;  	cp.num_rsp = ir->num_rsp; -	hci_send_cmd(hdev, HCI_OP_INQUIRY, sizeof(cp), &cp); +	hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp); +} + +static int wait_inquiry(void *word) +{ +	schedule(); +	return signal_pending(current);  }  int hci_inquiry(void __user *arg) @@ -556,9 +967,17 @@ int hci_inquiry(void __user *arg)  	timeo = ir.length * msecs_to_jiffies(2000);  	if (do_inquiry) { -		err = hci_request(hdev, hci_inq_req, (unsigned long)&ir, timeo); +		err = hci_req_sync(hdev, hci_inq_req, (unsigned long) &ir, +				   timeo);  		if (err < 0)  			goto done; + +		/* Wait until Inquiry procedure finishes (HCI_INQUIRY flag is +		 * cleared). If it is interrupted by a signal, return -EINTR. +		 */ +		if (wait_on_bit(&hdev->flags, HCI_INQUIRY, wait_inquiry, +				TASK_INTERRUPTIBLE)) +			return -EINTR;  	}  	/* for unlimited number of responses we will use buffer with @@ -654,39 +1073,29 @@ static u8 create_ad(struct hci_dev *hdev, u8 *ptr)  	return ad_len;  } -int hci_update_ad(struct hci_dev *hdev) +void hci_update_ad(struct hci_request *req)  { +	struct hci_dev *hdev = req->hdev;  	struct hci_cp_le_set_adv_data cp;  	u8 len; -	int err; - -	hci_dev_lock(hdev); -	if (!lmp_le_capable(hdev)) { -		err = -EINVAL; -		goto unlock; -	} +	if (!lmp_le_capable(hdev)) +		return;  	memset(&cp, 0, sizeof(cp));  	len = create_ad(hdev, cp.data);  	if (hdev->adv_data_len == len && -	    memcmp(cp.data, hdev->adv_data, len) == 0) { -		err = 0; -		goto unlock; -	} +	    memcmp(cp.data, hdev->adv_data, len) == 0) +		return;  	memcpy(hdev->adv_data, cp.data, sizeof(cp.data));  	hdev->adv_data_len = len;  	cp.length = len; -	err = hci_send_cmd(hdev, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp); -unlock: -	hci_dev_unlock(hdev); - -	return err; +	hci_req_add(req, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp);  }  /* ---- HCI ioctl helpers ---- */ @@ -719,34 +1128,37 @@ int hci_dev_open(__u16 dev)  		goto done;  	} -	if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) -		set_bit(HCI_RAW, &hdev->flags); - -	/* Treat all non BR/EDR controllers as raw devices if -	   enable_hs is not set */ -	if (hdev->dev_type != HCI_BREDR && !enable_hs) -		set_bit(HCI_RAW, &hdev->flags); -  	if (hdev->open(hdev)) {  		ret = -EIO;  		goto done;  	} -	if (!test_bit(HCI_RAW, &hdev->flags)) { -		atomic_set(&hdev->cmd_cnt, 1); -		set_bit(HCI_INIT, &hdev->flags); -		hdev->init_last_cmd = 0; +	atomic_set(&hdev->cmd_cnt, 1); +	set_bit(HCI_INIT, &hdev->flags); -		ret = __hci_request(hdev, hci_init_req, 0, HCI_INIT_TIMEOUT); +	if (hdev->setup && test_bit(HCI_SETUP, &hdev->dev_flags)) +		ret = hdev->setup(hdev); -		clear_bit(HCI_INIT, &hdev->flags); +	if (!ret) { +		/* Treat all non BR/EDR controllers as raw devices if +		 * enable_hs is not set. +		 */ +		if (hdev->dev_type != HCI_BREDR && !enable_hs) +			set_bit(HCI_RAW, &hdev->flags); + +		if (test_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks)) +			set_bit(HCI_RAW, &hdev->flags); + +		if (!test_bit(HCI_RAW, &hdev->flags)) +			ret = __hci_init(hdev);  	} +	clear_bit(HCI_INIT, &hdev->flags); +  	if (!ret) {  		hci_dev_hold(hdev);  		set_bit(HCI_UP, &hdev->flags);  		hci_notify(hdev, HCI_DEV_UP); -		hci_update_ad(hdev);  		if (!test_bit(HCI_SETUP, &hdev->dev_flags) &&  		    mgmt_valid_hdev(hdev)) {  			hci_dev_lock(hdev); @@ -828,7 +1240,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)  	if (!test_bit(HCI_RAW, &hdev->flags) &&  	    test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) {  		set_bit(HCI_INIT, &hdev->flags); -		__hci_request(hdev, hci_reset_req, 0, HCI_CMD_TIMEOUT); +		__hci_req_sync(hdev, hci_reset_req, 0, HCI_CMD_TIMEOUT);  		clear_bit(HCI_INIT, &hdev->flags);  	} @@ -847,10 +1259,17 @@ static int hci_dev_do_close(struct hci_dev *hdev)  		hdev->sent_cmd = NULL;  	} +	kfree_skb(hdev->recv_evt); +	hdev->recv_evt = NULL; +  	/* After this point our queues are empty  	 * and no tasks are scheduled. */  	hdev->close(hdev); +	/* Clear flags */ +	hdev->flags = 0; +	hdev->dev_flags &= ~HCI_PERSISTENT_MASK; +  	if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags) &&  	    mgmt_valid_hdev(hdev)) {  		hci_dev_lock(hdev); @@ -858,9 +1277,6 @@ static int hci_dev_do_close(struct hci_dev *hdev)  		hci_dev_unlock(hdev);  	} -	/* Clear flags */ -	hdev->flags = 0; -  	/* Controller radio is available but is currently powered down */  	hdev->amp_status = 0; @@ -921,7 +1337,7 @@ int hci_dev_reset(__u16 dev)  	hdev->acl_cnt = 0; hdev->sco_cnt = 0; hdev->le_cnt = 0;  	if (!test_bit(HCI_RAW, &hdev->flags)) -		ret = __hci_request(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT); +		ret = __hci_req_sync(hdev, hci_reset_req, 0, HCI_INIT_TIMEOUT);  done:  	hci_req_unlock(hdev); @@ -960,8 +1376,8 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)  	switch (cmd) {  	case HCISETAUTH: -		err = hci_request(hdev, hci_auth_req, dr.dev_opt, -				  HCI_INIT_TIMEOUT); +		err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt, +				   HCI_INIT_TIMEOUT);  		break;  	case HCISETENCRYPT: @@ -972,24 +1388,24 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg)  		if (!test_bit(HCI_AUTH, &hdev->flags)) {  			/* Auth must be enabled first */ -			err = hci_request(hdev, hci_auth_req, dr.dev_opt, -					  HCI_INIT_TIMEOUT); +			err = hci_req_sync(hdev, hci_auth_req, dr.dev_opt, +					   HCI_INIT_TIMEOUT);  			if (err)  				break;  		} -		err = hci_request(hdev, hci_encrypt_req, dr.dev_opt, -				  HCI_INIT_TIMEOUT); +		err = hci_req_sync(hdev, hci_encrypt_req, dr.dev_opt, +				   HCI_INIT_TIMEOUT);  		break;  	case HCISETSCAN: -		err = hci_request(hdev, hci_scan_req, dr.dev_opt, -				  HCI_INIT_TIMEOUT); +		err = hci_req_sync(hdev, hci_scan_req, dr.dev_opt, +				   HCI_INIT_TIMEOUT);  		break;  	case HCISETLINKPOL: -		err = hci_request(hdev, hci_linkpol_req, dr.dev_opt, -				  HCI_INIT_TIMEOUT); +		err = hci_req_sync(hdev, hci_linkpol_req, dr.dev_opt, +				   HCI_INIT_TIMEOUT);  		break;  	case HCISETLINKMODE: @@ -1566,7 +1982,7 @@ int hci_blacklist_del(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type)  	return mgmt_device_unblocked(hdev, bdaddr, type);  } -static void le_scan_param_req(struct hci_dev *hdev, unsigned long opt) +static void le_scan_param_req(struct hci_request *req, unsigned long opt)  {  	struct le_scan_params *param =  (struct le_scan_params *) opt;  	struct hci_cp_le_set_scan_param cp; @@ -1576,18 +1992,18 @@ static void le_scan_param_req(struct hci_dev *hdev, unsigned long opt)  	cp.interval = cpu_to_le16(param->interval);  	cp.window = cpu_to_le16(param->window); -	hci_send_cmd(hdev, HCI_OP_LE_SET_SCAN_PARAM, sizeof(cp), &cp); +	hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(cp), &cp);  } -static void le_scan_enable_req(struct hci_dev *hdev, unsigned long opt) +static void le_scan_enable_req(struct hci_request *req, unsigned long opt)  {  	struct hci_cp_le_set_scan_enable cp;  	memset(&cp, 0, sizeof(cp)); -	cp.enable = 1; -	cp.filter_dup = 1; +	cp.enable = LE_SCAN_ENABLE; +	cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; -	hci_send_cmd(hdev, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); +	hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);  }  static int hci_do_le_scan(struct hci_dev *hdev, u8 type, u16 interval, @@ -1608,10 +2024,10 @@ static int hci_do_le_scan(struct hci_dev *hdev, u8 type, u16 interval,  	hci_req_lock(hdev); -	err = __hci_request(hdev, le_scan_param_req, (unsigned long) ¶m, -			    timeo); +	err = __hci_req_sync(hdev, le_scan_param_req, (unsigned long) ¶m, +			     timeo);  	if (!err) -		err = __hci_request(hdev, le_scan_enable_req, 0, timeo); +		err = __hci_req_sync(hdev, le_scan_enable_req, 0, timeo);  	hci_req_unlock(hdev); @@ -1619,7 +2035,7 @@ static int hci_do_le_scan(struct hci_dev *hdev, u8 type, u16 interval,  		return err;  	queue_delayed_work(hdev->workqueue, &hdev->le_scan_disable, -			   msecs_to_jiffies(timeout)); +			   timeout);  	return 0;  } @@ -1729,7 +2145,6 @@ struct hci_dev *hci_alloc_dev(void)  	INIT_DELAYED_WORK(&hdev->discov_off, hci_discov_off);  	INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); -	skb_queue_head_init(&hdev->driver_init);  	skb_queue_head_init(&hdev->rx_q);  	skb_queue_head_init(&hdev->cmd_q);  	skb_queue_head_init(&hdev->raw_q); @@ -1748,8 +2163,6 @@ EXPORT_SYMBOL(hci_alloc_dev);  /* Free HCI device */  void hci_free_dev(struct hci_dev *hdev)  { -	skb_queue_purge(&hdev->driver_init); -  	/* will free via device release */  	put_device(&hdev->dev);  } @@ -2160,20 +2573,55 @@ static int hci_send_frame(struct sk_buff *skb)  	return hdev->send(skb);  } -/* Send HCI command */ -int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param) +void hci_req_init(struct hci_request *req, struct hci_dev *hdev) +{ +	skb_queue_head_init(&req->cmd_q); +	req->hdev = hdev; +	req->err = 0; +} + +int hci_req_run(struct hci_request *req, hci_req_complete_t complete) +{ +	struct hci_dev *hdev = req->hdev; +	struct sk_buff *skb; +	unsigned long flags; + +	BT_DBG("length %u", skb_queue_len(&req->cmd_q)); + +	/* If an error occured during request building, remove all HCI +	 * commands queued on the HCI request queue. +	 */ +	if (req->err) { +		skb_queue_purge(&req->cmd_q); +		return req->err; +	} + +	/* Do not allow empty requests */ +	if (skb_queue_empty(&req->cmd_q)) +		return -ENODATA; + +	skb = skb_peek_tail(&req->cmd_q); +	bt_cb(skb)->req.complete = complete; + +	spin_lock_irqsave(&hdev->cmd_q.lock, flags); +	skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q); +	spin_unlock_irqrestore(&hdev->cmd_q.lock, flags); + +	queue_work(hdev->workqueue, &hdev->cmd_work); + +	return 0; +} + +static struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, +				       u32 plen, const void *param)  {  	int len = HCI_COMMAND_HDR_SIZE + plen;  	struct hci_command_hdr *hdr;  	struct sk_buff *skb; -	BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen); -  	skb = bt_skb_alloc(len, GFP_ATOMIC); -	if (!skb) { -		BT_ERR("%s no memory for command", hdev->name); -		return -ENOMEM; -	} +	if (!skb) +		return NULL;  	hdr = (struct hci_command_hdr *) skb_put(skb, HCI_COMMAND_HDR_SIZE);  	hdr->opcode = cpu_to_le16(opcode); @@ -2187,8 +2635,27 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param)  	bt_cb(skb)->pkt_type = HCI_COMMAND_PKT;  	skb->dev = (void *) hdev; -	if (test_bit(HCI_INIT, &hdev->flags)) -		hdev->init_last_cmd = opcode; +	return skb; +} + +/* Send HCI command */ +int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, +		 const void *param) +{ +	struct sk_buff *skb; + +	BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen); + +	skb = hci_prepare_cmd(hdev, opcode, plen, param); +	if (!skb) { +		BT_ERR("%s no memory for command", hdev->name); +		return -ENOMEM; +	} + +	/* Stand-alone HCI commands must be flaged as +	 * single-command requests. +	 */ +	bt_cb(skb)->req.start = true;  	skb_queue_tail(&hdev->cmd_q, skb);  	queue_work(hdev->workqueue, &hdev->cmd_work); @@ -2196,6 +2663,43 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param)  	return 0;  } +/* Queue a command to an asynchronous HCI request */ +void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, +		    const void *param, u8 event) +{ +	struct hci_dev *hdev = req->hdev; +	struct sk_buff *skb; + +	BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen); + +	/* If an error occured during request building, there is no point in +	 * queueing the HCI command. We can simply return. +	 */ +	if (req->err) +		return; + +	skb = hci_prepare_cmd(hdev, opcode, plen, param); +	if (!skb) { +		BT_ERR("%s no memory for command (opcode 0x%4.4x)", +		       hdev->name, opcode); +		req->err = -ENOMEM; +		return; +	} + +	if (skb_queue_empty(&req->cmd_q)) +		bt_cb(skb)->req.start = true; + +	bt_cb(skb)->req.event = event; + +	skb_queue_tail(&req->cmd_q, skb); +} + +void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, +		 const void *param) +{ +	hci_req_add_ev(req, opcode, plen, param, 0); +} +  /* Get data from the previously sent command */  void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode)  { @@ -2398,7 +2902,7 @@ static void hci_link_tx_to(struct hci_dev *hdev, __u8 type)  		if (c->type == type && c->sent) {  			BT_ERR("%s killing stalled connection %pMR",  			       hdev->name, &c->dst); -			hci_acl_disconn(c, HCI_ERROR_REMOTE_USER_TERM); +			hci_disconnect(c, HCI_ERROR_REMOTE_USER_TERM);  		}  	} @@ -2860,6 +3364,97 @@ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb)  	kfree_skb(skb);  } +static bool hci_req_is_complete(struct hci_dev *hdev) +{ +	struct sk_buff *skb; + +	skb = skb_peek(&hdev->cmd_q); +	if (!skb) +		return true; + +	return bt_cb(skb)->req.start; +} + +static void hci_resend_last(struct hci_dev *hdev) +{ +	struct hci_command_hdr *sent; +	struct sk_buff *skb; +	u16 opcode; + +	if (!hdev->sent_cmd) +		return; + +	sent = (void *) hdev->sent_cmd->data; +	opcode = __le16_to_cpu(sent->opcode); +	if (opcode == HCI_OP_RESET) +		return; + +	skb = skb_clone(hdev->sent_cmd, GFP_KERNEL); +	if (!skb) +		return; + +	skb_queue_head(&hdev->cmd_q, skb); +	queue_work(hdev->workqueue, &hdev->cmd_work); +} + +void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status) +{ +	hci_req_complete_t req_complete = NULL; +	struct sk_buff *skb; +	unsigned long flags; + +	BT_DBG("opcode 0x%04x status 0x%02x", opcode, status); + +	/* If the completed command doesn't match the last one that was +	 * sent we need to do special handling of it. +	 */ +	if (!hci_sent_cmd_data(hdev, opcode)) { +		/* Some CSR based controllers generate a spontaneous +		 * reset complete event during init and any pending +		 * command will never be completed. In such a case we +		 * need to resend whatever was the last sent +		 * command. +		 */ +		if (test_bit(HCI_INIT, &hdev->flags) && opcode == HCI_OP_RESET) +			hci_resend_last(hdev); + +		return; +	} + +	/* If the command succeeded and there's still more commands in +	 * this request the request is not yet complete. +	 */ +	if (!status && !hci_req_is_complete(hdev)) +		return; + +	/* If this was the last command in a request the complete +	 * callback would be found in hdev->sent_cmd instead of the +	 * command queue (hdev->cmd_q). +	 */ +	if (hdev->sent_cmd) { +		req_complete = bt_cb(hdev->sent_cmd)->req.complete; +		if (req_complete) +			goto call_complete; +	} + +	/* Remove all pending commands belonging to this request */ +	spin_lock_irqsave(&hdev->cmd_q.lock, flags); +	while ((skb = __skb_dequeue(&hdev->cmd_q))) { +		if (bt_cb(skb)->req.start) { +			__skb_queue_head(&hdev->cmd_q, skb); +			break; +		} + +		req_complete = bt_cb(skb)->req.complete; +		kfree_skb(skb); +	} +	spin_unlock_irqrestore(&hdev->cmd_q.lock, flags); + +call_complete: +	if (req_complete) +		req_complete(hdev, status); +} +  static void hci_rx_work(struct work_struct *work)  {  	struct hci_dev *hdev = container_of(work, struct hci_dev, rx_work); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 477726a6351..b93cd2eb5d5 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -48,13 +48,13 @@ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb)  	}  	clear_bit(HCI_INQUIRY, &hdev->flags); +	smp_mb__after_clear_bit(); /* wake_up_bit advises about this barrier */ +	wake_up_bit(&hdev->flags, HCI_INQUIRY);  	hci_dev_lock(hdev);  	hci_discovery_set_state(hdev, DISCOVERY_STOPPED);  	hci_dev_unlock(hdev); -	hci_req_complete(hdev, HCI_OP_INQUIRY_CANCEL, status); -  	hci_conn_check_pending(hdev);  } @@ -183,8 +183,6 @@ static void hci_cc_write_def_link_policy(struct hci_dev *hdev,  	if (!status)  		hdev->link_policy = get_unaligned_le16(sent); - -	hci_req_complete(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, status);  }  static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb) @@ -195,11 +193,8 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb)  	clear_bit(HCI_RESET, &hdev->flags); -	hci_req_complete(hdev, HCI_OP_RESET, status); -  	/* Reset all non-persistent flags */ -	hdev->dev_flags &= ~(BIT(HCI_LE_SCAN) | BIT(HCI_PENDING_CLASS) | -			     BIT(HCI_PERIODIC_INQ)); +	hdev->dev_flags &= ~HCI_PERSISTENT_MASK;  	hdev->discovery.state = DISCOVERY_STOPPED;  	hdev->inq_tx_power = HCI_TX_POWER_INVALID; @@ -228,11 +223,6 @@ static void hci_cc_write_local_name(struct hci_dev *hdev, struct sk_buff *skb)  		memcpy(hdev->dev_name, sent, HCI_MAX_NAME_LENGTH);  	hci_dev_unlock(hdev); - -	if (!status && !test_bit(HCI_INIT, &hdev->flags)) -		hci_update_ad(hdev); - -	hci_req_complete(hdev, HCI_OP_WRITE_LOCAL_NAME, status);  }  static void hci_cc_read_local_name(struct hci_dev *hdev, struct sk_buff *skb) @@ -270,8 +260,6 @@ static void hci_cc_write_auth_enable(struct hci_dev *hdev, struct sk_buff *skb)  	if (test_bit(HCI_MGMT, &hdev->dev_flags))  		mgmt_auth_enable_complete(hdev, status); - -	hci_req_complete(hdev, HCI_OP_WRITE_AUTH_ENABLE, status);  }  static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb) @@ -293,8 +281,6 @@ static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb)  		else  			clear_bit(HCI_ENCRYPT, &hdev->flags);  	} - -	hci_req_complete(hdev, HCI_OP_WRITE_ENCRYPT_MODE, status);  }  static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb) @@ -343,7 +329,6 @@ static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb)  done:  	hci_dev_unlock(hdev); -	hci_req_complete(hdev, HCI_OP_WRITE_SCAN_ENABLE, status);  }  static void hci_cc_read_class_of_dev(struct hci_dev *hdev, struct sk_buff *skb) @@ -435,15 +420,6 @@ static void hci_cc_write_voice_setting(struct hci_dev *hdev,  		hdev->notify(hdev, HCI_NOTIFY_VOICE_SETTING);  } -static void hci_cc_host_buffer_size(struct hci_dev *hdev, struct sk_buff *skb) -{ -	__u8 status = *((__u8 *) skb->data); - -	BT_DBG("%s status 0x%2.2x", hdev->name, status); - -	hci_req_complete(hdev, HCI_OP_HOST_BUFFER_SIZE, status); -} -  static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb)  {  	__u8 status = *((__u8 *) skb->data); @@ -457,9 +433,9 @@ static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb)  	if (!status) {  		if (sent->mode) -			hdev->host_features[0] |= LMP_HOST_SSP; +			hdev->features[1][0] |= LMP_HOST_SSP;  		else -			hdev->host_features[0] &= ~LMP_HOST_SSP; +			hdev->features[1][0] &= ~LMP_HOST_SSP;  	}  	if (test_bit(HCI_MGMT, &hdev->dev_flags)) @@ -472,211 +448,6 @@ static void hci_cc_write_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb)  	}  } -static u8 hci_get_inquiry_mode(struct hci_dev *hdev) -{ -	if (lmp_ext_inq_capable(hdev)) -		return 2; - -	if (lmp_inq_rssi_capable(hdev)) -		return 1; - -	if (hdev->manufacturer == 11 && hdev->hci_rev == 0x00 && -	    hdev->lmp_subver == 0x0757) -		return 1; - -	if (hdev->manufacturer == 15) { -		if (hdev->hci_rev == 0x03 && hdev->lmp_subver == 0x6963) -			return 1; -		if (hdev->hci_rev == 0x09 && hdev->lmp_subver == 0x6963) -			return 1; -		if (hdev->hci_rev == 0x00 && hdev->lmp_subver == 0x6965) -			return 1; -	} - -	if (hdev->manufacturer == 31 && hdev->hci_rev == 0x2005 && -	    hdev->lmp_subver == 0x1805) -		return 1; - -	return 0; -} - -static void hci_setup_inquiry_mode(struct hci_dev *hdev) -{ -	u8 mode; - -	mode = hci_get_inquiry_mode(hdev); - -	hci_send_cmd(hdev, HCI_OP_WRITE_INQUIRY_MODE, 1, &mode); -} - -static void hci_setup_event_mask(struct hci_dev *hdev) -{ -	/* The second byte is 0xff instead of 0x9f (two reserved bits -	 * disabled) since a Broadcom 1.2 dongle doesn't respond to the -	 * command otherwise */ -	u8 events[8] = { 0xff, 0xff, 0xfb, 0xff, 0x00, 0x00, 0x00, 0x00 }; - -	/* CSR 1.1 dongles does not accept any bitfield so don't try to set -	 * any event mask for pre 1.2 devices */ -	if (hdev->hci_ver < BLUETOOTH_VER_1_2) -		return; - -	if (lmp_bredr_capable(hdev)) { -		events[4] |= 0x01; /* Flow Specification Complete */ -		events[4] |= 0x02; /* Inquiry Result with RSSI */ -		events[4] |= 0x04; /* Read Remote Extended Features Complete */ -		events[5] |= 0x08; /* Synchronous Connection Complete */ -		events[5] |= 0x10; /* Synchronous Connection Changed */ -	} - -	if (lmp_inq_rssi_capable(hdev)) -		events[4] |= 0x02; /* Inquiry Result with RSSI */ - -	if (lmp_sniffsubr_capable(hdev)) -		events[5] |= 0x20; /* Sniff Subrating */ - -	if (lmp_pause_enc_capable(hdev)) -		events[5] |= 0x80; /* Encryption Key Refresh Complete */ - -	if (lmp_ext_inq_capable(hdev)) -		events[5] |= 0x40; /* Extended Inquiry Result */ - -	if (lmp_no_flush_capable(hdev)) -		events[7] |= 0x01; /* Enhanced Flush Complete */ - -	if (lmp_lsto_capable(hdev)) -		events[6] |= 0x80; /* Link Supervision Timeout Changed */ - -	if (lmp_ssp_capable(hdev)) { -		events[6] |= 0x01;	/* IO Capability Request */ -		events[6] |= 0x02;	/* IO Capability Response */ -		events[6] |= 0x04;	/* User Confirmation Request */ -		events[6] |= 0x08;	/* User Passkey Request */ -		events[6] |= 0x10;	/* Remote OOB Data Request */ -		events[6] |= 0x20;	/* Simple Pairing Complete */ -		events[7] |= 0x04;	/* User Passkey Notification */ -		events[7] |= 0x08;	/* Keypress Notification */ -		events[7] |= 0x10;	/* Remote Host Supported -					 * Features Notification */ -	} - -	if (lmp_le_capable(hdev)) -		events[7] |= 0x20;	/* LE Meta-Event */ - -	hci_send_cmd(hdev, HCI_OP_SET_EVENT_MASK, sizeof(events), events); - -	if (lmp_le_capable(hdev)) { -		memset(events, 0, sizeof(events)); -		events[0] = 0x1f; -		hci_send_cmd(hdev, HCI_OP_LE_SET_EVENT_MASK, -			     sizeof(events), events); -	} -} - -static void bredr_setup(struct hci_dev *hdev) -{ -	struct hci_cp_delete_stored_link_key cp; -	__le16 param; -	__u8 flt_type; - -	/* Read Buffer Size (ACL mtu, max pkt, etc.) */ -	hci_send_cmd(hdev, HCI_OP_READ_BUFFER_SIZE, 0, NULL); - -	/* Read Class of Device */ -	hci_send_cmd(hdev, HCI_OP_READ_CLASS_OF_DEV, 0, NULL); - -	/* Read Local Name */ -	hci_send_cmd(hdev, HCI_OP_READ_LOCAL_NAME, 0, NULL); - -	/* Read Voice Setting */ -	hci_send_cmd(hdev, HCI_OP_READ_VOICE_SETTING, 0, NULL); - -	/* Clear Event Filters */ -	flt_type = HCI_FLT_CLEAR_ALL; -	hci_send_cmd(hdev, HCI_OP_SET_EVENT_FLT, 1, &flt_type); - -	/* Connection accept timeout ~20 secs */ -	param = __constant_cpu_to_le16(0x7d00); -	hci_send_cmd(hdev, HCI_OP_WRITE_CA_TIMEOUT, 2, ¶m); - -	bacpy(&cp.bdaddr, BDADDR_ANY); -	cp.delete_all = 1; -	hci_send_cmd(hdev, HCI_OP_DELETE_STORED_LINK_KEY, sizeof(cp), &cp); -} - -static void le_setup(struct hci_dev *hdev) -{ -	/* Read LE Buffer Size */ -	hci_send_cmd(hdev, HCI_OP_LE_READ_BUFFER_SIZE, 0, NULL); - -	/* Read LE Local Supported Features */ -	hci_send_cmd(hdev, HCI_OP_LE_READ_LOCAL_FEATURES, 0, NULL); - -	/* Read LE Advertising Channel TX Power */ -	hci_send_cmd(hdev, HCI_OP_LE_READ_ADV_TX_POWER, 0, NULL); - -	/* Read LE White List Size */ -	hci_send_cmd(hdev, HCI_OP_LE_READ_WHITE_LIST_SIZE, 0, NULL); - -	/* Read LE Supported States */ -	hci_send_cmd(hdev, HCI_OP_LE_READ_SUPPORTED_STATES, 0, NULL); -} - -static void hci_setup(struct hci_dev *hdev) -{ -	if (hdev->dev_type != HCI_BREDR) -		return; - -	/* Read BD Address */ -	hci_send_cmd(hdev, HCI_OP_READ_BD_ADDR, 0, NULL); - -	if (lmp_bredr_capable(hdev)) -		bredr_setup(hdev); - -	if (lmp_le_capable(hdev)) -		le_setup(hdev); - -	hci_setup_event_mask(hdev); - -	if (hdev->hci_ver > BLUETOOTH_VER_1_1) -		hci_send_cmd(hdev, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL); - -	if (lmp_ssp_capable(hdev)) { -		if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) { -			u8 mode = 0x01; -			hci_send_cmd(hdev, HCI_OP_WRITE_SSP_MODE, -				     sizeof(mode), &mode); -		} else { -			struct hci_cp_write_eir cp; - -			memset(hdev->eir, 0, sizeof(hdev->eir)); -			memset(&cp, 0, sizeof(cp)); - -			hci_send_cmd(hdev, HCI_OP_WRITE_EIR, sizeof(cp), &cp); -		} -	} - -	if (lmp_inq_rssi_capable(hdev)) -		hci_setup_inquiry_mode(hdev); - -	if (lmp_inq_tx_pwr_capable(hdev)) -		hci_send_cmd(hdev, HCI_OP_READ_INQ_RSP_TX_POWER, 0, NULL); - -	if (lmp_ext_feat_capable(hdev)) { -		struct hci_cp_read_local_ext_features cp; - -		cp.page = 0x01; -		hci_send_cmd(hdev, HCI_OP_READ_LOCAL_EXT_FEATURES, sizeof(cp), -			     &cp); -	} - -	if (test_bit(HCI_LINK_SECURITY, &hdev->dev_flags)) { -		u8 enable = 1; -		hci_send_cmd(hdev, HCI_OP_WRITE_AUTH_ENABLE, sizeof(enable), -			     &enable); -	} -} -  static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb)  {  	struct hci_rp_read_local_version *rp = (void *) skb->data; @@ -684,7 +455,7 @@ static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb)  	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);  	if (rp->status) -		goto done; +		return;  	hdev->hci_ver = rp->hci_ver;  	hdev->hci_rev = __le16_to_cpu(rp->hci_rev); @@ -694,30 +465,6 @@ static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb)  	BT_DBG("%s manufacturer 0x%4.4x hci ver %d:%d", hdev->name,  	       hdev->manufacturer, hdev->hci_ver, hdev->hci_rev); - -	if (test_bit(HCI_INIT, &hdev->flags)) -		hci_setup(hdev); - -done: -	hci_req_complete(hdev, HCI_OP_READ_LOCAL_VERSION, rp->status); -} - -static void hci_setup_link_policy(struct hci_dev *hdev) -{ -	struct hci_cp_write_def_link_policy cp; -	u16 link_policy = 0; - -	if (lmp_rswitch_capable(hdev)) -		link_policy |= HCI_LP_RSWITCH; -	if (lmp_hold_capable(hdev)) -		link_policy |= HCI_LP_HOLD; -	if (lmp_sniff_capable(hdev)) -		link_policy |= HCI_LP_SNIFF; -	if (lmp_park_capable(hdev)) -		link_policy |= HCI_LP_PARK; - -	cp.policy = cpu_to_le16(link_policy); -	hci_send_cmd(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, sizeof(cp), &cp);  }  static void hci_cc_read_local_commands(struct hci_dev *hdev, @@ -727,16 +474,8 @@ static void hci_cc_read_local_commands(struct hci_dev *hdev,  	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); -	if (rp->status) -		goto done; - -	memcpy(hdev->commands, rp->commands, sizeof(hdev->commands)); - -	if (test_bit(HCI_INIT, &hdev->flags) && (hdev->commands[5] & 0x10)) -		hci_setup_link_policy(hdev); - -done: -	hci_req_complete(hdev, HCI_OP_READ_LOCAL_COMMANDS, rp->status); +	if (!rp->status) +		memcpy(hdev->commands, rp->commands, sizeof(hdev->commands));  }  static void hci_cc_read_local_features(struct hci_dev *hdev, @@ -754,18 +493,18 @@ static void hci_cc_read_local_features(struct hci_dev *hdev,  	/* Adjust default settings according to features  	 * supported by device. */ -	if (hdev->features[0] & LMP_3SLOT) +	if (hdev->features[0][0] & LMP_3SLOT)  		hdev->pkt_type |= (HCI_DM3 | HCI_DH3); -	if (hdev->features[0] & LMP_5SLOT) +	if (hdev->features[0][0] & LMP_5SLOT)  		hdev->pkt_type |= (HCI_DM5 | HCI_DH5); -	if (hdev->features[1] & LMP_HV2) { +	if (hdev->features[0][1] & LMP_HV2) {  		hdev->pkt_type  |= (HCI_HV2);  		hdev->esco_type |= (ESCO_HV2);  	} -	if (hdev->features[1] & LMP_HV3) { +	if (hdev->features[0][1] & LMP_HV3) {  		hdev->pkt_type  |= (HCI_HV3);  		hdev->esco_type |= (ESCO_HV3);  	} @@ -773,42 +512,26 @@ static void hci_cc_read_local_features(struct hci_dev *hdev,  	if (lmp_esco_capable(hdev))  		hdev->esco_type |= (ESCO_EV3); -	if (hdev->features[4] & LMP_EV4) +	if (hdev->features[0][4] & LMP_EV4)  		hdev->esco_type |= (ESCO_EV4); -	if (hdev->features[4] & LMP_EV5) +	if (hdev->features[0][4] & LMP_EV5)  		hdev->esco_type |= (ESCO_EV5); -	if (hdev->features[5] & LMP_EDR_ESCO_2M) +	if (hdev->features[0][5] & LMP_EDR_ESCO_2M)  		hdev->esco_type |= (ESCO_2EV3); -	if (hdev->features[5] & LMP_EDR_ESCO_3M) +	if (hdev->features[0][5] & LMP_EDR_ESCO_3M)  		hdev->esco_type |= (ESCO_3EV3); -	if (hdev->features[5] & LMP_EDR_3S_ESCO) +	if (hdev->features[0][5] & LMP_EDR_3S_ESCO)  		hdev->esco_type |= (ESCO_2EV5 | ESCO_3EV5);  	BT_DBG("%s features 0x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x", hdev->name, -	       hdev->features[0], hdev->features[1], -	       hdev->features[2], hdev->features[3], -	       hdev->features[4], hdev->features[5], -	       hdev->features[6], hdev->features[7]); -} - -static void hci_set_le_support(struct hci_dev *hdev) -{ -	struct hci_cp_write_le_host_supported cp; - -	memset(&cp, 0, sizeof(cp)); - -	if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) { -		cp.le = 1; -		cp.simul = lmp_le_br_capable(hdev); -	} - -	if (cp.le != lmp_host_le_capable(hdev)) -		hci_send_cmd(hdev, HCI_OP_WRITE_LE_HOST_SUPPORTED, sizeof(cp), -			     &cp); +	       hdev->features[0][0], hdev->features[0][1], +	       hdev->features[0][2], hdev->features[0][3], +	       hdev->features[0][4], hdev->features[0][5], +	       hdev->features[0][6], hdev->features[0][7]);  }  static void hci_cc_read_local_ext_features(struct hci_dev *hdev, @@ -819,22 +542,12 @@ static void hci_cc_read_local_ext_features(struct hci_dev *hdev,  	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);  	if (rp->status) -		goto done; - -	switch (rp->page) { -	case 0: -		memcpy(hdev->features, rp->features, 8); -		break; -	case 1: -		memcpy(hdev->host_features, rp->features, 8); -		break; -	} +		return; -	if (test_bit(HCI_INIT, &hdev->flags) && lmp_le_capable(hdev)) -		hci_set_le_support(hdev); +	hdev->max_page = rp->max_page; -done: -	hci_req_complete(hdev, HCI_OP_READ_LOCAL_EXT_FEATURES, rp->status); +	if (rp->page < HCI_MAX_PAGES) +		memcpy(hdev->features[rp->page], rp->features, 8);  }  static void hci_cc_read_flow_control_mode(struct hci_dev *hdev, @@ -844,12 +557,8 @@ static void hci_cc_read_flow_control_mode(struct hci_dev *hdev,  	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); -	if (rp->status) -		return; - -	hdev->flow_ctl_mode = rp->mode; - -	hci_req_complete(hdev, HCI_OP_READ_FLOW_CONTROL_MODE, rp->status); +	if (!rp->status) +		hdev->flow_ctl_mode = rp->mode;  }  static void hci_cc_read_buffer_size(struct hci_dev *hdev, struct sk_buff *skb) @@ -886,8 +595,65 @@ static void hci_cc_read_bd_addr(struct hci_dev *hdev, struct sk_buff *skb)  	if (!rp->status)  		bacpy(&hdev->bdaddr, &rp->bdaddr); +} -	hci_req_complete(hdev, HCI_OP_READ_BD_ADDR, rp->status); +static void hci_cc_read_page_scan_activity(struct hci_dev *hdev, +					   struct sk_buff *skb) +{ +	struct hci_rp_read_page_scan_activity *rp = (void *) skb->data; + +	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + +	if (test_bit(HCI_INIT, &hdev->flags) && !rp->status) { +		hdev->page_scan_interval = __le16_to_cpu(rp->interval); +		hdev->page_scan_window = __le16_to_cpu(rp->window); +	} +} + +static void hci_cc_write_page_scan_activity(struct hci_dev *hdev, +					    struct sk_buff *skb) +{ +	u8 status = *((u8 *) skb->data); +	struct hci_cp_write_page_scan_activity *sent; + +	BT_DBG("%s status 0x%2.2x", hdev->name, status); + +	if (status) +		return; + +	sent = hci_sent_cmd_data(hdev, HCI_OP_WRITE_PAGE_SCAN_ACTIVITY); +	if (!sent) +		return; + +	hdev->page_scan_interval = __le16_to_cpu(sent->interval); +	hdev->page_scan_window = __le16_to_cpu(sent->window); +} + +static void hci_cc_read_page_scan_type(struct hci_dev *hdev, +					   struct sk_buff *skb) +{ +	struct hci_rp_read_page_scan_type *rp = (void *) skb->data; + +	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); + +	if (test_bit(HCI_INIT, &hdev->flags) && !rp->status) +		hdev->page_scan_type = rp->type; +} + +static void hci_cc_write_page_scan_type(struct hci_dev *hdev, +					struct sk_buff *skb) +{ +	u8 status = *((u8 *) skb->data); +	u8 *type; + +	BT_DBG("%s status 0x%2.2x", hdev->name, status); + +	if (status) +		return; + +	type = hci_sent_cmd_data(hdev, HCI_OP_WRITE_PAGE_SCAN_TYPE); +	if (type) +		hdev->page_scan_type = *type;  }  static void hci_cc_read_data_block_size(struct hci_dev *hdev, @@ -908,17 +674,6 @@ static void hci_cc_read_data_block_size(struct hci_dev *hdev,  	BT_DBG("%s blk mtu %d cnt %d len %d", hdev->name, hdev->block_mtu,  	       hdev->block_cnt, hdev->block_len); - -	hci_req_complete(hdev, HCI_OP_READ_DATA_BLOCK_SIZE, rp->status); -} - -static void hci_cc_write_ca_timeout(struct hci_dev *hdev, struct sk_buff *skb) -{ -	__u8 status = *((__u8 *) skb->data); - -	BT_DBG("%s status 0x%2.2x", hdev->name, status); - -	hci_req_complete(hdev, HCI_OP_WRITE_CA_TIMEOUT, status);  }  static void hci_cc_read_local_amp_info(struct hci_dev *hdev, @@ -942,8 +697,6 @@ static void hci_cc_read_local_amp_info(struct hci_dev *hdev,  	hdev->amp_be_flush_to = __le32_to_cpu(rp->be_flush_to);  	hdev->amp_max_flush_to = __le32_to_cpu(rp->max_flush_to); -	hci_req_complete(hdev, HCI_OP_READ_LOCAL_AMP_INFO, rp->status); -  a2mp_rsp:  	a2mp_send_getinfo_rsp(hdev);  } @@ -985,35 +738,6 @@ a2mp_rsp:  	a2mp_send_create_phy_link_req(hdev, rp->status);  } -static void hci_cc_delete_stored_link_key(struct hci_dev *hdev, -					  struct sk_buff *skb) -{ -	__u8 status = *((__u8 *) skb->data); - -	BT_DBG("%s status 0x%2.2x", hdev->name, status); - -	hci_req_complete(hdev, HCI_OP_DELETE_STORED_LINK_KEY, status); -} - -static void hci_cc_set_event_mask(struct hci_dev *hdev, struct sk_buff *skb) -{ -	__u8 status = *((__u8 *) skb->data); - -	BT_DBG("%s status 0x%2.2x", hdev->name, status); - -	hci_req_complete(hdev, HCI_OP_SET_EVENT_MASK, status); -} - -static void hci_cc_write_inquiry_mode(struct hci_dev *hdev, -				      struct sk_buff *skb) -{ -	__u8 status = *((__u8 *) skb->data); - -	BT_DBG("%s status 0x%2.2x", hdev->name, status); - -	hci_req_complete(hdev, HCI_OP_WRITE_INQUIRY_MODE, status); -} -  static void hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev,  					 struct sk_buff *skb)  { @@ -1023,17 +747,6 @@ static void hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev,  	if (!rp->status)  		hdev->inq_tx_power = rp->tx_power; - -	hci_req_complete(hdev, HCI_OP_READ_INQ_RSP_TX_POWER, rp->status); -} - -static void hci_cc_set_event_flt(struct hci_dev *hdev, struct sk_buff *skb) -{ -	__u8 status = *((__u8 *) skb->data); - -	BT_DBG("%s status 0x%2.2x", hdev->name, status); - -	hci_req_complete(hdev, HCI_OP_SET_EVENT_FLT, status);  }  static void hci_cc_pin_code_reply(struct hci_dev *hdev, struct sk_buff *skb) @@ -1095,8 +808,6 @@ static void hci_cc_le_read_buffer_size(struct hci_dev *hdev,  	hdev->le_cnt = hdev->le_pkts;  	BT_DBG("%s le mtu %d:%d", hdev->name, hdev->le_mtu, hdev->le_pkts); - -	hci_req_complete(hdev, HCI_OP_LE_READ_BUFFER_SIZE, rp->status);  }  static void hci_cc_le_read_local_features(struct hci_dev *hdev, @@ -1108,8 +819,6 @@ static void hci_cc_le_read_local_features(struct hci_dev *hdev,  	if (!rp->status)  		memcpy(hdev->le_features, rp->features, 8); - -	hci_req_complete(hdev, HCI_OP_LE_READ_LOCAL_FEATURES, rp->status);  }  static void hci_cc_le_read_adv_tx_power(struct hci_dev *hdev, @@ -1119,22 +828,8 @@ static void hci_cc_le_read_adv_tx_power(struct hci_dev *hdev,  	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); -	if (!rp->status) { +	if (!rp->status)  		hdev->adv_tx_power = rp->tx_power; -		if (!test_bit(HCI_INIT, &hdev->flags)) -			hci_update_ad(hdev); -	} - -	hci_req_complete(hdev, HCI_OP_LE_READ_ADV_TX_POWER, rp->status); -} - -static void hci_cc_le_set_event_mask(struct hci_dev *hdev, struct sk_buff *skb) -{ -	__u8 status = *((__u8 *) skb->data); - -	BT_DBG("%s status 0x%2.2x", hdev->name, status); - -	hci_req_complete(hdev, HCI_OP_LE_SET_EVENT_MASK, status);  }  static void hci_cc_user_confirm_reply(struct hci_dev *hdev, struct sk_buff *skb) @@ -1231,12 +926,15 @@ static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb)  			clear_bit(HCI_LE_PERIPHERAL, &hdev->dev_flags);  	} -	hci_dev_unlock(hdev); +	if (!test_bit(HCI_INIT, &hdev->flags)) { +		struct hci_request req; -	if (!test_bit(HCI_INIT, &hdev->flags)) -		hci_update_ad(hdev); +		hci_req_init(&req, hdev); +		hci_update_ad(&req); +		hci_req_run(&req, NULL); +	} -	hci_req_complete(hdev, HCI_OP_LE_SET_ADV_ENABLE, status); +	hci_dev_unlock(hdev);  }  static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb) @@ -1245,8 +943,6 @@ static void hci_cc_le_set_scan_param(struct hci_dev *hdev, struct sk_buff *skb)  	BT_DBG("%s status 0x%2.2x", hdev->name, status); -	hci_req_complete(hdev, HCI_OP_LE_SET_SCAN_PARAM, status); -  	if (status) {  		hci_dev_lock(hdev);  		mgmt_start_discovery_failed(hdev, status); @@ -1268,9 +964,7 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,  		return;  	switch (cp->enable) { -	case LE_SCANNING_ENABLED: -		hci_req_complete(hdev, HCI_OP_LE_SET_SCAN_ENABLE, status); - +	case LE_SCAN_ENABLE:  		if (status) {  			hci_dev_lock(hdev);  			mgmt_start_discovery_failed(hdev, status); @@ -1285,7 +979,7 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev,  		hci_dev_unlock(hdev);  		break; -	case LE_SCANNING_DISABLED: +	case LE_SCAN_DISABLE:  		if (status) {  			hci_dev_lock(hdev);  			mgmt_stop_discovery_failed(hdev, status); @@ -1321,32 +1015,6 @@ static void hci_cc_le_read_white_list_size(struct hci_dev *hdev,  	if (!rp->status)  		hdev->le_white_list_size = rp->size; - -	hci_req_complete(hdev, HCI_OP_LE_READ_WHITE_LIST_SIZE, rp->status); -} - -static void hci_cc_le_ltk_reply(struct hci_dev *hdev, struct sk_buff *skb) -{ -	struct hci_rp_le_ltk_reply *rp = (void *) skb->data; - -	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); - -	if (rp->status) -		return; - -	hci_req_complete(hdev, HCI_OP_LE_LTK_REPLY, rp->status); -} - -static void hci_cc_le_ltk_neg_reply(struct hci_dev *hdev, struct sk_buff *skb) -{ -	struct hci_rp_le_ltk_neg_reply *rp = (void *) skb->data; - -	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status); - -	if (rp->status) -		return; - -	hci_req_complete(hdev, HCI_OP_LE_LTK_NEG_REPLY, rp->status);  }  static void hci_cc_le_read_supported_states(struct hci_dev *hdev, @@ -1358,8 +1026,6 @@ static void hci_cc_le_read_supported_states(struct hci_dev *hdev,  	if (!rp->status)  		memcpy(hdev->le_states, rp->le_states, 8); - -	hci_req_complete(hdev, HCI_OP_LE_READ_SUPPORTED_STATES, rp->status);  }  static void hci_cc_write_le_host_supported(struct hci_dev *hdev, @@ -1376,21 +1042,19 @@ static void hci_cc_write_le_host_supported(struct hci_dev *hdev,  	if (!status) {  		if (sent->le) -			hdev->host_features[0] |= LMP_HOST_LE; +			hdev->features[1][0] |= LMP_HOST_LE;  		else -			hdev->host_features[0] &= ~LMP_HOST_LE; +			hdev->features[1][0] &= ~LMP_HOST_LE;  		if (sent->simul) -			hdev->host_features[0] |= LMP_HOST_LE_BREDR; +			hdev->features[1][0] |= LMP_HOST_LE_BREDR;  		else -			hdev->host_features[0] &= ~LMP_HOST_LE_BREDR; +			hdev->features[1][0] &= ~LMP_HOST_LE_BREDR;  	}  	if (test_bit(HCI_MGMT, &hdev->dev_flags) &&  	    !test_bit(HCI_INIT, &hdev->flags))  		mgmt_le_enable_complete(hdev, sent->le, status); - -	hci_req_complete(hdev, HCI_OP_WRITE_LE_HOST_SUPPORTED, status);  }  static void hci_cc_write_remote_amp_assoc(struct hci_dev *hdev, @@ -1412,7 +1076,6 @@ static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status)  	BT_DBG("%s status 0x%2.2x", hdev->name, status);  	if (status) { -		hci_req_complete(hdev, HCI_OP_INQUIRY, status);  		hci_conn_check_pending(hdev);  		hci_dev_lock(hdev);  		if (test_bit(HCI_MGMT, &hdev->dev_flags)) @@ -1523,7 +1186,7 @@ static void hci_cs_auth_requested(struct hci_dev *hdev, __u8 status)  	if (conn) {  		if (conn->state == BT_CONFIG) {  			hci_proto_connect_cfm(conn, status); -			hci_conn_put(conn); +			hci_conn_drop(conn);  		}  	} @@ -1550,7 +1213,7 @@ static void hci_cs_set_conn_encrypt(struct hci_dev *hdev, __u8 status)  	if (conn) {  		if (conn->state == BT_CONFIG) {  			hci_proto_connect_cfm(conn, status); -			hci_conn_put(conn); +			hci_conn_drop(conn);  		}  	} @@ -1712,7 +1375,7 @@ static void hci_cs_read_remote_features(struct hci_dev *hdev, __u8 status)  	if (conn) {  		if (conn->state == BT_CONFIG) {  			hci_proto_connect_cfm(conn, status); -			hci_conn_put(conn); +			hci_conn_drop(conn);  		}  	} @@ -1739,7 +1402,7 @@ static void hci_cs_read_remote_ext_features(struct hci_dev *hdev, __u8 status)  	if (conn) {  		if (conn->state == BT_CONFIG) {  			hci_proto_connect_cfm(conn, status); -			hci_conn_put(conn); +			hci_conn_drop(conn);  		}  	} @@ -1884,11 +1547,6 @@ static void hci_cs_le_create_conn(struct hci_dev *hdev, __u8 status)  	}  } -static void hci_cs_le_start_enc(struct hci_dev *hdev, u8 status) -{ -	BT_DBG("%s status 0x%2.2x", hdev->name, status); -} -  static void hci_cs_create_phylink(struct hci_dev *hdev, u8 status)  {  	struct hci_cp_create_phy_link *cp; @@ -1930,11 +1588,6 @@ static void hci_cs_accept_phylink(struct hci_dev *hdev, u8 status)  	amp_write_remote_assoc(hdev, cp->phy_handle);  } -static void hci_cs_create_logical_link(struct hci_dev *hdev, u8 status) -{ -	BT_DBG("%s status 0x%2.2x", hdev->name, status); -} -  static void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)  {  	__u8 status = *((__u8 *) skb->data); @@ -1943,13 +1596,14 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)  	BT_DBG("%s status 0x%2.2x", hdev->name, status); -	hci_req_complete(hdev, HCI_OP_INQUIRY, status); -  	hci_conn_check_pending(hdev);  	if (!test_and_clear_bit(HCI_INQUIRY, &hdev->flags))  		return; +	smp_mb__after_clear_bit(); /* wake_up_bit advises about this barrier */ +	wake_up_bit(&hdev->flags, HCI_INQUIRY); +  	if (!test_bit(HCI_MGMT, &hdev->dev_flags))  		return; @@ -2048,7 +1702,6 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)  		} else  			conn->state = BT_CONNECTED; -		hci_conn_hold_device(conn);  		hci_conn_add_sysfs(conn);  		if (test_bit(HCI_AUTH, &hdev->flags)) @@ -2095,42 +1748,6 @@ unlock:  	hci_conn_check_pending(hdev);  } -void hci_conn_accept(struct hci_conn *conn, int mask) -{ -	struct hci_dev *hdev = conn->hdev; - -	BT_DBG("conn %p", conn); - -	conn->state = BT_CONFIG; - -	if (!lmp_esco_capable(hdev)) { -		struct hci_cp_accept_conn_req cp; - -		bacpy(&cp.bdaddr, &conn->dst); - -		if (lmp_rswitch_capable(hdev) && (mask & HCI_LM_MASTER)) -			cp.role = 0x00; /* Become master */ -		else -			cp.role = 0x01; /* Remain slave */ - -		hci_send_cmd(hdev, HCI_OP_ACCEPT_CONN_REQ, sizeof(cp), &cp); -	} else /* lmp_esco_capable(hdev)) */ { -		struct hci_cp_accept_sync_conn_req cp; - -		bacpy(&cp.bdaddr, &conn->dst); -		cp.pkt_type = cpu_to_le16(conn->pkt_type); - -		cp.tx_bandwidth   = __constant_cpu_to_le32(0x00001f40); -		cp.rx_bandwidth   = __constant_cpu_to_le32(0x00001f40); -		cp.max_latency    = __constant_cpu_to_le16(0xffff); -		cp.content_format = cpu_to_le16(hdev->voice_setting); -		cp.retrans_effort = 0xff; - -		hci_send_cmd(hdev, HCI_OP_ACCEPT_SYNC_CONN_REQ, -			     sizeof(cp), &cp); -	} -} -  static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)  {  	struct hci_ev_conn_request *ev = (void *) skb->data; @@ -2202,7 +1819,6 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)  		} else {  			conn->state = BT_CONNECT2;  			hci_proto_connect_cfm(conn, 0); -			hci_conn_put(conn);  		}  	} else {  		/* Connection rejected */ @@ -2309,14 +1925,14 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)  		} else {  			conn->state = BT_CONNECTED;  			hci_proto_connect_cfm(conn, ev->status); -			hci_conn_put(conn); +			hci_conn_drop(conn);  		}  	} else {  		hci_auth_cfm(conn, ev->status);  		hci_conn_hold(conn);  		conn->disc_timeout = HCI_DISCONN_TIMEOUT; -		hci_conn_put(conn); +		hci_conn_drop(conn);  	}  	if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags)) { @@ -2399,8 +2015,8 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)  		clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);  		if (ev->status && conn->state == BT_CONNECTED) { -			hci_acl_disconn(conn, HCI_ERROR_AUTH_FAILURE); -			hci_conn_put(conn); +			hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE); +			hci_conn_drop(conn);  			goto unlock;  		} @@ -2409,7 +2025,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)  				conn->state = BT_CONNECTED;  			hci_proto_connect_cfm(conn, ev->status); -			hci_conn_put(conn); +			hci_conn_drop(conn);  		} else  			hci_encrypt_cfm(conn, ev->status, ev->encrypt);  	} @@ -2456,7 +2072,7 @@ static void hci_remote_features_evt(struct hci_dev *hdev,  		goto unlock;  	if (!ev->status) -		memcpy(conn->features, ev->features, 8); +		memcpy(conn->features[0], ev->features, 8);  	if (conn->state != BT_CONFIG)  		goto unlock; @@ -2484,27 +2100,17 @@ static void hci_remote_features_evt(struct hci_dev *hdev,  	if (!hci_outgoing_auth_needed(hdev, conn)) {  		conn->state = BT_CONNECTED;  		hci_proto_connect_cfm(conn, ev->status); -		hci_conn_put(conn); +		hci_conn_drop(conn);  	}  unlock:  	hci_dev_unlock(hdev);  } -static void hci_remote_version_evt(struct hci_dev *hdev, struct sk_buff *skb) -{ -	BT_DBG("%s", hdev->name); -} - -static void hci_qos_setup_complete_evt(struct hci_dev *hdev, -				       struct sk_buff *skb) -{ -	BT_DBG("%s", hdev->name); -} -  static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)  {  	struct hci_ev_cmd_complete *ev = (void *) skb->data; +	u8 status = skb->data[sizeof(*ev)];  	__u16 opcode;  	skb_pull(skb, sizeof(*ev)); @@ -2588,10 +2194,6 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)  		hci_cc_write_voice_setting(hdev, skb);  		break; -	case HCI_OP_HOST_BUFFER_SIZE: -		hci_cc_host_buffer_size(hdev, skb); -		break; -  	case HCI_OP_WRITE_SSP_MODE:  		hci_cc_write_ssp_mode(hdev, skb);  		break; @@ -2620,46 +2222,42 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)  		hci_cc_read_bd_addr(hdev, skb);  		break; -	case HCI_OP_READ_DATA_BLOCK_SIZE: -		hci_cc_read_data_block_size(hdev, skb); +	case HCI_OP_READ_PAGE_SCAN_ACTIVITY: +		hci_cc_read_page_scan_activity(hdev, skb);  		break; -	case HCI_OP_WRITE_CA_TIMEOUT: -		hci_cc_write_ca_timeout(hdev, skb); +	case HCI_OP_WRITE_PAGE_SCAN_ACTIVITY: +		hci_cc_write_page_scan_activity(hdev, skb);  		break; -	case HCI_OP_READ_FLOW_CONTROL_MODE: -		hci_cc_read_flow_control_mode(hdev, skb); +	case HCI_OP_READ_PAGE_SCAN_TYPE: +		hci_cc_read_page_scan_type(hdev, skb);  		break; -	case HCI_OP_READ_LOCAL_AMP_INFO: -		hci_cc_read_local_amp_info(hdev, skb); +	case HCI_OP_WRITE_PAGE_SCAN_TYPE: +		hci_cc_write_page_scan_type(hdev, skb);  		break; -	case HCI_OP_READ_LOCAL_AMP_ASSOC: -		hci_cc_read_local_amp_assoc(hdev, skb); +	case HCI_OP_READ_DATA_BLOCK_SIZE: +		hci_cc_read_data_block_size(hdev, skb);  		break; -	case HCI_OP_DELETE_STORED_LINK_KEY: -		hci_cc_delete_stored_link_key(hdev, skb); +	case HCI_OP_READ_FLOW_CONTROL_MODE: +		hci_cc_read_flow_control_mode(hdev, skb);  		break; -	case HCI_OP_SET_EVENT_MASK: -		hci_cc_set_event_mask(hdev, skb); +	case HCI_OP_READ_LOCAL_AMP_INFO: +		hci_cc_read_local_amp_info(hdev, skb);  		break; -	case HCI_OP_WRITE_INQUIRY_MODE: -		hci_cc_write_inquiry_mode(hdev, skb); +	case HCI_OP_READ_LOCAL_AMP_ASSOC: +		hci_cc_read_local_amp_assoc(hdev, skb);  		break;  	case HCI_OP_READ_INQ_RSP_TX_POWER:  		hci_cc_read_inq_rsp_tx_power(hdev, skb);  		break; -	case HCI_OP_SET_EVENT_FLT: -		hci_cc_set_event_flt(hdev, skb); -		break; -  	case HCI_OP_PIN_CODE_REPLY:  		hci_cc_pin_code_reply(hdev, skb);  		break; @@ -2684,10 +2282,6 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)  		hci_cc_le_read_adv_tx_power(hdev, skb);  		break; -	case HCI_OP_LE_SET_EVENT_MASK: -		hci_cc_le_set_event_mask(hdev, skb); -		break; -  	case HCI_OP_USER_CONFIRM_REPLY:  		hci_cc_user_confirm_reply(hdev, skb);  		break; @@ -2720,14 +2314,6 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)  		hci_cc_le_read_white_list_size(hdev, skb);  		break; -	case HCI_OP_LE_LTK_REPLY: -		hci_cc_le_ltk_reply(hdev, skb); -		break; - -	case HCI_OP_LE_LTK_NEG_REPLY: -		hci_cc_le_ltk_neg_reply(hdev, skb); -		break; -  	case HCI_OP_LE_READ_SUPPORTED_STATES:  		hci_cc_le_read_supported_states(hdev, skb);  		break; @@ -2745,9 +2331,11 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)  		break;  	} -	if (ev->opcode != HCI_OP_NOP) +	if (opcode != HCI_OP_NOP)  		del_timer(&hdev->cmd_timer); +	hci_req_cmd_complete(hdev, opcode, status); +  	if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) {  		atomic_set(&hdev->cmd_cnt, 1);  		if (!skb_queue_empty(&hdev->cmd_q)) @@ -2817,10 +2405,6 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb)  		hci_cs_le_create_conn(hdev, ev->status);  		break; -	case HCI_OP_LE_START_ENC: -		hci_cs_le_start_enc(hdev, ev->status); -		break; -  	case HCI_OP_CREATE_PHY_LINK:  		hci_cs_create_phylink(hdev, ev->status);  		break; @@ -2829,18 +2413,18 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb)  		hci_cs_accept_phylink(hdev, ev->status);  		break; -	case HCI_OP_CREATE_LOGICAL_LINK: -		hci_cs_create_logical_link(hdev, ev->status); -		break; -  	default:  		BT_DBG("%s opcode 0x%4.4x", hdev->name, opcode);  		break;  	} -	if (ev->opcode != HCI_OP_NOP) +	if (opcode != HCI_OP_NOP)  		del_timer(&hdev->cmd_timer); +	if (ev->status || +	    (hdev->sent_cmd && !bt_cb(hdev->sent_cmd)->req.event)) +		hci_req_cmd_complete(hdev, opcode, ev->status); +  	if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags)) {  		atomic_set(&hdev->cmd_cnt, 1);  		if (!skb_queue_empty(&hdev->cmd_q)) @@ -3056,7 +2640,7 @@ static void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff *skb)  	if (conn->state == BT_CONNECTED) {  		hci_conn_hold(conn);  		conn->disc_timeout = HCI_PAIRING_TIMEOUT; -		hci_conn_put(conn); +		hci_conn_drop(conn);  	}  	if (!test_bit(HCI_PAIRABLE, &hdev->dev_flags)) @@ -3159,7 +2743,7 @@ static void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb)  		if (ev->key_type != HCI_LK_CHANGED_COMBINATION)  			conn->key_type = ev->key_type; -		hci_conn_put(conn); +		hci_conn_drop(conn);  	}  	if (test_bit(HCI_LINK_KEYS, &hdev->dev_flags)) @@ -3300,6 +2884,9 @@ static void hci_remote_ext_features_evt(struct hci_dev *hdev,  	if (!conn)  		goto unlock; +	if (ev->page < HCI_MAX_PAGES) +		memcpy(conn->features[ev->page], ev->features, 8); +  	if (!ev->status && ev->page == 0x01) {  		struct inquiry_entry *ie; @@ -3307,8 +2894,19 @@ static void hci_remote_ext_features_evt(struct hci_dev *hdev,  		if (ie)  			ie->data.ssp_mode = (ev->features[0] & LMP_HOST_SSP); -		if (ev->features[0] & LMP_HOST_SSP) +		if (ev->features[0] & LMP_HOST_SSP) {  			set_bit(HCI_CONN_SSP_ENABLED, &conn->flags); +		} else { +			/* It is mandatory by the Bluetooth specification that +			 * Extended Inquiry Results are only used when Secure +			 * Simple Pairing is enabled, but some devices violate +			 * this. +			 * +			 * To make these devices work, the internal SSP +			 * enabled flag needs to be cleared if the remote host +			 * features do not indicate SSP support */ +			clear_bit(HCI_CONN_SSP_ENABLED, &conn->flags); +		}  	}  	if (conn->state != BT_CONFIG) @@ -3328,7 +2926,7 @@ static void hci_remote_ext_features_evt(struct hci_dev *hdev,  	if (!hci_outgoing_auth_needed(hdev, conn)) {  		conn->state = BT_CONNECTED;  		hci_proto_connect_cfm(conn, ev->status); -		hci_conn_put(conn); +		hci_conn_drop(conn);  	}  unlock: @@ -3362,7 +2960,6 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev,  		conn->handle = __le16_to_cpu(ev->handle);  		conn->state  = BT_CONNECTED; -		hci_conn_hold_device(conn);  		hci_conn_add_sysfs(conn);  		break; @@ -3391,18 +2988,6 @@ unlock:  	hci_dev_unlock(hdev);  } -static void hci_sync_conn_changed_evt(struct hci_dev *hdev, struct sk_buff *skb) -{ -	BT_DBG("%s", hdev->name); -} - -static void hci_sniff_subrate_evt(struct hci_dev *hdev, struct sk_buff *skb) -{ -	struct hci_ev_sniff_subrate *ev = (void *) skb->data; - -	BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); -} -  static void hci_extended_inquiry_result_evt(struct hci_dev *hdev,  					    struct sk_buff *skb)  { @@ -3472,8 +3057,8 @@ static void hci_key_refresh_complete_evt(struct hci_dev *hdev,  	clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);  	if (ev->status && conn->state == BT_CONNECTED) { -		hci_acl_disconn(conn, HCI_ERROR_AUTH_FAILURE); -		hci_conn_put(conn); +		hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE); +		hci_conn_drop(conn);  		goto unlock;  	} @@ -3482,13 +3067,13 @@ static void hci_key_refresh_complete_evt(struct hci_dev *hdev,  			conn->state = BT_CONNECTED;  		hci_proto_connect_cfm(conn, ev->status); -		hci_conn_put(conn); +		hci_conn_drop(conn);  	} else {  		hci_auth_cfm(conn, ev->status);  		hci_conn_hold(conn);  		conn->disc_timeout = HCI_DISCONN_TIMEOUT; -		hci_conn_put(conn); +		hci_conn_drop(conn);  	}  unlock: @@ -3749,7 +3334,7 @@ static void hci_simple_pair_complete_evt(struct hci_dev *hdev,  		mgmt_auth_failed(hdev, &conn->dst, conn->type, conn->dst_type,  				 ev->status); -	hci_conn_put(conn); +	hci_conn_drop(conn);  unlock:  	hci_dev_unlock(hdev); @@ -3760,11 +3345,16 @@ static void hci_remote_host_features_evt(struct hci_dev *hdev,  {  	struct hci_ev_remote_host_features *ev = (void *) skb->data;  	struct inquiry_entry *ie; +	struct hci_conn *conn;  	BT_DBG("%s", hdev->name);  	hci_dev_lock(hdev); +	conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); +	if (conn) +		memcpy(conn->features[1], ev->features, 8); +  	ie = hci_inquiry_cache_lookup(hdev, &ev->bdaddr);  	if (ie)  		ie->data.ssp_mode = (ev->features[0] & LMP_HOST_SSP); @@ -3837,9 +3427,8 @@ static void hci_phy_link_complete_evt(struct hci_dev *hdev,  	hci_conn_hold(hcon);  	hcon->disc_timeout = HCI_DISCONN_TIMEOUT; -	hci_conn_put(hcon); +	hci_conn_drop(hcon); -	hci_conn_hold_device(hcon);  	hci_conn_add_sysfs(hcon);  	amp_physical_cfm(bredr_hcon, hcon); @@ -3973,7 +3562,6 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)  	conn->handle = __le16_to_cpu(ev->handle);  	conn->state = BT_CONNECTED; -	hci_conn_hold_device(conn);  	hci_conn_add_sysfs(conn);  	hci_proto_connect_cfm(conn, ev->status); @@ -4087,8 +3675,27 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)  	struct hci_event_hdr *hdr = (void *) skb->data;  	__u8 event = hdr->evt; +	hci_dev_lock(hdev); + +	/* Received events are (currently) only needed when a request is +	 * ongoing so avoid unnecessary memory allocation. +	 */ +	if (hdev->req_status == HCI_REQ_PEND) { +		kfree_skb(hdev->recv_evt); +		hdev->recv_evt = skb_clone(skb, GFP_KERNEL); +	} + +	hci_dev_unlock(hdev); +  	skb_pull(skb, HCI_EVENT_HDR_SIZE); +	if (hdev->sent_cmd && bt_cb(hdev->sent_cmd)->req.event == event) { +		struct hci_command_hdr *hdr = (void *) hdev->sent_cmd->data; +		u16 opcode = __le16_to_cpu(hdr->opcode); + +		hci_req_cmd_complete(hdev, opcode, 0); +	} +  	switch (event) {  	case HCI_EV_INQUIRY_COMPLETE:  		hci_inquiry_complete_evt(hdev, skb); @@ -4130,14 +3737,6 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)  		hci_remote_features_evt(hdev, skb);  		break; -	case HCI_EV_REMOTE_VERSION: -		hci_remote_version_evt(hdev, skb); -		break; - -	case HCI_EV_QOS_SETUP_COMPLETE: -		hci_qos_setup_complete_evt(hdev, skb); -		break; -  	case HCI_EV_CMD_COMPLETE:  		hci_cmd_complete_evt(hdev, skb);  		break; @@ -4194,14 +3793,6 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)  		hci_sync_conn_complete_evt(hdev, skb);  		break; -	case HCI_EV_SYNC_CONN_CHANGED: -		hci_sync_conn_changed_evt(hdev, skb); -		break; - -	case HCI_EV_SNIFF_SUBRATE: -		hci_sniff_subrate_evt(hdev, skb); -		break; -  	case HCI_EV_EXTENDED_INQUIRY_RESULT:  		hci_extended_inquiry_result_evt(hdev, skb);  		break; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 6a93614f2c4..aa4354fca77 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -854,6 +854,11 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock,  			skb_queue_tail(&hdev->raw_q, skb);  			queue_work(hdev->workqueue, &hdev->tx_work);  		} else { +			/* Stand-alone HCI commands must be flaged as +			 * single-command requests. +			 */ +			bt_cb(skb)->req.start = true; +  			skb_queue_tail(&hdev->cmd_q, skb);  			queue_work(hdev->workqueue, &hdev->cmd_work);  		} @@ -1121,8 +1126,6 @@ error:  void hci_sock_cleanup(void)  {  	bt_procfs_cleanup(&init_net, "hci"); -	if (bt_sock_unregister(BTPROTO_HCI) < 0) -		BT_ERR("HCI socket unregistration failed"); - +	bt_sock_unregister(BTPROTO_HCI);  	proto_unregister(&hci_sk_proto);  } diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 23b4e242a31..7ad6ecf36f2 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -48,10 +48,10 @@ static ssize_t show_link_features(struct device *dev,  	struct hci_conn *conn = to_hci_conn(dev);  	return sprintf(buf, "0x%02x%02x%02x%02x%02x%02x%02x%02x\n", -		       conn->features[0], conn->features[1], -		       conn->features[2], conn->features[3], -		       conn->features[4], conn->features[5], -		       conn->features[6], conn->features[7]); +		       conn->features[0][0], conn->features[0][1], +		       conn->features[0][2], conn->features[0][3], +		       conn->features[0][4], conn->features[0][5], +		       conn->features[0][6], conn->features[0][7]);  }  #define LINK_ATTR(_name, _mode, _show, _store) \ @@ -146,7 +146,6 @@ void hci_conn_del_sysfs(struct hci_conn *conn)  	}  	device_del(&conn->dev); -	put_device(&conn->dev);  	hci_dev_put(hdev);  } @@ -234,10 +233,10 @@ static ssize_t show_features(struct device *dev,  	struct hci_dev *hdev = to_hci_dev(dev);  	return sprintf(buf, "0x%02x%02x%02x%02x%02x%02x%02x%02x\n", -		       hdev->features[0], hdev->features[1], -		       hdev->features[2], hdev->features[3], -		       hdev->features[4], hdev->features[5], -		       hdev->features[6], hdev->features[7]); +		       hdev->features[0][0], hdev->features[0][1], +		       hdev->features[0][2], hdev->features[0][3], +		       hdev->features[0][4], hdev->features[0][5], +		       hdev->features[0][6], hdev->features[0][7]);  }  static ssize_t show_manufacturer(struct device *dev, @@ -590,10 +589,8 @@ int __init bt_sysfs_init(void)  	bt_debugfs = debugfs_create_dir("bluetooth", NULL);  	bt_class = class_create(THIS_MODULE, "bluetooth"); -	if (IS_ERR(bt_class)) -		return PTR_ERR(bt_class); -	return 0; +	return PTR_RET(bt_class);  }  void bt_sysfs_cleanup(void) diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index a7352ff3fd1..940f5acb669 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -1,6 +1,7 @@  /*     HIDP implementation for Linux Bluetooth stack (BlueZ).     Copyright (C) 2003-2004 Marcel Holtmann <marcel@holtmann.org> +   Copyright (C) 2013 David Herrmann <dh.herrmann@gmail.com>     This program is free software; you can redistribute it and/or modify     it under the terms of the GNU General Public License version 2 as @@ -20,6 +21,7 @@     SOFTWARE IS DISCLAIMED.  */ +#include <linux/kref.h>  #include <linux/module.h>  #include <linux/file.h>  #include <linux/kthread.h> @@ -59,39 +61,20 @@ static unsigned char hidp_keycode[256] = {  static unsigned char hidp_mkeyspat[] = { 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 }; -static struct hidp_session *__hidp_get_session(bdaddr_t *bdaddr) -{ -	struct hidp_session *session; - -	BT_DBG(""); - -	list_for_each_entry(session, &hidp_session_list, list) { -		if (!bacmp(bdaddr, &session->bdaddr)) -			return session; -	} - -	return NULL; -} - -static void __hidp_link_session(struct hidp_session *session) -{ -	list_add(&session->list, &hidp_session_list); -} - -static void __hidp_unlink_session(struct hidp_session *session) -{ -	hci_conn_put_device(session->conn); - -	list_del(&session->list); -} +static int hidp_session_probe(struct l2cap_conn *conn, +			      struct l2cap_user *user); +static void hidp_session_remove(struct l2cap_conn *conn, +				struct l2cap_user *user); +static int hidp_session_thread(void *arg); +static void hidp_session_terminate(struct hidp_session *s); -static void __hidp_copy_session(struct hidp_session *session, struct hidp_conninfo *ci) +static void hidp_copy_session(struct hidp_session *session, struct hidp_conninfo *ci)  {  	memset(ci, 0, sizeof(*ci));  	bacpy(&ci->bdaddr, &session->bdaddr);  	ci->flags = session->flags; -	ci->state = session->state; +	ci->state = BT_CONNECTED;  	ci->vendor  = 0x0000;  	ci->product = 0x0000; @@ -115,58 +98,80 @@ static void __hidp_copy_session(struct hidp_session *session, struct hidp_connin  	}  } -static int hidp_queue_event(struct hidp_session *session, struct input_dev *dev, -				unsigned int type, unsigned int code, int value) +/* assemble skb, queue message on @transmit and wake up the session thread */ +static int hidp_send_message(struct hidp_session *session, struct socket *sock, +			     struct sk_buff_head *transmit, unsigned char hdr, +			     const unsigned char *data, int size)  { -	unsigned char newleds;  	struct sk_buff *skb; +	struct sock *sk = sock->sk; -	BT_DBG("session %p type %d code %d value %d", session, type, code, value); - -	if (type != EV_LED) -		return -1; - -	newleds = (!!test_bit(LED_KANA,    dev->led) << 3) | -		  (!!test_bit(LED_COMPOSE, dev->led) << 3) | -		  (!!test_bit(LED_SCROLLL, dev->led) << 2) | -		  (!!test_bit(LED_CAPSL,   dev->led) << 1) | -		  (!!test_bit(LED_NUML,    dev->led)); - -	if (session->leds == newleds) -		return 0; +	BT_DBG("session %p data %p size %d", session, data, size); -	session->leds = newleds; +	if (atomic_read(&session->terminate)) +		return -EIO; -	skb = alloc_skb(3, GFP_ATOMIC); +	skb = alloc_skb(size + 1, GFP_ATOMIC);  	if (!skb) {  		BT_ERR("Can't allocate memory for new frame");  		return -ENOMEM;  	} -	*skb_put(skb, 1) = HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT; -	*skb_put(skb, 1) = 0x01; -	*skb_put(skb, 1) = newleds; - -	skb_queue_tail(&session->intr_transmit, skb); +	*skb_put(skb, 1) = hdr; +	if (data && size > 0) +		memcpy(skb_put(skb, size), data, size); -	hidp_schedule(session); +	skb_queue_tail(transmit, skb); +	wake_up_interruptible(sk_sleep(sk));  	return 0;  } -static int hidp_hidinput_event(struct input_dev *dev, unsigned int type, unsigned int code, int value) +static int hidp_send_ctrl_message(struct hidp_session *session, +				  unsigned char hdr, const unsigned char *data, +				  int size)  { -	struct hid_device *hid = input_get_drvdata(dev); -	struct hidp_session *session = hid->driver_data; +	return hidp_send_message(session, session->ctrl_sock, +				 &session->ctrl_transmit, hdr, data, size); +} -	return hidp_queue_event(session, dev, type, code, value); +static int hidp_send_intr_message(struct hidp_session *session, +				  unsigned char hdr, const unsigned char *data, +				  int size) +{ +	return hidp_send_message(session, session->intr_sock, +				 &session->intr_transmit, hdr, data, size);  } -static int hidp_input_event(struct input_dev *dev, unsigned int type, unsigned int code, int value) +static int hidp_input_event(struct input_dev *dev, unsigned int type, +			    unsigned int code, int value)  {  	struct hidp_session *session = input_get_drvdata(dev); +	unsigned char newleds; +	unsigned char hdr, data[2]; + +	BT_DBG("session %p type %d code %d value %d", +	       session, type, code, value); + +	if (type != EV_LED) +		return -1; -	return hidp_queue_event(session, dev, type, code, value); +	newleds = (!!test_bit(LED_KANA,    dev->led) << 3) | +		  (!!test_bit(LED_COMPOSE, dev->led) << 3) | +		  (!!test_bit(LED_SCROLLL, dev->led) << 2) | +		  (!!test_bit(LED_CAPSL,   dev->led) << 1) | +		  (!!test_bit(LED_NUML,    dev->led)); + +	if (session->leds == newleds) +		return 0; + +	session->leds = newleds; + +	hdr = HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT; +	data[0] = 0x01; +	data[1] = newleds; + +	return hidp_send_intr_message(session, hdr, data, 2);  }  static void hidp_input_report(struct hidp_session *session, struct sk_buff *skb) @@ -224,71 +229,9 @@ static void hidp_input_report(struct hidp_session *session, struct sk_buff *skb)  	input_sync(dev);  } -static int __hidp_send_ctrl_message(struct hidp_session *session, -				    unsigned char hdr, unsigned char *data, -				    int size) -{ -	struct sk_buff *skb; - -	BT_DBG("session %p data %p size %d", session, data, size); - -	if (atomic_read(&session->terminate)) -		return -EIO; - -	skb = alloc_skb(size + 1, GFP_ATOMIC); -	if (!skb) { -		BT_ERR("Can't allocate memory for new frame"); -		return -ENOMEM; -	} - -	*skb_put(skb, 1) = hdr; -	if (data && size > 0) -		memcpy(skb_put(skb, size), data, size); - -	skb_queue_tail(&session->ctrl_transmit, skb); - -	return 0; -} - -static int hidp_send_ctrl_message(struct hidp_session *session, -			unsigned char hdr, unsigned char *data, int size) -{ -	int err; - -	err = __hidp_send_ctrl_message(session, hdr, data, size); - -	hidp_schedule(session); - -	return err; -} - -static int hidp_queue_report(struct hidp_session *session, -				unsigned char *data, int size) -{ -	struct sk_buff *skb; - -	BT_DBG("session %p hid %p data %p size %d", session, session->hid, data, size); - -	skb = alloc_skb(size + 1, GFP_ATOMIC); -	if (!skb) { -		BT_ERR("Can't allocate memory for new frame"); -		return -ENOMEM; -	} - -	*skb_put(skb, 1) = 0xa2; -	if (size > 0) -		memcpy(skb_put(skb, size), data, size); - -	skb_queue_tail(&session->intr_transmit, skb); - -	hidp_schedule(session); - -	return 0; -} -  static int hidp_send_report(struct hidp_session *session, struct hid_report *report)  { -	unsigned char buf[32]; +	unsigned char buf[32], hdr;  	int rsize;  	rsize = ((report->size - 1) >> 3) + 1 + (report->id > 0); @@ -296,8 +239,9 @@ static int hidp_send_report(struct hidp_session *session, struct hid_report *rep  		return -EIO;  	hid_output_report(report, buf); +	hdr = HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT; -	return hidp_queue_report(session, buf, rsize); +	return hidp_send_intr_message(session, hdr, buf, rsize);  }  static int hidp_get_raw_report(struct hid_device *hid, @@ -311,6 +255,9 @@ static int hidp_get_raw_report(struct hid_device *hid,  	int numbered_reports = hid->report_enum[report_type].numbered;  	int ret; +	if (atomic_read(&session->terminate)) +		return -EIO; +  	switch (report_type) {  	case HID_FEATURE_REPORT:  		report_type = HIDP_TRANS_GET_REPORT | HIDP_DATA_RTYPE_FEATURE; @@ -333,17 +280,19 @@ static int hidp_get_raw_report(struct hid_device *hid,  	session->waiting_report_number = numbered_reports ? report_number : -1;  	set_bit(HIDP_WAITING_FOR_RETURN, &session->flags);  	data[0] = report_number; -	ret = hidp_send_ctrl_message(hid->driver_data, report_type, data, 1); +	ret = hidp_send_ctrl_message(session, report_type, data, 1);  	if (ret)  		goto err;  	/* Wait for the return of the report. The returned report  	   gets put in session->report_return.  */ -	while (test_bit(HIDP_WAITING_FOR_RETURN, &session->flags)) { +	while (test_bit(HIDP_WAITING_FOR_RETURN, &session->flags) && +	       !atomic_read(&session->terminate)) {  		int res;  		res = wait_event_interruptible_timeout(session->report_queue, -			!test_bit(HIDP_WAITING_FOR_RETURN, &session->flags), +			!test_bit(HIDP_WAITING_FOR_RETURN, &session->flags) +				|| atomic_read(&session->terminate),  			5*HZ);  		if (res == 0) {  			/* timeout */ @@ -386,14 +335,11 @@ static int hidp_output_raw_report(struct hid_device *hid, unsigned char *data, s  	struct hidp_session *session = hid->driver_data;  	int ret; -	switch (report_type) { -	case HID_FEATURE_REPORT: -		report_type = HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_FEATURE; -		break; -	case HID_OUTPUT_REPORT: -		report_type = HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_OUPUT; -		break; -	default: +	if (report_type == HID_OUTPUT_REPORT) { +		report_type = HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT; +		return hidp_send_intr_message(session, report_type, +					      data, count); +	} else if (report_type != HID_FEATURE_REPORT) {  		return -EINVAL;  	} @@ -402,17 +348,19 @@ static int hidp_output_raw_report(struct hid_device *hid, unsigned char *data, s  	/* Set up our wait, and send the report request to the device. */  	set_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags); -	ret = hidp_send_ctrl_message(hid->driver_data, report_type, data, -									count); +	report_type = HIDP_TRANS_SET_REPORT | HIDP_DATA_RTYPE_FEATURE; +	ret = hidp_send_ctrl_message(session, report_type, data, count);  	if (ret)  		goto err;  	/* Wait for the ACK from the device. */ -	while (test_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags)) { +	while (test_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags) && +	       !atomic_read(&session->terminate)) {  		int res;  		res = wait_event_interruptible_timeout(session->report_queue, -			!test_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags), +			!test_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags) +				|| atomic_read(&session->terminate),  			10*HZ);  		if (res == 0) {  			/* timeout */ @@ -443,8 +391,7 @@ static void hidp_idle_timeout(unsigned long arg)  {  	struct hidp_session *session = (struct hidp_session *) arg; -	atomic_inc(&session->terminate); -	wake_up_process(session->task); +	hidp_session_terminate(session);  }  static void hidp_set_timer(struct hidp_session *session) @@ -487,12 +434,12 @@ static void hidp_process_handshake(struct hidp_session *session,  	case HIDP_HSHK_ERR_FATAL:  		/* Device requests a reboot, as this is the only way this error  		 * can be recovered. */ -		__hidp_send_ctrl_message(session, +		hidp_send_ctrl_message(session,  			HIDP_TRANS_HID_CONTROL | HIDP_CTRL_SOFT_RESET, NULL, 0);  		break;  	default: -		__hidp_send_ctrl_message(session, +		hidp_send_ctrl_message(session,  			HIDP_TRANS_HANDSHAKE | HIDP_HSHK_ERR_INVALID_PARAMETER, NULL, 0);  		break;  	} @@ -512,8 +459,7 @@ static void hidp_process_hid_control(struct hidp_session *session,  		skb_queue_purge(&session->ctrl_transmit);  		skb_queue_purge(&session->intr_transmit); -		atomic_inc(&session->terminate); -		wake_up_process(current); +		hidp_session_terminate(session);  	}  } @@ -541,7 +487,7 @@ static int hidp_process_data(struct hidp_session *session, struct sk_buff *skb,  		break;  	default: -		__hidp_send_ctrl_message(session, +		hidp_send_ctrl_message(session,  			HIDP_TRANS_HANDSHAKE | HIDP_HSHK_ERR_INVALID_PARAMETER, NULL, 0);  	} @@ -588,7 +534,7 @@ static void hidp_recv_ctrl_frame(struct hidp_session *session,  		break;  	default: -		__hidp_send_ctrl_message(session, +		hidp_send_ctrl_message(session,  			HIDP_TRANS_HANDSHAKE | HIDP_HSHK_ERR_UNSUPPORTED_REQUEST, NULL, 0);  		break;  	} @@ -639,32 +585,24 @@ static int hidp_send_frame(struct socket *sock, unsigned char *data, int len)  	return kernel_sendmsg(sock, &msg, &iv, 1, len);  } -static void hidp_process_intr_transmit(struct hidp_session *session) +/* dequeue message from @transmit and send via @sock */ +static void hidp_process_transmit(struct hidp_session *session, +				  struct sk_buff_head *transmit, +				  struct socket *sock)  {  	struct sk_buff *skb; +	int ret;  	BT_DBG("session %p", session); -	while ((skb = skb_dequeue(&session->intr_transmit))) { -		if (hidp_send_frame(session->intr_sock, skb->data, skb->len) < 0) { -			skb_queue_head(&session->intr_transmit, skb); +	while ((skb = skb_dequeue(transmit))) { +		ret = hidp_send_frame(sock, skb->data, skb->len); +		if (ret == -EAGAIN) { +			skb_queue_head(transmit, skb);  			break; -		} - -		hidp_set_timer(session); -		kfree_skb(skb); -	} -} - -static void hidp_process_ctrl_transmit(struct hidp_session *session) -{ -	struct sk_buff *skb; - -	BT_DBG("session %p", session); - -	while ((skb = skb_dequeue(&session->ctrl_transmit))) { -		if (hidp_send_frame(session->ctrl_sock, skb->data, skb->len) < 0) { -			skb_queue_head(&session->ctrl_transmit, skb); +		} else if (ret < 0) { +			hidp_session_terminate(session); +			kfree_skb(skb);  			break;  		} @@ -673,121 +611,6 @@ static void hidp_process_ctrl_transmit(struct hidp_session *session)  	}  } -static int hidp_session(void *arg) -{ -	struct hidp_session *session = arg; -	struct sock *ctrl_sk = session->ctrl_sock->sk; -	struct sock *intr_sk = session->intr_sock->sk; -	struct sk_buff *skb; -	wait_queue_t ctrl_wait, intr_wait; - -	BT_DBG("session %p", session); - -	__module_get(THIS_MODULE); -	set_user_nice(current, -15); - -	init_waitqueue_entry(&ctrl_wait, current); -	init_waitqueue_entry(&intr_wait, current); -	add_wait_queue(sk_sleep(ctrl_sk), &ctrl_wait); -	add_wait_queue(sk_sleep(intr_sk), &intr_wait); -	session->waiting_for_startup = 0; -	wake_up_interruptible(&session->startup_queue); -	set_current_state(TASK_INTERRUPTIBLE); -	while (!atomic_read(&session->terminate)) { -		if (ctrl_sk->sk_state != BT_CONNECTED || -				intr_sk->sk_state != BT_CONNECTED) -			break; - -		while ((skb = skb_dequeue(&intr_sk->sk_receive_queue))) { -			skb_orphan(skb); -			if (!skb_linearize(skb)) -				hidp_recv_intr_frame(session, skb); -			else -				kfree_skb(skb); -		} - -		hidp_process_intr_transmit(session); - -		while ((skb = skb_dequeue(&ctrl_sk->sk_receive_queue))) { -			skb_orphan(skb); -			if (!skb_linearize(skb)) -				hidp_recv_ctrl_frame(session, skb); -			else -				kfree_skb(skb); -		} - -		hidp_process_ctrl_transmit(session); - -		schedule(); -		set_current_state(TASK_INTERRUPTIBLE); -	} -	set_current_state(TASK_RUNNING); -	remove_wait_queue(sk_sleep(intr_sk), &intr_wait); -	remove_wait_queue(sk_sleep(ctrl_sk), &ctrl_wait); - -	clear_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags); -	clear_bit(HIDP_WAITING_FOR_RETURN, &session->flags); -	wake_up_interruptible(&session->report_queue); - -	down_write(&hidp_session_sem); - -	hidp_del_timer(session); - -	if (session->input) { -		input_unregister_device(session->input); -		session->input = NULL; -	} - -	if (session->hid) { -		hid_destroy_device(session->hid); -		session->hid = NULL; -	} - -	/* Wakeup user-space polling for socket errors */ -	session->intr_sock->sk->sk_err = EUNATCH; -	session->ctrl_sock->sk->sk_err = EUNATCH; - -	hidp_schedule(session); - -	fput(session->intr_sock->file); - -	wait_event_timeout(*(sk_sleep(ctrl_sk)), -		(ctrl_sk->sk_state == BT_CLOSED), msecs_to_jiffies(500)); - -	fput(session->ctrl_sock->file); - -	__hidp_unlink_session(session); - -	up_write(&hidp_session_sem); - -	kfree(session->rd_data); -	kfree(session); -	module_put_and_exit(0); -	return 0; -} - -static struct hci_conn *hidp_get_connection(struct hidp_session *session) -{ -	bdaddr_t *src = &bt_sk(session->ctrl_sock->sk)->src; -	bdaddr_t *dst = &bt_sk(session->ctrl_sock->sk)->dst; -	struct hci_conn *conn; -	struct hci_dev *hdev; - -	hdev = hci_get_route(dst, src); -	if (!hdev) -		return NULL; - -	hci_dev_lock(hdev); -	conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst); -	if (conn) -		hci_conn_hold_device(conn); -	hci_dev_unlock(hdev); - -	hci_dev_put(hdev); - -	return conn; -} -  static int hidp_setup_input(struct hidp_session *session,  				struct hidp_connadd_req *req)  { @@ -835,7 +658,7 @@ static int hidp_setup_input(struct hidp_session *session,  		input->relbit[0] |= BIT_MASK(REL_WHEEL);  	} -	input->dev.parent = &session->conn->dev; +	input->dev.parent = &session->conn->hcon->dev;  	input->event = hidp_input_event; @@ -894,7 +717,6 @@ static struct hid_ll_driver hidp_hid_driver = {  	.stop = hidp_stop,  	.open  = hidp_open,  	.close = hidp_close, -	.hidinput_input_event = hidp_hidinput_event,  };  /* This function sets up the hid device. It does not add it @@ -939,7 +761,7 @@ static int hidp_setup_hid(struct hidp_session *session,  	snprintf(hid->uniq, sizeof(hid->uniq), "%pMR",  		 &bt_sk(session->ctrl_sock->sk)->dst); -	hid->dev.parent = &session->conn->dev; +	hid->dev.parent = &session->conn->hcon->dev;  	hid->ll_driver = &hidp_hid_driver;  	hid->hid_get_raw_report = hidp_get_raw_report; @@ -961,80 +783,217 @@ fault:  	return err;  } -int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock) +/* initialize session devices */ +static int hidp_session_dev_init(struct hidp_session *session, +				 struct hidp_connadd_req *req)  { -	struct hidp_session *session, *s; -	int vendor, product; -	int err; +	int ret; -	BT_DBG(""); +	if (req->rd_size > 0) { +		ret = hidp_setup_hid(session, req); +		if (ret && ret != -ENODEV) +			return ret; +	} -	if (bacmp(&bt_sk(ctrl_sock->sk)->src, &bt_sk(intr_sock->sk)->src) || -			bacmp(&bt_sk(ctrl_sock->sk)->dst, &bt_sk(intr_sock->sk)->dst)) -		return -ENOTUNIQ; +	if (!session->hid) { +		ret = hidp_setup_input(session, req); +		if (ret < 0) +			return ret; +	} -	BT_DBG("rd_data %p rd_size %d", req->rd_data, req->rd_size); +	return 0; +} -	down_write(&hidp_session_sem); +/* destroy session devices */ +static void hidp_session_dev_destroy(struct hidp_session *session) +{ +	if (session->hid) +		put_device(&session->hid->dev); +	else if (session->input) +		input_put_device(session->input); -	s = __hidp_get_session(&bt_sk(ctrl_sock->sk)->dst); -	if (s && s->state == BT_CONNECTED) { -		up_write(&hidp_session_sem); -		return -EEXIST; -	} +	kfree(session->rd_data); +	session->rd_data = NULL; +} -	session = kzalloc(sizeof(struct hidp_session), GFP_KERNEL); -	if (!session) { -		up_write(&hidp_session_sem); -		return -ENOMEM; +/* add HID/input devices to their underlying bus systems */ +static int hidp_session_dev_add(struct hidp_session *session) +{ +	int ret; + +	/* Both HID and input systems drop a ref-count when unregistering the +	 * device but they don't take a ref-count when registering them. Work +	 * around this by explicitly taking a refcount during registration +	 * which is dropped automatically by unregistering the devices. */ + +	if (session->hid) { +		ret = hid_add_device(session->hid); +		if (ret) +			return ret; +		get_device(&session->hid->dev); +	} else if (session->input) { +		ret = input_register_device(session->input); +		if (ret) +			return ret; +		input_get_device(session->input);  	} -	bacpy(&session->bdaddr, &bt_sk(ctrl_sock->sk)->dst); +	return 0; +} -	session->ctrl_mtu = min_t(uint, l2cap_pi(ctrl_sock->sk)->chan->omtu, -					l2cap_pi(ctrl_sock->sk)->chan->imtu); -	session->intr_mtu = min_t(uint, l2cap_pi(intr_sock->sk)->chan->omtu, -					l2cap_pi(intr_sock->sk)->chan->imtu); +/* remove HID/input devices from their bus systems */ +static void hidp_session_dev_del(struct hidp_session *session) +{ +	if (session->hid) +		hid_destroy_device(session->hid); +	else if (session->input) +		input_unregister_device(session->input); +} -	BT_DBG("ctrl mtu %d intr mtu %d", session->ctrl_mtu, session->intr_mtu); +/* + * Create new session object + * Allocate session object, initialize static fields, copy input data into the + * object and take a reference to all sub-objects. + * This returns 0 on success and puts a pointer to the new session object in + * \out. Otherwise, an error code is returned. + * The new session object has an initial ref-count of 1. + */ +static int hidp_session_new(struct hidp_session **out, const bdaddr_t *bdaddr, +			    struct socket *ctrl_sock, +			    struct socket *intr_sock, +			    struct hidp_connadd_req *req, +			    struct l2cap_conn *conn) +{ +	struct hidp_session *session; +	int ret; +	struct bt_sock *ctrl, *intr; -	session->ctrl_sock = ctrl_sock; -	session->intr_sock = intr_sock; -	session->state     = BT_CONNECTED; +	ctrl = bt_sk(ctrl_sock->sk); +	intr = bt_sk(intr_sock->sk); -	session->conn = hidp_get_connection(session); -	if (!session->conn) { -		err = -ENOTCONN; -		goto failed; -	} +	session = kzalloc(sizeof(*session), GFP_KERNEL); +	if (!session) +		return -ENOMEM; -	setup_timer(&session->timer, hidp_idle_timeout, (unsigned long)session); +	/* object and runtime management */ +	kref_init(&session->ref); +	atomic_set(&session->state, HIDP_SESSION_IDLING); +	init_waitqueue_head(&session->state_queue); +	session->flags = req->flags & (1 << HIDP_BLUETOOTH_VENDOR_ID); +	/* connection management */ +	bacpy(&session->bdaddr, bdaddr); +	session->conn = conn; +	session->user.probe = hidp_session_probe; +	session->user.remove = hidp_session_remove; +	session->ctrl_sock = ctrl_sock; +	session->intr_sock = intr_sock;  	skb_queue_head_init(&session->ctrl_transmit);  	skb_queue_head_init(&session->intr_transmit); +	session->ctrl_mtu = min_t(uint, l2cap_pi(ctrl)->chan->omtu, +					l2cap_pi(ctrl)->chan->imtu); +	session->intr_mtu = min_t(uint, l2cap_pi(intr)->chan->omtu, +					l2cap_pi(intr)->chan->imtu); +	session->idle_to = req->idle_to; + +	/* device management */ +	setup_timer(&session->timer, hidp_idle_timeout, +		    (unsigned long)session); +	/* session data */  	mutex_init(&session->report_mutex);  	init_waitqueue_head(&session->report_queue); -	init_waitqueue_head(&session->startup_queue); -	session->waiting_for_startup = 1; -	session->flags   = req->flags & (1 << HIDP_BLUETOOTH_VENDOR_ID); -	session->idle_to = req->idle_to; -	__hidp_link_session(session); +	ret = hidp_session_dev_init(session, req); +	if (ret) +		goto err_free; -	if (req->rd_size > 0) { -		err = hidp_setup_hid(session, req); -		if (err && err != -ENODEV) -			goto purge; -	} +	l2cap_conn_get(session->conn); +	get_file(session->intr_sock->file); +	get_file(session->ctrl_sock->file); +	*out = session; +	return 0; -	if (!session->hid) { -		err = hidp_setup_input(session, req); -		if (err < 0) -			goto purge; +err_free: +	kfree(session); +	return ret; +} + +/* increase ref-count of the given session by one */ +static void hidp_session_get(struct hidp_session *session) +{ +	kref_get(&session->ref); +} + +/* release callback */ +static void session_free(struct kref *ref) +{ +	struct hidp_session *session = container_of(ref, struct hidp_session, +						    ref); + +	hidp_session_dev_destroy(session); +	skb_queue_purge(&session->ctrl_transmit); +	skb_queue_purge(&session->intr_transmit); +	fput(session->intr_sock->file); +	fput(session->ctrl_sock->file); +	l2cap_conn_put(session->conn); +	kfree(session); +} + +/* decrease ref-count of the given session by one */ +static void hidp_session_put(struct hidp_session *session) +{ +	kref_put(&session->ref, session_free); +} + +/* + * Search the list of active sessions for a session with target address + * \bdaddr. You must hold at least a read-lock on \hidp_session_sem. As long as + * you do not release this lock, the session objects cannot vanish and you can + * safely take a reference to the session yourself. + */ +static struct hidp_session *__hidp_session_find(const bdaddr_t *bdaddr) +{ +	struct hidp_session *session; + +	list_for_each_entry(session, &hidp_session_list, list) { +		if (!bacmp(bdaddr, &session->bdaddr)) +			return session;  	} -	hidp_set_timer(session); +	return NULL; +} + +/* + * Same as __hidp_session_find() but no locks must be held. This also takes a + * reference of the returned session (if non-NULL) so you must drop this + * reference if you no longer use the object. + */ +static struct hidp_session *hidp_session_find(const bdaddr_t *bdaddr) +{ +	struct hidp_session *session; + +	down_read(&hidp_session_sem); + +	session = __hidp_session_find(bdaddr); +	if (session) +		hidp_session_get(session); + +	up_read(&hidp_session_sem); + +	return session; +} + +/* + * Start session synchronously + * This starts a session thread and waits until initialization + * is done or returns an error if it couldn't be started. + * If this returns 0 the session thread is up and running. You must call + * hipd_session_stop_sync() before deleting any runtime resources. + */ +static int hidp_session_start_sync(struct hidp_session *session) +{ +	unsigned int vendor, product;  	if (session->hid) {  		vendor  = session->hid->vendor; @@ -1047,98 +1006,320 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock,  		product = 0x0000;  	} -	session->task = kthread_run(hidp_session, session, "khidpd_%04x%04x", -							vendor, product); -	if (IS_ERR(session->task)) { -		err = PTR_ERR(session->task); -		goto unlink; -	} +	session->task = kthread_run(hidp_session_thread, session, +				    "khidpd_%04x%04x", vendor, product); +	if (IS_ERR(session->task)) +		return PTR_ERR(session->task); -	while (session->waiting_for_startup) { -		wait_event_interruptible(session->startup_queue, -			!session->waiting_for_startup); -	} +	while (atomic_read(&session->state) <= HIDP_SESSION_IDLING) +		wait_event(session->state_queue, +			   atomic_read(&session->state) > HIDP_SESSION_IDLING); -	if (session->hid) -		err = hid_add_device(session->hid); -	else -		err = input_register_device(session->input); +	return 0; +} -	if (err < 0) { -		atomic_inc(&session->terminate); -		wake_up_process(session->task); -		up_write(&hidp_session_sem); -		return err; -	} +/* + * Terminate session thread + * Wake up session thread and notify it to stop. This is asynchronous and + * returns immediately. Call this whenever a runtime error occurs and you want + * the session to stop. + * Note: wake_up_process() performs any necessary memory-barriers for us. + */ +static void hidp_session_terminate(struct hidp_session *session) +{ +	atomic_inc(&session->terminate); +	wake_up_process(session->task); +} -	if (session->input) { -		hidp_send_ctrl_message(session, -			HIDP_TRANS_SET_PROTOCOL | HIDP_PROTO_BOOT, NULL, 0); -		session->flags |= (1 << HIDP_BOOT_PROTOCOL_MODE); +/* + * Probe HIDP session + * This is called from the l2cap_conn core when our l2cap_user object is bound + * to the hci-connection. We get the session via the \user object and can now + * start the session thread, register the HID/input devices and link it into + * the global session list. + * The global session-list owns its own reference to the session object so you + * can drop your own reference after registering the l2cap_user object. + */ +static int hidp_session_probe(struct l2cap_conn *conn, +			      struct l2cap_user *user) +{ +	struct hidp_session *session = container_of(user, +						    struct hidp_session, +						    user); +	struct hidp_session *s; +	int ret; -		session->leds = 0xff; -		hidp_input_event(session->input, EV_LED, 0, 0); +	down_write(&hidp_session_sem); + +	/* check that no other session for this device exists */ +	s = __hidp_session_find(&session->bdaddr); +	if (s) { +		ret = -EEXIST; +		goto out_unlock;  	} +	ret = hidp_session_start_sync(session); +	if (ret) +		goto out_unlock; + +	ret = hidp_session_dev_add(session); +	if (ret) +		goto out_stop; + +	hidp_session_get(session); +	list_add(&session->list, &hidp_session_list); +	ret = 0; +	goto out_unlock; + +out_stop: +	hidp_session_terminate(session); +out_unlock:  	up_write(&hidp_session_sem); -	return 0; +	return ret; +} -unlink: -	hidp_del_timer(session); +/* + * Remove HIDP session + * Called from the l2cap_conn core when either we explicitly unregistered + * the l2cap_user object or if the underlying connection is shut down. + * We signal the hidp-session thread to shut down, unregister the HID/input + * devices and unlink the session from the global list. + * This drops the reference to the session that is owned by the global + * session-list. + * Note: We _must_ not synchronosly wait for the session-thread to shut down. + * This is, because the session-thread might be waiting for an HCI lock that is + * held while we are called. Therefore, we only unregister the devices and + * notify the session-thread to terminate. The thread itself owns a reference + * to the session object so it can safely shut down. + */ +static void hidp_session_remove(struct l2cap_conn *conn, +				struct l2cap_user *user) +{ +	struct hidp_session *session = container_of(user, +						    struct hidp_session, +						    user); -	if (session->input) { -		input_unregister_device(session->input); -		session->input = NULL; -	} +	down_write(&hidp_session_sem); -	if (session->hid) { -		hid_destroy_device(session->hid); -		session->hid = NULL; +	hidp_session_terminate(session); +	hidp_session_dev_del(session); +	list_del(&session->list); + +	up_write(&hidp_session_sem); + +	hidp_session_put(session); +} + +/* + * Session Worker + * This performs the actual main-loop of the HIDP worker. We first check + * whether the underlying connection is still alive, then parse all pending + * messages and finally send all outstanding messages. + */ +static void hidp_session_run(struct hidp_session *session) +{ +	struct sock *ctrl_sk = session->ctrl_sock->sk; +	struct sock *intr_sk = session->intr_sock->sk; +	struct sk_buff *skb; + +	for (;;) { +		/* +		 * This thread can be woken up two ways: +		 *  - You call hidp_session_terminate() which sets the +		 *    session->terminate flag and wakes this thread up. +		 *  - Via modifying the socket state of ctrl/intr_sock. This +		 *    thread is woken up by ->sk_state_changed(). +		 * +		 * Note: set_current_state() performs any necessary +		 * memory-barriers for us. +		 */ +		set_current_state(TASK_INTERRUPTIBLE); + +		if (atomic_read(&session->terminate)) +			break; + +		if (ctrl_sk->sk_state != BT_CONNECTED || +		    intr_sk->sk_state != BT_CONNECTED) +			break; + +		/* parse incoming intr-skbs */ +		while ((skb = skb_dequeue(&intr_sk->sk_receive_queue))) { +			skb_orphan(skb); +			if (!skb_linearize(skb)) +				hidp_recv_intr_frame(session, skb); +			else +				kfree_skb(skb); +		} + +		/* send pending intr-skbs */ +		hidp_process_transmit(session, &session->intr_transmit, +				      session->intr_sock); + +		/* parse incoming ctrl-skbs */ +		while ((skb = skb_dequeue(&ctrl_sk->sk_receive_queue))) { +			skb_orphan(skb); +			if (!skb_linearize(skb)) +				hidp_recv_ctrl_frame(session, skb); +			else +				kfree_skb(skb); +		} + +		/* send pending ctrl-skbs */ +		hidp_process_transmit(session, &session->ctrl_transmit, +				      session->ctrl_sock); + +		schedule();  	} -	kfree(session->rd_data); -	session->rd_data = NULL; +	atomic_inc(&session->terminate); +	set_current_state(TASK_RUNNING); +} -purge: -	__hidp_unlink_session(session); +/* + * HIDP session thread + * This thread runs the I/O for a single HIDP session. Startup is synchronous + * which allows us to take references to ourself here instead of doing that in + * the caller. + * When we are ready to run we notify the caller and call hidp_session_run(). + */ +static int hidp_session_thread(void *arg) +{ +	struct hidp_session *session = arg; +	wait_queue_t ctrl_wait, intr_wait; -	skb_queue_purge(&session->ctrl_transmit); -	skb_queue_purge(&session->intr_transmit); +	BT_DBG("session %p", session); -failed: -	up_write(&hidp_session_sem); +	/* initialize runtime environment */ +	hidp_session_get(session); +	__module_get(THIS_MODULE); +	set_user_nice(current, -15); +	hidp_set_timer(session); -	kfree(session); -	return err; +	init_waitqueue_entry(&ctrl_wait, current); +	init_waitqueue_entry(&intr_wait, current); +	add_wait_queue(sk_sleep(session->ctrl_sock->sk), &ctrl_wait); +	add_wait_queue(sk_sleep(session->intr_sock->sk), &intr_wait); +	/* This memory barrier is paired with wq_has_sleeper(). See +	 * sock_poll_wait() for more information why this is needed. */ +	smp_mb(); + +	/* notify synchronous startup that we're ready */ +	atomic_inc(&session->state); +	wake_up(&session->state_queue); + +	/* run session */ +	hidp_session_run(session); + +	/* cleanup runtime environment */ +	remove_wait_queue(sk_sleep(session->intr_sock->sk), &intr_wait); +	remove_wait_queue(sk_sleep(session->intr_sock->sk), &ctrl_wait); +	wake_up_interruptible(&session->report_queue); +	hidp_del_timer(session); + +	/* +	 * If we stopped ourself due to any internal signal, we should try to +	 * unregister our own session here to avoid having it linger until the +	 * parent l2cap_conn dies or user-space cleans it up. +	 * This does not deadlock as we don't do any synchronous shutdown. +	 * Instead, this call has the same semantics as if user-space tried to +	 * delete the session. +	 */ +	l2cap_unregister_user(session->conn, &session->user); +	hidp_session_put(session); + +	module_put_and_exit(0); +	return 0;  } -int hidp_del_connection(struct hidp_conndel_req *req) +static int hidp_verify_sockets(struct socket *ctrl_sock, +			       struct socket *intr_sock)  { +	struct bt_sock *ctrl, *intr;  	struct hidp_session *session; -	int err = 0; -	BT_DBG(""); +	if (!l2cap_is_socket(ctrl_sock) || !l2cap_is_socket(intr_sock)) +		return -EINVAL; -	down_read(&hidp_session_sem); +	ctrl = bt_sk(ctrl_sock->sk); +	intr = bt_sk(intr_sock->sk); -	session = __hidp_get_session(&req->bdaddr); +	if (bacmp(&ctrl->src, &intr->src) || bacmp(&ctrl->dst, &intr->dst)) +		return -ENOTUNIQ; +	if (ctrl->sk.sk_state != BT_CONNECTED || +	    intr->sk.sk_state != BT_CONNECTED) +		return -EBADFD; + +	/* early session check, we check again during session registration */ +	session = hidp_session_find(&ctrl->dst);  	if (session) { -		if (req->flags & (1 << HIDP_VIRTUAL_CABLE_UNPLUG)) { -			hidp_send_ctrl_message(session, -				HIDP_TRANS_HID_CONTROL | HIDP_CTRL_VIRTUAL_CABLE_UNPLUG, NULL, 0); -		} else { -			/* Flush the transmit queues */ -			skb_queue_purge(&session->ctrl_transmit); -			skb_queue_purge(&session->intr_transmit); +		hidp_session_put(session); +		return -EEXIST; +	} -			atomic_inc(&session->terminate); -			wake_up_process(session->task); -		} -	} else -		err = -ENOENT; +	return 0; +} -	up_read(&hidp_session_sem); -	return err; +int hidp_connection_add(struct hidp_connadd_req *req, +			struct socket *ctrl_sock, +			struct socket *intr_sock) +{ +	struct hidp_session *session; +	struct l2cap_conn *conn; +	struct l2cap_chan *chan = l2cap_pi(ctrl_sock->sk)->chan; +	int ret; + +	ret = hidp_verify_sockets(ctrl_sock, intr_sock); +	if (ret) +		return ret; + +	conn = NULL; +	l2cap_chan_lock(chan); +	if (chan->conn) { +		l2cap_conn_get(chan->conn); +		conn = chan->conn; +	} +	l2cap_chan_unlock(chan); + +	if (!conn) +		return -EBADFD; + +	ret = hidp_session_new(&session, &bt_sk(ctrl_sock->sk)->dst, ctrl_sock, +			       intr_sock, req, conn); +	if (ret) +		goto out_conn; + +	ret = l2cap_register_user(conn, &session->user); +	if (ret) +		goto out_session; + +	ret = 0; + +out_session: +	hidp_session_put(session); +out_conn: +	l2cap_conn_put(conn); +	return ret; +} + +int hidp_connection_del(struct hidp_conndel_req *req) +{ +	struct hidp_session *session; + +	session = hidp_session_find(&req->bdaddr); +	if (!session) +		return -ENOENT; + +	if (req->flags & (1 << HIDP_VIRTUAL_CABLE_UNPLUG)) +		hidp_send_ctrl_message(session, +				       HIDP_TRANS_HID_CONTROL | +				         HIDP_CTRL_VIRTUAL_CABLE_UNPLUG, +				       NULL, 0); +	else +		l2cap_unregister_user(session->conn, &session->user); + +	hidp_session_put(session); + +	return 0;  }  int hidp_get_connlist(struct hidp_connlist_req *req) @@ -1153,7 +1334,7 @@ int hidp_get_connlist(struct hidp_connlist_req *req)  	list_for_each_entry(session, &hidp_session_list, list) {  		struct hidp_conninfo ci; -		__hidp_copy_session(session, &ci); +		hidp_copy_session(session, &ci);  		if (copy_to_user(req->ci, &ci, sizeof(ci))) {  			err = -EFAULT; @@ -1174,18 +1355,14 @@ int hidp_get_connlist(struct hidp_connlist_req *req)  int hidp_get_conninfo(struct hidp_conninfo *ci)  {  	struct hidp_session *session; -	int err = 0; - -	down_read(&hidp_session_sem); -	session = __hidp_get_session(&ci->bdaddr); -	if (session) -		__hidp_copy_session(session, ci); -	else -		err = -ENOENT; +	session = hidp_session_find(&ci->bdaddr); +	if (session) { +		hidp_copy_session(session, ci); +		hidp_session_put(session); +	} -	up_read(&hidp_session_sem); -	return err; +	return session ? 0 : -ENOENT;  }  static int __init hidp_init(void) @@ -1204,6 +1381,7 @@ module_init(hidp_init);  module_exit(hidp_exit);  MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>"); +MODULE_AUTHOR("David Herrmann <dh.herrmann@gmail.com>");  MODULE_DESCRIPTION("Bluetooth HIDP ver " VERSION);  MODULE_VERSION(VERSION);  MODULE_LICENSE("GPL"); diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h index af1bcc823f2..6162ce8606a 100644 --- a/net/bluetooth/hidp/hidp.h +++ b/net/bluetooth/hidp/hidp.h @@ -24,7 +24,9 @@  #define __HIDP_H  #include <linux/types.h> +#include <linux/kref.h>  #include <net/bluetooth/bluetooth.h> +#include <net/bluetooth/l2cap.h>  /* HIDP header masks */  #define HIDP_HEADER_TRANS_MASK			0xf0 @@ -119,43 +121,52 @@ struct hidp_connlist_req {  	struct hidp_conninfo __user *ci;  }; -int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock); -int hidp_del_connection(struct hidp_conndel_req *req); +int hidp_connection_add(struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock); +int hidp_connection_del(struct hidp_conndel_req *req);  int hidp_get_connlist(struct hidp_connlist_req *req);  int hidp_get_conninfo(struct hidp_conninfo *ci); +enum hidp_session_state { +	HIDP_SESSION_IDLING, +	HIDP_SESSION_RUNNING, +}; +  /* HIDP session defines */  struct hidp_session {  	struct list_head list; +	struct kref ref; -	struct hci_conn *conn; +	/* runtime management */ +	atomic_t state; +	wait_queue_head_t state_queue; +	atomic_t terminate; +	struct task_struct *task; +	unsigned long flags; +	/* connection management */ +	bdaddr_t bdaddr; +	struct l2cap_conn *conn; +	struct l2cap_user user;  	struct socket *ctrl_sock;  	struct socket *intr_sock; - -	bdaddr_t bdaddr; - -	unsigned long state; -	unsigned long flags; -	unsigned long idle_to; - +	struct sk_buff_head ctrl_transmit; +	struct sk_buff_head intr_transmit;  	uint ctrl_mtu;  	uint intr_mtu; +	unsigned long idle_to; -	atomic_t terminate; -	struct task_struct *task; - -	unsigned char keys[8]; -	unsigned char leds; - +	/* device management */  	struct input_dev *input; -  	struct hid_device *hid; -  	struct timer_list timer; -	struct sk_buff_head ctrl_transmit; -	struct sk_buff_head intr_transmit; +	/* Report descriptor */ +	__u8 *rd_data; +	uint rd_size; + +	/* session data */ +	unsigned char keys[8]; +	unsigned char leds;  	/* Used in hidp_get_raw_report() */  	int waiting_report_type; /* HIDP_DATA_RTYPE_* */ @@ -166,24 +177,8 @@ struct hidp_session {  	/* Used in hidp_output_raw_report() */  	int output_report_success; /* boolean */ - -	/* Report descriptor */ -	__u8 *rd_data; -	uint rd_size; - -	wait_queue_head_t startup_queue; -	int waiting_for_startup;  }; -static inline void hidp_schedule(struct hidp_session *session) -{ -	struct sock *ctrl_sk = session->ctrl_sock->sk; -	struct sock *intr_sk = session->intr_sock->sk; - -	wake_up_interruptible(sk_sleep(ctrl_sk)); -	wake_up_interruptible(sk_sleep(intr_sk)); -} -  /* HIDP init defines */  extern int __init hidp_init_sockets(void);  extern void __exit hidp_cleanup_sockets(void); diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c index 82a829d90b0..2f4cbb0865c 100644 --- a/net/bluetooth/hidp/sock.c +++ b/net/bluetooth/hidp/sock.c @@ -77,21 +77,12 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long  			return err;  		} -		if (csock->sk->sk_state != BT_CONNECTED || -				isock->sk->sk_state != BT_CONNECTED) { -			sockfd_put(csock); -			sockfd_put(isock); -			return -EBADFD; -		} +		err = hidp_connection_add(&ca, csock, isock); +		if (!err && copy_to_user(argp, &ca, sizeof(ca))) +			err = -EFAULT; -		err = hidp_add_connection(&ca, csock, isock); -		if (!err) { -			if (copy_to_user(argp, &ca, sizeof(ca))) -				err = -EFAULT; -		} else { -			sockfd_put(csock); -			sockfd_put(isock); -		} +		sockfd_put(csock); +		sockfd_put(isock);  		return err; @@ -102,7 +93,7 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long  		if (copy_from_user(&cd, argp, sizeof(cd)))  			return -EFAULT; -		return hidp_del_connection(&cd); +		return hidp_connection_del(&cd);  	case HIDPGETCONNLIST:  		if (copy_from_user(&cl, argp, sizeof(cl))) @@ -296,7 +287,6 @@ int __init hidp_init_sockets(void)  	return 0;  error: -	BT_ERR("Can't register HIDP socket");  	proto_unregister(&hidp_proto);  	return err;  } @@ -304,8 +294,6 @@ error:  void __exit hidp_cleanup_sockets(void)  {  	bt_procfs_cleanup(&init_net, "hidp"); -	if (bt_sock_unregister(BTPROTO_HIDP) < 0) -		BT_ERR("Can't unregister HIDP socket"); - +	bt_sock_unregister(BTPROTO_HIDP);  	proto_unregister(&hidp_proto);  } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 7c7e9321f1e..a76d1ac0321 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -571,7 +571,7 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err)  		chan->conn = NULL;  		if (chan->chan_type != L2CAP_CHAN_CONN_FIX_A2MP) -			hci_conn_put(conn->hcon); +			hci_conn_drop(conn->hcon);  		if (mgr && mgr->bredr_chan == chan)  			mgr->bredr_chan = NULL; @@ -1446,6 +1446,89 @@ static void l2cap_info_timeout(struct work_struct *work)  	l2cap_conn_start(conn);  } +/* + * l2cap_user + * External modules can register l2cap_user objects on l2cap_conn. The ->probe + * callback is called during registration. The ->remove callback is called + * during unregistration. + * An l2cap_user object can either be explicitly unregistered or when the + * underlying l2cap_conn object is deleted. This guarantees that l2cap->hcon, + * l2cap->hchan, .. are valid as long as the remove callback hasn't been called. + * External modules must own a reference to the l2cap_conn object if they intend + * to call l2cap_unregister_user(). The l2cap_conn object might get destroyed at + * any time if they don't. + */ + +int l2cap_register_user(struct l2cap_conn *conn, struct l2cap_user *user) +{ +	struct hci_dev *hdev = conn->hcon->hdev; +	int ret; + +	/* We need to check whether l2cap_conn is registered. If it is not, we +	 * must not register the l2cap_user. l2cap_conn_del() is unregisters +	 * l2cap_conn objects, but doesn't provide its own locking. Instead, it +	 * relies on the parent hci_conn object to be locked. This itself relies +	 * on the hci_dev object to be locked. So we must lock the hci device +	 * here, too. */ + +	hci_dev_lock(hdev); + +	if (user->list.next || user->list.prev) { +		ret = -EINVAL; +		goto out_unlock; +	} + +	/* conn->hchan is NULL after l2cap_conn_del() was called */ +	if (!conn->hchan) { +		ret = -ENODEV; +		goto out_unlock; +	} + +	ret = user->probe(conn, user); +	if (ret) +		goto out_unlock; + +	list_add(&user->list, &conn->users); +	ret = 0; + +out_unlock: +	hci_dev_unlock(hdev); +	return ret; +} +EXPORT_SYMBOL(l2cap_register_user); + +void l2cap_unregister_user(struct l2cap_conn *conn, struct l2cap_user *user) +{ +	struct hci_dev *hdev = conn->hcon->hdev; + +	hci_dev_lock(hdev); + +	if (!user->list.next || !user->list.prev) +		goto out_unlock; + +	list_del(&user->list); +	user->list.next = NULL; +	user->list.prev = NULL; +	user->remove(conn, user); + +out_unlock: +	hci_dev_unlock(hdev); +} +EXPORT_SYMBOL(l2cap_unregister_user); + +static void l2cap_unregister_all_users(struct l2cap_conn *conn) +{ +	struct l2cap_user *user; + +	while (!list_empty(&conn->users)) { +		user = list_first_entry(&conn->users, struct l2cap_user, list); +		list_del(&user->list); +		user->list.next = NULL; +		user->list.prev = NULL; +		user->remove(conn, user); +	} +} +  static void l2cap_conn_del(struct hci_conn *hcon, int err)  {  	struct l2cap_conn *conn = hcon->l2cap_data; @@ -1458,6 +1541,8 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err)  	kfree_skb(conn->rx_skb); +	l2cap_unregister_all_users(conn); +  	mutex_lock(&conn->chan_lock);  	/* Kill channels */ @@ -1486,7 +1571,8 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err)  	}  	hcon->l2cap_data = NULL; -	kfree(conn); +	conn->hchan = NULL; +	l2cap_conn_put(conn);  }  static void security_timeout(struct work_struct *work) @@ -1502,12 +1588,12 @@ static void security_timeout(struct work_struct *work)  	}  } -static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status) +static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon)  {  	struct l2cap_conn *conn = hcon->l2cap_data;  	struct hci_chan *hchan; -	if (conn || status) +	if (conn)  		return conn;  	hchan = hci_chan_create(hcon); @@ -1520,8 +1606,10 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status)  		return NULL;  	} +	kref_init(&conn->ref);  	hcon->l2cap_data = conn;  	conn->hcon = hcon; +	hci_conn_get(conn->hcon);  	conn->hchan = hchan;  	BT_DBG("hcon %p conn %p hchan %p", hcon, conn, hchan); @@ -1547,6 +1635,7 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status)  	mutex_init(&conn->chan_lock);  	INIT_LIST_HEAD(&conn->chan_l); +	INIT_LIST_HEAD(&conn->users);  	if (hcon->type == LE_LINK)  		INIT_DELAYED_WORK(&conn->security_timer, security_timeout); @@ -1558,6 +1647,26 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status)  	return conn;  } +static void l2cap_conn_free(struct kref *ref) +{ +	struct l2cap_conn *conn = container_of(ref, struct l2cap_conn, ref); + +	hci_conn_put(conn->hcon); +	kfree(conn); +} + +void l2cap_conn_get(struct l2cap_conn *conn) +{ +	kref_get(&conn->ref); +} +EXPORT_SYMBOL(l2cap_conn_get); + +void l2cap_conn_put(struct l2cap_conn *conn) +{ +	kref_put(&conn->ref, l2cap_conn_free); +} +EXPORT_SYMBOL(l2cap_conn_put); +  /* ---- Socket interface ---- */  /* Find socket with psm and source / destination bdaddr. @@ -1695,9 +1804,9 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,  		goto done;  	} -	conn = l2cap_conn_add(hcon, 0); +	conn = l2cap_conn_add(hcon);  	if (!conn) { -		hci_conn_put(hcon); +		hci_conn_drop(hcon);  		err = -ENOMEM;  		goto done;  	} @@ -1707,7 +1816,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,  		if (!list_empty(&conn->chan_l)) {  			err = -EBUSY; -			hci_conn_put(hcon); +			hci_conn_drop(hcon);  		}  		if (err) @@ -6205,12 +6314,13 @@ drop:  	kfree_skb(skb);  } -static void l2cap_att_channel(struct l2cap_conn *conn, u16 cid, +static void l2cap_att_channel(struct l2cap_conn *conn,  			      struct sk_buff *skb)  {  	struct l2cap_chan *chan; -	chan = l2cap_global_chan_by_scid(0, cid, conn->src, conn->dst); +	chan = l2cap_global_chan_by_scid(0, L2CAP_CID_LE_DATA, +					 conn->src, conn->dst);  	if (!chan)  		goto drop; @@ -6259,7 +6369,7 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb)  		break;  	case L2CAP_CID_LE_DATA: -		l2cap_att_channel(conn, cid, skb); +		l2cap_att_channel(conn, skb);  		break;  	case L2CAP_CID_SMP: @@ -6313,7 +6423,7 @@ void l2cap_connect_cfm(struct hci_conn *hcon, u8 status)  	BT_DBG("hcon %p bdaddr %pMR status %d", hcon, &hcon->dst, status);  	if (!status) { -		conn = l2cap_conn_add(hcon, status); +		conn = l2cap_conn_add(hcon);  		if (conn)  			l2cap_conn_ready(conn);  	} else { @@ -6482,7 +6592,7 @@ int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags)  		goto drop;  	if (!conn) -		conn = l2cap_conn_add(hcon, 0); +		conn = l2cap_conn_add(hcon);  	if (!conn)  		goto drop; diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 1bcfb8422fd..141e7b058b7 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -43,6 +43,12 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent);  static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock,  				     int proto, gfp_t prio); +bool l2cap_is_socket(struct socket *sock) +{ +	return sock && sock->ops == &l2cap_sock_ops; +} +EXPORT_SYMBOL(l2cap_is_socket); +  static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)  {  	struct sock *sk = sock->sk; @@ -1312,8 +1318,6 @@ error:  void l2cap_cleanup_sockets(void)  {  	bt_procfs_cleanup(&init_net, "l2cap"); -	if (bt_sock_unregister(BTPROTO_L2CAP) < 0) -		BT_ERR("L2CAP socket unregistration failed"); - +	bt_sock_unregister(BTPROTO_L2CAP);  	proto_unregister(&l2cap_proto);  } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 39395c7144a..35fef22703e 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -106,11 +106,10 @@ static const u16 mgmt_events[] = {   * These LE scan and inquiry parameters were chosen according to LE General   * Discovery Procedure specification.   */ -#define LE_SCAN_TYPE			0x01  #define LE_SCAN_WIN			0x12  #define LE_SCAN_INT			0x12 -#define LE_SCAN_TIMEOUT_LE_ONLY		10240	/* TGAP(gen_disc_scan_min) */ -#define LE_SCAN_TIMEOUT_BREDR_LE	5120	/* TGAP(100)/2 */ +#define LE_SCAN_TIMEOUT_LE_ONLY		msecs_to_jiffies(10240) +#define LE_SCAN_TIMEOUT_BREDR_LE	msecs_to_jiffies(5120)  #define INQUIRY_LEN_BREDR		0x08	/* TGAP(100) */  #define INQUIRY_LEN_BREDR_LE		0x04	/* TGAP(100)/2 */ @@ -384,7 +383,8 @@ static u32 get_supported_settings(struct hci_dev *hdev)  	if (lmp_bredr_capable(hdev)) {  		settings |= MGMT_SETTING_CONNECTABLE; -		settings |= MGMT_SETTING_FAST_CONNECTABLE; +		if (hdev->hci_ver >= BLUETOOTH_VER_1_2) +			settings |= MGMT_SETTING_FAST_CONNECTABLE;  		settings |= MGMT_SETTING_DISCOVERABLE;  		settings |= MGMT_SETTING_BREDR;  		settings |= MGMT_SETTING_LINK_SECURITY; @@ -409,6 +409,9 @@ static u32 get_current_settings(struct hci_dev *hdev)  	if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags))  		settings |= MGMT_SETTING_CONNECTABLE; +	if (test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags)) +		settings |= MGMT_SETTING_FAST_CONNECTABLE; +  	if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags))  		settings |= MGMT_SETTING_DISCOVERABLE; @@ -591,32 +594,33 @@ static void create_eir(struct hci_dev *hdev, u8 *data)  	ptr = create_uuid128_list(hdev, ptr, HCI_MAX_EIR_LENGTH - (ptr - data));  } -static int update_eir(struct hci_dev *hdev) +static void update_eir(struct hci_request *req)  { +	struct hci_dev *hdev = req->hdev;  	struct hci_cp_write_eir cp;  	if (!hdev_is_powered(hdev)) -		return 0; +		return;  	if (!lmp_ext_inq_capable(hdev)) -		return 0; +		return;  	if (!test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) -		return 0; +		return;  	if (test_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) -		return 0; +		return;  	memset(&cp, 0, sizeof(cp));  	create_eir(hdev, cp.data);  	if (memcmp(cp.data, hdev->eir, sizeof(cp.data)) == 0) -		return 0; +		return;  	memcpy(hdev->eir, cp.data, sizeof(cp.data)); -	return hci_send_cmd(hdev, HCI_OP_WRITE_EIR, sizeof(cp), &cp); +	hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp);  }  static u8 get_service_classes(struct hci_dev *hdev) @@ -630,47 +634,48 @@ static u8 get_service_classes(struct hci_dev *hdev)  	return val;  } -static int update_class(struct hci_dev *hdev) +static void update_class(struct hci_request *req)  { +	struct hci_dev *hdev = req->hdev;  	u8 cod[3]; -	int err;  	BT_DBG("%s", hdev->name);  	if (!hdev_is_powered(hdev)) -		return 0; +		return;  	if (test_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) -		return 0; +		return;  	cod[0] = hdev->minor_class;  	cod[1] = hdev->major_class;  	cod[2] = get_service_classes(hdev);  	if (memcmp(cod, hdev->dev_class, 3) == 0) -		return 0; - -	err = hci_send_cmd(hdev, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod); -	if (err == 0) -		set_bit(HCI_PENDING_CLASS, &hdev->dev_flags); +		return; -	return err; +	hci_req_add(req, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod);  }  static void service_cache_off(struct work_struct *work)  {  	struct hci_dev *hdev = container_of(work, struct hci_dev,  					    service_cache.work); +	struct hci_request req;  	if (!test_and_clear_bit(HCI_SERVICE_CACHE, &hdev->dev_flags))  		return; +	hci_req_init(&req, hdev); +  	hci_dev_lock(hdev); -	update_eir(hdev); -	update_class(hdev); +	update_eir(&req); +	update_class(&req);  	hci_dev_unlock(hdev); + +	hci_req_run(&req, NULL);  }  static void mgmt_init_hdev(struct sock *sk, struct hci_dev *hdev) @@ -994,11 +999,64 @@ failed:  	return err;  } +static void write_fast_connectable(struct hci_request *req, bool enable) +{ +	struct hci_dev *hdev = req->hdev; +	struct hci_cp_write_page_scan_activity acp; +	u8 type; + +	if (hdev->hci_ver < BLUETOOTH_VER_1_2) +		return; + +	if (enable) { +		type = PAGE_SCAN_TYPE_INTERLACED; + +		/* 160 msec page scan interval */ +		acp.interval = __constant_cpu_to_le16(0x0100); +	} else { +		type = PAGE_SCAN_TYPE_STANDARD;	/* default */ + +		/* default 1.28 sec page scan */ +		acp.interval = __constant_cpu_to_le16(0x0800); +	} + +	acp.window = __constant_cpu_to_le16(0x0012); + +	if (__cpu_to_le16(hdev->page_scan_interval) != acp.interval || +	    __cpu_to_le16(hdev->page_scan_window) != acp.window) +		hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_ACTIVITY, +			    sizeof(acp), &acp); + +	if (hdev->page_scan_type != type) +		hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type); +} + +static void set_connectable_complete(struct hci_dev *hdev, u8 status) +{ +	struct pending_cmd *cmd; + +	BT_DBG("status 0x%02x", status); + +	hci_dev_lock(hdev); + +	cmd = mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, hdev); +	if (!cmd) +		goto unlock; + +	send_settings_rsp(cmd->sk, MGMT_OP_SET_CONNECTABLE, hdev); + +	mgmt_pending_remove(cmd); + +unlock: +	hci_dev_unlock(hdev); +} +  static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data,  			   u16 len)  {  	struct mgmt_mode *cp = data;  	struct pending_cmd *cmd; +	struct hci_request req;  	u8 scan;  	int err; @@ -1065,7 +1123,20 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data,  			cancel_delayed_work(&hdev->discov_off);  	} -	err = hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); +	hci_req_init(&req, hdev); + +	hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); + +	/* If we're going from non-connectable to connectable or +	 * vice-versa when fast connectable is enabled ensure that fast +	 * connectable gets disabled. write_fast_connectable won't do +	 * anything if the page scan parameters are already what they +	 * should be. +	 */ +	if (cp->val || test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags)) +		write_fast_connectable(&req, false); + +	err = hci_req_run(&req, set_connectable_complete);  	if (err < 0)  		mgmt_pending_remove(cmd); @@ -1280,6 +1351,11 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)  		return cmd_status(sk, hdev->id, MGMT_OP_SET_LE,  				  MGMT_STATUS_INVALID_PARAMS); +	/* LE-only devices do not allow toggling LE on/off */ +	if (!lmp_bredr_capable(hdev)) +		return cmd_status(sk, hdev->id, MGMT_OP_SET_LE, +				  MGMT_STATUS_REJECTED); +  	hci_dev_lock(hdev);  	val = !!cp->val; @@ -1332,6 +1408,29 @@ unlock:  	return err;  } +/* This is a helper function to test for pending mgmt commands that can + * cause CoD or EIR HCI commands. We can only allow one such pending + * mgmt command at a time since otherwise we cannot easily track what + * the current values are, will be, and based on that calculate if a new + * HCI command needs to be sent and if yes with what value. + */ +static bool pending_eir_or_class(struct hci_dev *hdev) +{ +	struct pending_cmd *cmd; + +	list_for_each_entry(cmd, &hdev->mgmt_pending, list) { +		switch (cmd->opcode) { +		case MGMT_OP_ADD_UUID: +		case MGMT_OP_REMOVE_UUID: +		case MGMT_OP_SET_DEV_CLASS: +		case MGMT_OP_SET_POWERED: +			return true; +		} +	} + +	return false; +} +  static const u8 bluetooth_base_uuid[] = {  			0xfb, 0x34, 0x9b, 0x5f, 0x80, 0x00, 0x00, 0x80,  			0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -1351,10 +1450,37 @@ static u8 get_uuid_size(const u8 *uuid)  	return 16;  } +static void mgmt_class_complete(struct hci_dev *hdev, u16 mgmt_op, u8 status) +{ +	struct pending_cmd *cmd; + +	hci_dev_lock(hdev); + +	cmd = mgmt_pending_find(mgmt_op, hdev); +	if (!cmd) +		goto unlock; + +	cmd_complete(cmd->sk, cmd->index, cmd->opcode, mgmt_status(status), +		     hdev->dev_class, 3); + +	mgmt_pending_remove(cmd); + +unlock: +	hci_dev_unlock(hdev); +} + +static void add_uuid_complete(struct hci_dev *hdev, u8 status) +{ +	BT_DBG("status 0x%02x", status); + +	mgmt_class_complete(hdev, MGMT_OP_ADD_UUID, status); +} +  static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)  {  	struct mgmt_cp_add_uuid *cp = data;  	struct pending_cmd *cmd; +	struct hci_request req;  	struct bt_uuid *uuid;  	int err; @@ -1362,7 +1488,7 @@ static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)  	hci_dev_lock(hdev); -	if (test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) { +	if (pending_eir_or_class(hdev)) {  		err = cmd_status(sk, hdev->id, MGMT_OP_ADD_UUID,  				 MGMT_STATUS_BUSY);  		goto failed; @@ -1380,23 +1506,28 @@ static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)  	list_add_tail(&uuid->list, &hdev->uuids); -	err = update_class(hdev); -	if (err < 0) -		goto failed; +	hci_req_init(&req, hdev); -	err = update_eir(hdev); -	if (err < 0) -		goto failed; +	update_class(&req); +	update_eir(&req); + +	err = hci_req_run(&req, add_uuid_complete); +	if (err < 0) { +		if (err != -ENODATA) +			goto failed; -	if (!test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) {  		err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_UUID, 0,  				   hdev->dev_class, 3);  		goto failed;  	}  	cmd = mgmt_pending_add(sk, MGMT_OP_ADD_UUID, hdev, data, len); -	if (!cmd) +	if (!cmd) {  		err = -ENOMEM; +		goto failed; +	} + +	err = 0;  failed:  	hci_dev_unlock(hdev); @@ -1417,6 +1548,13 @@ static bool enable_service_cache(struct hci_dev *hdev)  	return false;  } +static void remove_uuid_complete(struct hci_dev *hdev, u8 status) +{ +	BT_DBG("status 0x%02x", status); + +	mgmt_class_complete(hdev, MGMT_OP_REMOVE_UUID, status); +} +  static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data,  		       u16 len)  { @@ -1424,13 +1562,14 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data,  	struct pending_cmd *cmd;  	struct bt_uuid *match, *tmp;  	u8 bt_uuid_any[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; +	struct hci_request req;  	int err, found;  	BT_DBG("request for %s", hdev->name);  	hci_dev_lock(hdev); -	if (test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) { +	if (pending_eir_or_class(hdev)) {  		err = cmd_status(sk, hdev->id, MGMT_OP_REMOVE_UUID,  				 MGMT_STATUS_BUSY);  		goto unlock; @@ -1466,34 +1605,47 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data,  	}  update_class: -	err = update_class(hdev); -	if (err < 0) -		goto unlock; +	hci_req_init(&req, hdev); -	err = update_eir(hdev); -	if (err < 0) -		goto unlock; +	update_class(&req); +	update_eir(&req); + +	err = hci_req_run(&req, remove_uuid_complete); +	if (err < 0) { +		if (err != -ENODATA) +			goto unlock; -	if (!test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) {  		err = cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_UUID, 0,  				   hdev->dev_class, 3);  		goto unlock;  	}  	cmd = mgmt_pending_add(sk, MGMT_OP_REMOVE_UUID, hdev, data, len); -	if (!cmd) +	if (!cmd) {  		err = -ENOMEM; +		goto unlock; +	} + +	err = 0;  unlock:  	hci_dev_unlock(hdev);  	return err;  } +static void set_class_complete(struct hci_dev *hdev, u8 status) +{ +	BT_DBG("status 0x%02x", status); + +	mgmt_class_complete(hdev, MGMT_OP_SET_DEV_CLASS, status); +} +  static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data,  			 u16 len)  {  	struct mgmt_cp_set_dev_class *cp = data;  	struct pending_cmd *cmd; +	struct hci_request req;  	int err;  	BT_DBG("request for %s", hdev->name); @@ -1502,15 +1654,19 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data,  		return cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS,  				  MGMT_STATUS_NOT_SUPPORTED); -	if (test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) -		return cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, -				  MGMT_STATUS_BUSY); +	hci_dev_lock(hdev); -	if ((cp->minor & 0x03) != 0 || (cp->major & 0xe0) != 0) -		return cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, -				  MGMT_STATUS_INVALID_PARAMS); +	if (pending_eir_or_class(hdev)) { +		err = cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, +				 MGMT_STATUS_BUSY); +		goto unlock; +	} -	hci_dev_lock(hdev); +	if ((cp->minor & 0x03) != 0 || (cp->major & 0xe0) != 0) { +		err = cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, +				 MGMT_STATUS_INVALID_PARAMS); +		goto unlock; +	}  	hdev->major_class = cp->major;  	hdev->minor_class = cp->minor; @@ -1521,26 +1677,34 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data,  		goto unlock;  	} +	hci_req_init(&req, hdev); +  	if (test_and_clear_bit(HCI_SERVICE_CACHE, &hdev->dev_flags)) {  		hci_dev_unlock(hdev);  		cancel_delayed_work_sync(&hdev->service_cache);  		hci_dev_lock(hdev); -		update_eir(hdev); +		update_eir(&req);  	} -	err = update_class(hdev); -	if (err < 0) -		goto unlock; +	update_class(&req); + +	err = hci_req_run(&req, set_class_complete); +	if (err < 0) { +		if (err != -ENODATA) +			goto unlock; -	if (!test_bit(HCI_PENDING_CLASS, &hdev->dev_flags)) {  		err = cmd_complete(sk, hdev->id, MGMT_OP_SET_DEV_CLASS, 0,  				   hdev->dev_class, 3);  		goto unlock;  	}  	cmd = mgmt_pending_add(sk, MGMT_OP_SET_DEV_CLASS, hdev, data, len); -	if (!cmd) +	if (!cmd) {  		err = -ENOMEM; +		goto unlock; +	} + +	err = 0;  unlock:  	hci_dev_unlock(hdev); @@ -1971,7 +2135,7 @@ static void pairing_complete(struct pending_cmd *cmd, u8 status)  	conn->security_cfm_cb = NULL;  	conn->disconn_cfm_cb = NULL; -	hci_conn_put(conn); +	hci_conn_drop(conn);  	mgmt_pending_remove(cmd);  } @@ -2062,7 +2226,7 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,  	}  	if (conn->connect_cfm_cb) { -		hci_conn_put(conn); +		hci_conn_drop(conn);  		err = cmd_complete(sk, hdev->id, MGMT_OP_PAIR_DEVICE,  				   MGMT_STATUS_BUSY, &rp, sizeof(rp));  		goto unlock; @@ -2071,7 +2235,7 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,  	cmd = mgmt_pending_add(sk, MGMT_OP_PAIR_DEVICE, hdev, data, len);  	if (!cmd) {  		err = -ENOMEM; -		hci_conn_put(conn); +		hci_conn_drop(conn);  		goto unlock;  	} @@ -2140,7 +2304,7 @@ unlock:  }  static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev, -			     bdaddr_t *bdaddr, u8 type, u16 mgmt_op, +			     struct mgmt_addr_info *addr, u16 mgmt_op,  			     u16 hci_op, __le32 passkey)  {  	struct pending_cmd *cmd; @@ -2150,37 +2314,41 @@ static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev,  	hci_dev_lock(hdev);  	if (!hdev_is_powered(hdev)) { -		err = cmd_status(sk, hdev->id, mgmt_op, -				 MGMT_STATUS_NOT_POWERED); +		err = cmd_complete(sk, hdev->id, mgmt_op, +				   MGMT_STATUS_NOT_POWERED, addr, +				   sizeof(*addr));  		goto done;  	} -	if (type == BDADDR_BREDR) -		conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, bdaddr); +	if (addr->type == BDADDR_BREDR) +		conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &addr->bdaddr);  	else -		conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, bdaddr); +		conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &addr->bdaddr);  	if (!conn) { -		err = cmd_status(sk, hdev->id, mgmt_op, -				 MGMT_STATUS_NOT_CONNECTED); +		err = cmd_complete(sk, hdev->id, mgmt_op, +				   MGMT_STATUS_NOT_CONNECTED, addr, +				   sizeof(*addr));  		goto done;  	} -	if (type == BDADDR_LE_PUBLIC || type == BDADDR_LE_RANDOM) { +	if (addr->type == BDADDR_LE_PUBLIC || addr->type == BDADDR_LE_RANDOM) {  		/* Continue with pairing via SMP */  		err = smp_user_confirm_reply(conn, mgmt_op, passkey);  		if (!err) -			err = cmd_status(sk, hdev->id, mgmt_op, -					 MGMT_STATUS_SUCCESS); +			err = cmd_complete(sk, hdev->id, mgmt_op, +					   MGMT_STATUS_SUCCESS, addr, +					   sizeof(*addr));  		else -			err = cmd_status(sk, hdev->id, mgmt_op, -					 MGMT_STATUS_FAILED); +			err = cmd_complete(sk, hdev->id, mgmt_op, +					   MGMT_STATUS_FAILED, addr, +					   sizeof(*addr));  		goto done;  	} -	cmd = mgmt_pending_add(sk, mgmt_op, hdev, bdaddr, sizeof(*bdaddr)); +	cmd = mgmt_pending_add(sk, mgmt_op, hdev, addr, sizeof(*addr));  	if (!cmd) {  		err = -ENOMEM;  		goto done; @@ -2190,11 +2358,12 @@ static int user_pairing_resp(struct sock *sk, struct hci_dev *hdev,  	if (hci_op == HCI_OP_USER_PASSKEY_REPLY) {  		struct hci_cp_user_passkey_reply cp; -		bacpy(&cp.bdaddr, bdaddr); +		bacpy(&cp.bdaddr, &addr->bdaddr);  		cp.passkey = passkey;  		err = hci_send_cmd(hdev, hci_op, sizeof(cp), &cp);  	} else -		err = hci_send_cmd(hdev, hci_op, sizeof(*bdaddr), bdaddr); +		err = hci_send_cmd(hdev, hci_op, sizeof(addr->bdaddr), +				   &addr->bdaddr);  	if (err < 0)  		mgmt_pending_remove(cmd); @@ -2211,7 +2380,7 @@ static int pin_code_neg_reply(struct sock *sk, struct hci_dev *hdev,  	BT_DBG(""); -	return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type, +	return user_pairing_resp(sk, hdev, &cp->addr,  				MGMT_OP_PIN_CODE_NEG_REPLY,  				HCI_OP_PIN_CODE_NEG_REPLY, 0);  } @@ -2227,7 +2396,7 @@ static int user_confirm_reply(struct sock *sk, struct hci_dev *hdev, void *data,  		return cmd_status(sk, hdev->id, MGMT_OP_USER_CONFIRM_REPLY,  				  MGMT_STATUS_INVALID_PARAMS); -	return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type, +	return user_pairing_resp(sk, hdev, &cp->addr,  				 MGMT_OP_USER_CONFIRM_REPLY,  				 HCI_OP_USER_CONFIRM_REPLY, 0);  } @@ -2239,7 +2408,7 @@ static int user_confirm_neg_reply(struct sock *sk, struct hci_dev *hdev,  	BT_DBG(""); -	return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type, +	return user_pairing_resp(sk, hdev, &cp->addr,  				 MGMT_OP_USER_CONFIRM_NEG_REPLY,  				 HCI_OP_USER_CONFIRM_NEG_REPLY, 0);  } @@ -2251,7 +2420,7 @@ static int user_passkey_reply(struct sock *sk, struct hci_dev *hdev, void *data,  	BT_DBG(""); -	return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type, +	return user_pairing_resp(sk, hdev, &cp->addr,  				 MGMT_OP_USER_PASSKEY_REPLY,  				 HCI_OP_USER_PASSKEY_REPLY, cp->passkey);  } @@ -2263,18 +2432,47 @@ static int user_passkey_neg_reply(struct sock *sk, struct hci_dev *hdev,  	BT_DBG(""); -	return user_pairing_resp(sk, hdev, &cp->addr.bdaddr, cp->addr.type, +	return user_pairing_resp(sk, hdev, &cp->addr,  				 MGMT_OP_USER_PASSKEY_NEG_REPLY,  				 HCI_OP_USER_PASSKEY_NEG_REPLY, 0);  } -static int update_name(struct hci_dev *hdev, const char *name) +static void update_name(struct hci_request *req)  { +	struct hci_dev *hdev = req->hdev;  	struct hci_cp_write_local_name cp; -	memcpy(cp.name, name, sizeof(cp.name)); +	memcpy(cp.name, hdev->dev_name, sizeof(cp.name)); + +	hci_req_add(req, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp); +} + +static void set_name_complete(struct hci_dev *hdev, u8 status) +{ +	struct mgmt_cp_set_local_name *cp; +	struct pending_cmd *cmd; + +	BT_DBG("status 0x%02x", status); + +	hci_dev_lock(hdev); -	return hci_send_cmd(hdev, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp); +	cmd = mgmt_pending_find(MGMT_OP_SET_LOCAL_NAME, hdev); +	if (!cmd) +		goto unlock; + +	cp = cmd->param; + +	if (status) +		cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, +			   mgmt_status(status)); +	else +		cmd_complete(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0, +			     cp, sizeof(*cp)); + +	mgmt_pending_remove(cmd); + +unlock: +	hci_dev_unlock(hdev);  }  static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, @@ -2282,12 +2480,24 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data,  {  	struct mgmt_cp_set_local_name *cp = data;  	struct pending_cmd *cmd; +	struct hci_request req;  	int err;  	BT_DBG("");  	hci_dev_lock(hdev); +	/* If the old values are the same as the new ones just return a +	 * direct command complete event. +	 */ +	if (!memcmp(hdev->dev_name, cp->name, sizeof(hdev->dev_name)) && +	    !memcmp(hdev->short_name, cp->short_name, +		    sizeof(hdev->short_name))) { +		err = cmd_complete(sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0, +				   data, len); +		goto failed; +	} +  	memcpy(hdev->short_name, cp->short_name, sizeof(hdev->short_name));  	if (!hdev_is_powered(hdev)) { @@ -2310,7 +2520,19 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data,  		goto failed;  	} -	err = update_name(hdev, cp->name); +	memcpy(hdev->dev_name, cp->name, sizeof(hdev->dev_name)); + +	hci_req_init(&req, hdev); + +	if (lmp_bredr_capable(hdev)) { +		update_name(&req); +		update_eir(&req); +	} + +	if (lmp_le_capable(hdev)) +		hci_update_ad(&req); + +	err = hci_req_run(&req, set_name_complete);  	if (err < 0)  		mgmt_pending_remove(cmd); @@ -2485,7 +2707,7 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev,  			goto failed;  		} -		err = hci_le_scan(hdev, LE_SCAN_TYPE, LE_SCAN_INT, +		err = hci_le_scan(hdev, LE_SCAN_ACTIVE, LE_SCAN_INT,  				  LE_SCAN_WIN, LE_SCAN_TIMEOUT_LE_ONLY);  		break; @@ -2497,8 +2719,8 @@ static int start_discovery(struct sock *sk, struct hci_dev *hdev,  			goto failed;  		} -		err = hci_le_scan(hdev, LE_SCAN_TYPE, LE_SCAN_INT, LE_SCAN_WIN, -				  LE_SCAN_TIMEOUT_BREDR_LE); +		err = hci_le_scan(hdev, LE_SCAN_ACTIVE, LE_SCAN_INT, +				  LE_SCAN_WIN, LE_SCAN_TIMEOUT_BREDR_LE);  		break;  	default: @@ -2698,6 +2920,7 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data,  			 u16 len)  {  	struct mgmt_cp_set_device_id *cp = data; +	struct hci_request req;  	int err;  	__u16 source; @@ -2718,24 +2941,59 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data,  	err = cmd_complete(sk, hdev->id, MGMT_OP_SET_DEVICE_ID, 0, NULL, 0); -	update_eir(hdev); +	hci_req_init(&req, hdev); +	update_eir(&req); +	hci_req_run(&req, NULL);  	hci_dev_unlock(hdev);  	return err;  } +static void fast_connectable_complete(struct hci_dev *hdev, u8 status) +{ +	struct pending_cmd *cmd; + +	BT_DBG("status 0x%02x", status); + +	hci_dev_lock(hdev); + +	cmd = mgmt_pending_find(MGMT_OP_SET_FAST_CONNECTABLE, hdev); +	if (!cmd) +		goto unlock; + +	if (status) { +		cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, +			   mgmt_status(status)); +	} else { +		struct mgmt_mode *cp = cmd->param; + +		if (cp->val) +			set_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags); +		else +			clear_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags); + +		send_settings_rsp(cmd->sk, MGMT_OP_SET_FAST_CONNECTABLE, hdev); +		new_settings(hdev, cmd->sk); +	} + +	mgmt_pending_remove(cmd); + +unlock: +	hci_dev_unlock(hdev); +} +  static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev,  				void *data, u16 len)  {  	struct mgmt_mode *cp = data; -	struct hci_cp_write_page_scan_activity acp; -	u8 type; +	struct pending_cmd *cmd; +	struct hci_request req;  	int err;  	BT_DBG("%s", hdev->name); -	if (!lmp_bredr_capable(hdev)) +	if (!lmp_bredr_capable(hdev) || hdev->hci_ver < BLUETOOTH_VER_1_2)  		return cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE,  				  MGMT_STATUS_NOT_SUPPORTED); @@ -2753,40 +3011,39 @@ static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev,  	hci_dev_lock(hdev); -	if (cp->val) { -		type = PAGE_SCAN_TYPE_INTERLACED; +	if (mgmt_pending_find(MGMT_OP_SET_FAST_CONNECTABLE, hdev)) { +		err = cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, +				 MGMT_STATUS_BUSY); +		goto unlock; +	} -		/* 160 msec page scan interval */ -		acp.interval = __constant_cpu_to_le16(0x0100); -	} else { -		type = PAGE_SCAN_TYPE_STANDARD;	/* default */ +	if (!!cp->val == test_bit(HCI_FAST_CONNECTABLE, &hdev->dev_flags)) { +		err = send_settings_rsp(sk, MGMT_OP_SET_FAST_CONNECTABLE, +					hdev); +		goto unlock; +	} -		/* default 1.28 sec page scan */ -		acp.interval = __constant_cpu_to_le16(0x0800); +	cmd = mgmt_pending_add(sk, MGMT_OP_SET_FAST_CONNECTABLE, hdev, +			       data, len); +	if (!cmd) { +		err = -ENOMEM; +		goto unlock;  	} -	/* default 11.25 msec page scan window */ -	acp.window = __constant_cpu_to_le16(0x0012); +	hci_req_init(&req, hdev); -	err = hci_send_cmd(hdev, HCI_OP_WRITE_PAGE_SCAN_ACTIVITY, sizeof(acp), -			   &acp); -	if (err < 0) { -		err = cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, -				 MGMT_STATUS_FAILED); -		goto done; -	} +	write_fast_connectable(&req, cp->val); -	err = hci_send_cmd(hdev, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type); +	err = hci_req_run(&req, fast_connectable_complete);  	if (err < 0) {  		err = cmd_status(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE,  				 MGMT_STATUS_FAILED); -		goto done; +		mgmt_pending_remove(cmd);  	} -	err = cmd_complete(sk, hdev->id, MGMT_OP_SET_FAST_CONNECTABLE, 0, -			   NULL, 0); -done: +unlock:  	hci_dev_unlock(hdev); +  	return err;  } @@ -3043,79 +3300,116 @@ static void settings_rsp(struct pending_cmd *cmd, void *data)  	mgmt_pending_free(cmd);  } -static int set_bredr_scan(struct hci_dev *hdev) +static void set_bredr_scan(struct hci_request *req)  { +	struct hci_dev *hdev = req->hdev;  	u8 scan = 0; +	/* Ensure that fast connectable is disabled. This function will +	 * not do anything if the page scan parameters are already what +	 * they should be. +	 */ +	write_fast_connectable(req, false); +  	if (test_bit(HCI_CONNECTABLE, &hdev->dev_flags))  		scan |= SCAN_PAGE;  	if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags))  		scan |= SCAN_INQUIRY; -	if (!scan) -		return 0; - -	return hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); +	if (scan) +		hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);  } -int mgmt_powered(struct hci_dev *hdev, u8 powered) +static void powered_complete(struct hci_dev *hdev, u8 status)  {  	struct cmd_lookup match = { NULL, hdev }; -	int err; -	if (!test_bit(HCI_MGMT, &hdev->dev_flags)) -		return 0; +	BT_DBG("status 0x%02x", status); + +	hci_dev_lock(hdev);  	mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); -	if (powered) { -		u8 link_sec; +	new_settings(hdev, match.sk); -		if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) && -		    !lmp_host_ssp_capable(hdev)) { -			u8 ssp = 1; +	hci_dev_unlock(hdev); -			hci_send_cmd(hdev, HCI_OP_WRITE_SSP_MODE, 1, &ssp); -		} +	if (match.sk) +		sock_put(match.sk); +} + +static int powered_update_hci(struct hci_dev *hdev) +{ +	struct hci_request req; +	u8 link_sec; -		if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) { -			struct hci_cp_write_le_host_supported cp; +	hci_req_init(&req, hdev); -			cp.le = 1; -			cp.simul = lmp_le_br_capable(hdev); +	if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags) && +	    !lmp_host_ssp_capable(hdev)) { +		u8 ssp = 1; -			/* Check first if we already have the right -			 * host state (host features set) -			 */ -			if (cp.le != lmp_host_le_capable(hdev) || -			    cp.simul != lmp_host_le_br_capable(hdev)) -				hci_send_cmd(hdev, -					     HCI_OP_WRITE_LE_HOST_SUPPORTED, -					     sizeof(cp), &cp); -		} +		hci_req_add(&req, HCI_OP_WRITE_SSP_MODE, 1, &ssp); +	} -		link_sec = test_bit(HCI_LINK_SECURITY, &hdev->dev_flags); -		if (link_sec != test_bit(HCI_AUTH, &hdev->flags)) -			hci_send_cmd(hdev, HCI_OP_WRITE_AUTH_ENABLE, -				     sizeof(link_sec), &link_sec); +	if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags) && +	    lmp_bredr_capable(hdev)) { +		struct hci_cp_write_le_host_supported cp; -		if (lmp_bredr_capable(hdev)) { -			set_bredr_scan(hdev); -			update_class(hdev); -			update_name(hdev, hdev->dev_name); -			update_eir(hdev); -		} -	} else { -		u8 status = MGMT_STATUS_NOT_POWERED; -		u8 zero_cod[] = { 0, 0, 0 }; +		cp.le = 1; +		cp.simul = lmp_le_br_capable(hdev); -		mgmt_pending_foreach(0, hdev, cmd_status_rsp, &status); +		/* Check first if we already have the right +		 * host state (host features set) +		 */ +		if (cp.le != lmp_host_le_capable(hdev) || +		    cp.simul != lmp_host_le_br_capable(hdev)) +			hci_req_add(&req, HCI_OP_WRITE_LE_HOST_SUPPORTED, +				    sizeof(cp), &cp); +	} + +	link_sec = test_bit(HCI_LINK_SECURITY, &hdev->dev_flags); +	if (link_sec != test_bit(HCI_AUTH, &hdev->flags)) +		hci_req_add(&req, HCI_OP_WRITE_AUTH_ENABLE, +			    sizeof(link_sec), &link_sec); -		if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) -			mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, -				   zero_cod, sizeof(zero_cod), NULL); +	if (lmp_bredr_capable(hdev)) { +		set_bredr_scan(&req); +		update_class(&req); +		update_name(&req); +		update_eir(&req);  	} +	return hci_req_run(&req, powered_complete); +} + +int mgmt_powered(struct hci_dev *hdev, u8 powered) +{ +	struct cmd_lookup match = { NULL, hdev }; +	u8 status_not_powered = MGMT_STATUS_NOT_POWERED; +	u8 zero_cod[] = { 0, 0, 0 }; +	int err; + +	if (!test_bit(HCI_MGMT, &hdev->dev_flags)) +		return 0; + +	if (powered) { +		if (powered_update_hci(hdev) == 0) +			return 0; + +		mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, +				     &match); +		goto new_settings; +	} + +	mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match); +	mgmt_pending_foreach(0, hdev, cmd_status_rsp, &status_not_powered); + +	if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) +		mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, +			   zero_cod, sizeof(zero_cod), NULL); + +new_settings:  	err = new_settings(hdev, match.sk);  	if (match.sk) @@ -3152,7 +3446,7 @@ int mgmt_discoverable(struct hci_dev *hdev, u8 discoverable)  int mgmt_connectable(struct hci_dev *hdev, u8 connectable)  { -	struct cmd_lookup match = { NULL, hdev }; +	struct pending_cmd *cmd;  	bool changed = false;  	int err = 0; @@ -3164,14 +3458,10 @@ int mgmt_connectable(struct hci_dev *hdev, u8 connectable)  			changed = true;  	} -	mgmt_pending_foreach(MGMT_OP_SET_CONNECTABLE, hdev, settings_rsp, -			     &match); +	cmd = mgmt_pending_find(MGMT_OP_SET_CONNECTABLE, hdev);  	if (changed) -		err = new_settings(hdev, match.sk); - -	if (match.sk) -		sock_put(match.sk); +		err = new_settings(hdev, cmd ? cmd->sk : NULL);  	return err;  } @@ -3555,23 +3845,25 @@ int mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status)  	return err;  } -static int clear_eir(struct hci_dev *hdev) +static void clear_eir(struct hci_request *req)  { +	struct hci_dev *hdev = req->hdev;  	struct hci_cp_write_eir cp;  	if (!lmp_ext_inq_capable(hdev)) -		return 0; +		return;  	memset(hdev->eir, 0, sizeof(hdev->eir));  	memset(&cp, 0, sizeof(cp)); -	return hci_send_cmd(hdev, HCI_OP_WRITE_EIR, sizeof(cp), &cp); +	hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp);  }  int mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status)  {  	struct cmd_lookup match = { NULL, hdev }; +	struct hci_request req;  	bool changed = false;  	int err = 0; @@ -3604,29 +3896,26 @@ int mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status)  	if (match.sk)  		sock_put(match.sk); +	hci_req_init(&req, hdev); +  	if (test_bit(HCI_SSP_ENABLED, &hdev->dev_flags)) -		update_eir(hdev); +		update_eir(&req);  	else -		clear_eir(hdev); +		clear_eir(&req); + +	hci_req_run(&req, NULL);  	return err;  } -static void class_rsp(struct pending_cmd *cmd, void *data) +static void sk_lookup(struct pending_cmd *cmd, void *data)  {  	struct cmd_lookup *match = data; -	cmd_complete(cmd->sk, cmd->index, cmd->opcode, match->mgmt_status, -		     match->hdev->dev_class, 3); - -	list_del(&cmd->list); -  	if (match->sk == NULL) {  		match->sk = cmd->sk;  		sock_hold(match->sk);  	} - -	mgmt_pending_free(cmd);  }  int mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, @@ -3635,11 +3924,9 @@ int mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class,  	struct cmd_lookup match = { NULL, hdev, mgmt_status(status) };  	int err = 0; -	clear_bit(HCI_PENDING_CLASS, &hdev->dev_flags); - -	mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, class_rsp, &match); -	mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, class_rsp, &match); -	mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, class_rsp, &match); +	mgmt_pending_foreach(MGMT_OP_SET_DEV_CLASS, hdev, sk_lookup, &match); +	mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, sk_lookup, &match); +	mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, sk_lookup, &match);  	if (!status)  		err = mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, dev_class, @@ -3653,55 +3940,29 @@ int mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class,  int mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status)  { -	struct pending_cmd *cmd;  	struct mgmt_cp_set_local_name ev; -	bool changed = false; -	int err = 0; +	struct pending_cmd *cmd; -	if (memcmp(name, hdev->dev_name, sizeof(hdev->dev_name)) != 0) { -		memcpy(hdev->dev_name, name, sizeof(hdev->dev_name)); -		changed = true; -	} +	if (status) +		return 0;  	memset(&ev, 0, sizeof(ev));  	memcpy(ev.name, name, HCI_MAX_NAME_LENGTH);  	memcpy(ev.short_name, hdev->short_name, HCI_MAX_SHORT_NAME_LENGTH);  	cmd = mgmt_pending_find(MGMT_OP_SET_LOCAL_NAME, hdev); -	if (!cmd) -		goto send_event; - -	/* Always assume that either the short or the complete name has -	 * changed if there was a pending mgmt command */ -	changed = true; +	if (!cmd) { +		memcpy(hdev->dev_name, name, sizeof(hdev->dev_name)); -	if (status) { -		err = cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, -				 mgmt_status(status)); -		goto failed; +		/* If this is a HCI command related to powering on the +		 * HCI dev don't send any mgmt signals. +		 */ +		if (mgmt_pending_find(MGMT_OP_SET_POWERED, hdev)) +			return 0;  	} -	err = cmd_complete(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0, &ev, -			   sizeof(ev)); -	if (err < 0) -		goto failed; - -send_event: -	if (changed) -		err = mgmt_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev, -				 sizeof(ev), cmd ? cmd->sk : NULL); - -	/* EIR is taken care of separately when powering on the -	 * adapter so only update them here if this is a name change -	 * unrelated to power on. -	 */ -	if (!test_bit(HCI_INIT, &hdev->flags)) -		update_eir(hdev); - -failed: -	if (cmd) -		mgmt_pending_remove(cmd); -	return err; +	return mgmt_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev, sizeof(ev), +			  cmd ? cmd->sk : NULL);  }  int mgmt_read_local_oob_data_reply_complete(struct hci_dev *hdev, u8 *hash, diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index b23e2713fea..ca957d34b0c 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -69,7 +69,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src,  							u8 sec_level,  							int *err);  static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst); -static void rfcomm_session_del(struct rfcomm_session *s); +static struct rfcomm_session *rfcomm_session_del(struct rfcomm_session *s);  /* ---- RFCOMM frame parsing macros ---- */  #define __get_dlci(b)     ((b & 0xfc) >> 2) @@ -108,12 +108,6 @@ static void rfcomm_schedule(void)  	wake_up_process(rfcomm_thread);  } -static void rfcomm_session_put(struct rfcomm_session *s) -{ -	if (atomic_dec_and_test(&s->refcnt)) -		rfcomm_session_del(s); -} -  /* ---- RFCOMM FCS computation ---- */  /* reversed, 8-bit, poly=0x07 */ @@ -249,16 +243,14 @@ static void rfcomm_session_set_timer(struct rfcomm_session *s, long timeout)  {  	BT_DBG("session %p state %ld timeout %ld", s, s->state, timeout); -	if (!mod_timer(&s->timer, jiffies + timeout)) -		rfcomm_session_hold(s); +	mod_timer(&s->timer, jiffies + timeout);  }  static void rfcomm_session_clear_timer(struct rfcomm_session *s)  {  	BT_DBG("session %p state %ld", s, s->state); -	if (del_timer(&s->timer)) -		rfcomm_session_put(s); +	del_timer_sync(&s->timer);  }  /* ---- RFCOMM DLCs ---- */ @@ -336,8 +328,6 @@ static void rfcomm_dlc_link(struct rfcomm_session *s, struct rfcomm_dlc *d)  {  	BT_DBG("dlc %p session %p", d, s); -	rfcomm_session_hold(s); -  	rfcomm_session_clear_timer(s);  	rfcomm_dlc_hold(d);  	list_add(&d->list, &s->dlcs); @@ -356,8 +346,6 @@ static void rfcomm_dlc_unlink(struct rfcomm_dlc *d)  	if (list_empty(&s->dlcs))  		rfcomm_session_set_timer(s, RFCOMM_IDLE_TIMEOUT); - -	rfcomm_session_put(s);  }  static struct rfcomm_dlc *rfcomm_dlc_get(struct rfcomm_session *s, u8 dlci) @@ -493,12 +481,34 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err)  int rfcomm_dlc_close(struct rfcomm_dlc *d, int err)  { -	int r; +	int r = 0; +	struct rfcomm_dlc *d_list; +	struct rfcomm_session *s, *s_list; + +	BT_DBG("dlc %p state %ld dlci %d err %d", d, d->state, d->dlci, err);  	rfcomm_lock(); -	r = __rfcomm_dlc_close(d, err); +	s = d->session; +	if (!s) +		goto no_session; + +	/* after waiting on the mutex check the session still exists +	 * then check the dlc still exists +	 */ +	list_for_each_entry(s_list, &session_list, list) { +		if (s_list == s) { +			list_for_each_entry(d_list, &s->dlcs, list) { +				if (d_list == d) { +					r = __rfcomm_dlc_close(d, err); +					break; +				} +			} +			break; +		} +	} +no_session:  	rfcomm_unlock();  	return r;  } @@ -609,7 +619,7 @@ static struct rfcomm_session *rfcomm_session_add(struct socket *sock, int state)  	return s;  } -static void rfcomm_session_del(struct rfcomm_session *s) +static struct rfcomm_session *rfcomm_session_del(struct rfcomm_session *s)  {  	int state = s->state; @@ -617,15 +627,14 @@ static void rfcomm_session_del(struct rfcomm_session *s)  	list_del(&s->list); -	if (state == BT_CONNECTED) -		rfcomm_send_disc(s, 0); -  	rfcomm_session_clear_timer(s);  	sock_release(s->sock);  	kfree(s);  	if (state != BT_LISTEN)  		module_put(THIS_MODULE); + +	return NULL;  }  static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst) @@ -644,17 +653,16 @@ static struct rfcomm_session *rfcomm_session_get(bdaddr_t *src, bdaddr_t *dst)  	return NULL;  } -static void rfcomm_session_close(struct rfcomm_session *s, int err) +static struct rfcomm_session *rfcomm_session_close(struct rfcomm_session *s, +						   int err)  {  	struct rfcomm_dlc *d;  	struct list_head *p, *n; -	BT_DBG("session %p state %ld err %d", s, s->state, err); - -	rfcomm_session_hold(s); -  	s->state = BT_CLOSED; +	BT_DBG("session %p state %ld err %d", s, s->state, err); +  	/* Close all dlcs */  	list_for_each_safe(p, n, &s->dlcs) {  		d = list_entry(p, struct rfcomm_dlc, list); @@ -663,7 +671,7 @@ static void rfcomm_session_close(struct rfcomm_session *s, int err)  	}  	rfcomm_session_clear_timer(s); -	rfcomm_session_put(s); +	return rfcomm_session_del(s);  }  static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, @@ -715,8 +723,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src,  	if (*err == 0 || *err == -EINPROGRESS)  		return s; -	rfcomm_session_del(s); -	return NULL; +	return rfcomm_session_del(s);  failed:  	sock_release(sock); @@ -1105,7 +1112,7 @@ static void rfcomm_make_uih(struct sk_buff *skb, u8 addr)  }  /* ---- RFCOMM frame reception ---- */ -static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci) +static struct rfcomm_session *rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci)  {  	BT_DBG("session %p state %ld dlci %d", s, s->state, dlci); @@ -1114,7 +1121,7 @@ static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci)  		struct rfcomm_dlc *d = rfcomm_dlc_get(s, dlci);  		if (!d) {  			rfcomm_send_dm(s, dlci); -			return 0; +			return s;  		}  		switch (d->state) { @@ -1150,25 +1157,14 @@ static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci)  			break;  		case BT_DISCONN: -			/* rfcomm_session_put is called later so don't do -			 * anything here otherwise we will mess up the session -			 * reference counter: -			 * -			 * (a) when we are the initiator dlc_unlink will drive -			 * the reference counter to 0 (there is no initial put -			 * after session_add) -			 * -			 * (b) when we are not the initiator rfcomm_rx_process -			 * will explicitly call put to balance the initial hold -			 * done after session add. -			 */ +			s = rfcomm_session_close(s, ECONNRESET);  			break;  		}  	} -	return 0; +	return s;  } -static int rfcomm_recv_dm(struct rfcomm_session *s, u8 dlci) +static struct rfcomm_session *rfcomm_recv_dm(struct rfcomm_session *s, u8 dlci)  {  	int err = 0; @@ -1192,13 +1188,13 @@ static int rfcomm_recv_dm(struct rfcomm_session *s, u8 dlci)  		else  			err = ECONNRESET; -		s->state = BT_CLOSED; -		rfcomm_session_close(s, err); +		s = rfcomm_session_close(s, err);  	} -	return 0; +	return s;  } -static int rfcomm_recv_disc(struct rfcomm_session *s, u8 dlci) +static struct rfcomm_session *rfcomm_recv_disc(struct rfcomm_session *s, +					       u8 dlci)  {  	int err = 0; @@ -1227,11 +1223,9 @@ static int rfcomm_recv_disc(struct rfcomm_session *s, u8 dlci)  		else  			err = ECONNRESET; -		s->state = BT_CLOSED; -		rfcomm_session_close(s, err); +		s = rfcomm_session_close(s, err);  	} - -	return 0; +	return s;  }  void rfcomm_dlc_accept(struct rfcomm_dlc *d) @@ -1652,11 +1646,18 @@ drop:  	return 0;  } -static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb) +static struct rfcomm_session *rfcomm_recv_frame(struct rfcomm_session *s, +						struct sk_buff *skb)  {  	struct rfcomm_hdr *hdr = (void *) skb->data;  	u8 type, dlci, fcs; +	if (!s) { +		/* no session, so free socket data */ +		kfree_skb(skb); +		return s; +	} +  	dlci = __get_dlci(hdr->addr);  	type = __get_type(hdr->ctrl); @@ -1667,7 +1668,7 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb)  	if (__check_fcs(skb->data, type, fcs)) {  		BT_ERR("bad checksum in packet");  		kfree_skb(skb); -		return -EILSEQ; +		return s;  	}  	if (__test_ea(hdr->len)) @@ -1683,22 +1684,23 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb)  	case RFCOMM_DISC:  		if (__test_pf(hdr->ctrl)) -			rfcomm_recv_disc(s, dlci); +			s = rfcomm_recv_disc(s, dlci);  		break;  	case RFCOMM_UA:  		if (__test_pf(hdr->ctrl)) -			rfcomm_recv_ua(s, dlci); +			s = rfcomm_recv_ua(s, dlci);  		break;  	case RFCOMM_DM: -		rfcomm_recv_dm(s, dlci); +		s = rfcomm_recv_dm(s, dlci);  		break;  	case RFCOMM_UIH: -		if (dlci) -			return rfcomm_recv_data(s, dlci, __test_pf(hdr->ctrl), skb); - +		if (dlci) { +			rfcomm_recv_data(s, dlci, __test_pf(hdr->ctrl), skb); +			return s; +		}  		rfcomm_recv_mcc(s, skb);  		break; @@ -1707,7 +1709,7 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb)  		break;  	}  	kfree_skb(skb); -	return 0; +	return s;  }  /* ---- Connection and data processing ---- */ @@ -1844,7 +1846,7 @@ static void rfcomm_process_dlcs(struct rfcomm_session *s)  	}  } -static void rfcomm_process_rx(struct rfcomm_session *s) +static struct rfcomm_session *rfcomm_process_rx(struct rfcomm_session *s)  {  	struct socket *sock = s->sock;  	struct sock *sk = sock->sk; @@ -1856,17 +1858,15 @@ static void rfcomm_process_rx(struct rfcomm_session *s)  	while ((skb = skb_dequeue(&sk->sk_receive_queue))) {  		skb_orphan(skb);  		if (!skb_linearize(skb)) -			rfcomm_recv_frame(s, skb); +			s = rfcomm_recv_frame(s, skb);  		else  			kfree_skb(skb);  	} -	if (sk->sk_state == BT_CLOSED) { -		if (!s->initiator) -			rfcomm_session_put(s); +	if (s && (sk->sk_state == BT_CLOSED)) +		s = rfcomm_session_close(s, sk->sk_err); -		rfcomm_session_close(s, sk->sk_err); -	} +	return s;  }  static void rfcomm_accept_connection(struct rfcomm_session *s) @@ -1891,8 +1891,6 @@ static void rfcomm_accept_connection(struct rfcomm_session *s)  	s = rfcomm_session_add(nsock, BT_OPEN);  	if (s) { -		rfcomm_session_hold(s); -  		/* We should adjust MTU on incoming sessions.  		 * L2CAP MTU minus UIH header and FCS. */  		s->mtu = min(l2cap_pi(nsock->sk)->chan->omtu, @@ -1903,7 +1901,7 @@ static void rfcomm_accept_connection(struct rfcomm_session *s)  		sock_release(nsock);  } -static void rfcomm_check_connection(struct rfcomm_session *s) +static struct rfcomm_session *rfcomm_check_connection(struct rfcomm_session *s)  {  	struct sock *sk = s->sock->sk; @@ -1921,10 +1919,10 @@ static void rfcomm_check_connection(struct rfcomm_session *s)  		break;  	case BT_CLOSED: -		s->state = BT_CLOSED; -		rfcomm_session_close(s, sk->sk_err); +		s = rfcomm_session_close(s, sk->sk_err);  		break;  	} +	return s;  }  static void rfcomm_process_sessions(void) @@ -1940,7 +1938,6 @@ static void rfcomm_process_sessions(void)  		if (test_and_clear_bit(RFCOMM_TIMED_OUT, &s->flags)) {  			s->state = BT_DISCONN;  			rfcomm_send_disc(s, 0); -			rfcomm_session_put(s);  			continue;  		} @@ -1949,21 +1946,18 @@ static void rfcomm_process_sessions(void)  			continue;  		} -		rfcomm_session_hold(s); -  		switch (s->state) {  		case BT_BOUND: -			rfcomm_check_connection(s); +			s = rfcomm_check_connection(s);  			break;  		default: -			rfcomm_process_rx(s); +			s = rfcomm_process_rx(s);  			break;  		} -		rfcomm_process_dlcs(s); - -		rfcomm_session_put(s); +		if (s) +			rfcomm_process_dlcs(s);  	}  	rfcomm_unlock(); @@ -2010,10 +2004,11 @@ static int rfcomm_add_listener(bdaddr_t *ba)  	/* Add listening session */  	s = rfcomm_session_add(sock, BT_LISTEN); -	if (!s) +	if (!s) { +		err = -ENOMEM;  		goto failed; +	} -	rfcomm_session_hold(s);  	return 0;  failed:  	sock_release(sock); @@ -2071,8 +2066,6 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt)  	if (!s)  		return; -	rfcomm_session_hold(s); -  	list_for_each_safe(p, n, &s->dlcs) {  		d = list_entry(p, struct rfcomm_dlc, list); @@ -2104,8 +2097,6 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt)  			set_bit(RFCOMM_AUTH_REJECT, &d->flags);  	} -	rfcomm_session_put(s); -  	rfcomm_schedule();  } diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 7c9224bcce1..a8638b58c4b 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -1066,8 +1066,7 @@ void __exit rfcomm_cleanup_sockets(void)  	debugfs_remove(rfcomm_sock_debugfs); -	if (bt_sock_unregister(BTPROTO_RFCOMM) < 0) -		BT_ERR("RFCOMM socket layer unregistration failed"); +	bt_sock_unregister(BTPROTO_RFCOMM);  	proto_unregister(&rfcomm_proto);  } diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index fb6192c9812..373d81e6e8f 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -83,7 +83,7 @@ static struct sco_conn *sco_conn_add(struct hci_conn *hcon)  	if (conn)  		return conn; -	conn = kzalloc(sizeof(struct sco_conn), GFP_ATOMIC); +	conn = kzalloc(sizeof(struct sco_conn), GFP_KERNEL);  	if (!conn)  		return NULL; @@ -185,7 +185,7 @@ static int sco_connect(struct sock *sk)  	conn = sco_conn_add(hcon);  	if (!conn) { -		hci_conn_put(hcon); +		hci_conn_drop(hcon);  		err = -ENOMEM;  		goto done;  	} @@ -353,7 +353,7 @@ static void __sco_sock_close(struct sock *sk)  		if (sco_pi(sk)->conn->hcon) {  			sk->sk_state = BT_DISCONN;  			sco_sock_set_timer(sk, SCO_DISCONN_TIMEOUT); -			hci_conn_put(sco_pi(sk)->conn->hcon); +			hci_conn_drop(sco_pi(sk)->conn->hcon);  			sco_pi(sk)->conn->hcon = NULL;  		} else  			sco_chan_del(sk, ECONNRESET); @@ -481,8 +481,7 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen  {  	struct sockaddr_sco *sa = (struct sockaddr_sco *) addr;  	struct sock *sk = sock->sk; -	int err = 0; - +	int err;  	BT_DBG("sk %p", sk); @@ -653,6 +652,42 @@ static int sco_sock_sendmsg(struct kiocb *iocb, struct socket *sock,  	return err;  } +static void sco_conn_defer_accept(struct hci_conn *conn, int mask) +{ +	struct hci_dev *hdev = conn->hdev; + +	BT_DBG("conn %p", conn); + +	conn->state = BT_CONFIG; + +	if (!lmp_esco_capable(hdev)) { +		struct hci_cp_accept_conn_req cp; + +		bacpy(&cp.bdaddr, &conn->dst); + +		if (lmp_rswitch_capable(hdev) && (mask & HCI_LM_MASTER)) +			cp.role = 0x00; /* Become master */ +		else +			cp.role = 0x01; /* Remain slave */ + +		hci_send_cmd(hdev, HCI_OP_ACCEPT_CONN_REQ, sizeof(cp), &cp); +	} else { +		struct hci_cp_accept_sync_conn_req cp; + +		bacpy(&cp.bdaddr, &conn->dst); +		cp.pkt_type = cpu_to_le16(conn->pkt_type); + +		cp.tx_bandwidth   = __constant_cpu_to_le32(0x00001f40); +		cp.rx_bandwidth   = __constant_cpu_to_le32(0x00001f40); +		cp.max_latency    = __constant_cpu_to_le16(0xffff); +		cp.content_format = cpu_to_le16(hdev->voice_setting); +		cp.retrans_effort = 0xff; + +		hci_send_cmd(hdev, HCI_OP_ACCEPT_SYNC_CONN_REQ, +			     sizeof(cp), &cp); +	} +} +  static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock,  			    struct msghdr *msg, size_t len, int flags)  { @@ -663,7 +698,7 @@ static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock,  	if (sk->sk_state == BT_CONNECT2 &&  	    test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) { -		hci_conn_accept(pi->conn->hcon, 0); +		sco_conn_defer_accept(pi->conn->hcon, 0);  		sk->sk_state = BT_CONFIG;  		msg->msg_namelen = 0; @@ -883,7 +918,7 @@ static void sco_chan_del(struct sock *sk, int err)  		sco_conn_unlock(conn);  		if (conn->hcon) -			hci_conn_put(conn->hcon); +			hci_conn_drop(conn->hcon);  	}  	sk->sk_state = BT_CLOSED; @@ -1113,8 +1148,7 @@ void __exit sco_exit(void)  	debugfs_remove(sco_debugfs); -	if (bt_sock_unregister(BTPROTO_SCO) < 0) -		BT_ERR("SCO socket unregistration failed"); +	bt_sock_unregister(BTPROTO_SCO);  	proto_unregister(&sco_proto);  } diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 5abefb12891..b2296d3857a 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -522,7 +522,7 @@ void smp_chan_destroy(struct l2cap_conn *conn)  	kfree(smp);  	conn->smp_chan = NULL;  	conn->hcon->smp_conn = NULL; -	hci_conn_put(conn->hcon); +	hci_conn_drop(conn->hcon);  }  int smp_user_confirm_reply(struct hci_conn *hcon, u16 mgmt_op, __le32 passkey) diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 314c73ed418..967312803e4 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -348,10 +348,10 @@ void br_dev_setup(struct net_device *dev)  	dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |  			NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | NETIF_F_LLTX | -			NETIF_F_NETNS_LOCAL | NETIF_F_HW_VLAN_TX; +			NETIF_F_NETNS_LOCAL | NETIF_F_HW_VLAN_CTAG_TX;  	dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |  			   NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | -			   NETIF_F_HW_VLAN_TX; +			   NETIF_F_HW_VLAN_CTAG_TX;  	br->dev = dev;  	spin_lock_init(&br->lock); diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index bab338e6270..ebfa4443c69 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -161,9 +161,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)  	if (!pv)  		return; -	for (vid = find_next_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN, vid); -	     vid < BR_VLAN_BITMAP_LEN; -	     vid = find_next_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN, vid+1)) { +	for_each_set_bit_from(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {  		f = __br_fdb_get(br, br->dev->dev_addr, vid);  		if (f && f->is_local && !f->dst)  			fdb_delete(br, f); @@ -617,6 +615,7 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,  	struct net_bridge *br = source->br;  	struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];  	struct net_bridge_fdb_entry *fdb; +	bool modified = false;  	fdb = fdb_find(head, addr, vid);  	if (fdb == NULL) { @@ -626,10 +625,16 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,  		fdb = fdb_create(head, source, addr, vid);  		if (!fdb)  			return -ENOMEM; -		fdb_notify(br, fdb, RTM_NEWNEIGH); + +		modified = true;  	} else {  		if (flags & NLM_F_EXCL)  			return -EEXIST; + +		if (fdb->dst != source) { +			fdb->dst = source; +			modified = true; +		}  	}  	if (fdb_to_nud(fdb) != state) { @@ -641,7 +646,12 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,  		} else  			fdb->is_local = fdb->is_static = 0; -		fdb->updated = fdb->used = jiffies; +		modified = true; +	} + +	fdb->used = jiffies; +	if (modified) { +		fdb->updated = jiffies;  		fdb_notify(br, fdb, RTM_NEWNEIGH);  	} @@ -724,13 +734,10 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],  		 * specify a VLAN.  To be nice, add/update entry for every  		 * vlan on this port.  		 */ -		vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); -		while (vid < BR_VLAN_BITMAP_LEN) { +		for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {  			err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);  			if (err)  				goto out; -			vid = find_next_bit(pv->vlan_bitmap, -					    BR_VLAN_BITMAP_LEN, vid+1);  		}  	} @@ -815,11 +822,8 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],  		 * vlan on this port.  		 */  		err = -ENOENT; -		vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); -		while (vid < BR_VLAN_BITMAP_LEN) { +		for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {  			err &= __br_fdb_delete(p, addr, vid); -			vid = find_next_bit(pv->vlan_bitmap, -					    BR_VLAN_BITMAP_LEN, vid+1);  		}  	}  out: diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 459dab22b3f..4cdba60926f 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -149,7 +149,6 @@ static void del_nbp(struct net_bridge_port *p)  	dev->priv_flags &= ~IFF_BRIDGE_PORT;  	netdev_rx_handler_unregister(dev); -	synchronize_net();  	netdev_upper_dev_unlink(dev, br->dev); diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index ee79f3f2038..19942e38fd2 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -382,7 +382,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br,  	return ret;  } -static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)  {  	struct net *net = sock_net(skb->sk);  	struct br_mdb_entry *entry; @@ -458,7 +458,7 @@ unlock:  	return err;  } -static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)  {  	struct net_device *dev;  	struct br_mdb_entry *entry; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 923fbeaf7af..81f2389f78e 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1369,7 +1369,7 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,  		return -EINVAL;  	if (iph->protocol != IPPROTO_IGMP) { -		if ((iph->daddr & IGMP_LOCAL_GROUP_MASK) != IGMP_LOCAL_GROUP) +		if (!ipv4_is_local_multicast(iph->daddr))  			BR_INPUT_SKB_CB(skb)->mrouters_only = 1;  		return 0;  	} diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index fe43bc7b063..1ed75bfd8d1 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -535,7 +535,8 @@ static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct  	if (brnf_pass_vlan_indev == 0 || !vlan_tx_tag_present(skb))  		return br; -	vlan = __vlan_find_dev_deep(br, vlan_tx_tag_get(skb) & VLAN_VID_MASK); +	vlan = __vlan_find_dev_deep(br, skb->vlan_proto, +				    vlan_tx_tag_get(skb) & VLAN_VID_MASK);  	return vlan ? vlan : br;  } diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 299fc5f40a2..8e3abf56479 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -136,10 +136,7 @@ static int br_fill_ifinfo(struct sk_buff *skb,  			goto nla_put_failure;  		pvid = br_get_pvid(pv); -		for (vid = find_first_bit(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN); -		     vid < BR_VLAN_BITMAP_LEN; -		     vid = find_next_bit(pv->vlan_bitmap, -					 BR_VLAN_BITMAP_LEN, vid+1)) { +		for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {  			vinfo.vid = vid;  			vinfo.flags = 0;  			if (vid == pvid) @@ -355,17 +352,14 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])  /* Change state and parameters on port. */  int br_setlink(struct net_device *dev, struct nlmsghdr *nlh)  { -	struct ifinfomsg *ifm;  	struct nlattr *protinfo;  	struct nlattr *afspec;  	struct net_bridge_port *p;  	struct nlattr *tb[IFLA_BRPORT_MAX + 1]; -	int err; - -	ifm = nlmsg_data(nlh); +	int err = 0; -	protinfo = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_PROTINFO); -	afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC); +	protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO); +	afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);  	if (!protinfo && !afspec)  		return 0; @@ -373,7 +367,7 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh)  	/* We want to accept dev as bridge itself if the AF_SPEC  	 * is set to see if someone is setting vlan info on the brigde  	 */ -	if (!p && ((dev->priv_flags & IFF_EBRIDGE) && !afspec)) +	if (!p && !afspec)  		return -EINVAL;  	if (p && protinfo) { @@ -414,14 +408,11 @@ out:  /* Delete port information */  int br_dellink(struct net_device *dev, struct nlmsghdr *nlh)  { -	struct ifinfomsg *ifm;  	struct nlattr *afspec;  	struct net_bridge_port *p;  	int err; -	ifm = nlmsg_data(nlh); - -	afspec = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_AF_SPEC); +	afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);  	if (!afspec)  		return 0; diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index b01849a7431..1c0a50f1322 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -225,7 +225,14 @@ static void br_record_config_timeout_values(struct net_bridge *br,  /* called under bridge lock */  void br_transmit_tcn(struct net_bridge *br)  { -	br_send_tcn_bpdu(br_get_port(br, br->root_port)); +	struct net_bridge_port *p; + +	p = br_get_port(br, br->root_port); +	if (p) +		br_send_tcn_bpdu(p); +	else +		br_notice(br, "root port %u not found for topology notice\n", +			  br->root_port);  }  /* called under bridge lock */ diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 93dde75923f..bd58b45f5f9 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -34,6 +34,7 @@ static void __vlan_add_flags(struct net_port_vlans *v, u16 vid, u16 flags)  static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags)  { +	const struct net_device_ops *ops;  	struct net_bridge_port *p = NULL;  	struct net_bridge *br;  	struct net_device *dev; @@ -53,15 +54,17 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags)  			br = v->parent.br;  			dev = br->dev;  		} +		ops = dev->netdev_ops; -		if (p && (dev->features & NETIF_F_HW_VLAN_FILTER)) { +		if (p && (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) {  			/* Add VLAN to the device filter if it is supported.  			 * Stricly speaking, this is not necessary now, since  			 * devices are made promiscuous by the bridge, but if  			 * that ever changes this code will allow tagged  			 * traffic to enter the bridge.  			 */ -			err = dev->netdev_ops->ndo_vlan_rx_add_vid(dev, vid); +			err = ops->ndo_vlan_rx_add_vid(dev, htons(ETH_P_8021Q), +						       vid);  			if (err)  				return err;  		} @@ -82,8 +85,8 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags)  	return 0;  out_filt: -	if (p && (dev->features & NETIF_F_HW_VLAN_FILTER)) -		dev->netdev_ops->ndo_vlan_rx_kill_vid(dev, vid); +	if (p && (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) +		ops->ndo_vlan_rx_kill_vid(dev, htons(ETH_P_8021Q), vid);  	return err;  } @@ -97,9 +100,10 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid)  	if (v->port_idx && vid) {  		struct net_device *dev = v->parent.port->dev; +		const struct net_device_ops *ops = dev->netdev_ops; -		if (dev->features & NETIF_F_HW_VLAN_FILTER) -			dev->netdev_ops->ndo_vlan_rx_kill_vid(dev, vid); +		if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) +			ops->ndo_vlan_rx_kill_vid(dev, htons(ETH_P_8021Q), vid);  	}  	clear_bit(vid, v->vlan_bitmap); @@ -171,7 +175,7 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br,  			 * mac header.  			 */  			skb_push(skb, ETH_HLEN); -			skb = __vlan_put_tag(skb, skb->vlan_tci); +			skb = __vlan_put_tag(skb, skb->vlan_proto, skb->vlan_tci);  			if (!skb)  				goto out;  			/* put skb->data back to where it was */ @@ -213,7 +217,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,  		/* PVID is set on this port.  Any untagged ingress  		 * frame is considered to belong to this vlan.  		 */ -		__vlan_hwaccel_put_tag(skb, pvid); +		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), pvid);  		return true;  	} diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 92de5e5f9db..9878eb8204c 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -78,6 +78,11 @@ ebt_log_packet(u_int8_t pf, unsigned int hooknum,     const char *prefix)  {  	unsigned int bitmask; +	struct net *net = dev_net(in ? in : out); + +	/* FIXME: Disabled from containers until syslog ns is supported */ +	if (!net_eq(net, &init_net)) +		return;  	spin_lock_bh(&ebt_log_lock);  	printk(KERN_SOH "%c%s IN=%s OUT=%s MAC source = %pM MAC dest = %pM proto = 0x%04x", @@ -176,17 +181,18 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par)  {  	const struct ebt_log_info *info = par->targinfo;  	struct nf_loginfo li; +	struct net *net = dev_net(par->in ? par->in : par->out);  	li.type = NF_LOG_TYPE_LOG;  	li.u.log.level = info->loglevel;  	li.u.log.logflags = info->bitmask;  	if (info->bitmask & EBT_LOG_NFLOG) -		nf_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in, -		              par->out, &li, "%s", info->prefix); +		nf_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb, +			      par->in, par->out, &li, "%s", info->prefix);  	else  		ebt_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in, -		               par->out, &li, info->prefix); +			       par->out, &li, info->prefix);  	return EBT_CONTINUE;  } @@ -206,19 +212,47 @@ static struct nf_logger ebt_log_logger __read_mostly = {  	.me		= THIS_MODULE,  }; +static int __net_init ebt_log_net_init(struct net *net) +{ +	nf_log_set(net, NFPROTO_BRIDGE, &ebt_log_logger); +	return 0; +} + +static void __net_exit ebt_log_net_fini(struct net *net) +{ +	nf_log_unset(net, &ebt_log_logger); +} + +static struct pernet_operations ebt_log_net_ops = { +	.init = ebt_log_net_init, +	.exit = ebt_log_net_fini, +}; +  static int __init ebt_log_init(void)  {  	int ret; +	ret = register_pernet_subsys(&ebt_log_net_ops); +	if (ret < 0) +		goto err_pernet; +  	ret = xt_register_target(&ebt_log_tg_reg);  	if (ret < 0) -		return ret; +		goto err_target; +  	nf_log_register(NFPROTO_BRIDGE, &ebt_log_logger); -	return 0; + +	return ret; + +err_target: +	unregister_pernet_subsys(&ebt_log_net_ops); +err_pernet: +	return ret;  }  static void __exit ebt_log_fini(void)  { +	unregister_pernet_subsys(&ebt_log_net_ops);  	nf_log_unregister(&ebt_log_logger);  	xt_unregister_target(&ebt_log_tg_reg);  } diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c index 5be68bbcc34..59ac7952010 100644 --- a/net/bridge/netfilter/ebt_nflog.c +++ b/net/bridge/netfilter/ebt_nflog.c @@ -24,14 +24,15 @@ ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)  {  	const struct ebt_nflog_info *info = par->targinfo;  	struct nf_loginfo li; +	struct net *net = dev_net(par->in ? par->in : par->out);  	li.type = NF_LOG_TYPE_ULOG;  	li.u.ulog.copy_len = info->len;  	li.u.ulog.group = info->group;  	li.u.ulog.qthreshold = info->threshold; -	nf_log_packet(PF_BRIDGE, par->hooknum, skb, par->in, par->out, -	              &li, "%s", info->prefix); +	nf_log_packet(net, PF_BRIDGE, par->hooknum, skb, par->in, +		      par->out, &li, "%s", info->prefix);  	return EBT_CONTINUE;  } diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 3bf43f7bb9d..fc1905c5141 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -35,12 +35,13 @@  #include <linux/skbuff.h>  #include <linux/kernel.h>  #include <linux/timer.h> -#include <linux/netlink.h> +#include <net/netlink.h>  #include <linux/netdevice.h>  #include <linux/netfilter/x_tables.h>  #include <linux/netfilter_bridge/ebtables.h>  #include <linux/netfilter_bridge/ebt_ulog.h>  #include <net/netfilter/nf_log.h> +#include <net/netns/generic.h>  #include <net/sock.h>  #include "../br_private.h" @@ -62,13 +63,22 @@ typedef struct {  	spinlock_t lock;		/* the per-queue lock */  } ebt_ulog_buff_t; -static ebt_ulog_buff_t ulog_buffers[EBT_ULOG_MAXNLGROUPS]; -static struct sock *ebtulognl; +static int ebt_ulog_net_id __read_mostly; +struct ebt_ulog_net { +	unsigned int nlgroup[EBT_ULOG_MAXNLGROUPS]; +	ebt_ulog_buff_t ulog_buffers[EBT_ULOG_MAXNLGROUPS]; +	struct sock *ebtulognl; +}; + +static struct ebt_ulog_net *ebt_ulog_pernet(struct net *net) +{ +	return net_generic(net, ebt_ulog_net_id); +}  /* send one ulog_buff_t to userspace */ -static void ulog_send(unsigned int nlgroup) +static void ulog_send(struct ebt_ulog_net *ebt, unsigned int nlgroup)  { -	ebt_ulog_buff_t *ub = &ulog_buffers[nlgroup]; +	ebt_ulog_buff_t *ub = &ebt->ulog_buffers[nlgroup];  	del_timer(&ub->timer); @@ -80,7 +90,7 @@ static void ulog_send(unsigned int nlgroup)  		ub->lastnlh->nlmsg_type = NLMSG_DONE;  	NETLINK_CB(ub->skb).dst_group = nlgroup + 1; -	netlink_broadcast(ebtulognl, ub->skb, 0, nlgroup + 1, GFP_ATOMIC); +	netlink_broadcast(ebt->ebtulognl, ub->skb, 0, nlgroup + 1, GFP_ATOMIC);  	ub->qlen = 0;  	ub->skb = NULL; @@ -89,10 +99,15 @@ static void ulog_send(unsigned int nlgroup)  /* timer function to flush queue in flushtimeout time */  static void ulog_timer(unsigned long data)  { -	spin_lock_bh(&ulog_buffers[data].lock); -	if (ulog_buffers[data].skb) -		ulog_send(data); -	spin_unlock_bh(&ulog_buffers[data].lock); +	struct ebt_ulog_net *ebt = container_of((void *)data, +						struct ebt_ulog_net, +						nlgroup[*(unsigned int *)data]); + +	ebt_ulog_buff_t *ub = &ebt->ulog_buffers[*(unsigned int *)data]; +	spin_lock_bh(&ub->lock); +	if (ub->skb) +		ulog_send(ebt, *(unsigned int *)data); +	spin_unlock_bh(&ub->lock);  }  static struct sk_buff *ulog_alloc_skb(unsigned int size) @@ -123,8 +138,10 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,  	ebt_ulog_packet_msg_t *pm;  	size_t size, copy_len;  	struct nlmsghdr *nlh; +	struct net *net = dev_net(in ? in : out); +	struct ebt_ulog_net *ebt = ebt_ulog_pernet(net);  	unsigned int group = uloginfo->nlgroup; -	ebt_ulog_buff_t *ub = &ulog_buffers[group]; +	ebt_ulog_buff_t *ub = &ebt->ulog_buffers[group];  	spinlock_t *lock = &ub->lock;  	ktime_t kt; @@ -134,7 +151,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,  	else  		copy_len = uloginfo->cprange; -	size = NLMSG_SPACE(sizeof(*pm) + copy_len); +	size = nlmsg_total_size(sizeof(*pm) + copy_len);  	if (size > nlbufsiz) {  		pr_debug("Size %Zd needed, but nlbufsiz=%d\n", size, nlbufsiz);  		return; @@ -146,7 +163,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,  		if (!(ub->skb = ulog_alloc_skb(size)))  			goto unlock;  	} else if (size > skb_tailroom(ub->skb)) { -		ulog_send(group); +		ulog_send(ebt, group);  		if (!(ub->skb = ulog_alloc_skb(size)))  			goto unlock; @@ -205,7 +222,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,  	ub->lastnlh = nlh;  	if (ub->qlen >= uloginfo->qthreshold) -		ulog_send(group); +		ulog_send(ebt, group);  	else if (!timer_pending(&ub->timer)) {  		ub->timer.expires = jiffies + flushtimeout * HZ / 100;  		add_timer(&ub->timer); @@ -277,56 +294,89 @@ static struct nf_logger ebt_ulog_logger __read_mostly = {  	.me		= THIS_MODULE,  }; -static int __init ebt_ulog_init(void) +static int __net_init ebt_ulog_net_init(struct net *net)  { -	int ret;  	int i; +	struct ebt_ulog_net *ebt = ebt_ulog_pernet(net); +  	struct netlink_kernel_cfg cfg = {  		.groups	= EBT_ULOG_MAXNLGROUPS,  	}; -	if (nlbufsiz >= 128*1024) { -		pr_warning("Netlink buffer has to be <= 128kB," -			   " please try a smaller nlbufsiz parameter.\n"); -		return -EINVAL; -	} -  	/* initialize ulog_buffers */  	for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { -		setup_timer(&ulog_buffers[i].timer, ulog_timer, i); -		spin_lock_init(&ulog_buffers[i].lock); +		ebt->nlgroup[i] = i; +		setup_timer(&ebt->ulog_buffers[i].timer, ulog_timer, +			    (unsigned long)&ebt->nlgroup[i]); +		spin_lock_init(&ebt->ulog_buffers[i].lock);  	} -	ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, &cfg); -	if (!ebtulognl) -		ret = -ENOMEM; -	else if ((ret = xt_register_target(&ebt_ulog_tg_reg)) != 0) -		netlink_kernel_release(ebtulognl); - -	if (ret == 0) -		nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger); +	ebt->ebtulognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg); +	if (!ebt->ebtulognl) +		return -ENOMEM; -	return ret; +	nf_log_set(net, NFPROTO_BRIDGE, &ebt_ulog_logger); +	return 0;  } -static void __exit ebt_ulog_fini(void) +static void __net_exit ebt_ulog_net_fini(struct net *net)  { -	ebt_ulog_buff_t *ub;  	int i; +	struct ebt_ulog_net *ebt = ebt_ulog_pernet(net); -	nf_log_unregister(&ebt_ulog_logger); -	xt_unregister_target(&ebt_ulog_tg_reg); +	nf_log_unset(net, &ebt_ulog_logger);  	for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { -		ub = &ulog_buffers[i]; +		ebt_ulog_buff_t *ub = &ebt->ulog_buffers[i];  		del_timer(&ub->timer); -		spin_lock_bh(&ub->lock); +  		if (ub->skb) {  			kfree_skb(ub->skb);  			ub->skb = NULL;  		} -		spin_unlock_bh(&ub->lock);  	} -	netlink_kernel_release(ebtulognl); +	netlink_kernel_release(ebt->ebtulognl); +} + +static struct pernet_operations ebt_ulog_net_ops = { +	.init = ebt_ulog_net_init, +	.exit = ebt_ulog_net_fini, +	.id   = &ebt_ulog_net_id, +	.size = sizeof(struct ebt_ulog_net), +}; + +static int __init ebt_ulog_init(void) +{ +	int ret; + +	if (nlbufsiz >= 128*1024) { +		pr_warn("Netlink buffer has to be <= 128kB," +			"please try a smaller nlbufsiz parameter.\n"); +		return -EINVAL; +	} + +	ret = register_pernet_subsys(&ebt_ulog_net_ops); +	if (ret) +		goto out_pernet; + +	ret = xt_register_target(&ebt_ulog_tg_reg); +	if (ret) +		goto out_target; + +	nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger); + +	return 0; + +out_target: +	unregister_pernet_subsys(&ebt_ulog_net_ops); +out_pernet: +	return ret; +} + +static void __exit ebt_ulog_fini(void) +{ +	nf_log_unregister(&ebt_ulog_logger); +	xt_unregister_target(&ebt_ulog_tg_reg); +	unregister_pernet_subsys(&ebt_ulog_net_ops);  }  module_init(ebt_ulog_init); diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 40d8258bf74..70f656ce0f4 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -64,9 +64,7 @@ static int ebt_broute(struct sk_buff *skb)  static int __net_init broute_net_init(struct net *net)  {  	net->xt.broute_table = ebt_register_table(net, &broute_table); -	if (IS_ERR(net->xt.broute_table)) -		return PTR_ERR(net->xt.broute_table); -	return 0; +	return PTR_RET(net->xt.broute_table);  }  static void __net_exit broute_net_exit(struct net *net) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 8d493c91a56..3d110c4fc78 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -138,7 +138,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,  		ethproto = h->h_proto;  	if (e->bitmask & EBT_802_3) { -		if (FWINV2(ntohs(ethproto) >= 1536, EBT_IPROTO)) +		if (FWINV2(ntohs(ethproto) >= ETH_P_802_3_MIN, EBT_IPROTO))  			return 1;  	} else if (!(e->bitmask & EBT_NOPROTO) &&  	   FWINV2(e->ethproto != ethproto, EBT_IPROTO)) diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c index 21760f00897..1f9ece1a9c3 100644 --- a/net/caif/caif_dev.c +++ b/net/caif/caif_dev.c @@ -1,7 +1,7 @@  /*   * CAIF Interface registration.   * Copyright (C) ST-Ericsson AB 2010 - * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com + * Author:	Sjur Brendeland   * License terms: GNU General Public License (GPL) version 2   *   * Borrowed heavily from file: pn_dev.c. Thanks to Remi Denis-Courmont @@ -301,10 +301,11 @@ static void dev_flowctrl(struct net_device *dev, int on)  }  void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev, -			struct cflayer *link_support, int head_room, -			struct cflayer **layer, int (**rcv_func)( -				struct sk_buff *, struct net_device *, -				struct packet_type *, struct net_device *)) +		     struct cflayer *link_support, int head_room, +		     struct cflayer **layer, +		     int (**rcv_func)(struct sk_buff *, struct net_device *, +				      struct packet_type *, +				      struct net_device *))  {  	struct caif_device_entry *caifd;  	enum cfcnfg_phy_preference pref; diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index ff2ff3ce696..05a41c7ec30 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -1,6 +1,6 @@  /*   * Copyright (C) ST-Ericsson AB 2010 - * Author:	Sjur Brendeland sjur.brandeland@stericsson.com + * Author:	Sjur Brendeland   * License terms: GNU General Public License (GPL) version 2   */ @@ -197,8 +197,8 @@ static void cfsk_put(struct cflayer *layr)  /* Packet Control Callback function called from CAIF */  static void caif_ctrl_cb(struct cflayer *layr, -				enum caif_ctrlcmd flow, -				int phyid) +			 enum caif_ctrlcmd flow, +			 int phyid)  {  	struct caifsock *cf_sk = container_of(layr, struct caifsock, layer);  	switch (flow) { @@ -274,7 +274,7 @@ static void caif_check_flow_release(struct sock *sk)   * changed locking, address handling and added MSG_TRUNC.   */  static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock, -				struct msghdr *m, size_t len, int flags) +			       struct msghdr *m, size_t len, int flags)  {  	struct sock *sk = sock->sk; @@ -348,8 +348,8 @@ static long caif_stream_data_wait(struct sock *sk, long timeo)   * changed locking calls, changed address handling.   */  static int caif_stream_recvmsg(struct kiocb *iocb, struct socket *sock, -				struct msghdr *msg, size_t size, -				int flags) +			       struct msghdr *msg, size_t size, +			       int flags)  {  	struct sock *sk = sock->sk;  	int copied = 0; @@ -464,7 +464,7 @@ out:   * CAIF flow-on and sock_writable.   */  static long caif_wait_for_flow_on(struct caifsock *cf_sk, -				int wait_writeable, long timeo, int *err) +				  int wait_writeable, long timeo, int *err)  {  	struct sock *sk = &cf_sk->sk;  	DEFINE_WAIT(wait); @@ -518,7 +518,7 @@ static int transmit_skb(struct sk_buff *skb, struct caifsock *cf_sk,  /* Copied from af_unix:unix_dgram_sendmsg, and adapted to CAIF */  static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock, -			struct msghdr *msg, size_t len) +			       struct msghdr *msg, size_t len)  {  	struct sock *sk = sock->sk;  	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); @@ -593,7 +593,7 @@ err:   * and other minor adaptations.   */  static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, -				struct msghdr *msg, size_t len) +			       struct msghdr *msg, size_t len)  {  	struct sock *sk = sock->sk;  	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); @@ -672,7 +672,7 @@ out_err:  }  static int setsockopt(struct socket *sock, -			int lvl, int opt, char __user *ov, unsigned int ol) +		      int lvl, int opt, char __user *ov, unsigned int ol)  {  	struct sock *sk = sock->sk;  	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); @@ -934,7 +934,7 @@ static int caif_release(struct socket *sock)  /* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */  static unsigned int caif_poll(struct file *file, -				struct socket *sock, poll_table *wait) +			      struct socket *sock, poll_table *wait)  {  	struct sock *sk = sock->sk;  	unsigned int mask; @@ -1024,7 +1024,7 @@ static void caif_sock_destructor(struct sock *sk)  }  static int caif_create(struct net *net, struct socket *sock, int protocol, -			int kern) +		       int kern)  {  	struct sock *sk = NULL;  	struct caifsock *cf_sk = NULL; diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c index ef8ebaa993c..942e00a425f 100644 --- a/net/caif/caif_usb.c +++ b/net/caif/caif_usb.c @@ -1,7 +1,7 @@  /*   * CAIF USB handler   * Copyright (C) ST-Ericsson AB 2011 - * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com + * Author:	Sjur Brendeland   * License terms: GNU General Public License (GPL) version 2   *   */ @@ -75,7 +75,7 @@ static int cfusbl_transmit(struct cflayer *layr, struct cfpkt *pkt)  }  static void cfusbl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, -					int phyid) +			   int phyid)  {  	if (layr->up && layr->up->ctrlcmd)  		layr->up->ctrlcmd(layr->up, ctrl, layr->id); @@ -121,7 +121,7 @@ static struct packet_type caif_usb_type __read_mostly = {  };  static int cfusbl_device_notify(struct notifier_block *me, unsigned long what, -			      void *arg) +				void *arg)  {  	struct net_device *dev = arg;  	struct caif_dev_common common; diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c index f1dbddb95a6..fa39fc29870 100644 --- a/net/caif/cfcnfg.c +++ b/net/caif/cfcnfg.c @@ -1,6 +1,6 @@  /*   * Copyright (C) ST-Ericsson AB 2010 - * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com + * Author:	Sjur Brendeland   * License terms: GNU General Public License (GPL) version 2   */ @@ -61,11 +61,11 @@ struct cfcnfg {  };  static void cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, -			     enum cfctrl_srv serv, u8 phyid, -			     struct cflayer *adapt_layer); +			      enum cfctrl_srv serv, u8 phyid, +			      struct cflayer *adapt_layer);  static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id);  static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id, -			     struct cflayer *adapt_layer); +			      struct cflayer *adapt_layer);  static void cfctrl_resp_func(void);  static void cfctrl_enum_resp(void); @@ -131,7 +131,7 @@ static void cfctrl_resp_func(void)  }  static struct cfcnfg_phyinfo *cfcnfg_get_phyinfo_rcu(struct cfcnfg *cnfg, -							u8 phyid) +						     u8 phyid)  {  	struct cfcnfg_phyinfo *phy; @@ -216,8 +216,8 @@ static const int protohead[CFCTRL_SRV_MASK] = {  static int caif_connect_req_to_link_param(struct cfcnfg *cnfg, -				   struct caif_connect_request *s, -				   struct cfctrl_link_param *l) +					  struct caif_connect_request *s, +					  struct cfctrl_link_param *l)  {  	struct dev_info *dev_info;  	enum cfcnfg_phy_preference pref; @@ -301,8 +301,7 @@ static int caif_connect_req_to_link_param(struct cfcnfg *cnfg,  int caif_connect_client(struct net *net, struct caif_connect_request *conn_req,  			struct cflayer *adap_layer, int *ifindex, -				int *proto_head, -				int *proto_tail) +			int *proto_head, int *proto_tail)  {  	struct cflayer *frml;  	struct cfcnfg_phyinfo *phy; @@ -364,7 +363,7 @@ unlock:  EXPORT_SYMBOL(caif_connect_client);  static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id, -			     struct cflayer *adapt_layer) +			      struct cflayer *adapt_layer)  {  	if (adapt_layer != NULL && adapt_layer->ctrlcmd != NULL)  		adapt_layer->ctrlcmd(adapt_layer, @@ -526,7 +525,7 @@ out_err:  EXPORT_SYMBOL(cfcnfg_add_phy_layer);  int cfcnfg_set_phy_state(struct cfcnfg *cnfg, struct cflayer *phy_layer, -		bool up) +			 bool up)  {  	struct cfcnfg_phyinfo *phyinfo; diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c index a376ec1ac0a..2bd4b58f437 100644 --- a/net/caif/cfctrl.c +++ b/net/caif/cfctrl.c @@ -1,6 +1,6 @@  /*   * Copyright (C) ST-Ericsson AB 2010 - * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com + * Author:	Sjur Brendeland   * License terms: GNU General Public License (GPL) version 2   */ @@ -20,12 +20,12 @@  #ifdef CAIF_NO_LOOP  static int handle_loop(struct cfctrl *ctrl, -			      int cmd, struct cfpkt *pkt){ +		       int cmd, struct cfpkt *pkt){  	return -1;  }  #else  static int handle_loop(struct cfctrl *ctrl, -		int cmd, struct cfpkt *pkt); +		       int cmd, struct cfpkt *pkt);  #endif  static int cfctrl_recv(struct cflayer *layr, struct cfpkt *pkt);  static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, @@ -72,7 +72,7 @@ void cfctrl_remove(struct cflayer *layer)  }  static bool param_eq(const struct cfctrl_link_param *p1, -			const struct cfctrl_link_param *p2) +		     const struct cfctrl_link_param *p2)  {  	bool eq =  	    p1->linktype == p2->linktype && @@ -197,8 +197,8 @@ void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid)  }  int cfctrl_linkup_request(struct cflayer *layer, -			   struct cfctrl_link_param *param, -			   struct cflayer *user_layer) +			  struct cfctrl_link_param *param, +			  struct cflayer *user_layer)  {  	struct cfctrl *cfctrl = container_obj(layer);  	u32 tmp32; @@ -301,7 +301,7 @@ int cfctrl_linkup_request(struct cflayer *layer,  }  int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid, -				struct cflayer *client) +			struct cflayer *client)  {  	int ret;  	struct cfpkt *pkt; @@ -555,7 +555,7 @@ error:  }  static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, -			int phyid) +			   int phyid)  {  	struct cfctrl *this = container_obj(layr);  	switch (ctrl) { diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c index 2914659eb9b..7aae0b56829 100644 --- a/net/caif/cfdbgl.c +++ b/net/caif/cfdbgl.c @@ -1,6 +1,6 @@  /*   * Copyright (C) ST-Ericsson AB 2010 - * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com + * Author:	Sjur Brendeland   * License terms: GNU General Public License (GPL) version 2   */ diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c index a63f4a5f5af..3bdddb32d55 100644 --- a/net/caif/cfdgml.c +++ b/net/caif/cfdgml.c @@ -1,6 +1,6 @@  /*   * Copyright (C) ST-Ericsson AB 2010 - * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com + * Author:	Sjur Brendeland   * License terms: GNU General Public License (GPL) version 2   */ diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c index 0a7df7ef062..8bc7caa28e6 100644 --- a/net/caif/cffrml.c +++ b/net/caif/cffrml.c @@ -2,7 +2,7 @@   * CAIF Framing Layer.   *   * Copyright (C) ST-Ericsson AB 2010 - * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com + * Author:	Sjur Brendeland   * License terms: GNU General Public License (GPL) version 2   */ @@ -28,7 +28,7 @@ struct cffrml {  static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt);  static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt);  static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, -				int phyid); +			   int phyid);  static u32 cffrml_rcv_error;  static u32 cffrml_rcv_checsum_error; @@ -167,7 +167,7 @@ static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt)  }  static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, -					int phyid) +			   int phyid)  {  	if (layr->up && layr->up->ctrlcmd)  		layr->up->ctrlcmd(layr->up, ctrl, layr->id); diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c index 94b08612a4d..8c5d6386319 100644 --- a/net/caif/cfmuxl.c +++ b/net/caif/cfmuxl.c @@ -1,6 +1,6 @@  /*   * Copyright (C) ST-Ericsson AB 2010 - * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com + * Author:	Sjur Brendeland   * License terms: GNU General Public License (GPL) version 2   */ @@ -42,7 +42,7 @@ struct cfmuxl {  static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt);  static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt);  static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, -				int phyid); +			   int phyid);  static struct cflayer *get_up(struct cfmuxl *muxl, u16 id);  struct cflayer *cfmuxl_create(void) @@ -244,7 +244,7 @@ static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt)  }  static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, -				int phyid) +			   int phyid)  {  	struct cfmuxl *muxl = container_obj(layr);  	struct cflayer *layer; diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c index 863dedd91bb..6493351f39c 100644 --- a/net/caif/cfpkt_skbuff.c +++ b/net/caif/cfpkt_skbuff.c @@ -1,6 +1,6 @@  /*   * Copyright (C) ST-Ericsson AB 2010 - * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com + * Author:	Sjur Brendeland   * License terms: GNU General Public License (GPL) version 2   */ @@ -266,8 +266,8 @@ inline u16 cfpkt_getlen(struct cfpkt *pkt)  }  inline u16 cfpkt_iterate(struct cfpkt *pkt, -			    u16 (*iter_func)(u16, void *, u16), -			    u16 data) +			 u16 (*iter_func)(u16, void *, u16), +			 u16 data)  {  	/*  	 * Don't care about the performance hit of linearizing, @@ -307,8 +307,8 @@ int cfpkt_setlen(struct cfpkt *pkt, u16 len)  }  struct cfpkt *cfpkt_append(struct cfpkt *dstpkt, -			     struct cfpkt *addpkt, -			     u16 expectlen) +			   struct cfpkt *addpkt, +			   u16 expectlen)  {  	struct sk_buff *dst = pkt_to_skb(dstpkt);  	struct sk_buff *add = pkt_to_skb(addpkt); diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c index 2b563ad0459..61d7617d924 100644 --- a/net/caif/cfrfml.c +++ b/net/caif/cfrfml.c @@ -1,6 +1,6 @@  /*   * Copyright (C) ST-Ericsson AB 2010 - * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com + * Author:	Sjur Brendeland   * License terms: GNU General Public License (GPL) version 2   */ @@ -43,7 +43,7 @@ static void cfrfml_release(struct cflayer *layer)  }  struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info, -					int mtu_size) +			      int mtu_size)  {  	int tmp;  	struct cfrfml *this = kzalloc(sizeof(struct cfrfml), GFP_ATOMIC); @@ -69,7 +69,7 @@ struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info,  }  static struct cfpkt *rfm_append(struct cfrfml *rfml, char *seghead, -			struct cfpkt *pkt, int *err) +				struct cfpkt *pkt, int *err)  {  	struct cfpkt *tmppkt;  	*err = -EPROTO; diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c index 8e68b97f13e..ce60f06d76d 100644 --- a/net/caif/cfserl.c +++ b/net/caif/cfserl.c @@ -1,6 +1,6 @@  /*   * Copyright (C) ST-Ericsson AB 2010 - * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com + * Author:	Sjur Brendeland   * License terms: GNU General Public License (GPL) version 2   */ @@ -29,7 +29,7 @@ struct cfserl {  static int cfserl_receive(struct cflayer *layr, struct cfpkt *pkt);  static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt);  static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, -				int phyid); +			   int phyid);  struct cflayer *cfserl_create(int instance, bool use_stx)  { @@ -182,7 +182,7 @@ static int cfserl_transmit(struct cflayer *layer, struct cfpkt *newpkt)  }  static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, -				int phyid) +			   int phyid)  {  	layr->up->ctrlcmd(layr->up, ctrl, phyid);  } diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c index ba217e90765..353f793d1b3 100644 --- a/net/caif/cfsrvl.c +++ b/net/caif/cfsrvl.c @@ -1,6 +1,6 @@  /*   * Copyright (C) ST-Ericsson AB 2010 - * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com + * Author:	Sjur Brendeland   * License terms: GNU General Public License (GPL) version 2   */ @@ -25,7 +25,7 @@  #define container_obj(layr) container_of(layr, struct cfsrvl, layer)  static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl, -				int phyid) +			    int phyid)  {  	struct cfsrvl *service = container_obj(layr); @@ -158,10 +158,9 @@ static void cfsrvl_release(struct cflayer *layer)  }  void cfsrvl_init(struct cfsrvl *service, -			u8 channel_id, -			struct dev_info *dev_info, -			bool supports_flowctrl -			) +		 u8 channel_id, +		 struct dev_info *dev_info, +		 bool supports_flowctrl)  {  	caif_assert(offsetof(struct cfsrvl, layer) == 0);  	service->open = false; @@ -207,8 +206,8 @@ void caif_free_client(struct cflayer *adap_layer)  EXPORT_SYMBOL(caif_free_client);  void caif_client_register_refcnt(struct cflayer *adapt_layer, -					void (*hold)(struct cflayer *lyr), -					void (*put)(struct cflayer *lyr)) +				 void (*hold)(struct cflayer *lyr), +				 void (*put)(struct cflayer *lyr))  {  	struct cfsrvl *service; diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c index 86d2dadb4b7..1728fa4471c 100644 --- a/net/caif/cfutill.c +++ b/net/caif/cfutill.c @@ -1,6 +1,6 @@  /*   * Copyright (C) ST-Ericsson AB 2010 - * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com + * Author:	Sjur Brendeland   * License terms: GNU General Public License (GPL) version 2   */ diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c index 910ab0661f6..262224581ef 100644 --- a/net/caif/cfveil.c +++ b/net/caif/cfveil.c @@ -1,6 +1,6 @@  /*   * Copyright (C) ST-Ericsson AB 2010 - * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com + * Author:	Sjur Brendeland   * License terms: GNU General Public License (GPL) version 2   */ diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c index a8e2a2d758a..b3b110e8a35 100644 --- a/net/caif/cfvidl.c +++ b/net/caif/cfvidl.c @@ -1,6 +1,6 @@  /*   * Copyright (C) ST-Ericsson AB 2010 - * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com + * Author:	Sjur Brendeland   * License terms: GNU General Public License (GPL) version 2   */ diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index e597733affb..7344a8fa1bb 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -1,7 +1,7 @@  /*   * Copyright (C) ST-Ericsson AB 2010 - * Authors:	Sjur Brendeland/sjur.brandeland@stericsson.com - *		Daniel Martensson / Daniel.Martensson@stericsson.com + * Authors:	Sjur Brendeland + *		Daniel Martensson   * License terms: GNU General Public License (GPL) version 2   */ @@ -167,7 +167,7 @@ static void chnl_put(struct cflayer *lyr)  }  static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow, -				int phyid) +			     int phyid)  {  	struct chnl_net *priv = container_of(layr, struct chnl_net, chnl);  	pr_debug("NET flowctrl func called flow: %s\n", @@ -443,7 +443,7 @@ nla_put_failure:  }  static void caif_netlink_parms(struct nlattr *data[], -				struct caif_connect_request *conn_req) +			       struct caif_connect_request *conn_req)  {  	if (!data) {  		pr_warn("no params data found\n"); @@ -488,7 +488,7 @@ static int ipcaif_newlink(struct net *src_net, struct net_device *dev,  }  static int ipcaif_changelink(struct net_device *dev, struct nlattr *tb[], -				struct nlattr *data[]) +			     struct nlattr *data[])  {  	struct chnl_net *caifdev;  	ASSERT_RTNL(); diff --git a/net/can/af_can.c b/net/can/af_can.c index c48e5220bba..c4e50852c9f 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -525,7 +525,7 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask,  	d = find_dev_rcv_lists(dev);  	if (!d) { -		printk(KERN_ERR "BUG: receive list not found for " +		pr_err("BUG: receive list not found for "  		       "dev %s, id %03X, mask %03X\n",  		       DNAME(dev), can_id, mask);  		goto out; @@ -546,16 +546,13 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask,  	}  	/* -	 * Check for bugs in CAN protocol implementations: -	 * If no matching list item was found, the list cursor variable next -	 * will be NULL, while r will point to the last item of the list. +	 * Check for bugs in CAN protocol implementations using af_can.c: +	 * 'r' will be NULL if no matching list item was found for removal.  	 */  	if (!r) { -		printk(KERN_ERR "BUG: receive list entry not found for " -		       "dev %s, id %03X, mask %03X\n", -		       DNAME(dev), can_id, mask); -		r = NULL; +		WARN(1, "BUG: receive list entry not found for dev %s, " +		     "id %03X, mask %03X\n", DNAME(dev), can_id, mask);  		goto out;  	} @@ -749,8 +746,7 @@ int can_proto_register(const struct can_proto *cp)  	int err = 0;  	if (proto < 0 || proto >= CAN_NPROTO) { -		printk(KERN_ERR "can: protocol number %d out of range\n", -		       proto); +		pr_err("can: protocol number %d out of range\n", proto);  		return -EINVAL;  	} @@ -761,8 +757,7 @@ int can_proto_register(const struct can_proto *cp)  	mutex_lock(&proto_tab_lock);  	if (proto_tab[proto]) { -		printk(KERN_ERR "can: protocol %d already registered\n", -		       proto); +		pr_err("can: protocol %d already registered\n", proto);  		err = -EBUSY;  	} else  		RCU_INIT_POINTER(proto_tab[proto], cp); @@ -816,11 +811,8 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,  		/* create new dev_rcv_lists for this device */  		d = kzalloc(sizeof(*d), GFP_KERNEL); -		if (!d) { -			printk(KERN_ERR -			       "can: allocation of receive list failed\n"); +		if (!d)  			return NOTIFY_DONE; -		}  		BUG_ON(dev->ml_priv);  		dev->ml_priv = d; @@ -838,8 +830,8 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,  				dev->ml_priv = NULL;  			}  		} else -			printk(KERN_ERR "can: notifier: receive list not " -			       "found for dev %s\n", dev->name); +			pr_err("can: notifier: receive list not found for dev " +			       "%s\n", dev->name);  		spin_unlock(&can_rcvlists_lock); @@ -927,7 +919,7 @@ static __exit void can_exit(void)  	/* remove created dev_rcv_lists from still registered CAN devices */  	rcu_read_lock();  	for_each_netdev_rcu(&init_net, dev) { -		if (dev->type == ARPHRD_CAN && dev->ml_priv){ +		if (dev->type == ARPHRD_CAN && dev->ml_priv) {  			struct dev_rcv_lists *d = dev->ml_priv; diff --git a/net/can/gw.c b/net/can/gw.c index 117814a7e73..3ee690e8c7d 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -778,8 +778,7 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,  	return 0;  } -static int cgw_create_job(struct sk_buff *skb,  struct nlmsghdr *nlh, -			  void *arg) +static int cgw_create_job(struct sk_buff *skb,  struct nlmsghdr *nlh)  {  	struct rtcanmsg *r;  	struct cgw_job *gwj; @@ -868,7 +867,7 @@ static void cgw_remove_all_jobs(void)  	}  } -static int cgw_remove_job(struct sk_buff *skb,  struct nlmsghdr *nlh, void *arg) +static int cgw_remove_job(struct sk_buff *skb,  struct nlmsghdr *nlh)  {  	struct cgw_job *gwj = NULL;  	struct hlist_node *nx; diff --git a/net/can/raw.c b/net/can/raw.c index c1764e41dda..1085e65f848 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -711,9 +711,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,  	err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);  	if (err < 0)  		goto free_skb; -	err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); -	if (err < 0) -		goto free_skb; + +	sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);  	skb->dev = dev;  	skb->sk  = sk; diff --git a/net/core/datagram.c b/net/core/datagram.c index 368f9c3f9dc..b71423db778 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -78,9 +78,10 @@ static int receiver_wake_function(wait_queue_t *wait, unsigned int mode, int syn  	return autoremove_wake_function(wait, mode, sync, key);  }  /* - * Wait for a packet.. + * Wait for the last received packet to be different from skb   */ -static int wait_for_packet(struct sock *sk, int *err, long *timeo_p) +static int wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, +				 const struct sk_buff *skb)  {  	int error;  	DEFINE_WAIT_FUNC(wait, receiver_wake_function); @@ -92,7 +93,7 @@ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)  	if (error)  		goto out_err; -	if (!skb_queue_empty(&sk->sk_receive_queue)) +	if (sk->sk_receive_queue.prev != skb)  		goto out;  	/* Socket shut down? */ @@ -131,9 +132,9 @@ out_noerr:   *	__skb_recv_datagram - Receive a datagram skbuff   *	@sk: socket   *	@flags: MSG_ flags + *	@peeked: returns non-zero if this packet has been seen before   *	@off: an offset in bytes to peek skb from. Returns an offset   *	      within an skb where data actually starts - *	@peeked: returns non-zero if this packet has been seen before   *	@err: error code returned   *   *	Get a datagram skbuff, understands the peeking, nonblocking wakeups @@ -161,7 +162,7 @@ out_noerr:  struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,  				    int *peeked, int *off, int *err)  { -	struct sk_buff *skb; +	struct sk_buff *skb, *last;  	long timeo;  	/*  	 * Caller is allowed not to check sk->sk_err before skb_recv_datagram() @@ -182,13 +183,17 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,  		 */  		unsigned long cpu_flags;  		struct sk_buff_head *queue = &sk->sk_receive_queue; +		int _off = *off; +		last = (struct sk_buff *)queue;  		spin_lock_irqsave(&queue->lock, cpu_flags);  		skb_queue_walk(queue, skb) { +			last = skb;  			*peeked = skb->peeked;  			if (flags & MSG_PEEK) { -				if (*off >= skb->len && skb->len) { -					*off -= skb->len; +				if (_off >= skb->len && (skb->len || _off || +							 skb->peeked)) { +					_off -= skb->len;  					continue;  				}  				skb->peeked = 1; @@ -197,6 +202,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,  				__skb_unlink(skb, queue);  			spin_unlock_irqrestore(&queue->lock, cpu_flags); +			*off = _off;  			return skb;  		}  		spin_unlock_irqrestore(&queue->lock, cpu_flags); @@ -206,7 +212,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,  		if (!timeo)  			goto no_packet; -	} while (!wait_for_packet(sk, err, &timeo)); +	} while (!wait_for_more_packets(sk, err, &timeo, last));  	return NULL; @@ -749,7 +755,9 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,  	/* exceptional events? */  	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) -		mask |= POLLERR; +		mask |= POLLERR | +			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); +  	if (sk->sk_shutdown & RCV_SHUTDOWN)  		mask |= POLLRDHUP | POLLIN | POLLRDNORM;  	if (sk->sk_shutdown == SHUTDOWN_MASK) diff --git a/net/core/dev.c b/net/core/dev.c index b24ab0e98eb..4040673f806 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -200,7 +200,7 @@ static inline void rps_unlock(struct softnet_data *sd)  }  /* Device list insertion */ -static int list_netdevice(struct net_device *dev) +static void list_netdevice(struct net_device *dev)  {  	struct net *net = dev_net(dev); @@ -214,8 +214,6 @@ static int list_netdevice(struct net_device *dev)  	write_unlock_bh(&dev_base_lock);  	dev_base_seq_inc(net); - -	return 0;  }  /* Device list removal @@ -2210,30 +2208,40 @@ out:  }  EXPORT_SYMBOL(skb_checksum_help); -/** - *	skb_mac_gso_segment - mac layer segmentation handler. - *	@skb: buffer to segment - *	@features: features for the output path (see dev->features) - */ -struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, -				    netdev_features_t features) +__be16 skb_network_protocol(struct sk_buff *skb)  { -	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); -	struct packet_offload *ptype;  	__be16 type = skb->protocol;  	int vlan_depth = ETH_HLEN; -	while (type == htons(ETH_P_8021Q)) { +	while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {  		struct vlan_hdr *vh;  		if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN))) -			return ERR_PTR(-EINVAL); +			return 0;  		vh = (struct vlan_hdr *)(skb->data + vlan_depth);  		type = vh->h_vlan_encapsulated_proto;  		vlan_depth += VLAN_HLEN;  	} +	return type; +} + +/** + *	skb_mac_gso_segment - mac layer segmentation handler. + *	@skb: buffer to segment + *	@features: features for the output path (see dev->features) + */ +struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb, +				    netdev_features_t features) +{ +	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); +	struct packet_offload *ptype; +	__be16 type = skb_network_protocol(skb); + +	if (unlikely(!type)) +		return ERR_PTR(-EINVAL); +  	__skb_pull(skb, skb->mac_len);  	rcu_read_lock(); @@ -2400,24 +2408,12 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)  	return 0;  } -static bool can_checksum_protocol(netdev_features_t features, __be16 protocol) -{ -	return ((features & NETIF_F_GEN_CSUM) || -		((features & NETIF_F_V4_CSUM) && -		 protocol == htons(ETH_P_IP)) || -		((features & NETIF_F_V6_CSUM) && -		 protocol == htons(ETH_P_IPV6)) || -		((features & NETIF_F_FCOE_CRC) && -		 protocol == htons(ETH_P_FCOE))); -} -  static netdev_features_t harmonize_features(struct sk_buff *skb,  	__be16 protocol, netdev_features_t features)  {  	if (skb->ip_summed != CHECKSUM_NONE &&  	    !can_checksum_protocol(features, protocol)) {  		features &= ~NETIF_F_ALL_CSUM; -		features &= ~NETIF_F_SG;  	} else if (illegal_highdma(skb->dev, skb)) {  		features &= ~NETIF_F_SG;  	} @@ -2433,20 +2429,22 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)  	if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)  		features &= ~NETIF_F_GSO_MASK; -	if (protocol == htons(ETH_P_8021Q)) { +	if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {  		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;  		protocol = veh->h_vlan_encapsulated_proto;  	} else if (!vlan_tx_tag_present(skb)) {  		return harmonize_features(skb, protocol, features);  	} -	features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_TX); +	features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX | +					       NETIF_F_HW_VLAN_STAG_TX); -	if (protocol != htons(ETH_P_8021Q)) { +	if (protocol != htons(ETH_P_8021Q) && protocol != htons(ETH_P_8021AD)) {  		return harmonize_features(skb, protocol, features);  	} else {  		features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | -				NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_TX; +				NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX | +				NETIF_F_HW_VLAN_STAG_TX;  		return harmonize_features(skb, protocol, features);  	}  } @@ -2487,8 +2485,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,  		features = netif_skb_features(skb);  		if (vlan_tx_tag_present(skb) && -		    !(features & NETIF_F_HW_VLAN_TX)) { -			skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb)); +		    !vlan_hw_offload_capable(features, skb->vlan_proto)) { +			skb = __vlan_put_tag(skb, skb->vlan_proto, +					     vlan_tx_tag_get(skb));  			if (unlikely(!skb))  				goto out; @@ -2547,13 +2546,6 @@ gso:  		skb->next = nskb->next;  		nskb->next = NULL; -		/* -		 * If device doesn't need nskb->dst, release it right now while -		 * its hot in this cpu cache -		 */ -		if (dev->priv_flags & IFF_XMIT_DST_RELEASE) -			skb_dst_drop(nskb); -  		if (!list_empty(&ptype_all))  			dev_queue_xmit_nit(nskb, dev); @@ -2573,8 +2565,11 @@ gso:  	} while (skb->next);  out_kfree_gso_skb: -	if (likely(skb->next == NULL)) +	if (likely(skb->next == NULL)) {  		skb->destructor = DEV_GSO_CB(skb)->destructor; +		consume_skb(skb); +		return rc; +	}  out_kfree_skb:  	kfree_skb(skb);  out: @@ -2592,6 +2587,7 @@ static void qdisc_pkt_len_init(struct sk_buff *skb)  	 */  	if (shinfo->gso_size)  {  		unsigned int hdr_len; +		u16 gso_segs = shinfo->gso_segs;  		/* mac layer + network layer */  		hdr_len = skb_transport_header(skb) - skb_mac_header(skb); @@ -2601,7 +2597,12 @@ static void qdisc_pkt_len_init(struct sk_buff *skb)  			hdr_len += tcp_hdrlen(skb);  		else  			hdr_len += sizeof(struct udphdr); -		qdisc_skb_cb(skb)->pkt_len += (shinfo->gso_segs - 1) * hdr_len; + +		if (shinfo->gso_type & SKB_GSO_DODGY) +			gso_segs = DIV_ROUND_UP(skb->len - hdr_len, +						shinfo->gso_size); + +		qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len;  	}  } @@ -3329,7 +3330,7 @@ EXPORT_SYMBOL_GPL(netdev_rx_handler_register);   *	netdev_rx_handler_unregister - unregister receive handler   *	@dev: device to unregister a handler from   * - *	Unregister a receive hander from a device. + *	Unregister a receive handler from a device.   *   *	The caller must hold the rtnl_mutex.   */ @@ -3358,6 +3359,7 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb)  	case __constant_htons(ETH_P_IP):  	case __constant_htons(ETH_P_IPV6):  	case __constant_htons(ETH_P_8021Q): +	case __constant_htons(ETH_P_8021AD):  		return true;  	default:  		return false; @@ -3398,7 +3400,8 @@ another_round:  	__this_cpu_inc(softnet_data.processed); -	if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { +	if (skb->protocol == cpu_to_be16(ETH_P_8021Q) || +	    skb->protocol == cpu_to_be16(ETH_P_8021AD)) {  		skb = vlan_untag(skb);  		if (unlikely(!skb))  			goto unlock; @@ -4066,6 +4069,9 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,  	napi->gro_list = NULL;  	napi->skb = NULL;  	napi->poll = poll; +	if (weight > NAPI_POLL_WEIGHT) +		pr_err_once("netif_napi_add() called with weight %d on device %s\n", +			    weight, dev->name);  	napi->weight = weight;  	list_add(&napi->dev_list, &dev->napi_list);  	napi->dev = dev; @@ -4927,20 +4933,25 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,  		features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);  	} -	/* Fix illegal SG+CSUM combinations. */ -	if ((features & NETIF_F_SG) && -	    !(features & NETIF_F_ALL_CSUM)) { -		netdev_dbg(dev, -			"Dropping NETIF_F_SG since no checksum feature.\n"); -		features &= ~NETIF_F_SG; -	} -  	/* TSO requires that SG is present as well. */  	if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {  		netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");  		features &= ~NETIF_F_ALL_TSO;  	} +	if ((features & NETIF_F_TSO) && !(features & NETIF_F_HW_CSUM) && +					!(features & NETIF_F_IP_CSUM)) { +		netdev_dbg(dev, "Dropping TSO features since no CSUM feature.\n"); +		features &= ~NETIF_F_TSO; +		features &= ~NETIF_F_TSO_ECN; +	} + +	if ((features & NETIF_F_TSO6) && !(features & NETIF_F_HW_CSUM) && +					 !(features & NETIF_F_IPV6_CSUM)) { +		netdev_dbg(dev, "Dropping TSO6 features since no CSUM feature.\n"); +		features &= ~NETIF_F_TSO6; +	} +  	/* TSO ECN requires that TSO is present as well. */  	if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)  		features &= ~NETIF_F_TSO_ECN; @@ -5171,7 +5182,8 @@ int register_netdevice(struct net_device *dev)  		}  	} -	if (((dev->hw_features | dev->features) & NETIF_F_HW_VLAN_FILTER) && +	if (((dev->hw_features | dev->features) & +	     NETIF_F_HW_VLAN_CTAG_FILTER) &&  	    (!dev->netdev_ops->ndo_vlan_rx_add_vid ||  	     !dev->netdev_ops->ndo_vlan_rx_kill_vid)) {  		netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n"); @@ -5208,6 +5220,10 @@ int register_netdevice(struct net_device *dev)  	 */  	dev->vlan_features |= NETIF_F_HIGHDMA; +	/* Make NETIF_F_SG inheritable to tunnel devices. +	 */ +	dev->hw_enc_features |= NETIF_F_SG; +  	ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);  	ret = notifier_to_errno(ret);  	if (ret) diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index abdc9e6ef33..c013f38482a 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -22,7 +22,8 @@  static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,  			       const unsigned char *addr, int addr_len, -			       unsigned char addr_type, bool global) +			       unsigned char addr_type, bool global, +			       bool sync)  {  	struct netdev_hw_addr *ha;  	int alloc_size; @@ -37,7 +38,7 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,  	ha->type = addr_type;  	ha->refcount = 1;  	ha->global_use = global; -	ha->synced = 0; +	ha->synced = sync;  	list_add_tail_rcu(&ha->list, &list->list);  	list->count++; @@ -46,7 +47,7 @@ static int __hw_addr_create_ex(struct netdev_hw_addr_list *list,  static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,  			    const unsigned char *addr, int addr_len, -			    unsigned char addr_type, bool global) +			    unsigned char addr_type, bool global, bool sync)  {  	struct netdev_hw_addr *ha; @@ -63,43 +64,62 @@ static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,  				else  					ha->global_use = true;  			} +			if (sync) { +				if (ha->synced) +					return 0; +				else +					ha->synced = true; +			}  			ha->refcount++;  			return 0;  		}  	} -	return __hw_addr_create_ex(list, addr, addr_len, addr_type, global); +	return __hw_addr_create_ex(list, addr, addr_len, addr_type, global, +				   sync);  }  static int __hw_addr_add(struct netdev_hw_addr_list *list,  			 const unsigned char *addr, int addr_len,  			 unsigned char addr_type)  { -	return __hw_addr_add_ex(list, addr, addr_len, addr_type, false); +	return __hw_addr_add_ex(list, addr, addr_len, addr_type, false, false); +} + +static int __hw_addr_del_entry(struct netdev_hw_addr_list *list, +			       struct netdev_hw_addr *ha, bool global, +			       bool sync) +{ +	if (global && !ha->global_use) +		return -ENOENT; + +	if (sync && !ha->synced) +		return -ENOENT; + +	if (global) +		ha->global_use = false; + +	if (sync) +		ha->synced = false; + +	if (--ha->refcount) +		return 0; +	list_del_rcu(&ha->list); +	kfree_rcu(ha, rcu_head); +	list->count--; +	return 0;  }  static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,  			    const unsigned char *addr, int addr_len, -			    unsigned char addr_type, bool global) +			    unsigned char addr_type, bool global, bool sync)  {  	struct netdev_hw_addr *ha;  	list_for_each_entry(ha, &list->list, list) {  		if (!memcmp(ha->addr, addr, addr_len) && -		    (ha->type == addr_type || !addr_type)) { -			if (global) { -				if (!ha->global_use) -					break; -				else -					ha->global_use = false; -			} -			if (--ha->refcount) -				return 0; -			list_del_rcu(&ha->list); -			kfree_rcu(ha, rcu_head); -			list->count--; -			return 0; -		} +		    (ha->type == addr_type || !addr_type)) +			return __hw_addr_del_entry(list, ha, global, sync);  	}  	return -ENOENT;  } @@ -108,7 +128,57 @@ static int __hw_addr_del(struct netdev_hw_addr_list *list,  			 const unsigned char *addr, int addr_len,  			 unsigned char addr_type)  { -	return __hw_addr_del_ex(list, addr, addr_len, addr_type, false); +	return __hw_addr_del_ex(list, addr, addr_len, addr_type, false, false); +} + +static int __hw_addr_sync_one(struct netdev_hw_addr_list *to_list, +			       struct netdev_hw_addr *ha, +			       int addr_len) +{ +	int err; + +	err = __hw_addr_add_ex(to_list, ha->addr, addr_len, ha->type, +			       false, true); +	if (err) +		return err; +	ha->sync_cnt++; +	ha->refcount++; + +	return 0; +} + +static void __hw_addr_unsync_one(struct netdev_hw_addr_list *to_list, +				 struct netdev_hw_addr_list *from_list, +				 struct netdev_hw_addr *ha, +				 int addr_len) +{ +	int err; + +	err = __hw_addr_del_ex(to_list, ha->addr, addr_len, ha->type, +			       false, true); +	if (err) +		return; +	ha->sync_cnt--; +	__hw_addr_del_entry(from_list, ha, false, true); +} + +static int __hw_addr_sync_multiple(struct netdev_hw_addr_list *to_list, +				   struct netdev_hw_addr_list *from_list, +				   int addr_len) +{ +	int err = 0; +	struct netdev_hw_addr *ha, *tmp; + +	list_for_each_entry_safe(ha, tmp, &from_list->list, list) { +		if (ha->sync_cnt == ha->refcount) { +			__hw_addr_unsync_one(to_list, from_list, ha, addr_len); +		} else { +			err = __hw_addr_sync_one(to_list, ha, addr_len); +			if (err) +				break; +		} +	} +	return err;  }  int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list, @@ -152,6 +222,11 @@ void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,  }  EXPORT_SYMBOL(__hw_addr_del_multiple); +/* This function only works where there is a strict 1-1 relationship + * between source and destionation of they synch. If you ever need to + * sync addresses to more then 1 destination, you need to use + * __hw_addr_sync_multiple(). + */  int __hw_addr_sync(struct netdev_hw_addr_list *to_list,  		   struct netdev_hw_addr_list *from_list,  		   int addr_len) @@ -160,17 +235,12 @@ int __hw_addr_sync(struct netdev_hw_addr_list *to_list,  	struct netdev_hw_addr *ha, *tmp;  	list_for_each_entry_safe(ha, tmp, &from_list->list, list) { -		if (!ha->synced) { -			err = __hw_addr_add(to_list, ha->addr, -					    addr_len, ha->type); +		if (!ha->sync_cnt) { +			err = __hw_addr_sync_one(to_list, ha, addr_len);  			if (err)  				break; -			ha->synced++; -			ha->refcount++; -		} else if (ha->refcount == 1) { -			__hw_addr_del(to_list, ha->addr, addr_len, ha->type); -			__hw_addr_del(from_list, ha->addr, addr_len, ha->type); -		} +		} else if (ha->refcount == 1) +			__hw_addr_unsync_one(to_list, from_list, ha, addr_len);  	}  	return err;  } @@ -183,13 +253,8 @@ void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,  	struct netdev_hw_addr *ha, *tmp;  	list_for_each_entry_safe(ha, tmp, &from_list->list, list) { -		if (ha->synced) { -			__hw_addr_del(to_list, ha->addr, -				      addr_len, ha->type); -			ha->synced--; -			__hw_addr_del(from_list, ha->addr, -				      addr_len, ha->type); -		} +		if (ha->sync_cnt) +			__hw_addr_unsync_one(to_list, from_list, ha, addr_len);  	}  }  EXPORT_SYMBOL(__hw_addr_unsync); @@ -406,7 +471,7 @@ int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr)  		}  	}  	err = __hw_addr_create_ex(&dev->uc, addr, dev->addr_len, -				  NETDEV_HW_ADDR_T_UNICAST, true); +				  NETDEV_HW_ADDR_T_UNICAST, true, false);  	if (!err)  		__dev_set_rx_mode(dev);  out: @@ -469,7 +534,8 @@ EXPORT_SYMBOL(dev_uc_del);   *	locked by netif_addr_lock_bh.   *   *	This function is intended to be called from the dev->set_rx_mode - *	function of layered software devices. + *	function of layered software devices.  This function assumes that + *	addresses will only ever be synced to the @to devices and no other.   */  int dev_uc_sync(struct net_device *to, struct net_device *from)  { @@ -488,6 +554,36 @@ int dev_uc_sync(struct net_device *to, struct net_device *from)  EXPORT_SYMBOL(dev_uc_sync);  /** + *	dev_uc_sync_multiple - Synchronize device's unicast list to another + *	device, but allow for multiple calls to sync to multiple devices. + *	@to: destination device + *	@from: source device + * + *	Add newly added addresses to the destination device and release + *	addresses that have been deleted from the source. The source device + *	must be locked by netif_addr_lock_bh. + * + *	This function is intended to be called from the dev->set_rx_mode + *	function of layered software devices.  It allows for a single source + *	device to be synced to multiple destination devices. + */ +int dev_uc_sync_multiple(struct net_device *to, struct net_device *from) +{ +	int err = 0; + +	if (to->addr_len != from->addr_len) +		return -EINVAL; + +	netif_addr_lock_nested(to); +	err = __hw_addr_sync_multiple(&to->uc, &from->uc, to->addr_len); +	if (!err) +		__dev_set_rx_mode(to); +	netif_addr_unlock(to); +	return err; +} +EXPORT_SYMBOL(dev_uc_sync_multiple); + +/**   *	dev_uc_unsync - Remove synchronized addresses from the destination device   *	@to: destination device   *	@from: source device @@ -559,7 +655,7 @@ int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr)  		}  	}  	err = __hw_addr_create_ex(&dev->mc, addr, dev->addr_len, -				  NETDEV_HW_ADDR_T_MULTICAST, true); +				  NETDEV_HW_ADDR_T_MULTICAST, true, false);  	if (!err)  		__dev_set_rx_mode(dev);  out: @@ -575,7 +671,7 @@ static int __dev_mc_add(struct net_device *dev, const unsigned char *addr,  	netif_addr_lock_bh(dev);  	err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len, -			       NETDEV_HW_ADDR_T_MULTICAST, global); +			       NETDEV_HW_ADDR_T_MULTICAST, global, false);  	if (!err)  		__dev_set_rx_mode(dev);  	netif_addr_unlock_bh(dev); @@ -615,7 +711,7 @@ static int __dev_mc_del(struct net_device *dev, const unsigned char *addr,  	netif_addr_lock_bh(dev);  	err = __hw_addr_del_ex(&dev->mc, addr, dev->addr_len, -			       NETDEV_HW_ADDR_T_MULTICAST, global); +			       NETDEV_HW_ADDR_T_MULTICAST, global, false);  	if (!err)  		__dev_set_rx_mode(dev);  	netif_addr_unlock_bh(dev); @@ -679,6 +775,36 @@ int dev_mc_sync(struct net_device *to, struct net_device *from)  EXPORT_SYMBOL(dev_mc_sync);  /** + *	dev_mc_sync_multiple - Synchronize device's unicast list to another + *	device, but allow for multiple calls to sync to multiple devices. + *	@to: destination device + *	@from: source device + * + *	Add newly added addresses to the destination device and release + *	addresses that have no users left. The source device must be + *	locked by netif_addr_lock_bh. + * + *	This function is intended to be called from the ndo_set_rx_mode + *	function of layered software devices.  It allows for a single + *	source device to be synced to multiple destination devices. + */ +int dev_mc_sync_multiple(struct net_device *to, struct net_device *from) +{ +	int err = 0; + +	if (to->addr_len != from->addr_len) +		return -EINVAL; + +	netif_addr_lock_nested(to); +	err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len); +	if (!err) +		__dev_set_rx_mode(to); +	netif_addr_unlock(to); +	return err; +} +EXPORT_SYMBOL(dev_mc_sync_multiple); + +/**   *	dev_mc_unsync - Remove synchronized addresses from the destination device   *	@to: destination device   *	@from: source device diff --git a/net/core/dst.c b/net/core/dst.c index 35fd12f1a69..df9cc810ec8 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -320,27 +320,28 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)  EXPORT_SYMBOL(__dst_destroy_metrics_generic);  /** - * skb_dst_set_noref - sets skb dst, without a reference + * __skb_dst_set_noref - sets skb dst, without a reference   * @skb: buffer   * @dst: dst entry + * @force: if force is set, use noref version even for DST_NOCACHE entries   *   * Sets skb dst, assuming a reference was not taken on dst   * skb_dst_drop() should not dst_release() this dst   */ -void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) +void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst, bool force)  {  	WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());  	/* If dst not in cache, we must take a reference, because  	 * dst_release() will destroy dst as soon as its refcount becomes zero  	 */ -	if (unlikely(dst->flags & DST_NOCACHE)) { +	if (unlikely((dst->flags & DST_NOCACHE) && !force)) {  		dst_hold(dst);  		skb_dst_set(skb, dst);  	} else {  		skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;  	}  } -EXPORT_SYMBOL(skb_dst_set_noref); +EXPORT_SYMBOL(__skb_dst_set_noref);  /* Dirty hack. We did it in 2.2 (in __dst_free),   * we have _very_ good reasons not to repeat diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 3e9b2c3e30f..5a934ef90f8 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -60,10 +60,13 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]  	[NETIF_F_IPV6_CSUM_BIT] =        "tx-checksum-ipv6",  	[NETIF_F_HIGHDMA_BIT] =          "highdma",  	[NETIF_F_FRAGLIST_BIT] =         "tx-scatter-gather-fraglist", -	[NETIF_F_HW_VLAN_TX_BIT] =       "tx-vlan-hw-insert", +	[NETIF_F_HW_VLAN_CTAG_TX_BIT] =  "tx-vlan-ctag-hw-insert", -	[NETIF_F_HW_VLAN_RX_BIT] =       "rx-vlan-hw-parse", -	[NETIF_F_HW_VLAN_FILTER_BIT] =   "rx-vlan-filter", +	[NETIF_F_HW_VLAN_CTAG_RX_BIT] =  "rx-vlan-ctag-hw-parse", +	[NETIF_F_HW_VLAN_CTAG_FILTER_BIT] = "rx-vlan-ctag-filter", +	[NETIF_F_HW_VLAN_STAG_TX_BIT] =  "tx-vlan-stag-hw-insert", +	[NETIF_F_HW_VLAN_STAG_RX_BIT] =  "rx-vlan-stag-hw-parse", +	[NETIF_F_HW_VLAN_STAG_FILTER_BIT] = "rx-vlan-stag-filter",  	[NETIF_F_VLAN_CHALLENGED_BIT] =  "vlan-challenged",  	[NETIF_F_GSO_BIT] =              "tx-generic-segmentation",  	[NETIF_F_LLTX_BIT] =             "tx-lockless", @@ -78,6 +81,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]  	[NETIF_F_TSO6_BIT] =             "tx-tcp6-segmentation",  	[NETIF_F_FSO_BIT] =              "tx-fcoe-segmentation",  	[NETIF_F_GSO_GRE_BIT] =		 "tx-gre-segmentation", +	[NETIF_F_GSO_UDP_TUNNEL_BIT] =	 "tx-udp_tnl-segmentation",  	[NETIF_F_FCOE_CRC_BIT] =         "tx-checksum-fcoe-crc",  	[NETIF_F_SCTP_CSUM_BIT] =        "tx-checksum-sctp", @@ -266,18 +270,19 @@ static int ethtool_set_one_feature(struct net_device *dev,  #define ETH_ALL_FLAGS    (ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | \  			  ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH) -#define ETH_ALL_FEATURES (NETIF_F_LRO | NETIF_F_HW_VLAN_RX | \ -			  NETIF_F_HW_VLAN_TX | NETIF_F_NTUPLE | NETIF_F_RXHASH) +#define ETH_ALL_FEATURES (NETIF_F_LRO | NETIF_F_HW_VLAN_CTAG_RX | \ +			  NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_NTUPLE | \ +			  NETIF_F_RXHASH)  static u32 __ethtool_get_flags(struct net_device *dev)  {  	u32 flags = 0; -	if (dev->features & NETIF_F_LRO)	flags |= ETH_FLAG_LRO; -	if (dev->features & NETIF_F_HW_VLAN_RX)	flags |= ETH_FLAG_RXVLAN; -	if (dev->features & NETIF_F_HW_VLAN_TX)	flags |= ETH_FLAG_TXVLAN; -	if (dev->features & NETIF_F_NTUPLE)	flags |= ETH_FLAG_NTUPLE; -	if (dev->features & NETIF_F_RXHASH)	flags |= ETH_FLAG_RXHASH; +	if (dev->features & NETIF_F_LRO)	     flags |= ETH_FLAG_LRO; +	if (dev->features & NETIF_F_HW_VLAN_CTAG_RX) flags |= ETH_FLAG_RXVLAN; +	if (dev->features & NETIF_F_HW_VLAN_CTAG_TX) flags |= ETH_FLAG_TXVLAN; +	if (dev->features & NETIF_F_NTUPLE)	     flags |= ETH_FLAG_NTUPLE; +	if (dev->features & NETIF_F_RXHASH)	     flags |= ETH_FLAG_RXHASH;  	return flags;  } @@ -290,8 +295,8 @@ static int __ethtool_set_flags(struct net_device *dev, u32 data)  		return -EINVAL;  	if (data & ETH_FLAG_LRO)	features |= NETIF_F_LRO; -	if (data & ETH_FLAG_RXVLAN)	features |= NETIF_F_HW_VLAN_RX; -	if (data & ETH_FLAG_TXVLAN)	features |= NETIF_F_HW_VLAN_TX; +	if (data & ETH_FLAG_RXVLAN)	features |= NETIF_F_HW_VLAN_CTAG_RX; +	if (data & ETH_FLAG_TXVLAN)	features |= NETIF_F_HW_VLAN_CTAG_TX;  	if (data & ETH_FLAG_NTUPLE)	features |= NETIF_F_NTUPLE;  	if (data & ETH_FLAG_RXHASH)	features |= NETIF_F_RXHASH; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 58a4ba27dfe..d5a9f8ead0d 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -266,7 +266,7 @@ errout:  	return err;  } -static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)  {  	struct net *net = sock_net(skb->sk);  	struct fib_rule_hdr *frh = nlmsg_data(nlh); @@ -415,7 +415,7 @@ errout:  	return err;  } -static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh)  {  	struct net *net = sock_net(skb->sk);  	struct fib_rule_hdr *frh = nlmsg_data(nlh); diff --git a/net/core/filter.c b/net/core/filter.c index 2e20b55a783..dad2a178f9f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -348,6 +348,9 @@ load_b:  		case BPF_S_ANC_VLAN_TAG_PRESENT:  			A = !!vlan_tx_tag_present(skb);  			continue; +		case BPF_S_ANC_PAY_OFFSET: +			A = __skb_get_poff(skb); +			continue;  		case BPF_S_ANC_NLATTR: {  			struct nlattr *nla; @@ -612,6 +615,7 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)  			ANCILLARY(ALU_XOR_X);  			ANCILLARY(VLAN_TAG);  			ANCILLARY(VLAN_TAG_PRESENT); +			ANCILLARY(PAY_OFFSET);  			}  			/* ancillary operation unknown or unsupported */ @@ -814,6 +818,7 @@ static void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)  		[BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS,  		[BPF_S_ANC_VLAN_TAG]	= BPF_LD|BPF_B|BPF_ABS,  		[BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS, +		[BPF_S_ANC_PAY_OFFSET]	= BPF_LD|BPF_B|BPF_ABS,  		[BPF_S_LD_W_LEN]	= BPF_LD|BPF_W|BPF_LEN,  		[BPF_S_LD_W_IND]	= BPF_LD|BPF_W|BPF_IND,  		[BPF_S_LD_H_IND]	= BPF_LD|BPF_H|BPF_IND, diff --git a/net/core/flow.c b/net/core/flow.c index 2bfd081c59f..7102f166482 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -323,6 +323,24 @@ static void flow_cache_flush_tasklet(unsigned long data)  		complete(&info->completion);  } +/* + * Return whether a cpu needs flushing.  Conservatively, we assume + * the presence of any entries means the core may require flushing, + * since the flow_cache_ops.check() function may assume it's running + * on the same core as the per-cpu cache component. + */ +static int flow_cache_percpu_empty(struct flow_cache *fc, int cpu) +{ +	struct flow_cache_percpu *fcp; +	int i; + +	fcp = per_cpu_ptr(fc->percpu, cpu); +	for (i = 0; i < flow_cache_hash_size(fc); i++) +		if (!hlist_empty(&fcp->hash_table[i])) +			return 0; +	return 1; +} +  static void flow_cache_flush_per_cpu(void *data)  {  	struct flow_flush_info *info = data; @@ -337,22 +355,40 @@ void flow_cache_flush(void)  {  	struct flow_flush_info info;  	static DEFINE_MUTEX(flow_flush_sem); +	cpumask_var_t mask; +	int i, self; + +	/* Track which cpus need flushing to avoid disturbing all cores. */ +	if (!alloc_cpumask_var(&mask, GFP_KERNEL)) +		return; +	cpumask_clear(mask);  	/* Don't want cpus going down or up during this. */  	get_online_cpus();  	mutex_lock(&flow_flush_sem);  	info.cache = &flow_cache_global; -	atomic_set(&info.cpuleft, num_online_cpus()); +	for_each_online_cpu(i) +		if (!flow_cache_percpu_empty(info.cache, i)) +			cpumask_set_cpu(i, mask); +	atomic_set(&info.cpuleft, cpumask_weight(mask)); +	if (atomic_read(&info.cpuleft) == 0) +		goto done; +  	init_completion(&info.completion);  	local_bh_disable(); -	smp_call_function(flow_cache_flush_per_cpu, &info, 0); -	flow_cache_flush_tasklet((unsigned long)&info); +	self = cpumask_test_and_clear_cpu(smp_processor_id(), mask); +	on_each_cpu_mask(mask, flow_cache_flush_per_cpu, &info, 0); +	if (self) +		flow_cache_flush_tasklet((unsigned long)&info);  	local_bh_enable();  	wait_for_completion(&info.completion); + +done:  	mutex_unlock(&flow_flush_sem);  	put_online_cpus(); +	free_cpumask_var(mask);  }  static void flow_cache_flush_task(struct work_struct *work) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index e187bf06d67..00ee068efc1 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -5,6 +5,10 @@  #include <linux/if_vlan.h>  #include <net/ip.h>  #include <net/ipv6.h> +#include <linux/igmp.h> +#include <linux/icmp.h> +#include <linux/sctp.h> +#include <linux/dccp.h>  #include <linux/if_tunnel.h>  #include <linux/if_pppox.h>  #include <linux/ppp_defs.h> @@ -119,6 +123,17 @@ ipv6:  				nhoff += 4;  			if (hdr->flags & GRE_SEQ)  				nhoff += 4; +			if (proto == htons(ETH_P_TEB)) { +				const struct ethhdr *eth; +				struct ethhdr _eth; + +				eth = skb_header_pointer(skb, nhoff, +							 sizeof(_eth), &_eth); +				if (!eth) +					return false; +				proto = eth->h_proto; +				nhoff += sizeof(*eth); +			}  			goto again;  		}  		break; @@ -217,6 +232,59 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,  }  EXPORT_SYMBOL(__skb_tx_hash); +/* __skb_get_poff() returns the offset to the payload as far as it could + * be dissected. The main user is currently BPF, so that we can dynamically + * truncate packets without needing to push actual payload to the user + * space and can analyze headers only, instead. + */ +u32 __skb_get_poff(const struct sk_buff *skb) +{ +	struct flow_keys keys; +	u32 poff = 0; + +	if (!skb_flow_dissect(skb, &keys)) +		return 0; + +	poff += keys.thoff; +	switch (keys.ip_proto) { +	case IPPROTO_TCP: { +		const struct tcphdr *tcph; +		struct tcphdr _tcph; + +		tcph = skb_header_pointer(skb, poff, sizeof(_tcph), &_tcph); +		if (!tcph) +			return poff; + +		poff += max_t(u32, sizeof(struct tcphdr), tcph->doff * 4); +		break; +	} +	case IPPROTO_UDP: +	case IPPROTO_UDPLITE: +		poff += sizeof(struct udphdr); +		break; +	/* For the rest, we do not really care about header +	 * extensions at this point for now. +	 */ +	case IPPROTO_ICMP: +		poff += sizeof(struct icmphdr); +		break; +	case IPPROTO_ICMPV6: +		poff += sizeof(struct icmp6hdr); +		break; +	case IPPROTO_IGMP: +		poff += sizeof(struct igmphdr); +		break; +	case IPPROTO_DCCP: +		poff += sizeof(struct dccp_hdr); +		break; +	case IPPROTO_SCTP: +		poff += sizeof(struct sctphdr); +		break; +	} + +	return poff; +} +  static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)  {  	if (unlikely(queue_index >= dev->real_num_tx_queues)) { diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 3863b8f639c..89a3a07d85f 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -39,21 +39,13 @@  #include <linux/string.h>  #include <linux/log2.h> +#define DEBUG  #define NEIGH_DEBUG 1 - -#define NEIGH_PRINTK(x...) printk(x) -#define NEIGH_NOPRINTK(x...) do { ; } while(0) -#define NEIGH_PRINTK1 NEIGH_NOPRINTK -#define NEIGH_PRINTK2 NEIGH_NOPRINTK - -#if NEIGH_DEBUG >= 1 -#undef NEIGH_PRINTK1 -#define NEIGH_PRINTK1 NEIGH_PRINTK -#endif -#if NEIGH_DEBUG >= 2 -#undef NEIGH_PRINTK2 -#define NEIGH_PRINTK2 NEIGH_PRINTK -#endif +#define neigh_dbg(level, fmt, ...)		\ +do {						\ +	if (level <= NEIGH_DEBUG)		\ +		pr_debug(fmt, ##__VA_ARGS__);	\ +} while (0)  #define PNEIGH_HASHMASK		0xF @@ -246,7 +238,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)  					n->nud_state = NUD_NOARP;  				else  					n->nud_state = NUD_NONE; -				NEIGH_PRINTK2("neigh %p is stray.\n", n); +				neigh_dbg(2, "neigh %p is stray\n", n);  			}  			write_unlock(&n->lock);  			neigh_cleanup_and_release(n); @@ -542,7 +534,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,  						     lockdep_is_held(&tbl->lock)));  	rcu_assign_pointer(nht->hash_buckets[hash_val], n);  	write_unlock_bh(&tbl->lock); -	NEIGH_PRINTK2("neigh %p is created.\n", n); +	neigh_dbg(2, "neigh %p is created\n", n);  	rc = n;  out:  	return rc; @@ -725,7 +717,7 @@ void neigh_destroy(struct neighbour *neigh)  	dev_put(dev);  	neigh_parms_put(neigh->parms); -	NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh); +	neigh_dbg(2, "neigh %p is destroyed\n", neigh);  	atomic_dec(&neigh->tbl->entries);  	kfree_rcu(neigh, rcu); @@ -739,7 +731,7 @@ EXPORT_SYMBOL(neigh_destroy);   */  static void neigh_suspect(struct neighbour *neigh)  { -	NEIGH_PRINTK2("neigh %p is suspected.\n", neigh); +	neigh_dbg(2, "neigh %p is suspected\n", neigh);  	neigh->output = neigh->ops->output;  } @@ -751,7 +743,7 @@ static void neigh_suspect(struct neighbour *neigh)   */  static void neigh_connect(struct neighbour *neigh)  { -	NEIGH_PRINTK2("neigh %p is connected.\n", neigh); +	neigh_dbg(2, "neigh %p is connected\n", neigh);  	neigh->output = neigh->ops->connected_output;  } @@ -852,7 +844,7 @@ static void neigh_invalidate(struct neighbour *neigh)  	struct sk_buff *skb;  	NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed); -	NEIGH_PRINTK2("neigh %p is failed.\n", neigh); +	neigh_dbg(2, "neigh %p is failed\n", neigh);  	neigh->updated = jiffies;  	/* It is very thin place. report_unreachable is very complicated @@ -904,17 +896,17 @@ static void neigh_timer_handler(unsigned long arg)  	if (state & NUD_REACHABLE) {  		if (time_before_eq(now,  				   neigh->confirmed + neigh->parms->reachable_time)) { -			NEIGH_PRINTK2("neigh %p is still alive.\n", neigh); +			neigh_dbg(2, "neigh %p is still alive\n", neigh);  			next = neigh->confirmed + neigh->parms->reachable_time;  		} else if (time_before_eq(now,  					  neigh->used + neigh->parms->delay_probe_time)) { -			NEIGH_PRINTK2("neigh %p is delayed.\n", neigh); +			neigh_dbg(2, "neigh %p is delayed\n", neigh);  			neigh->nud_state = NUD_DELAY;  			neigh->updated = jiffies;  			neigh_suspect(neigh);  			next = now + neigh->parms->delay_probe_time;  		} else { -			NEIGH_PRINTK2("neigh %p is suspected.\n", neigh); +			neigh_dbg(2, "neigh %p is suspected\n", neigh);  			neigh->nud_state = NUD_STALE;  			neigh->updated = jiffies;  			neigh_suspect(neigh); @@ -923,14 +915,14 @@ static void neigh_timer_handler(unsigned long arg)  	} else if (state & NUD_DELAY) {  		if (time_before_eq(now,  				   neigh->confirmed + neigh->parms->delay_probe_time)) { -			NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh); +			neigh_dbg(2, "neigh %p is now reachable\n", neigh);  			neigh->nud_state = NUD_REACHABLE;  			neigh->updated = jiffies;  			neigh_connect(neigh);  			notify = 1;  			next = neigh->confirmed + neigh->parms->reachable_time;  		} else { -			NEIGH_PRINTK2("neigh %p is probed.\n", neigh); +			neigh_dbg(2, "neigh %p is probed\n", neigh);  			neigh->nud_state = NUD_PROBE;  			neigh->updated = jiffies;  			atomic_set(&neigh->probes, 0); @@ -997,7 +989,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)  			return 1;  		}  	} else if (neigh->nud_state & NUD_STALE) { -		NEIGH_PRINTK2("neigh %p is delayed.\n", neigh); +		neigh_dbg(2, "neigh %p is delayed\n", neigh);  		neigh->nud_state = NUD_DELAY;  		neigh->updated = jiffies;  		neigh_add_timer(neigh, @@ -1320,8 +1312,7 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)  out:  	return rc;  discard: -	NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n", -		      dst, neigh); +	neigh_dbg(1, "%s: dst=%p neigh=%p\n", __func__, dst, neigh);  out_kfree_skb:  	rc = -EINVAL;  	kfree_skb(skb); @@ -1498,7 +1489,7 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)  		}  	}  	write_unlock_bh(&tbl->lock); -	NEIGH_PRINTK1("neigh_parms_release: not found\n"); +	neigh_dbg(1, "%s: not found\n", __func__);  }  EXPORT_SYMBOL(neigh_parms_release); @@ -1613,7 +1604,7 @@ int neigh_table_clear(struct neigh_table *tbl)  }  EXPORT_SYMBOL(neigh_table_clear); -static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)  {  	struct net *net = sock_net(skb->sk);  	struct ndmsg *ndm; @@ -1677,7 +1668,7 @@ out:  	return err;  } -static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)  {  	struct net *net = sock_net(skb->sk);  	struct ndmsg *ndm; @@ -1955,7 +1946,7 @@ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {  	[NDTPA_LOCKTIME]		= { .type = NLA_U64 },  }; -static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)  {  	struct net *net = sock_net(skb->sk);  	struct neigh_table *tbl; diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index 3174f1998ee..569d355fec3 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -271,7 +271,7 @@ static int ptype_seq_show(struct seq_file *seq, void *v)  		else  			seq_printf(seq, "%04x", ntohs(pt->type)); -		seq_printf(seq, " %-8s %pF\n", +		seq_printf(seq, " %-8s %pf\n",  			   pt->dev ? pt->dev->name : "", pt->func);  	} diff --git a/net/core/netpoll.c b/net/core/netpoll.c index fa32899006a..a5802a8b652 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -47,7 +47,7 @@ static struct sk_buff_head skb_pool;  static atomic_t trapped; -static struct srcu_struct netpoll_srcu; +DEFINE_STATIC_SRCU(netpoll_srcu);  #define USEC_PER_POLL	50  #define NETPOLL_RX_ENABLED  1 @@ -206,17 +206,17 @@ static void netpoll_poll_dev(struct net_device *dev)  	 * the dev_open/close paths use this to block netpoll activity  	 * while changing device state  	 */ -	if (!mutex_trylock(&ni->dev_lock)) +	if (!down_trylock(&ni->dev_lock))  		return;  	if (!netif_running(dev)) { -		mutex_unlock(&ni->dev_lock); +		up(&ni->dev_lock);  		return;  	}  	ops = dev->netdev_ops;  	if (!ops->ndo_poll_controller) { -		mutex_unlock(&ni->dev_lock); +		up(&ni->dev_lock);  		return;  	} @@ -225,7 +225,7 @@ static void netpoll_poll_dev(struct net_device *dev)  	poll_napi(dev); -	mutex_unlock(&ni->dev_lock); +	up(&ni->dev_lock);  	if (dev->flags & IFF_SLAVE) {  		if (ni) { @@ -255,7 +255,7 @@ int netpoll_rx_disable(struct net_device *dev)  	idx = srcu_read_lock(&netpoll_srcu);  	ni = srcu_dereference(dev->npinfo, &netpoll_srcu);  	if (ni) -		mutex_lock(&ni->dev_lock); +		down(&ni->dev_lock);  	srcu_read_unlock(&netpoll_srcu, idx);  	return 0;  } @@ -267,7 +267,7 @@ void netpoll_rx_enable(struct net_device *dev)  	rcu_read_lock();  	ni = rcu_dereference(dev->npinfo);  	if (ni) -		mutex_unlock(&ni->dev_lock); +		up(&ni->dev_lock);  	rcu_read_unlock();  }  EXPORT_SYMBOL(netpoll_rx_enable); @@ -383,8 +383,9 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,  			if (__netif_tx_trylock(txq)) {  				if (!netif_xmit_stopped(txq)) {  					if (vlan_tx_tag_present(skb) && -					    !(netif_skb_features(skb) & NETIF_F_HW_VLAN_TX)) { -						skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb)); +					    !vlan_hw_offload_capable(netif_skb_features(skb), +								     skb->vlan_proto)) { +						skb = __vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb));  						if (unlikely(!skb))  							break;  						skb->vlan_tci = 0; @@ -1046,7 +1047,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)  		INIT_LIST_HEAD(&npinfo->rx_np);  		spin_lock_init(&npinfo->rx_lock); -		mutex_init(&npinfo->dev_lock); +		sema_init(&npinfo->dev_lock, 1);  		skb_queue_head_init(&npinfo->neigh_tx);  		skb_queue_head_init(&npinfo->txq);  		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process); @@ -1212,7 +1213,6 @@ EXPORT_SYMBOL(netpoll_setup);  static int __init netpoll_init(void)  {  	skb_queue_head_init(&skb_pool); -	init_srcu_struct(&netpoll_srcu);  	return 0;  }  core_initcall(netpoll_init); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 23854b51a25..a08bd2b7fe3 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -517,32 +517,6 @@ out:  	return err;  } -static const int rtm_min[RTM_NR_FAMILIES] = -{ -	[RTM_FAM(RTM_NEWLINK)]      = NLMSG_LENGTH(sizeof(struct ifinfomsg)), -	[RTM_FAM(RTM_NEWADDR)]      = NLMSG_LENGTH(sizeof(struct ifaddrmsg)), -	[RTM_FAM(RTM_NEWROUTE)]     = NLMSG_LENGTH(sizeof(struct rtmsg)), -	[RTM_FAM(RTM_NEWRULE)]      = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)), -	[RTM_FAM(RTM_NEWQDISC)]     = NLMSG_LENGTH(sizeof(struct tcmsg)), -	[RTM_FAM(RTM_NEWTCLASS)]    = NLMSG_LENGTH(sizeof(struct tcmsg)), -	[RTM_FAM(RTM_NEWTFILTER)]   = NLMSG_LENGTH(sizeof(struct tcmsg)), -	[RTM_FAM(RTM_NEWACTION)]    = NLMSG_LENGTH(sizeof(struct tcamsg)), -	[RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), -	[RTM_FAM(RTM_GETANYCAST)]   = NLMSG_LENGTH(sizeof(struct rtgenmsg)), -}; - -static const int rta_max[RTM_NR_FAMILIES] = -{ -	[RTM_FAM(RTM_NEWLINK)]      = IFLA_MAX, -	[RTM_FAM(RTM_NEWADDR)]      = IFA_MAX, -	[RTM_FAM(RTM_NEWROUTE)]     = RTA_MAX, -	[RTM_FAM(RTM_NEWRULE)]      = FRA_MAX, -	[RTM_FAM(RTM_NEWQDISC)]     = TCA_MAX, -	[RTM_FAM(RTM_NEWTCLASS)]    = TCA_MAX, -	[RTM_FAM(RTM_NEWTFILTER)]   = TCA_MAX, -	[RTM_FAM(RTM_NEWACTION)]    = TCAA_MAX, -}; -  int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int group, int echo)  {  	struct sock *rtnl = net->rtnl; @@ -1539,7 +1513,7 @@ errout:  	return err;  } -static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)  {  	struct net *net = sock_net(skb->sk);  	struct ifinfomsg *ifm; @@ -1580,7 +1554,7 @@ errout:  	return err;  } -static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)  {  	struct net *net = sock_net(skb->sk);  	const struct rtnl_link_ops *ops; @@ -1711,7 +1685,7 @@ static int rtnl_group_changelink(struct net *net, int group,  	return 0;  } -static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh)  {  	struct net *net = sock_net(skb->sk);  	const struct rtnl_link_ops *ops; @@ -1866,7 +1840,7 @@ out:  	}  } -static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh)  {  	struct net *net = sock_net(skb->sk);  	struct ifinfomsg *ifm; @@ -1957,8 +1931,11 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)  		if (rtnl_msg_handlers[idx] == NULL ||  		    rtnl_msg_handlers[idx][type].dumpit == NULL)  			continue; -		if (idx > s_idx) +		if (idx > s_idx) {  			memset(&cb->args[0], 0, sizeof(cb->args)); +			cb->prev_seq = 0; +			cb->seq = 0; +		}  		if (rtnl_msg_handlers[idx][type].dumpit(skb, cb))  			break;  	} @@ -2051,7 +2028,39 @@ errout:  	rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);  } -static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +/** + * ndo_dflt_fdb_add - default netdevice operation to add an FDB entry + */ +int ndo_dflt_fdb_add(struct ndmsg *ndm, +		     struct nlattr *tb[], +		     struct net_device *dev, +		     const unsigned char *addr, +		     u16 flags) +{ +	int err = -EINVAL; + +	/* If aging addresses are supported device will need to +	 * implement its own handler for this. +	 */ +	if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) { +		pr_info("%s: FDB only supports static addresses\n", dev->name); +		return err; +	} + +	if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) +		err = dev_uc_add_excl(dev, addr); +	else if (is_multicast_ether_addr(addr)) +		err = dev_mc_add_excl(dev, addr); + +	/* Only return duplicate errors if NLM_F_EXCL is set */ +	if (err == -EEXIST && !(flags & NLM_F_EXCL)) +		err = 0; + +	return err; +} +EXPORT_SYMBOL(ndo_dflt_fdb_add); + +static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)  {  	struct net *net = sock_net(skb->sk);  	struct ndmsg *ndm; @@ -2082,7 +2091,7 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)  	}  	addr = nla_data(tb[NDA_LLADDR]); -	if (!is_valid_ether_addr(addr)) { +	if (is_zero_ether_addr(addr)) {  		pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ether address\n");  		return -EINVAL;  	} @@ -2103,10 +2112,13 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)  	}  	/* Embedded bridge, macvlan, and any other device support */ -	if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_add) { -		err = dev->netdev_ops->ndo_fdb_add(ndm, tb, -						   dev, addr, -						   nlh->nlmsg_flags); +	if ((ndm->ndm_flags & NTF_SELF)) { +		if (dev->netdev_ops->ndo_fdb_add) +			err = dev->netdev_ops->ndo_fdb_add(ndm, tb, dev, addr, +							   nlh->nlmsg_flags); +		else +			err = ndo_dflt_fdb_add(ndm, tb, dev, addr, +					       nlh->nlmsg_flags);  		if (!err) {  			rtnl_fdb_notify(dev, addr, RTM_NEWNEIGH); @@ -2117,7 +2129,36 @@ out:  	return err;  } -static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +/** + * ndo_dflt_fdb_del - default netdevice operation to delete an FDB entry + */ +int ndo_dflt_fdb_del(struct ndmsg *ndm, +		     struct nlattr *tb[], +		     struct net_device *dev, +		     const unsigned char *addr) +{ +	int err = -EOPNOTSUPP; + +	/* If aging addresses are supported device will need to +	 * implement its own handler for this. +	 */ +	if (ndm->ndm_state & NUD_PERMANENT) { +		pr_info("%s: FDB only supports static addresses\n", dev->name); +		return -EINVAL; +	} + +	if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) +		err = dev_uc_del(dev, addr); +	else if (is_multicast_ether_addr(addr)) +		err = dev_mc_del(dev, addr); +	else +		err = -EINVAL; + +	return err; +} +EXPORT_SYMBOL(ndo_dflt_fdb_del); + +static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)  {  	struct net *net = sock_net(skb->sk);  	struct ndmsg *ndm; @@ -2151,7 +2192,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)  	}  	addr = nla_data(tb[NDA_LLADDR]); -	if (!is_valid_ether_addr(addr)) { +	if (is_zero_ether_addr(addr)) {  		pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ether address\n");  		return -EINVAL;  	} @@ -2174,8 +2215,11 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)  	}  	/* Embedded bridge, macvlan, and any other device support */ -	if ((ndm->ndm_flags & NTF_SELF) && dev->netdev_ops->ndo_fdb_del) { -		err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr); +	if (ndm->ndm_flags & NTF_SELF) { +		if (dev->netdev_ops->ndo_fdb_del) +			err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr); +		else +			err = ndo_dflt_fdb_del(ndm, tb, dev, addr);  		if (!err) {  			rtnl_fdb_notify(dev, addr, RTM_DELNEIGH); @@ -2220,7 +2264,7 @@ skip:   * @dev: netdevice   *   * Default netdevice operation to dump the existing unicast address list. - * Returns zero on success. + * Returns number of addresses from list put in skb.   */  int ndo_dflt_fdb_dump(struct sk_buff *skb,  		      struct netlink_callback *cb, @@ -2260,6 +2304,8 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)  		if (dev->netdev_ops->ndo_fdb_dump)  			idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, idx); +		else +			idx = ndo_dflt_fdb_dump(skb, cb, dev, idx);  	}  	rcu_read_unlock(); @@ -2411,8 +2457,7 @@ errout:  	return err;  } -static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, -			       void *arg) +static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)  {  	struct net *net = sock_net(skb->sk);  	struct ifinfomsg *ifm; @@ -2482,8 +2527,7 @@ out:  	return err;  } -static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, -			       void *arg) +static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)  {  	struct net *net = sock_net(skb->sk);  	struct ifinfomsg *ifm; @@ -2553,10 +2597,6 @@ out:  	return err;  } -/* Protected by RTNL sempahore.  */ -static struct rtattr **rta_buf; -static int rtattr_max; -  /* Process one rtnetlink message. */  static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) @@ -2564,7 +2604,6 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)  	struct net *net = sock_net(skb->sk);  	rtnl_doit_func doit;  	int sz_idx, kind; -	int min_len;  	int family;  	int type;  	int err; @@ -2576,10 +2615,10 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)  	type -= RTM_BASE;  	/* All the messages must have at least 1 byte length */ -	if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct rtgenmsg))) +	if (nlmsg_len(nlh) < sizeof(struct rtgenmsg))  		return 0; -	family = ((struct rtgenmsg *)NLMSG_DATA(nlh))->rtgen_family; +	family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family;  	sz_idx = type>>2;  	kind = type&3; @@ -2612,32 +2651,11 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)  		return err;  	} -	memset(rta_buf, 0, (rtattr_max * sizeof(struct rtattr *))); - -	min_len = rtm_min[sz_idx]; -	if (nlh->nlmsg_len < min_len) -		return -EINVAL; - -	if (nlh->nlmsg_len > min_len) { -		int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); -		struct rtattr *attr = (void *)nlh + NLMSG_ALIGN(min_len); - -		while (RTA_OK(attr, attrlen)) { -			unsigned int flavor = attr->rta_type & NLA_TYPE_MASK; -			if (flavor) { -				if (flavor > rta_max[sz_idx]) -					return -EINVAL; -				rta_buf[flavor-1] = attr; -			} -			attr = RTA_NEXT(attr, attrlen); -		} -	} -  	doit = rtnl_get_doit(family, type);  	if (doit == NULL)  		return -EOPNOTSUPP; -	return doit(skb, nlh, (void *)&rta_buf[0]); +	return doit(skb, nlh);  }  static void rtnetlink_rcv(struct sk_buff *skb) @@ -2707,16 +2725,6 @@ static struct pernet_operations rtnetlink_net_ops = {  void __init rtnetlink_init(void)  { -	int i; - -	rtattr_max = 0; -	for (i = 0; i < ARRAY_SIZE(rta_max); i++) -		if (rta_max[i] > rtattr_max) -			rtattr_max = rta_max[i]; -	rta_buf = kmalloc(rtattr_max * sizeof(struct rtattr *), GFP_KERNEL); -	if (!rta_buf) -		panic("rtnetlink_init: cannot allocate rta_buf\n"); -  	if (register_pernet_subsys(&rtnetlink_net_ops))  		panic("rtnetlink_init: cannot initialize rtnetlink\n"); diff --git a/net/core/scm.c b/net/core/scm.c index 2dc6cdaaae8..03795d0147f 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -187,22 +187,6 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)  			p->creds.uid = uid;  			p->creds.gid = gid; - -			if (!p->cred || -			    !uid_eq(p->cred->euid, uid) || -			    !gid_eq(p->cred->egid, gid)) { -				struct cred *cred; -				err = -ENOMEM; -				cred = prepare_creds(); -				if (!cred) -					goto error; - -				cred->uid = cred->euid = uid; -				cred->gid = cred->egid = gid; -				if (p->cred) -					put_cred(p->cred); -				p->cred = cred; -			}  			break;  		}  		default: @@ -306,8 +290,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)  		/* Bump the usage count and install the file. */  		sock = sock_from_file(fp[i], &err);  		if (sock) { -			sock_update_netprioidx(sock->sk, current); -			sock_update_classid(sock->sk, current); +			sock_update_netprioidx(sock->sk); +			sock_update_classid(sock->sk);  		}  		fd_install(new_fd, get_file(fp[i]));  	} diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index e61a8bb7fce..6a2f13cee86 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -12,12 +12,10 @@  static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned; -static int __init net_secret_init(void) +void net_secret_init(void)  {  	get_random_bytes(net_secret, sizeof(net_secret)); -	return 0;  } -late_initcall(net_secret_init);  #ifdef CONFIG_INET  static u32 seq_scale(u32 seq) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 33245ef54c3..af9185d0be6 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -179,6 +179,33 @@ out:   *   */ +struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node) +{ +	struct sk_buff *skb; + +	/* Get the HEAD */ +	skb = kmem_cache_alloc_node(skbuff_head_cache, +				    gfp_mask & ~__GFP_DMA, node); +	if (!skb) +		goto out; + +	/* +	 * Only clear those fields we need to clear, not those that we will +	 * actually initialise below. Hence, don't put any more fields after +	 * the tail pointer in struct sk_buff! +	 */ +	memset(skb, 0, offsetof(struct sk_buff, tail)); +	skb->data = NULL; +	skb->truesize = sizeof(struct sk_buff); +	atomic_set(&skb->users, 1); + +#ifdef NET_SKBUFF_DATA_USES_OFFSET +	skb->mac_header = ~0U; +#endif +out: +	return skb; +} +  /**   *	__alloc_skb	-	allocate a network buffer   *	@size: size to allocate @@ -584,7 +611,8 @@ static void skb_release_head_state(struct sk_buff *skb)  static void skb_release_all(struct sk_buff *skb)  {  	skb_release_head_state(skb); -	skb_release_data(skb); +	if (likely(skb->data)) +		skb_release_data(skb);  }  /** @@ -673,6 +701,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)  	new->mac_header		= old->mac_header;  	new->inner_transport_header = old->inner_transport_header;  	new->inner_network_header = old->inner_network_header; +	new->inner_mac_header = old->inner_mac_header;  	skb_dst_copy(new, old);  	new->rxhash		= old->rxhash;  	new->ooo_okay		= old->ooo_okay; @@ -706,6 +735,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)  	new->tc_verd		= old->tc_verd;  #endif  #endif +	new->vlan_proto		= old->vlan_proto;  	new->vlan_tci		= old->vlan_tci;  	skb_copy_secmark(new, old); @@ -867,6 +897,18 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)  }  EXPORT_SYMBOL(skb_clone); +static void skb_headers_offset_update(struct sk_buff *skb, int off) +{ +	/* {transport,network,mac}_header and tail are relative to skb->head */ +	skb->transport_header += off; +	skb->network_header   += off; +	if (skb_mac_header_was_set(skb)) +		skb->mac_header += off; +	skb->inner_transport_header += off; +	skb->inner_network_header += off; +	skb->inner_mac_header += off; +} +  static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)  {  #ifndef NET_SKBUFF_DATA_USES_OFFSET @@ -879,13 +921,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)  	__copy_skb_header(new, old);  #ifndef NET_SKBUFF_DATA_USES_OFFSET -	/* {transport,network,mac}_header are relative to skb->head */ -	new->transport_header += offset; -	new->network_header   += offset; -	if (skb_mac_header_was_set(new)) -		new->mac_header	      += offset; -	new->inner_transport_header += offset; -	new->inner_network_header   += offset; +	skb_headers_offset_update(new, offset);  #endif  	skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;  	skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; @@ -1077,14 +1113,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,  #else  	skb->end      = skb->head + size;  #endif -	/* {transport,network,mac}_header and tail are relative to skb->head */  	skb->tail	      += off; -	skb->transport_header += off; -	skb->network_header   += off; -	if (skb_mac_header_was_set(skb)) -		skb->mac_header += off; -	skb->inner_transport_header += off; -	skb->inner_network_header += off; +	skb_headers_offset_update(skb, off);  	/* Only adjust this if it actually is csum_start rather than csum */  	if (skb->ip_summed == CHECKSUM_PARTIAL)  		skb->csum_start += nhead; @@ -1180,12 +1210,7 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,  	if (n->ip_summed == CHECKSUM_PARTIAL)  		n->csum_start += off;  #ifdef NET_SKBUFF_DATA_USES_OFFSET -	n->transport_header += off; -	n->network_header   += off; -	if (skb_mac_header_was_set(skb)) -		n->mac_header += off; -	n->inner_transport_header += off; -	n->inner_network_header	   += off; +	skb_headers_offset_update(n, off);  #endif  	return n; @@ -2741,12 +2766,19 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)  	unsigned int tnl_hlen = skb_tnl_header_len(skb);  	unsigned int headroom;  	unsigned int len; +	__be16 proto; +	bool csum;  	int sg = !!(features & NETIF_F_SG);  	int nfrags = skb_shinfo(skb)->nr_frags;  	int err = -ENOMEM;  	int i = 0;  	int pos; +	proto = skb_network_protocol(skb); +	if (unlikely(!proto)) +		return ERR_PTR(-EINVAL); + +	csum = !!can_checksum_protocol(features, proto);  	__skb_push(skb, doffset);  	headroom = skb_headroom(skb);  	pos = skb_headlen(skb); @@ -2884,6 +2916,12 @@ skip_fraglist:  		nskb->data_len = len - hsize;  		nskb->len += nskb->data_len;  		nskb->truesize += nskb->data_len; + +		if (!csum) { +			nskb->csum = skb_checksum(nskb, doffset, +						  nskb->len - doffset, 0); +			nskb->ip_summed = CHECKSUM_NONE; +		}  	} while ((offset += len) < skb->len);  	return segs; @@ -3289,12 +3327,8 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,  	if (!sk)  		return; -	skb = skb_clone(orig_skb, GFP_ATOMIC); -	if (!skb) -		return; -  	if (hwtstamps) { -		*skb_hwtstamps(skb) = +		*skb_hwtstamps(orig_skb) =  			*hwtstamps;  	} else {  		/* @@ -3302,9 +3336,13 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,  		 * so keep the shared tx_flags and only  		 * store software time stamp  		 */ -		skb->tstamp = ktime_get_real(); +		orig_skb->tstamp = ktime_get_real();  	} +	skb = skb_clone(orig_skb, GFP_ATOMIC); +	if (!skb) +		return; +  	serr = SKB_EXT_ERR(skb);  	memset(serr, 0, sizeof(*serr));  	serr->ee.ee_errno = ENOMSG; @@ -3361,6 +3399,7 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off)  	skb->ip_summed = CHECKSUM_PARTIAL;  	skb->csum_start = skb_headroom(skb) + start;  	skb->csum_offset = off; +	skb_set_transport_header(skb, start);  	return true;  }  EXPORT_SYMBOL_GPL(skb_partial_csum_set); diff --git a/net/core/sock.c b/net/core/sock.c index b261a797774..d4f4cea726e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -907,6 +907,10 @@ set_rcvbuf:  		sock_valbool_flag(sk, SOCK_NOFCS, valbool);  		break; +	case SO_SELECT_ERR_QUEUE: +		sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool); +		break; +  	default:  		ret = -ENOPROTOOPT;  		break; @@ -1160,6 +1164,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,  		v.val = sock_flag(sk, SOCK_FILTER_LOCKED);  		break; +	case SO_SELECT_ERR_QUEUE: +		v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE); +		break; +  	default:  		return -ENOPROTOOPT;  	} @@ -1298,13 +1306,12 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)  	module_put(owner);  } -#ifdef CONFIG_CGROUPS  #if IS_ENABLED(CONFIG_NET_CLS_CGROUP) -void sock_update_classid(struct sock *sk, struct task_struct *task) +void sock_update_classid(struct sock *sk)  {  	u32 classid; -	classid = task_cls_classid(task); +	classid = task_cls_classid(current);  	if (classid != sk->sk_classid)  		sk->sk_classid = classid;  } @@ -1312,16 +1319,15 @@ EXPORT_SYMBOL(sock_update_classid);  #endif  #if IS_ENABLED(CONFIG_NETPRIO_CGROUP) -void sock_update_netprioidx(struct sock *sk, struct task_struct *task) +void sock_update_netprioidx(struct sock *sk)  {  	if (in_interrupt())  		return; -	sk->sk_cgrp_prioidx = task_netprioidx(task); +	sk->sk_cgrp_prioidx = task_netprioidx(current);  }  EXPORT_SYMBOL_GPL(sock_update_netprioidx);  #endif -#endif  /**   *	sk_alloc - All socket objects are allocated here @@ -1347,8 +1353,8 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,  		sock_net_set(sk, get_net(net));  		atomic_set(&sk->sk_wmem_alloc, 1); -		sock_update_classid(sk, current); -		sock_update_netprioidx(sk, current); +		sock_update_classid(sk); +		sock_update_netprioidx(sk);  	}  	return sk; diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index a29e90cf36b..d5bef0b0f63 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -49,6 +49,39 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype)  }  EXPORT_SYMBOL_GPL(sock_diag_put_meminfo); +int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk, +			     struct sk_buff *skb, int attrtype) +{ +	struct nlattr *attr; +	struct sk_filter *filter; +	unsigned int len; +	int err = 0; + +	if (!ns_capable(user_ns, CAP_NET_ADMIN)) { +		nla_reserve(skb, attrtype, 0); +		return 0; +	} + +	rcu_read_lock(); + +	filter = rcu_dereference(sk->sk_filter); +	len = filter ? filter->len * sizeof(struct sock_filter) : 0; + +	attr = nla_reserve(skb, attrtype, len); +	if (attr == NULL) { +		err = -EMSGSIZE; +		goto out; +	} + +	if (filter) +		memcpy(nla_data(attr), filter->insns, len); + +out: +	rcu_read_unlock(); +	return err; +} +EXPORT_SYMBOL(sock_diag_put_filterinfo); +  void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh))  {  	mutex_lock(&sock_diag_table_mutex); diff --git a/net/core/utils.c b/net/core/utils.c index e3487e46193..3c7f5b51b97 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -17,6 +17,7 @@  #include <linux/module.h>  #include <linux/jiffies.h>  #include <linux/kernel.h> +#include <linux/ctype.h>  #include <linux/inet.h>  #include <linux/mm.h>  #include <linux/net.h> @@ -348,9 +349,7 @@ int mac_pton(const char *s, u8 *mac)  	/* Don't dirty result unless string is valid MAC. */  	for (i = 0; i < ETH_ALEN; i++) { -		if (!strchr("0123456789abcdefABCDEF", s[i * 3])) -			return 0; -		if (!strchr("0123456789abcdefABCDEF", s[i * 3 + 1])) +		if (!isxdigit(s[i * 3]) || !isxdigit(s[i * 3 + 1]))  			return 0;  		if (i != ETH_ALEN - 1 && s[i * 3 + 2] != ':')  			return 0; diff --git a/net/dcb/dcbevent.c b/net/dcb/dcbevent.c index 1d9eb7c60a6..4f72fc40bf0 100644 --- a/net/dcb/dcbevent.c +++ b/net/dcb/dcbevent.c @@ -20,6 +20,7 @@  #include <linux/rtnetlink.h>  #include <linux/notifier.h>  #include <linux/export.h> +#include <net/dcbevent.h>  static ATOMIC_NOTIFIER_HEAD(dcbevent_notif_chain); diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 21291f1abcd..40d5829ed36 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1658,7 +1658,7 @@ static const struct reply_func reply_funcs[DCB_CMD_MAX+1] = {  	[DCB_CMD_CEE_GET]	= { RTM_GETDCB, dcbnl_cee_get },  }; -static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh)  {  	struct net *net = sock_net(skb->sk);  	struct net_device *netdev; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 4f9f5eb478f..ebc54fef85a 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -500,8 +500,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,  	return &rt->dst;  } -static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, -				 struct request_values *rv_unused) +static int dccp_v4_send_response(struct sock *sk, struct request_sock *req)  {  	int err = -1;  	struct sk_buff *skb; @@ -658,7 +657,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)  	dreq->dreq_gss     = dreq->dreq_iss;  	dreq->dreq_service = service; -	if (dccp_v4_send_response(sk, req, NULL)) +	if (dccp_v4_send_response(sk, req))  		goto drop_and_free;  	inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 6e05981f271..9c61f9c02fd 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -213,8 +213,7 @@ out:  } -static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, -				 struct request_values *rv_unused) +static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)  {  	struct inet6_request_sock *ireq6 = inet6_rsk(req);  	struct ipv6_pinfo *np = inet6_sk(sk); @@ -428,7 +427,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)  	dreq->dreq_gss     = dreq->dreq_iss;  	dreq->dreq_service = service; -	if (dccp_v6_send_response(sk, req, NULL)) +	if (dccp_v6_send_response(sk, req))  		goto drop_and_free;  	inet6_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index c8da116d84a..7d9197063eb 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -563,7 +563,7 @@ static const struct nla_policy dn_ifa_policy[IFA_MAX+1] = {  				    .len = IFNAMSIZ - 1 },  }; -static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)  {  	struct net *net = sock_net(skb->sk);  	struct nlattr *tb[IFA_MAX+1]; @@ -607,7 +607,7 @@ errout:  	return err;  } -static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)  {  	struct net *net = sock_net(skb->sk);  	struct nlattr *tb[IFA_MAX+1]; diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index e36614eccc0..57dc159245e 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -145,22 +145,10 @@ static inline struct dn_fib_info *dn_fib_find_info(const struct dn_fib_info *nfi  	return NULL;  } -__le16 dn_fib_get_attr16(struct rtattr *attr, int attrlen, int type) +static int dn_fib_count_nhs(const struct nlattr *attr)  { -	while(RTA_OK(attr,attrlen)) { -		if (attr->rta_type == type) -			return *(__le16*)RTA_DATA(attr); -		attr = RTA_NEXT(attr, attrlen); -	} - -	return 0; -} - -static int dn_fib_count_nhs(struct rtattr *rta) -{ -	int nhs = 0; -	struct rtnexthop *nhp = RTA_DATA(rta); -	int nhlen = RTA_PAYLOAD(rta); +	struct rtnexthop *nhp = nla_data(attr); +	int nhs = 0, nhlen = nla_len(attr);  	while(nhlen >= (int)sizeof(struct rtnexthop)) {  		if ((nhlen -= nhp->rtnh_len) < 0) @@ -172,10 +160,11 @@ static int dn_fib_count_nhs(struct rtattr *rta)  	return nhs;  } -static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct rtattr *rta, const struct rtmsg *r) +static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr, +			  const struct rtmsg *r)  { -	struct rtnexthop *nhp = RTA_DATA(rta); -	int nhlen = RTA_PAYLOAD(rta); +	struct rtnexthop *nhp = nla_data(attr); +	int nhlen = nla_len(attr);  	change_nexthops(fi) {  		int attrlen = nhlen - sizeof(struct rtnexthop); @@ -187,7 +176,10 @@ static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct rtattr *rta, cons  		nh->nh_weight = nhp->rtnh_hops + 1;  		if (attrlen) { -			nh->nh_gw = dn_fib_get_attr16(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); +			struct nlattr *gw_attr; + +			gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY); +			nh->nh_gw = gw_attr ? nla_get_le16(gw_attr) : 0;  		}  		nhp = RTNH_NEXT(nhp);  	} endfor_nexthops(fi); @@ -268,7 +260,8 @@ out:  } -struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta *rta, const struct nlmsghdr *nlh, int *errp) +struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct nlattr *attrs[], +				       const struct nlmsghdr *nlh, int *errp)  {  	int err;  	struct dn_fib_info *fi = NULL; @@ -281,11 +274,9 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta  	if (dn_fib_props[r->rtm_type].scope > r->rtm_scope)  		goto err_inval; -	if (rta->rta_mp) { -		nhs = dn_fib_count_nhs(rta->rta_mp); -		if (nhs == 0) -			goto err_inval; -	} +	if (attrs[RTA_MULTIPATH] && +	    (nhs = dn_fib_count_nhs(attrs[RTA_MULTIPATH])) == 0) +		goto err_inval;  	fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct dn_fib_nh), GFP_KERNEL);  	err = -ENOBUFS; @@ -295,53 +286,65 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta  	fi->fib_protocol = r->rtm_protocol;  	fi->fib_nhs = nhs;  	fi->fib_flags = r->rtm_flags; -	if (rta->rta_priority) -		fi->fib_priority = *rta->rta_priority; -	if (rta->rta_mx) { -		int attrlen = RTA_PAYLOAD(rta->rta_mx); -		struct rtattr *attr = RTA_DATA(rta->rta_mx); -		while(RTA_OK(attr, attrlen)) { -			unsigned int flavour = attr->rta_type; +	if (attrs[RTA_PRIORITY]) +		fi->fib_priority = nla_get_u32(attrs[RTA_PRIORITY]); + +	if (attrs[RTA_METRICS]) { +		struct nlattr *attr; +		int rem; -			if (flavour) { -				if (flavour > RTAX_MAX) +		nla_for_each_nested(attr, attrs[RTA_METRICS], rem) { +			int type = nla_type(attr); + +			if (type) { +				if (type > RTAX_MAX || nla_len(attr) < 4)  					goto err_inval; -				fi->fib_metrics[flavour-1] = *(unsigned int *)RTA_DATA(attr); + +				fi->fib_metrics[type-1] = nla_get_u32(attr);  			} -			attr = RTA_NEXT(attr, attrlen);  		}  	} -	if (rta->rta_prefsrc) -		memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 2); -	if (rta->rta_mp) { -		if ((err = dn_fib_get_nhs(fi, rta->rta_mp, r)) != 0) +	if (attrs[RTA_PREFSRC]) +		fi->fib_prefsrc = nla_get_le16(attrs[RTA_PREFSRC]); + +	if (attrs[RTA_MULTIPATH]) { +		if ((err = dn_fib_get_nhs(fi, attrs[RTA_MULTIPATH], r)) != 0)  			goto failure; -		if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif) + +		if (attrs[RTA_OIF] && +		    fi->fib_nh->nh_oif != nla_get_u32(attrs[RTA_OIF]))  			goto err_inval; -		if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 2)) + +		if (attrs[RTA_GATEWAY] && +		    fi->fib_nh->nh_gw != nla_get_le16(attrs[RTA_GATEWAY]))  			goto err_inval;  	} else {  		struct dn_fib_nh *nh = fi->fib_nh; -		if (rta->rta_oif) -			nh->nh_oif = *rta->rta_oif; -		if (rta->rta_gw) -			memcpy(&nh->nh_gw, rta->rta_gw, 2); + +		if (attrs[RTA_OIF]) +			nh->nh_oif = nla_get_u32(attrs[RTA_OIF]); + +		if (attrs[RTA_GATEWAY]) +			nh->nh_gw = nla_get_le16(attrs[RTA_GATEWAY]); +  		nh->nh_flags = r->rtm_flags;  		nh->nh_weight = 1;  	}  	if (r->rtm_type == RTN_NAT) { -		if (rta->rta_gw == NULL || nhs != 1 || rta->rta_oif) +		if (!attrs[RTA_GATEWAY] || nhs != 1 || attrs[RTA_OIF])  			goto err_inval; -		memcpy(&fi->fib_nh->nh_gw, rta->rta_gw, 2); + +		fi->fib_nh->nh_gw = nla_get_le16(attrs[RTA_GATEWAY]);  		goto link_it;  	}  	if (dn_fib_props[r->rtm_type].error) { -		if (rta->rta_gw || rta->rta_oif || rta->rta_mp) +		if (attrs[RTA_GATEWAY] || attrs[RTA_OIF] || attrs[RTA_MULTIPATH])  			goto err_inval; +  		goto link_it;  	} @@ -367,8 +370,8 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta  	}  	if (fi->fib_prefsrc) { -		if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL || -		    memcmp(&fi->fib_prefsrc, rta->rta_dst, 2)) +		if (r->rtm_type != RTN_LOCAL || !attrs[RTA_DST] || +		    fi->fib_prefsrc != nla_get_le16(attrs[RTA_DST]))  			if (dnet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)  				goto err_inval;  	} @@ -486,39 +489,21 @@ void dn_fib_select_multipath(const struct flowidn *fld, struct dn_fib_res *res)  	spin_unlock_bh(&dn_fib_multipath_lock);  } - -static int dn_fib_check_attr(struct rtmsg *r, struct rtattr **rta) -{ -	int i; - -	for(i = 1; i <= RTA_MAX; i++) { -		struct rtattr *attr = rta[i-1]; -		if (attr) { -			if (RTA_PAYLOAD(attr) < 4 && RTA_PAYLOAD(attr) != 2) -				return -EINVAL; -			if (i != RTA_MULTIPATH && i != RTA_METRICS && -			    i != RTA_TABLE) -				rta[i-1] = (struct rtattr *)RTA_DATA(attr); -		} -	} - -	return 0; -} - -static inline u32 rtm_get_table(struct rtattr **rta, u8 table) +static inline u32 rtm_get_table(struct nlattr *attrs[], u8 table)  { -	if (rta[RTA_TABLE - 1]) -		table = nla_get_u32((struct nlattr *) rta[RTA_TABLE - 1]); +	if (attrs[RTA_TABLE]) +		table = nla_get_u32(attrs[RTA_TABLE]);  	return table;  } -static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)  {  	struct net *net = sock_net(skb->sk);  	struct dn_fib_table *tb; -	struct rtattr **rta = arg; -	struct rtmsg *r = NLMSG_DATA(nlh); +	struct rtmsg *r = nlmsg_data(nlh); +	struct nlattr *attrs[RTA_MAX+1]; +	int err;  	if (!capable(CAP_NET_ADMIN))  		return -EPERM; @@ -526,22 +511,24 @@ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *  	if (!net_eq(net, &init_net))  		return -EINVAL; -	if (dn_fib_check_attr(r, rta)) -		return -EINVAL; +	err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy); +	if (err < 0) +		return err; -	tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 0); -	if (tb) -		return tb->delete(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb)); +	tb = dn_fib_get_table(rtm_get_table(attrs, r->rtm_table), 0); +	if (!tb) +		return -ESRCH; -	return -ESRCH; +	return tb->delete(tb, r, attrs, nlh, &NETLINK_CB(skb));  } -static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)  {  	struct net *net = sock_net(skb->sk);  	struct dn_fib_table *tb; -	struct rtattr **rta = arg; -	struct rtmsg *r = NLMSG_DATA(nlh); +	struct rtmsg *r = nlmsg_data(nlh); +	struct nlattr *attrs[RTA_MAX+1]; +	int err;  	if (!capable(CAP_NET_ADMIN))  		return -EPERM; @@ -549,14 +536,15 @@ static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *  	if (!net_eq(net, &init_net))  		return -EINVAL; -	if (dn_fib_check_attr(r, rta)) -		return -EINVAL; +	err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy); +	if (err < 0) +		return err; -	tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 1); -	if (tb) -		return tb->insert(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb)); +	tb = dn_fib_get_table(rtm_get_table(attrs, r->rtm_table), 1); +	if (!tb) +		return -ENOBUFS; -	return -ENOBUFS; +	return tb->insert(tb, r, attrs, nlh, &NETLINK_CB(skb));  }  static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifaddr *ifa) @@ -566,10 +554,31 @@ static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifad  		struct nlmsghdr nlh;  		struct rtmsg rtm;  	} req; -	struct dn_kern_rta rta; +	struct { +		struct nlattr hdr; +		__le16 dst; +	} dst_attr = { +		.dst = dst, +	}; +	struct { +		struct nlattr hdr; +		__le16 prefsrc; +	} prefsrc_attr = { +		.prefsrc = ifa->ifa_local, +	}; +	struct { +		struct nlattr hdr; +		u32 oif; +	} oif_attr = { +		.oif = ifa->ifa_dev->dev->ifindex, +	}; +	struct nlattr *attrs[RTA_MAX+1] = { +		[RTA_DST] = (struct nlattr *) &dst_attr, +		[RTA_PREFSRC] = (struct nlattr * ) &prefsrc_attr, +		[RTA_OIF] = (struct nlattr *) &oif_attr, +	};  	memset(&req.rtm, 0, sizeof(req.rtm)); -	memset(&rta, 0, sizeof(rta));  	if (type == RTN_UNICAST)  		tb = dn_fib_get_table(RT_MIN_TABLE, 1); @@ -591,14 +600,10 @@ static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifad  	req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);  	req.rtm.rtm_type = type; -	rta.rta_dst = &dst; -	rta.rta_prefsrc = &ifa->ifa_local; -	rta.rta_oif = &ifa->ifa_dev->dev->ifindex; -  	if (cmd == RTM_NEWROUTE) -		tb->insert(tb, &req.rtm, &rta, &req.nlh, NULL); +		tb->insert(tb, &req.rtm, attrs, &req.nlh, NULL);  	else -		tb->delete(tb, &req.rtm, &rta, &req.nlh, NULL); +		tb->delete(tb, &req.rtm, attrs, &req.nlh, NULL);  }  static void dn_fib_add_ifaddr(struct dn_ifaddr *ifa) diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 5ac0e153ef8..fe32388ea24 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1613,23 +1613,41 @@ errout:  	return -EMSGSIZE;  } +const struct nla_policy rtm_dn_policy[RTA_MAX + 1] = { +	[RTA_DST]		= { .type = NLA_U16 }, +	[RTA_SRC]		= { .type = NLA_U16 }, +	[RTA_IIF]		= { .type = NLA_U32 }, +	[RTA_OIF]		= { .type = NLA_U32 }, +	[RTA_GATEWAY]		= { .type = NLA_U16 }, +	[RTA_PRIORITY]		= { .type = NLA_U32 }, +	[RTA_PREFSRC]		= { .type = NLA_U16 }, +	[RTA_METRICS]		= { .type = NLA_NESTED }, +	[RTA_MULTIPATH]		= { .type = NLA_NESTED }, +	[RTA_TABLE]		= { .type = NLA_U32 }, +	[RTA_MARK]		= { .type = NLA_U32 }, +}; +  /*   * This is called by both endnodes and routers now.   */ -static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) +static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)  {  	struct net *net = sock_net(in_skb->sk); -	struct rtattr **rta = arg;  	struct rtmsg *rtm = nlmsg_data(nlh);  	struct dn_route *rt = NULL;  	struct dn_skb_cb *cb;  	int err;  	struct sk_buff *skb;  	struct flowidn fld; +	struct nlattr *tb[RTA_MAX+1];  	if (!net_eq(net, &init_net))  		return -EINVAL; +	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_dn_policy); +	if (err < 0) +		return err; +  	memset(&fld, 0, sizeof(fld));  	fld.flowidn_proto = DNPROTO_NSP; @@ -1639,12 +1657,14 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void  	skb_reset_mac_header(skb);  	cb = DN_SKB_CB(skb); -	if (rta[RTA_SRC-1]) -		memcpy(&fld.saddr, RTA_DATA(rta[RTA_SRC-1]), 2); -	if (rta[RTA_DST-1]) -		memcpy(&fld.daddr, RTA_DATA(rta[RTA_DST-1]), 2); -	if (rta[RTA_IIF-1]) -		memcpy(&fld.flowidn_iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int)); +	if (tb[RTA_SRC]) +		fld.saddr = nla_get_le16(tb[RTA_SRC]); + +	if (tb[RTA_DST]) +		fld.daddr = nla_get_le16(tb[RTA_DST]); + +	if (tb[RTA_IIF]) +		fld.flowidn_iif = nla_get_u32(tb[RTA_IIF]);  	if (fld.flowidn_iif) {  		struct net_device *dev; @@ -1669,10 +1689,9 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void  		if (!err && -rt->dst.error)  			err = rt->dst.error;  	} else { -		int oif = 0; -		if (rta[RTA_OIF - 1]) -			memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int)); -		fld.flowidn_oif = oif; +		if (tb[RTA_OIF]) +			fld.flowidn_oif = nla_get_u32(tb[RTA_OIF]); +  		err = dn_route_output_key((struct dst_entry **)&rt, &fld, 0);  	} diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 6c2445bcaba..86e3807052e 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -19,7 +19,6 @@  #include <linux/sockios.h>  #include <linux/init.h>  #include <linux/skbuff.h> -#include <linux/netlink.h>  #include <linux/rtnetlink.h>  #include <linux/proc_fs.h>  #include <linux/netdevice.h> @@ -224,26 +223,27 @@ static struct dn_zone *dn_new_zone(struct dn_hash *table, int z)  } -static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern_rta *rta, struct dn_fib_info *fi) +static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct nlattr *attrs[], struct dn_fib_info *fi)  {  	struct rtnexthop *nhp;  	int nhlen; -	if (rta->rta_priority && *rta->rta_priority != fi->fib_priority) +	if (attrs[RTA_PRIORITY] && +	    nla_get_u32(attrs[RTA_PRIORITY]) != fi->fib_priority)  		return 1; -	if (rta->rta_oif || rta->rta_gw) { -		if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) && -		    (!rta->rta_gw  || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 2) == 0)) +	if (attrs[RTA_OIF] || attrs[RTA_GATEWAY]) { +		if ((!attrs[RTA_OIF] || nla_get_u32(attrs[RTA_OIF]) == fi->fib_nh->nh_oif) && +		    (!attrs[RTA_GATEWAY]  || nla_get_le16(attrs[RTA_GATEWAY]) != fi->fib_nh->nh_gw))  			return 0;  		return 1;  	} -	if (rta->rta_mp == NULL) +	if (!attrs[RTA_MULTIPATH])  		return 0; -	nhp = RTA_DATA(rta->rta_mp); -	nhlen = RTA_PAYLOAD(rta->rta_mp); +	nhp = nla_data(attrs[RTA_MULTIPATH]); +	nhlen = nla_len(attrs[RTA_MULTIPATH]);  	for_nexthops(fi) {  		int attrlen = nhlen - sizeof(struct rtnexthop); @@ -254,7 +254,10 @@ static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern  		if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)  			return 1;  		if (attrlen) { -			gw = dn_fib_get_attr16(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); +			struct nlattr *gw_attr; + +			gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY); +			gw = gw_attr ? nla_get_le16(gw_attr) : 0;  			if (gw && gw != nh->nh_gw)  				return 1; @@ -488,7 +491,7 @@ int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb)  	if (!net_eq(net, &init_net))  		return 0; -	if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && +	if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&  		((struct rtmsg *)nlmsg_data(cb->nlh))->rtm_flags&RTM_F_CLONED)  			return dn_cache_dump(skb, cb); @@ -517,7 +520,8 @@ out:  	return skb->len;  } -static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, struct netlink_skb_parms *req) +static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct nlattr *attrs[], +			       struct nlmsghdr *n, struct netlink_skb_parms *req)  {  	struct dn_hash *table = (struct dn_hash *)tb->data;  	struct dn_fib_node *new_f, *f, **fp, **del_fp; @@ -536,15 +540,14 @@ static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct  		return -ENOBUFS;  	dz_key_0(key); -	if (rta->rta_dst) { -		__le16 dst; -		memcpy(&dst, rta->rta_dst, 2); +	if (attrs[RTA_DST]) { +		__le16 dst = nla_get_le16(attrs[RTA_DST]);  		if (dst & ~DZ_MASK(dz))  			return -EINVAL;  		key = dz_key(dst, dz);  	} -	if ((fi = dn_fib_create_info(r, rta, n, &err)) == NULL) +	if ((fi = dn_fib_create_info(r, attrs, n, &err)) == NULL)  		return err;  	if (dz->dz_nent > (dz->dz_divisor << 2) && @@ -654,7 +657,8 @@ out:  } -static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, struct netlink_skb_parms *req) +static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct nlattr *attrs[], +			       struct nlmsghdr *n, struct netlink_skb_parms *req)  {  	struct dn_hash *table = (struct dn_hash*)tb->data;  	struct dn_fib_node **fp, **del_fp, *f; @@ -671,9 +675,8 @@ static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct  		return -ESRCH;  	dz_key_0(key); -	if (rta->rta_dst) { -		__le16 dst; -		memcpy(&dst, rta->rta_dst, 2); +	if (attrs[RTA_DST]) { +		__le16 dst = nla_get_le16(attrs[RTA_DST]);  		if (dst & ~DZ_MASK(dz))  			return -EINVAL;  		key = dz_key(dst, dz); @@ -703,7 +706,7 @@ static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct  				(r->rtm_scope == RT_SCOPE_NOWHERE || f->fn_scope == r->rtm_scope) &&  				(!r->rtm_protocol ||  					fi->fib_protocol == r->rtm_protocol) && -				dn_fib_nh_match(r, n, rta, fi) == 0) +				dn_fib_nh_match(r, n, attrs, fi) == 0)  			del_fp = fp;  	} diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index dfe42012a04..2a7efe38834 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -19,7 +19,7 @@  #include <linux/netdevice.h>  #include <linux/netfilter.h>  #include <linux/spinlock.h> -#include <linux/netlink.h> +#include <net/netlink.h>  #include <linux/netfilter_decnet.h>  #include <net/sock.h> @@ -39,21 +39,21 @@ static struct sk_buff *dnrmg_build_message(struct sk_buff *rt_skb, int *errp)  	unsigned char *ptr;  	struct nf_dn_rtmsg *rtm; -	size = NLMSG_SPACE(rt_skb->len); -	size += NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg)); -	skb = alloc_skb(size, GFP_ATOMIC); +	size = NLMSG_ALIGN(rt_skb->len) + +	       NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg)); +	skb = nlmsg_new(size, GFP_ATOMIC);  	if (!skb) {  		*errp = -ENOMEM;  		return NULL;  	}  	old_tail = skb->tail; -	nlh = nlmsg_put(skb, 0, 0, 0, size - sizeof(*nlh), 0); +	nlh = nlmsg_put(skb, 0, 0, 0, size, 0);  	if (!nlh) {  		kfree_skb(skb);  		*errp = -ENOMEM;  		return NULL;  	} -	rtm = (struct nf_dn_rtmsg *)NLMSG_DATA(nlh); +	rtm = (struct nf_dn_rtmsg *)nlmsg_data(nlh);  	rtm->nfdn_ifindex = rt_skb->dev->ifindex;  	ptr = NFDN_RTMSG(rtm);  	skb_copy_from_linear_data(rt_skb, ptr, rt_skb->len); diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 2bc62ea857c..0eb5d5e76df 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -1,6 +1,7 @@  /*   * net/dsa/dsa.c - Hardware switch handling   * Copyright (c) 2008-2009 Marvell Semiconductor + * Copyright (c) 2013 Florian Fainelli <florian@openwrt.org>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License as published by @@ -14,6 +15,9 @@  #include <linux/slab.h>  #include <linux/module.h>  #include <net/dsa.h> +#include <linux/of.h> +#include <linux/of_mdio.h> +#include <linux/of_platform.h>  #include "dsa_priv.h"  char dsa_driver_version[] = "0.1"; @@ -287,34 +291,239 @@ static struct net_device *dev_to_net_device(struct device *dev)  	return NULL;  } +#ifdef CONFIG_OF +static int dsa_of_setup_routing_table(struct dsa_platform_data *pd, +					struct dsa_chip_data *cd, +					int chip_index, +					struct device_node *link) +{ +	int ret; +	const __be32 *reg; +	int link_port_addr; +	int link_sw_addr; +	struct device_node *parent_sw; +	int len; + +	parent_sw = of_get_parent(link); +	if (!parent_sw) +		return -EINVAL; + +	reg = of_get_property(parent_sw, "reg", &len); +	if (!reg || (len != sizeof(*reg) * 2)) +		return -EINVAL; + +	link_sw_addr = be32_to_cpup(reg + 1); + +	if (link_sw_addr >= pd->nr_chips) +		return -EINVAL; + +	/* First time routing table allocation */ +	if (!cd->rtable) { +		cd->rtable = kmalloc(pd->nr_chips * sizeof(s8), GFP_KERNEL); +		if (!cd->rtable) +			return -ENOMEM; + +		/* default to no valid uplink/downlink */ +		memset(cd->rtable, -1, pd->nr_chips * sizeof(s8)); +	} + +	reg = of_get_property(link, "reg", NULL); +	if (!reg) { +		ret = -EINVAL; +		goto out; +	} + +	link_port_addr = be32_to_cpup(reg); + +	cd->rtable[link_sw_addr] = link_port_addr; + +	return 0; +out: +	kfree(cd->rtable); +	return ret; +} + +static void dsa_of_free_platform_data(struct dsa_platform_data *pd) +{ +	int i; +	int port_index; + +	for (i = 0; i < pd->nr_chips; i++) { +		port_index = 0; +		while (port_index < DSA_MAX_PORTS) { +			if (pd->chip[i].port_names[port_index]) +				kfree(pd->chip[i].port_names[port_index]); +			port_index++; +		} +		kfree(pd->chip[i].rtable); +	} +	kfree(pd->chip); +} + +static int dsa_of_probe(struct platform_device *pdev) +{ +	struct device_node *np = pdev->dev.of_node; +	struct device_node *child, *mdio, *ethernet, *port, *link; +	struct mii_bus *mdio_bus; +	struct platform_device *ethernet_dev; +	struct dsa_platform_data *pd; +	struct dsa_chip_data *cd; +	const char *port_name; +	int chip_index, port_index; +	const unsigned int *sw_addr, *port_reg; +	int ret; + +	mdio = of_parse_phandle(np, "dsa,mii-bus", 0); +	if (!mdio) +		return -EINVAL; + +	mdio_bus = of_mdio_find_bus(mdio); +	if (!mdio_bus) +		return -EINVAL; + +	ethernet = of_parse_phandle(np, "dsa,ethernet", 0); +	if (!ethernet) +		return -EINVAL; + +	ethernet_dev = of_find_device_by_node(ethernet); +	if (!ethernet_dev) +		return -ENODEV; + +	pd = kzalloc(sizeof(*pd), GFP_KERNEL); +	if (!pd) +		return -ENOMEM; + +	pdev->dev.platform_data = pd; +	pd->netdev = ðernet_dev->dev; +	pd->nr_chips = of_get_child_count(np); +	if (pd->nr_chips > DSA_MAX_SWITCHES) +		pd->nr_chips = DSA_MAX_SWITCHES; + +	pd->chip = kzalloc(pd->nr_chips * sizeof(struct dsa_chip_data), +			GFP_KERNEL); +	if (!pd->chip) { +		ret = -ENOMEM; +		goto out_free; +	} + +	chip_index = 0; +	for_each_available_child_of_node(np, child) { +		cd = &pd->chip[chip_index]; + +		cd->mii_bus = &mdio_bus->dev; + +		sw_addr = of_get_property(child, "reg", NULL); +		if (!sw_addr) +			continue; + +		cd->sw_addr = be32_to_cpup(sw_addr); +		if (cd->sw_addr > PHY_MAX_ADDR) +			continue; + +		for_each_available_child_of_node(child, port) { +			port_reg = of_get_property(port, "reg", NULL); +			if (!port_reg) +				continue; + +			port_index = be32_to_cpup(port_reg); + +			port_name = of_get_property(port, "label", NULL); +			if (!port_name) +				continue; + +			cd->port_names[port_index] = kstrdup(port_name, +					GFP_KERNEL); +			if (!cd->port_names[port_index]) { +				ret = -ENOMEM; +				goto out_free_chip; +			} + +			link = of_parse_phandle(port, "link", 0); + +			if (!strcmp(port_name, "dsa") && link && +					pd->nr_chips > 1) { +				ret = dsa_of_setup_routing_table(pd, cd, +						chip_index, link); +				if (ret) +					goto out_free_chip; +			} + +			if (port_index == DSA_MAX_PORTS) +				break; +		} +	} + +	return 0; + +out_free_chip: +	dsa_of_free_platform_data(pd); +out_free: +	kfree(pd); +	pdev->dev.platform_data = NULL; +	return ret; +} + +static void dsa_of_remove(struct platform_device *pdev) +{ +	struct dsa_platform_data *pd = pdev->dev.platform_data; + +	if (!pdev->dev.of_node) +		return; + +	dsa_of_free_platform_data(pd); +	kfree(pd); +} +#else +static inline int dsa_of_probe(struct platform_device *pdev) +{ +	return 0; +} + +static inline void dsa_of_remove(struct platform_device *pdev) +{ +} +#endif +  static int dsa_probe(struct platform_device *pdev)  {  	static int dsa_version_printed;  	struct dsa_platform_data *pd = pdev->dev.platform_data;  	struct net_device *dev;  	struct dsa_switch_tree *dst; -	int i; +	int i, ret;  	if (!dsa_version_printed++)  		printk(KERN_NOTICE "Distributed Switch Architecture "  			"driver version %s\n", dsa_driver_version); +	if (pdev->dev.of_node) { +		ret = dsa_of_probe(pdev); +		if (ret) +			return ret; + +		pd = pdev->dev.platform_data; +	} +  	if (pd == NULL || pd->netdev == NULL)  		return -EINVAL;  	dev = dev_to_net_device(pd->netdev); -	if (dev == NULL) -		return -EINVAL; +	if (dev == NULL) { +		ret = -EINVAL; +		goto out; +	}  	if (dev->dsa_ptr != NULL) {  		dev_put(dev); -		return -EEXIST; +		ret = -EEXIST; +		goto out;  	}  	dst = kzalloc(sizeof(*dst), GFP_KERNEL);  	if (dst == NULL) {  		dev_put(dev); -		return -ENOMEM; +		ret = -ENOMEM; +		goto out;  	}  	platform_set_drvdata(pdev, dst); @@ -366,6 +575,11 @@ static int dsa_probe(struct platform_device *pdev)  	}  	return 0; + +out: +	dsa_of_remove(pdev); + +	return ret;  }  static int dsa_remove(struct platform_device *pdev) @@ -385,6 +599,8 @@ static int dsa_remove(struct platform_device *pdev)  			dsa_switch_destroy(ds);  	} +	dsa_of_remove(pdev); +  	return 0;  } @@ -392,6 +608,12 @@ static void dsa_shutdown(struct platform_device *pdev)  {  } +static const struct of_device_id dsa_of_match_table[] = { +	{ .compatible = "marvell,dsa", }, +	{} +}; +MODULE_DEVICE_TABLE(of, dsa_of_match_table); +  static struct platform_driver dsa_driver = {  	.probe		= dsa_probe,  	.remove		= dsa_remove, @@ -399,6 +621,7 @@ static struct platform_driver dsa_driver = {  	.driver = {  		.name	= "dsa",  		.owner	= THIS_MODULE, +		.of_match_table = dsa_of_match_table,  	},  }; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index a36c85eab5b..5359560926b 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -195,7 +195,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)  	if (netdev_uses_trailer_tags(dev))  		return htons(ETH_P_TRAILER); -	if (ntohs(eth->h_proto) >= 1536) +	if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN)  		return eth->h_proto;  	/* diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index 43b95ca6111..55e1fd5b3e5 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -104,6 +104,7 @@ static const u8 lowpan_llprefix[] = {0xfe, 0x80};  struct lowpan_dev_info {  	struct net_device	*real_dev; /* real WPAN device ptr */  	struct mutex		dev_list_mtx; /* mutex for list ops */ +	unsigned short		fragment_tag;  };  struct lowpan_dev_record { @@ -120,7 +121,6 @@ struct lowpan_fragment {  	struct list_head	list;		/* fragments list */  }; -static unsigned short fragment_tag;  static LIST_HEAD(lowpan_fragments);  static DEFINE_SPINLOCK(flist_lock); @@ -284,6 +284,9 @@ lowpan_compress_udp_header(u8 **hc06_ptr, struct sk_buff *skb)  	/* checksum is always inline */  	memcpy(*hc06_ptr, &uh->check, 2);  	*hc06_ptr += 2; + +	/* skip the UDP header */ +	skb_pull(skb, sizeof(struct udphdr));  }  static inline int lowpan_fetch_skb_u8(struct sk_buff *skb, u8 *val) @@ -309,9 +312,8 @@ static inline int lowpan_fetch_skb_u16(struct sk_buff *skb, u16 *val)  }  static int -lowpan_uncompress_udp_header(struct sk_buff *skb) +lowpan_uncompress_udp_header(struct sk_buff *skb, struct udphdr *uh)  { -	struct udphdr *uh = udp_hdr(skb);  	u8 tmp;  	if (!uh) @@ -358,6 +360,14 @@ lowpan_uncompress_udp_header(struct sk_buff *skb)  		/* copy checksum */  		memcpy(&uh->check, &skb->data[0], 2);  		skb_pull(skb, 2); + +		/* +		 * UDP lenght needs to be infered from the lower layers +		 * here, we obtain the hint from the remaining size of the +		 * frame +		 */ +		uh->len = htons(skb->len + sizeof(struct udphdr)); +		pr_debug("uncompressed UDP length: src = %d", uh->len);  	} else {  		pr_debug("ERROR: unsupported NH format\n");  		goto err; @@ -572,17 +582,31 @@ static int lowpan_header_create(struct sk_buff *skb,  	 * this isn't implemented in mainline yet, so currently we assign 0xff  	 */  	{ +		mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA; +		mac_cb(skb)->seq = ieee802154_mlme_ops(dev)->get_dsn(dev); +  		/* prepare wpan address data */  		sa.addr_type = IEEE802154_ADDR_LONG; -		sa.pan_id = 0xff; - -		da.addr_type = IEEE802154_ADDR_LONG; -		da.pan_id = 0xff; +		sa.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); -		memcpy(&(da.hwaddr), daddr, 8);  		memcpy(&(sa.hwaddr), saddr, 8); +		/* intra-PAN communications */ +		da.pan_id = ieee802154_mlme_ops(dev)->get_pan_id(dev); -		mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA; +		/* +		 * if the destination address is the broadcast address, use the +		 * corresponding short address +		 */ +		if (lowpan_is_addr_broadcast(daddr)) { +			da.addr_type = IEEE802154_ADDR_SHORT; +			da.short_addr = IEEE802154_ADDR_BROADCAST; +		} else { +			da.addr_type = IEEE802154_ADDR_LONG; +			memcpy(&(da.hwaddr), daddr, IEEE802154_ADDR_LEN); + +			/* request acknowledgment */ +			mac_cb(skb)->flags |= MAC_CB_FLAG_ACKREQ; +		}  		return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev,  				type, (void *)&da, (void *)&sa, skb->len); @@ -650,7 +674,7 @@ static void lowpan_fragment_timer_expired(unsigned long entry_addr)  }  static struct lowpan_fragment * -lowpan_alloc_new_frame(struct sk_buff *skb, u8 len, u16 tag) +lowpan_alloc_new_frame(struct sk_buff *skb, u16 len, u16 tag)  {  	struct lowpan_fragment *frame; @@ -720,7 +744,7 @@ lowpan_process_data(struct sk_buff *skb)  	{  		struct lowpan_fragment *frame;  		/* slen stores the rightmost 8 bits of the 11 bits length */ -		u8 slen, offset; +		u8 slen, offset = 0;  		u16 len, tag;  		bool found = false; @@ -731,6 +755,18 @@ lowpan_process_data(struct sk_buff *skb)  		/* adds the 3 MSB to the 8 LSB to retrieve the 11 bits length */  		len = ((iphc0 & 7) << 8) | slen; +		if ((iphc0 & LOWPAN_DISPATCH_MASK) == LOWPAN_DISPATCH_FRAG1) { +			pr_debug("%s received a FRAG1 packet (tag: %d, " +				 "size of the entire IP packet: %d)", +				 __func__, tag, len); +		} else { /* FRAGN */ +			if (lowpan_fetch_skb_u8(skb, &offset)) +				goto unlock_and_drop; +			pr_debug("%s received a FRAGN packet (tag: %d, " +				 "size of the entire IP packet: %d, " +				 "offset: %d)", __func__, tag, len, offset * 8); +		} +  		/*  		 * check if frame assembling with the same tag is  		 * already in progress @@ -745,17 +781,13 @@ lowpan_process_data(struct sk_buff *skb)  		/* alloc new frame structure */  		if (!found) { +			pr_debug("%s first fragment received for tag %d, " +				 "begin packet reassembly", __func__, tag);  			frame = lowpan_alloc_new_frame(skb, len, tag);  			if (!frame)  				goto unlock_and_drop;  		} -		if ((iphc0 & LOWPAN_DISPATCH_MASK) == LOWPAN_DISPATCH_FRAG1) -			goto unlock_and_drop; - -		if (lowpan_fetch_skb_u8(skb, &offset)) /* fetch offset */ -			goto unlock_and_drop; -  		/* if payload fits buffer, copy it */  		if (likely((offset * 8 + skb->len) <= frame->length))  			skb_copy_to_linear_data_offset(frame->skb, offset * 8, @@ -773,6 +805,9 @@ lowpan_process_data(struct sk_buff *skb)  			list_del(&frame->list);  			spin_unlock_bh(&flist_lock); +			pr_debug("%s successfully reassembled fragment " +				 "(tag %d)", __func__, tag); +  			dev_kfree_skb(skb);  			skb = frame->skb;  			kfree(frame); @@ -918,10 +953,35 @@ lowpan_process_data(struct sk_buff *skb)  	}  	/* UDP data uncompression */ -	if (iphc0 & LOWPAN_IPHC_NH_C) -		if (lowpan_uncompress_udp_header(skb)) +	if (iphc0 & LOWPAN_IPHC_NH_C) { +		struct udphdr uh; +		struct sk_buff *new; +		if (lowpan_uncompress_udp_header(skb, &uh))  			goto drop; +		/* +		 * replace the compressed UDP head by the uncompressed UDP +		 * header +		 */ +		new = skb_copy_expand(skb, sizeof(struct udphdr), +				      skb_tailroom(skb), GFP_ATOMIC); +		kfree_skb(skb); + +		if (!new) +			return -ENOMEM; + +		skb = new; + +		skb_push(skb, sizeof(struct udphdr)); +		skb_reset_transport_header(skb); +		skb_copy_to_linear_data(skb, &uh, sizeof(struct udphdr)); + +		lowpan_raw_dump_table(__func__, "raw UDP header dump", +				      (u8 *)&uh, sizeof(uh)); + +		hdr.nexthdr = UIP_PROTO_UDP; +	} +  	/* Not fragmented package */  	hdr.payload_len = htons(skb->len); @@ -969,13 +1029,13 @@ static int lowpan_get_mac_header_length(struct sk_buff *skb)  static int  lowpan_fragment_xmit(struct sk_buff *skb, u8 *head, -			int mlen, int plen, int offset) +			int mlen, int plen, int offset, int type)  {  	struct sk_buff *frag;  	int hlen, ret; -	/* if payload length is zero, therefore it's a first fragment */ -	hlen = (plen == 0 ? LOWPAN_FRAG1_HEAD_SIZE :  LOWPAN_FRAGN_HEAD_SIZE); +	hlen = (type == LOWPAN_DISPATCH_FRAG1) ? +			LOWPAN_FRAG1_HEAD_SIZE : LOWPAN_FRAGN_HEAD_SIZE;  	lowpan_raw_dump_inline(__func__, "6lowpan fragment header", head, hlen); @@ -1003,14 +1063,14 @@ lowpan_fragment_xmit(struct sk_buff *skb, u8 *head,  }  static int -lowpan_skb_fragmentation(struct sk_buff *skb) +lowpan_skb_fragmentation(struct sk_buff *skb, struct net_device *dev)  {  	int  err, header_length, payload_length, tag, offset = 0;  	u8 head[5];  	header_length = lowpan_get_mac_header_length(skb);  	payload_length = skb->len - header_length; -	tag = fragment_tag++; +	tag = lowpan_dev_info(dev)->fragment_tag++;  	/* first fragment header */  	head[0] = LOWPAN_DISPATCH_FRAG1 | ((payload_length >> 8) & 0x7); @@ -1018,7 +1078,16 @@ lowpan_skb_fragmentation(struct sk_buff *skb)  	head[2] = tag >> 8;  	head[3] = tag & 0xff; -	err = lowpan_fragment_xmit(skb, head, header_length, 0, 0); +	err = lowpan_fragment_xmit(skb, head, header_length, LOWPAN_FRAG_SIZE, +				   0, LOWPAN_DISPATCH_FRAG1); + +	if (err) { +		pr_debug("%s unable to send FRAG1 packet (tag: %d)", +			 __func__, tag); +		goto exit; +	} + +	offset = LOWPAN_FRAG_SIZE;  	/* next fragment header */  	head[0] &= ~LOWPAN_DISPATCH_FRAG1; @@ -1033,10 +1102,17 @@ lowpan_skb_fragmentation(struct sk_buff *skb)  			len = payload_length - offset;  		err = lowpan_fragment_xmit(skb, head, header_length, -							len, offset); +					   len, offset, LOWPAN_DISPATCH_FRAGN); +		if (err) { +			pr_debug("%s unable to send a subsequent FRAGN packet " +				 "(tag: %d, offset: %d", __func__, tag, offset); +			goto exit; +		} +  		offset += len;  	} +exit:  	return err;  } @@ -1059,14 +1135,14 @@ static netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev)  	}  	pr_debug("frame is too big, fragmentation is needed\n"); -	err = lowpan_skb_fragmentation(skb); +	err = lowpan_skb_fragmentation(skb, dev);  error:  	dev_kfree_skb(skb);  out: -	if (err < 0) +	if (err)  		pr_debug("ERROR: xmit failed\n"); -	return (err < 0 ? NETDEV_TX_BUSY : NETDEV_TX_OK); +	return (err < 0) ? NET_XMIT_DROP : err;  }  static struct wpan_phy *lowpan_get_phy(const struct net_device *dev) @@ -1087,6 +1163,12 @@ static u16 lowpan_get_short_addr(const struct net_device *dev)  	return ieee802154_mlme_ops(real_dev)->get_short_addr(real_dev);  } +static u8 lowpan_get_dsn(const struct net_device *dev) +{ +	struct net_device *real_dev = lowpan_dev_info(dev)->real_dev; +	return ieee802154_mlme_ops(real_dev)->get_dsn(real_dev); +} +  static struct header_ops lowpan_header_ops = {  	.create	= lowpan_header_create,  }; @@ -1100,6 +1182,7 @@ static struct ieee802154_mlme_ops lowpan_mlme = {  	.get_pan_id = lowpan_get_pan_id,  	.get_phy = lowpan_get_phy,  	.get_short_addr = lowpan_get_short_addr, +	.get_dsn = lowpan_get_dsn,  };  static void lowpan_setup(struct net_device *dev) @@ -1203,6 +1286,7 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev,  		return -ENODEV;  	lowpan_dev_info(dev)->real_dev = real_dev; +	lowpan_dev_info(dev)->fragment_tag = 0;  	mutex_init(&lowpan_dev_info(dev)->dev_list_mtx);  	entry = kzalloc(sizeof(struct lowpan_dev_record), GFP_KERNEL); diff --git a/net/ieee802154/6lowpan.h b/net/ieee802154/6lowpan.h index bba5f833631..4b8f917658b 100644 --- a/net/ieee802154/6lowpan.h +++ b/net/ieee802154/6lowpan.h @@ -92,9 +92,10 @@   */  #define lowpan_is_iid_16_bit_compressable(a)	\  	((((a)->s6_addr16[4]) == 0) &&		\ -	 (((a)->s6_addr16[5]) == 0) &&		\ -	 (((a)->s6_addr16[6]) == 0) &&		\ -	 ((((a)->s6_addr[14]) & 0x80) == 0)) +	 (((a)->s6_addr[10]) == 0) &&		\ +	 (((a)->s6_addr[11]) == 0xff) &&	\ +	 (((a)->s6_addr[12]) == 0xfe) &&	\ +	 (((a)->s6_addr[13]) == 0))  /* multicast address */  #define is_addr_mcast(a) (((a)->s6_addr[0]) == 0xFF) diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c index e0da175f8e5..581a59504bd 100644 --- a/net/ieee802154/dgram.c +++ b/net/ieee802154/dgram.c @@ -291,6 +291,9 @@ static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk,  	size_t copied = 0;  	int err = -EOPNOTSUPP;  	struct sk_buff *skb; +	struct sockaddr_ieee802154 *saddr; + +	saddr = (struct sockaddr_ieee802154 *)msg->msg_name;  	skb = skb_recv_datagram(sk, flags, noblock, &err);  	if (!skb) @@ -309,6 +312,13 @@ static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk,  	sock_recv_ts_and_drops(msg, sk, skb); +	if (saddr) { +		saddr->family = AF_IEEE802154; +		saddr->addr = mac_cb(skb)->sa; +	} +	if (addr_len) +		*addr_len = sizeof(*saddr); +  	if (flags & MSG_TRUNC)  		copied = skb->len;  done: diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index 97351e1d07a..7e49bbcc696 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -64,8 +64,8 @@ struct sk_buff *ieee802154_nl_create(int flags, u8 req)  int ieee802154_nl_mcast(struct sk_buff *msg, unsigned int group)  { -	/* XXX: nlh is right at the start of msg */ -	void *hdr = genlmsg_data(NLMSG_DATA(msg->data)); +	struct nlmsghdr *nlh = nlmsg_hdr(msg); +	void *hdr = genlmsg_data(nlmsg_data(nlh));  	if (genlmsg_end(msg, hdr) < 0)  		goto out; @@ -97,8 +97,8 @@ struct sk_buff *ieee802154_nl_new_reply(struct genl_info *info,  int ieee802154_nl_reply(struct sk_buff *msg, struct genl_info *info)  { -	/* XXX: nlh is right at the start of msg */ -	void *hdr = genlmsg_data(NLMSG_DATA(msg->data)); +	struct nlmsghdr *nlh = nlmsg_hdr(msg); +	void *hdr = genlmsg_data(nlmsg_data(nlh));  	if (genlmsg_end(msg, hdr) < 0)  		goto out; diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index 96bb08abece..b0bdd8c51e9 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -315,7 +315,7 @@ static int ieee802154_associate_req(struct sk_buff *skb,  	struct net_device *dev;  	struct ieee802154_addr addr;  	u8 page; -	int ret = -EINVAL; +	int ret = -EOPNOTSUPP;  	if (!info->attrs[IEEE802154_ATTR_CHANNEL] ||  	    !info->attrs[IEEE802154_ATTR_COORD_PAN_ID] || @@ -327,6 +327,8 @@ static int ieee802154_associate_req(struct sk_buff *skb,  	dev = ieee802154_nl_get_dev(info);  	if (!dev)  		return -ENODEV; +	if (!ieee802154_mlme_ops(dev)->assoc_req) +		goto out;  	if (info->attrs[IEEE802154_ATTR_COORD_HW_ADDR]) {  		addr.addr_type = IEEE802154_ADDR_LONG; @@ -350,6 +352,7 @@ static int ieee802154_associate_req(struct sk_buff *skb,  			page,  			nla_get_u8(info->attrs[IEEE802154_ATTR_CAPABILITY])); +out:  	dev_put(dev);  	return ret;  } @@ -359,7 +362,7 @@ static int ieee802154_associate_resp(struct sk_buff *skb,  {  	struct net_device *dev;  	struct ieee802154_addr addr; -	int ret = -EINVAL; +	int ret = -EOPNOTSUPP;  	if (!info->attrs[IEEE802154_ATTR_STATUS] ||  	    !info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] || @@ -369,6 +372,8 @@ static int ieee802154_associate_resp(struct sk_buff *skb,  	dev = ieee802154_nl_get_dev(info);  	if (!dev)  		return -ENODEV; +	if (!ieee802154_mlme_ops(dev)->assoc_resp) +		goto out;  	addr.addr_type = IEEE802154_ADDR_LONG;  	nla_memcpy(addr.hwaddr, info->attrs[IEEE802154_ATTR_DEST_HW_ADDR], @@ -380,6 +385,7 @@ static int ieee802154_associate_resp(struct sk_buff *skb,  		nla_get_u16(info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]),  		nla_get_u8(info->attrs[IEEE802154_ATTR_STATUS])); +out:  	dev_put(dev);  	return ret;  } @@ -389,7 +395,7 @@ static int ieee802154_disassociate_req(struct sk_buff *skb,  {  	struct net_device *dev;  	struct ieee802154_addr addr; -	int ret = -EINVAL; +	int ret = -EOPNOTSUPP;  	if ((!info->attrs[IEEE802154_ATTR_DEST_HW_ADDR] &&  		!info->attrs[IEEE802154_ATTR_DEST_SHORT_ADDR]) || @@ -399,6 +405,8 @@ static int ieee802154_disassociate_req(struct sk_buff *skb,  	dev = ieee802154_nl_get_dev(info);  	if (!dev)  		return -ENODEV; +	if (!ieee802154_mlme_ops(dev)->disassoc_req) +		goto out;  	if (info->attrs[IEEE802154_ATTR_DEST_HW_ADDR]) {  		addr.addr_type = IEEE802154_ADDR_LONG; @@ -415,6 +423,7 @@ static int ieee802154_disassociate_req(struct sk_buff *skb,  	ret = ieee802154_mlme_ops(dev)->disassoc_req(dev, &addr,  			nla_get_u8(info->attrs[IEEE802154_ATTR_REASON])); +out:  	dev_put(dev);  	return ret;  } @@ -432,7 +441,7 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info)  	u8 channel, bcn_ord, sf_ord;  	u8 page;  	int pan_coord, blx, coord_realign; -	int ret; +	int ret = -EOPNOTSUPP;  	if (!info->attrs[IEEE802154_ATTR_COORD_PAN_ID] ||  	    !info->attrs[IEEE802154_ATTR_COORD_SHORT_ADDR] || @@ -448,6 +457,8 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info)  	dev = ieee802154_nl_get_dev(info);  	if (!dev)  		return -ENODEV; +	if (!ieee802154_mlme_ops(dev)->start_req) +		goto out;  	addr.addr_type = IEEE802154_ADDR_SHORT;  	addr.short_addr = nla_get_u16( @@ -476,6 +487,7 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info)  	ret = ieee802154_mlme_ops(dev)->start_req(dev, &addr, channel, page,  		bcn_ord, sf_ord, pan_coord, blx, coord_realign); +out:  	dev_put(dev);  	return ret;  } @@ -483,7 +495,7 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info)  static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info)  {  	struct net_device *dev; -	int ret; +	int ret = -EOPNOTSUPP;  	u8 type;  	u32 channels;  	u8 duration; @@ -497,6 +509,8 @@ static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info)  	dev = ieee802154_nl_get_dev(info);  	if (!dev)  		return -ENODEV; +	if (!ieee802154_mlme_ops(dev)->scan_req) +		goto out;  	type = nla_get_u8(info->attrs[IEEE802154_ATTR_SCAN_TYPE]);  	channels = nla_get_u32(info->attrs[IEEE802154_ATTR_CHANNELS]); @@ -511,6 +525,7 @@ static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info)  	ret = ieee802154_mlme_ops(dev)->scan_req(dev, type, channels, page,  			duration); +out:  	dev_put(dev);  	return ret;  } diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 7944df76845..8603ca82710 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -166,6 +166,7 @@ config IP_PNP_RARP  config NET_IPIP  	tristate "IP: tunneling"  	select INET_TUNNEL +	select NET_IP_TUNNEL  	---help---  	  Tunneling means encapsulating data of one protocol type within  	  another protocol and sending it over a channel that understands the @@ -186,9 +187,14 @@ config NET_IPGRE_DEMUX  	 This is helper module to demultiplex GRE packets on GRE version field criteria.  	 Required by ip_gre and pptp modules. +config NET_IP_TUNNEL +	tristate +	default n +  config NET_IPGRE  	tristate "IP: GRE tunnels over IP"  	depends on (IPV6 || IPV6=n) && NET_IPGRE_DEMUX +	select NET_IP_TUNNEL  	help  	  Tunneling means encapsulating data of one protocol type within  	  another protocol and sending it over a channel that understands the @@ -313,6 +319,7 @@ config SYN_COOKIES  config NET_IPVTI  	tristate "Virtual (secure) IP: tunneling"  	select INET_TUNNEL +	select NET_IP_TUNNEL  	depends on INET_XFRM_MODE_TUNNEL  	---help---  	  Tunneling means encapsulating data of one protocol type within diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 15ca63ec604..089cb9f3638 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -13,6 +13,7 @@ obj-y     := route.o inetpeer.o protocol.o \  	     fib_frontend.o fib_semantics.o fib_trie.o \  	     inet_fragment.o ping.o +obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o  obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o  obj-$(CONFIG_PROC_FS) += proc.o  obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index c929d9c1c4b..c61b3bb87a1 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -111,10 +111,10 @@  #include <net/sock.h>  #include <net/raw.h>  #include <net/icmp.h> -#include <net/ipip.h>  #include <net/inet_common.h>  #include <net/xfrm.h>  #include <net/net_namespace.h> +#include <net/secure_seq.h>  #ifdef CONFIG_IP_MROUTE  #include <linux/mroute.h>  #endif @@ -263,8 +263,10 @@ void build_ehash_secret(void)  		get_random_bytes(&rnd, sizeof(rnd));  	} while (rnd == 0); -	if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) +	if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) {  		get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret)); +		net_secret_init(); +	}  }  EXPORT_SYMBOL(build_ehash_secret); @@ -1283,9 +1285,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,  	int ihl;  	int id;  	unsigned int offset = 0; - -	if (!(features & NETIF_F_V4_CSUM)) -		features &= ~NETIF_F_SG; +	bool tunnel;  	if (unlikely(skb_shinfo(skb)->gso_type &  		     ~(SKB_GSO_TCPV4 | @@ -1293,6 +1293,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,  		       SKB_GSO_DODGY |  		       SKB_GSO_TCP_ECN |  		       SKB_GSO_GRE | +		       SKB_GSO_UDP_TUNNEL |  		       0)))  		goto out; @@ -1307,6 +1308,8 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,  	if (unlikely(!pskb_may_pull(skb, ihl)))  		goto out; +	tunnel = !!skb->encapsulation; +  	__skb_pull(skb, ihl);  	skb_reset_transport_header(skb);  	iph = ip_hdr(skb); @@ -1326,7 +1329,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,  	skb = segs;  	do {  		iph = ip_hdr(skb); -		if (proto == IPPROTO_UDP) { +		if (!tunnel && proto == IPPROTO_UDP) {  			iph->id = htons(id);  			iph->frag_off = htons(offset >> 3);  			if (skb->next != NULL) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index fea4929f620..247ec1951c3 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -654,11 +654,19 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,  	arp_ptr += dev->addr_len;  	memcpy(arp_ptr, &src_ip, 4);  	arp_ptr += 4; -	if (target_hw != NULL) -		memcpy(arp_ptr, target_hw, dev->addr_len); -	else -		memset(arp_ptr, 0, dev->addr_len); -	arp_ptr += dev->addr_len; + +	switch (dev->type) { +#if IS_ENABLED(CONFIG_FIREWIRE_NET) +	case ARPHRD_IEEE1394: +		break; +#endif +	default: +		if (target_hw != NULL) +			memcpy(arp_ptr, target_hw, dev->addr_len); +		else +			memset(arp_ptr, 0, dev->addr_len); +		arp_ptr += dev->addr_len; +	}  	memcpy(arp_ptr, &dest_ip, 4);  	return skb; @@ -781,7 +789,14 @@ static int arp_process(struct sk_buff *skb)  	arp_ptr += dev->addr_len;  	memcpy(&sip, arp_ptr, 4);  	arp_ptr += 4; -	arp_ptr += dev->addr_len; +	switch (dev_type) { +#if IS_ENABLED(CONFIG_FIREWIRE_NET) +	case ARPHRD_IEEE1394: +		break; +#endif +	default: +		arp_ptr += dev->addr_len; +	}  	memcpy(&tip, arp_ptr, 4);  /*   *	Check for bad requests for 127.x.x.x and requests for multicast diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index c6287cd978c..dfc39d4d48b 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -536,7 +536,7 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,  	return NULL;  } -static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)  {  	struct net *net = sock_net(skb->sk);  	struct nlattr *tb[IFA_MAX+1]; @@ -801,7 +801,7 @@ static struct in_ifaddr *find_matching_ifa(struct in_ifaddr *ifa)  	return NULL;  } -static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)  {  	struct net *net = sock_net(skb->sk);  	struct in_ifaddr *ifa; @@ -1529,6 +1529,8 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)  		idx = 0;  		head = &net->dev_index_head[h];  		rcu_read_lock(); +		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^ +			  net->dev_base_seq;  		hlist_for_each_entry_rcu(dev, head, index_hlist) {  			if (idx < s_idx)  				goto cont; @@ -1549,6 +1551,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)  					rcu_read_unlock();  					goto done;  				} +				nl_dump_check_consistent(cb, nlmsg_hdr(skb));  			}  cont:  			idx++; @@ -1760,8 +1763,7 @@ static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {  };  static int inet_netconf_get_devconf(struct sk_buff *in_skb, -				    struct nlmsghdr *nlh, -				    void *arg) +				    struct nlmsghdr *nlh)  {  	struct net *net = sock_net(in_skb->sk);  	struct nlattr *tb[NETCONFA_MAX+1]; @@ -1821,6 +1823,77 @@ errout:  	return err;  } +static int inet_netconf_dump_devconf(struct sk_buff *skb, +				     struct netlink_callback *cb) +{ +	struct net *net = sock_net(skb->sk); +	int h, s_h; +	int idx, s_idx; +	struct net_device *dev; +	struct in_device *in_dev; +	struct hlist_head *head; + +	s_h = cb->args[0]; +	s_idx = idx = cb->args[1]; + +	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { +		idx = 0; +		head = &net->dev_index_head[h]; +		rcu_read_lock(); +		cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^ +			  net->dev_base_seq; +		hlist_for_each_entry_rcu(dev, head, index_hlist) { +			if (idx < s_idx) +				goto cont; +			in_dev = __in_dev_get_rcu(dev); +			if (!in_dev) +				goto cont; + +			if (inet_netconf_fill_devconf(skb, dev->ifindex, +						      &in_dev->cnf, +						      NETLINK_CB(cb->skb).portid, +						      cb->nlh->nlmsg_seq, +						      RTM_NEWNETCONF, +						      NLM_F_MULTI, +						      -1) <= 0) { +				rcu_read_unlock(); +				goto done; +			} +			nl_dump_check_consistent(cb, nlmsg_hdr(skb)); +cont: +			idx++; +		} +		rcu_read_unlock(); +	} +	if (h == NETDEV_HASHENTRIES) { +		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL, +					      net->ipv4.devconf_all, +					      NETLINK_CB(cb->skb).portid, +					      cb->nlh->nlmsg_seq, +					      RTM_NEWNETCONF, NLM_F_MULTI, +					      -1) <= 0) +			goto done; +		else +			h++; +	} +	if (h == NETDEV_HASHENTRIES + 1) { +		if (inet_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT, +					      net->ipv4.devconf_dflt, +					      NETLINK_CB(cb->skb).portid, +					      cb->nlh->nlmsg_seq, +					      RTM_NEWNETCONF, NLM_F_MULTI, +					      -1) <= 0) +			goto done; +		else +			h++; +	} +done: +	cb->args[0] = h; +	cb->args[1] = idx; + +	return skb->len; +} +  #ifdef CONFIG_SYSCTL  static void devinet_copy_dflt_conf(struct net *net, int i) @@ -2225,6 +2298,6 @@ void __init devinet_init(void)  	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);  	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);  	rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf, -		      NULL, NULL); +		      inet_netconf_dump_devconf, NULL);  } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index eb4bb12b3eb..c7629a209f9 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -604,7 +604,7 @@ errout:  	return err;  } -static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)  {  	struct net *net = sock_net(skb->sk);  	struct fib_config cfg; @@ -626,7 +626,7 @@ errout:  	return err;  } -static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)  {  	struct net *net = sock_net(skb->sk);  	struct fib_config cfg; @@ -957,8 +957,8 @@ static void nl_fib_input(struct sk_buff *skb)  	net = sock_net(skb->sk);  	nlh = nlmsg_hdr(skb); -	if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len || -	    nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) +	if (skb->len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len || +	    nlmsg_len(nlh) < sizeof(*frn))  		return;  	skb = skb_clone(skb, GFP_KERNEL); @@ -966,7 +966,7 @@ static void nl_fib_input(struct sk_buff *skb)  		return;  	nlh = nlmsg_hdr(skb); -	frn = (struct fib_result_nl *) NLMSG_DATA(nlh); +	frn = (struct fib_result_nl *) nlmsg_data(nlh);  	tb = fib_get_table(net, frn->tb_id_in);  	nl_fib_lookup(frn, tb); diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c index 7a4c710c4cd..d2d5a99fba0 100644 --- a/net/ipv4/gre.c +++ b/net/ipv4/gre.c @@ -27,11 +27,6 @@  static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;  static DEFINE_SPINLOCK(gre_proto_lock); -struct gre_base_hdr { -	__be16 flags; -	__be16 protocol; -}; -#define GRE_HEADER_SECTION 4  int gre_add_protocol(const struct gre_protocol *proto, u8 version)  { diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 3ac5dff7962..76e10b47e05 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -881,7 +881,7 @@ int icmp_rcv(struct sk_buff *skb)  	case CHECKSUM_NONE:  		skb->csum = 0;  		if (__skb_checksum_complete(skb)) -			goto error; +			goto csum_error;  	}  	if (!pskb_pull(skb, sizeof(*icmph))) @@ -929,6 +929,8 @@ int icmp_rcv(struct sk_buff *skb)  drop:  	kfree_skb(skb);  	return 0; +csum_error: +	ICMP_INC_STATS_BH(net, ICMP_MIB_CSUMERRORS);  error:  	ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);  	goto drop; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 786d97aee75..6acb541c909 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -559,7 +559,7 @@ static inline void syn_ack_recalc(struct request_sock *req, const int thresh,  int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req)  { -	int err = req->rsk_ops->rtx_syn_ack(parent, req, NULL); +	int err = req->rsk_ops->rtx_syn_ack(parent, req);  	if (!err)  		req->num_retrans++; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 7afa2c3c788..5f648751fce 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -158,7 +158,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,  #define EXPIRES_IN_MS(tmo)  DIV_ROUND_UP((tmo - jiffies) * 1000, HZ) -	if (icsk->icsk_pending == ICSK_TIME_RETRANS) { +	if (icsk->icsk_pending == ICSK_TIME_RETRANS || +	    icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || +	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {  		r->idiag_timer = 1;  		r->idiag_retrans = icsk->icsk_retransmits;  		r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); @@ -322,7 +324,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s  	}  	err = sk_diag_fill(sk, rep, req, -			   sk_user_ns(NETLINK_CB(in_skb).ssk), +			   sk_user_ns(NETLINK_CB(in_skb).sk),  			   NETLINK_CB(in_skb).portid,  			   nlh->nlmsg_seq, 0, nlh);  	if (err < 0) { @@ -628,7 +630,7 @@ static int inet_csk_diag_dump(struct sock *sk,  		return 0;  	return inet_csk_diag_fill(sk, skb, r, -				  sk_user_ns(NETLINK_CB(cb->skb).ssk), +				  sk_user_ns(NETLINK_CB(cb->skb).sk),  				  NETLINK_CB(cb->skb).portid,  				  cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);  } @@ -803,7 +805,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,  			}  			err = inet_diag_fill_req(skb, sk, req, -					       sk_user_ns(NETLINK_CB(cb->skb).ssk), +					       sk_user_ns(NETLINK_CB(cb->skb).sk),  					       NETLINK_CB(cb->skb).portid,  					       cb->nlh->nlmsg_seq, cb->nlh);  			if (err < 0) { diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index f4fd23de9b1..e97d66a1fdd 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -23,6 +23,28 @@  #include <net/sock.h>  #include <net/inet_frag.h> +#include <net/inet_ecn.h> + +/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements + * Value : 0xff if frame should be dropped. + *         0 or INET_ECN_CE value, to be ORed in to final iph->tos field + */ +const u8 ip_frag_ecn_table[16] = { +	/* at least one fragment had CE, and others ECT_0 or ECT_1 */ +	[IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0]			= INET_ECN_CE, +	[IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1]			= INET_ECN_CE, +	[IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1]	= INET_ECN_CE, + +	/* invalid combinations : drop frame */ +	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff, +	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff, +	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff, +	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, +	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff, +	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff, +	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, +}; +EXPORT_SYMBOL(ip_frag_ecn_table);  static void inet_frag_secret_rebuild(unsigned long dummy)  { @@ -30,20 +52,27 @@ static void inet_frag_secret_rebuild(unsigned long dummy)  	unsigned long now = jiffies;  	int i; +	/* Per bucket lock NOT needed here, due to write lock protection */  	write_lock(&f->lock); +  	get_random_bytes(&f->rnd, sizeof(u32));  	for (i = 0; i < INETFRAGS_HASHSZ; i++) { +		struct inet_frag_bucket *hb;  		struct inet_frag_queue *q;  		struct hlist_node *n; -		hlist_for_each_entry_safe(q, n, &f->hash[i], list) { +		hb = &f->hash[i]; +		hlist_for_each_entry_safe(q, n, &hb->chain, list) {  			unsigned int hval = f->hashfn(q);  			if (hval != i) { +				struct inet_frag_bucket *hb_dest; +  				hlist_del(&q->list);  				/* Relink to new hash chain. */ -				hlist_add_head(&q->list, &f->hash[hval]); +				hb_dest = &f->hash[hval]; +				hlist_add_head(&q->list, &hb_dest->chain);  			}  		}  	} @@ -56,9 +85,12 @@ void inet_frags_init(struct inet_frags *f)  {  	int i; -	for (i = 0; i < INETFRAGS_HASHSZ; i++) -		INIT_HLIST_HEAD(&f->hash[i]); +	for (i = 0; i < INETFRAGS_HASHSZ; i++) { +		struct inet_frag_bucket *hb = &f->hash[i]; +		spin_lock_init(&hb->chain_lock); +		INIT_HLIST_HEAD(&hb->chain); +	}  	rwlock_init(&f->lock);  	f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ @@ -100,10 +132,18 @@ EXPORT_SYMBOL(inet_frags_exit_net);  static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)  { -	write_lock(&f->lock); +	struct inet_frag_bucket *hb; +	unsigned int hash; + +	read_lock(&f->lock); +	hash = f->hashfn(fq); +	hb = &f->hash[hash]; + +	spin_lock(&hb->chain_lock);  	hlist_del(&fq->list); -	fq->net->nqueues--; -	write_unlock(&f->lock); +	spin_unlock(&hb->chain_lock); + +	read_unlock(&f->lock);  	inet_frag_lru_del(fq);  } @@ -182,6 +222,9 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force)  		q = list_first_entry(&nf->lru_list,  				struct inet_frag_queue, lru_list);  		atomic_inc(&q->refcnt); +		/* Remove q from list to avoid several CPUs grabbing it */ +		list_del_init(&q->lru_list); +  		spin_unlock(&nf->lru_lock);  		spin_lock(&q->lock); @@ -202,27 +245,32 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,  		struct inet_frag_queue *qp_in, struct inet_frags *f,  		void *arg)  { +	struct inet_frag_bucket *hb;  	struct inet_frag_queue *qp;  #ifdef CONFIG_SMP  #endif  	unsigned int hash; -	write_lock(&f->lock); +	read_lock(&f->lock); /* Protects against hash rebuild */  	/*  	 * While we stayed w/o the lock other CPU could update  	 * the rnd seed, so we need to re-calculate the hash  	 * chain. Fortunatelly the qp_in can be used to get one.  	 */  	hash = f->hashfn(qp_in); +	hb = &f->hash[hash]; +	spin_lock(&hb->chain_lock); +  #ifdef CONFIG_SMP  	/* With SMP race we have to recheck hash table, because  	 * such entry could be created on other cpu, while we -	 * promoted read lock to write lock. +	 * released the hash bucket lock.  	 */ -	hlist_for_each_entry(qp, &f->hash[hash], list) { +	hlist_for_each_entry(qp, &hb->chain, list) {  		if (qp->net == nf && f->match(qp, arg)) {  			atomic_inc(&qp->refcnt); -			write_unlock(&f->lock); +			spin_unlock(&hb->chain_lock); +			read_unlock(&f->lock);  			qp_in->last_in |= INET_FRAG_COMPLETE;  			inet_frag_put(qp_in, f);  			return qp; @@ -234,9 +282,9 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,  		atomic_inc(&qp->refcnt);  	atomic_inc(&qp->refcnt); -	hlist_add_head(&qp->list, &f->hash[hash]); -	nf->nqueues++; -	write_unlock(&f->lock); +	hlist_add_head(&qp->list, &hb->chain); +	spin_unlock(&hb->chain_lock); +	read_unlock(&f->lock);  	inet_frag_lru_add(nf, qp);  	return qp;  } @@ -277,17 +325,23 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,  		struct inet_frags *f, void *key, unsigned int hash)  	__releases(&f->lock)  { +	struct inet_frag_bucket *hb;  	struct inet_frag_queue *q;  	int depth = 0; -	hlist_for_each_entry(q, &f->hash[hash], list) { +	hb = &f->hash[hash]; + +	spin_lock(&hb->chain_lock); +	hlist_for_each_entry(q, &hb->chain, list) {  		if (q->net == nf && f->match(q, key)) {  			atomic_inc(&q->refcnt); +			spin_unlock(&hb->chain_lock);  			read_unlock(&f->lock);  			return q;  		}  		depth++;  	} +	spin_unlock(&hb->chain_lock);  	read_unlock(&f->lock);  	if (depth <= INETFRAGS_MAXDEPTH) diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c index cc280a3f4f9..1975f52933c 100644 --- a/net/ipv4/inet_lro.c +++ b/net/ipv4/inet_lro.c @@ -29,6 +29,7 @@  #include <linux/module.h>  #include <linux/if_vlan.h>  #include <linux/inet_lro.h> +#include <net/checksum.h>  MODULE_LICENSE("GPL");  MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>"); @@ -114,11 +115,9 @@ static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc)  		*(p+2) = lro_desc->tcp_rcv_tsecr;  	} +	csum_replace2(&iph->check, iph->tot_len, htons(lro_desc->ip_tot_len));  	iph->tot_len = htons(lro_desc->ip_tot_len); -	iph->check = 0; -	iph->check = ip_fast_csum((u8 *)lro_desc->iph, iph->ihl); -  	tcph->check = 0;  	tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0);  	lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 52c273ea05c..b66910aaef4 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -79,40 +79,11 @@ struct ipq {  	struct inet_peer *peer;  }; -/* RFC 3168 support : - * We want to check ECN values of all fragments, do detect invalid combinations. - * In ipq->ecn, we store the OR value of each ip4_frag_ecn() fragment value. - */ -#define	IPFRAG_ECN_NOT_ECT	0x01 /* one frag had ECN_NOT_ECT */ -#define	IPFRAG_ECN_ECT_1	0x02 /* one frag had ECN_ECT_1 */ -#define	IPFRAG_ECN_ECT_0	0x04 /* one frag had ECN_ECT_0 */ -#define	IPFRAG_ECN_CE		0x08 /* one frag had ECN_CE */ -  static inline u8 ip4_frag_ecn(u8 tos)  {  	return 1 << (tos & INET_ECN_MASK);  } -/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements - * Value : 0xff if frame should be dropped. - *         0 or INET_ECN_CE value, to be ORed in to final iph->tos field - */ -static const u8 ip4_frag_ecn_table[16] = { -	/* at least one fragment had CE, and others ECT_0 or ECT_1 */ -	[IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0]			= INET_ECN_CE, -	[IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1]			= INET_ECN_CE, -	[IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1]	= INET_ECN_CE, - -	/* invalid combinations : drop frame */ -	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff, -	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff, -	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff, -	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, -	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff, -	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff, -	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, -}; -  static struct inet_frags ip4_frags;  int ip_frag_nqueues(struct net *net) @@ -557,7 +528,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,  	ipq_kill(qp); -	ecn = ip4_frag_ecn_table[qp->ecn]; +	ecn = ip_frag_ecn_table[qp->ecn];  	if (unlikely(ecn == 0xff)) {  		err = -EINVAL;  		goto out_fail; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 91d66dbde9c..c625e4dad4b 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -37,7 +37,7 @@  #include <net/ip.h>  #include <net/icmp.h>  #include <net/protocol.h> -#include <net/ipip.h> +#include <net/ip_tunnels.h>  #include <net/arp.h>  #include <net/checksum.h>  #include <net/dsfield.h> @@ -108,15 +108,6 @@     fatal route to network, even if it were you who configured     fatal static route: you are innocent. :-) - - -   3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain -   practically identical code. It would be good to glue them -   together, but it is not very evident, how to make them modular. -   sit is integral part of IPv6, ipip and gre are naturally modular. -   We could extract common parts (hash table, ioctl etc) -   to a separate module (ip_tunnel.c). -     Alexey Kuznetsov.   */ @@ -126,400 +117,137 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");  static struct rtnl_link_ops ipgre_link_ops __read_mostly;  static int ipgre_tunnel_init(struct net_device *dev); -static void ipgre_tunnel_setup(struct net_device *dev); -static int ipgre_tunnel_bind_dev(struct net_device *dev); - -/* Fallback tunnel: no source, no destination, no key, no options */ - -#define HASH_SIZE  16  static int ipgre_net_id __read_mostly; -struct ipgre_net { -	struct ip_tunnel __rcu *tunnels[4][HASH_SIZE]; - -	struct net_device *fb_tunnel_dev; -}; +static int gre_tap_net_id __read_mostly; -/* Tunnel hash table */ - -/* -   4 hash tables: - -   3: (remote,local) -   2: (remote,*) -   1: (*,local) -   0: (*,*) - -   We require exact key match i.e. if a key is present in packet -   it will match only tunnel with the same key; if it is not present, -   it will match only keyless tunnel. - -   All keysless packets, if not matched configured keyless tunnels -   will match fallback tunnel. - */ - -#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) - -#define tunnels_r_l	tunnels[3] -#define tunnels_r	tunnels[2] -#define tunnels_l	tunnels[1] -#define tunnels_wc	tunnels[0] - -static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev, -						   struct rtnl_link_stats64 *tot) +static __sum16 check_checksum(struct sk_buff *skb)  { -	int i; +	__sum16 csum = 0; -	for_each_possible_cpu(i) { -		const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); -		u64 rx_packets, rx_bytes, tx_packets, tx_bytes; -		unsigned int start; +	switch (skb->ip_summed) { +	case CHECKSUM_COMPLETE: +		csum = csum_fold(skb->csum); -		do { -			start = u64_stats_fetch_begin_bh(&tstats->syncp); -			rx_packets = tstats->rx_packets; -			tx_packets = tstats->tx_packets; -			rx_bytes = tstats->rx_bytes; -			tx_bytes = tstats->tx_bytes; -		} while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); +		if (!csum) +			break; +		/* Fall through. */ -		tot->rx_packets += rx_packets; -		tot->tx_packets += tx_packets; -		tot->rx_bytes   += rx_bytes; -		tot->tx_bytes   += tx_bytes; +	case CHECKSUM_NONE: +		skb->csum = 0; +		csum = __skb_checksum_complete(skb); +		skb->ip_summed = CHECKSUM_COMPLETE; +		break;  	} -	tot->multicast = dev->stats.multicast; -	tot->rx_crc_errors = dev->stats.rx_crc_errors; -	tot->rx_fifo_errors = dev->stats.rx_fifo_errors; -	tot->rx_length_errors = dev->stats.rx_length_errors; -	tot->rx_frame_errors = dev->stats.rx_frame_errors; -	tot->rx_errors = dev->stats.rx_errors; - -	tot->tx_fifo_errors = dev->stats.tx_fifo_errors; -	tot->tx_carrier_errors = dev->stats.tx_carrier_errors; -	tot->tx_dropped = dev->stats.tx_dropped; -	tot->tx_aborted_errors = dev->stats.tx_aborted_errors; -	tot->tx_errors = dev->stats.tx_errors; - -	return tot; +	return csum;  } -/* Does key in tunnel parameters match packet */ -static bool ipgre_key_match(const struct ip_tunnel_parm *p, -			    __be16 flags, __be32 key) +static int ip_gre_calc_hlen(__be16 o_flags)  { -	if (p->i_flags & GRE_KEY) { -		if (flags & GRE_KEY) -			return key == p->i_key; -		else -			return false;	/* key expected, none present */ -	} else -		return !(flags & GRE_KEY); -} +	int addend = 4; -/* Given src, dst and key, find appropriate for input tunnel. */ +	if (o_flags&TUNNEL_CSUM) +		addend += 4; +	if (o_flags&TUNNEL_KEY) +		addend += 4; +	if (o_flags&TUNNEL_SEQ) +		addend += 4; +	return addend; +} -static struct ip_tunnel *ipgre_tunnel_lookup(struct net_device *dev, -					     __be32 remote, __be32 local, -					     __be16 flags, __be32 key, -					     __be16 gre_proto) +static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, +			    bool *csum_err, int *hdr_len)  { -	struct net *net = dev_net(dev); -	int link = dev->ifindex; -	unsigned int h0 = HASH(remote); -	unsigned int h1 = HASH(key); -	struct ip_tunnel *t, *cand = NULL; -	struct ipgre_net *ign = net_generic(net, ipgre_net_id); -	int dev_type = (gre_proto == htons(ETH_P_TEB)) ? -		       ARPHRD_ETHER : ARPHRD_IPGRE; -	int score, cand_score = 4; - -	for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) { -		if (local != t->parms.iph.saddr || -		    remote != t->parms.iph.daddr || -		    !(t->dev->flags & IFF_UP)) -			continue; - -		if (!ipgre_key_match(&t->parms, flags, key)) -			continue; +	unsigned int ip_hlen = ip_hdrlen(skb); +	const struct gre_base_hdr *greh; +	__be32 *options; -		if (t->dev->type != ARPHRD_IPGRE && -		    t->dev->type != dev_type) -			continue; - -		score = 0; -		if (t->parms.link != link) -			score |= 1; -		if (t->dev->type != dev_type) -			score |= 2; -		if (score == 0) -			return t; - -		if (score < cand_score) { -			cand = t; -			cand_score = score; -		} -	} - -	for_each_ip_tunnel_rcu(t, ign->tunnels_r[h0 ^ h1]) { -		if (remote != t->parms.iph.daddr || -		    !(t->dev->flags & IFF_UP)) -			continue; - -		if (!ipgre_key_match(&t->parms, flags, key)) -			continue; - -		if (t->dev->type != ARPHRD_IPGRE && -		    t->dev->type != dev_type) -			continue; - -		score = 0; -		if (t->parms.link != link) -			score |= 1; -		if (t->dev->type != dev_type) -			score |= 2; -		if (score == 0) -			return t; - -		if (score < cand_score) { -			cand = t; -			cand_score = score; -		} -	} - -	for_each_ip_tunnel_rcu(t, ign->tunnels_l[h1]) { -		if ((local != t->parms.iph.saddr && -		     (local != t->parms.iph.daddr || -		      !ipv4_is_multicast(local))) || -		    !(t->dev->flags & IFF_UP)) -			continue; - -		if (!ipgre_key_match(&t->parms, flags, key)) -			continue; - -		if (t->dev->type != ARPHRD_IPGRE && -		    t->dev->type != dev_type) -			continue; +	if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr)))) +		return -EINVAL; -		score = 0; -		if (t->parms.link != link) -			score |= 1; -		if (t->dev->type != dev_type) -			score |= 2; -		if (score == 0) -			return t; +	greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); +	if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) +		return -EINVAL; -		if (score < cand_score) { -			cand = t; -			cand_score = score; -		} -	} +	tpi->flags = gre_flags_to_tnl_flags(greh->flags); +	*hdr_len = ip_gre_calc_hlen(tpi->flags); -	for_each_ip_tunnel_rcu(t, ign->tunnels_wc[h1]) { -		if (t->parms.i_key != key || -		    !(t->dev->flags & IFF_UP)) -			continue; +	if (!pskb_may_pull(skb, *hdr_len)) +		return -EINVAL; -		if (t->dev->type != ARPHRD_IPGRE && -		    t->dev->type != dev_type) -			continue; +	greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); -		score = 0; -		if (t->parms.link != link) -			score |= 1; -		if (t->dev->type != dev_type) -			score |= 2; -		if (score == 0) -			return t; +	tpi->proto = greh->protocol; -		if (score < cand_score) { -			cand = t; -			cand_score = score; +	options = (__be32 *)(greh + 1); +	if (greh->flags & GRE_CSUM) { +		if (check_checksum(skb)) { +			*csum_err = true; +			return -EINVAL;  		} +		options++;  	} -	if (cand != NULL) -		return cand; - -	dev = ign->fb_tunnel_dev; -	if (dev->flags & IFF_UP) -		return netdev_priv(dev); - -	return NULL; -} - -static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign, -		struct ip_tunnel_parm *parms) -{ -	__be32 remote = parms->iph.daddr; -	__be32 local = parms->iph.saddr; -	__be32 key = parms->i_key; -	unsigned int h = HASH(key); -	int prio = 0; - -	if (local) -		prio |= 1; -	if (remote && !ipv4_is_multicast(remote)) { -		prio |= 2; -		h ^= HASH(remote); -	} - -	return &ign->tunnels[prio][h]; -} - -static inline struct ip_tunnel __rcu **ipgre_bucket(struct ipgre_net *ign, -		struct ip_tunnel *t) -{ -	return __ipgre_bucket(ign, &t->parms); -} - -static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t) -{ -	struct ip_tunnel __rcu **tp = ipgre_bucket(ign, t); - -	rcu_assign_pointer(t->next, rtnl_dereference(*tp)); -	rcu_assign_pointer(*tp, t); -} +	if (greh->flags & GRE_KEY) { +		tpi->key = *options; +		options++; +	} else +		tpi->key = 0; -static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) -{ -	struct ip_tunnel __rcu **tp; -	struct ip_tunnel *iter; +	if (unlikely(greh->flags & GRE_SEQ)) { +		tpi->seq = *options; +		options++; +	} else +		tpi->seq = 0; -	for (tp = ipgre_bucket(ign, t); -	     (iter = rtnl_dereference(*tp)) != NULL; -	     tp = &iter->next) { -		if (t == iter) { -			rcu_assign_pointer(*tp, t->next); -			break; +	/* WCCP version 1 and 2 protocol decoding. +	 * - Change protocol to IP +	 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header +	 */ +	if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { +		tpi->proto = htons(ETH_P_IP); +		if ((*(u8 *)options & 0xF0) != 0x40) { +			*hdr_len += 4; +			if (!pskb_may_pull(skb, *hdr_len)) +				return -EINVAL;  		}  	} -} - -static struct ip_tunnel *ipgre_tunnel_find(struct net *net, -					   struct ip_tunnel_parm *parms, -					   int type) -{ -	__be32 remote = parms->iph.daddr; -	__be32 local = parms->iph.saddr; -	__be32 key = parms->i_key; -	int link = parms->link; -	struct ip_tunnel *t; -	struct ip_tunnel __rcu **tp; -	struct ipgre_net *ign = net_generic(net, ipgre_net_id); - -	for (tp = __ipgre_bucket(ign, parms); -	     (t = rtnl_dereference(*tp)) != NULL; -	     tp = &t->next) -		if (local == t->parms.iph.saddr && -		    remote == t->parms.iph.daddr && -		    key == t->parms.i_key && -		    link == t->parms.link && -		    type == t->dev->type) -			break; - -	return t; -} -static struct ip_tunnel *ipgre_tunnel_locate(struct net *net, -		struct ip_tunnel_parm *parms, int create) -{ -	struct ip_tunnel *t, *nt; -	struct net_device *dev; -	char name[IFNAMSIZ]; -	struct ipgre_net *ign = net_generic(net, ipgre_net_id); - -	t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE); -	if (t || !create) -		return t; - -	if (parms->name[0]) -		strlcpy(name, parms->name, IFNAMSIZ); -	else -		strcpy(name, "gre%d"); - -	dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup); -	if (!dev) -		return NULL; - -	dev_net_set(dev, net); - -	nt = netdev_priv(dev); -	nt->parms = *parms; -	dev->rtnl_link_ops = &ipgre_link_ops; - -	dev->mtu = ipgre_tunnel_bind_dev(dev); - -	if (register_netdevice(dev) < 0) -		goto failed_free; - -	/* Can use a lockless transmit, unless we generate output sequences */ -	if (!(nt->parms.o_flags & GRE_SEQ)) -		dev->features |= NETIF_F_LLTX; - -	dev_hold(dev); -	ipgre_tunnel_link(ign, nt); -	return nt; - -failed_free: -	free_netdev(dev); -	return NULL; -} - -static void ipgre_tunnel_uninit(struct net_device *dev) -{ -	struct net *net = dev_net(dev); -	struct ipgre_net *ign = net_generic(net, ipgre_net_id); - -	ipgre_tunnel_unlink(ign, netdev_priv(dev)); -	dev_put(dev); +	return 0;  } -  static void ipgre_err(struct sk_buff *skb, u32 info)  { -/* All the routers (except for Linux) return only -   8 bytes of packet payload. It means, that precise relaying of -   ICMP in the real Internet is absolutely infeasible. - -   Moreover, Cisco "wise men" put GRE key to the third word -   in GRE header. It makes impossible maintaining even soft state for keyed -   GRE tunnels with enabled checksum. Tell them "thank you". +	/* All the routers (except for Linux) return only +	   8 bytes of packet payload. It means, that precise relaying of +	   ICMP in the real Internet is absolutely infeasible. -   Well, I wonder, rfc1812 was written by Cisco employee, -   what the hell these idiots break standards established -   by themselves??? - */ +	   Moreover, Cisco "wise men" put GRE key to the third word +	   in GRE header. It makes impossible maintaining even soft +	   state for keyed GRE tunnels with enabled checksum. Tell +	   them "thank you". +	   Well, I wonder, rfc1812 was written by Cisco employee, +	   what the hell these idiots break standards established +	   by themselves??? +	   */ +	struct net *net = dev_net(skb->dev); +	struct ip_tunnel_net *itn;  	const struct iphdr *iph = (const struct iphdr *)skb->data; -	__be16	     *p = (__be16 *)(skb->data+(iph->ihl<<2)); -	int grehlen = (iph->ihl<<2) + 4;  	const int type = icmp_hdr(skb)->type;  	const int code = icmp_hdr(skb)->code;  	struct ip_tunnel *t; -	__be16 flags; -	__be32 key = 0; +	struct tnl_ptk_info tpi; +	int hdr_len; +	bool csum_err = false; -	flags = p[0]; -	if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { -		if (flags&(GRE_VERSION|GRE_ROUTING)) +	if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len)) { +		if (!csum_err)          /* ignore csum errors. */  			return; -		if (flags&GRE_KEY) { -			grehlen += 4; -			if (flags&GRE_CSUM) -				grehlen += 4; -		}  	} -	/* If only 8 bytes returned, keyed message will be dropped here */ -	if (skb_headlen(skb) < grehlen) -		return; - -	if (flags & GRE_KEY) -		key = *(((__be32 *)p) + (grehlen / 4) - 1); -  	switch (type) {  	default:  	case ICMP_PARAMETERPROB: @@ -548,8 +276,13 @@ static void ipgre_err(struct sk_buff *skb, u32 info)  		break;  	} -	t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr, -				flags, key, p[1]); +	if (tpi.proto == htons(ETH_P_TEB)) +		itn = net_generic(net, gre_tap_net_id); +	else +		itn = net_generic(net, ipgre_net_id); + +	t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags, +			     iph->daddr, iph->saddr, tpi.key);  	if (t == NULL)  		return; @@ -578,158 +311,33 @@ static void ipgre_err(struct sk_buff *skb, u32 info)  	t->err_time = jiffies;  } -static inline u8 -ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb) -{ -	u8 inner = 0; -	if (skb->protocol == htons(ETH_P_IP)) -		inner = old_iph->tos; -	else if (skb->protocol == htons(ETH_P_IPV6)) -		inner = ipv6_get_dsfield((const struct ipv6hdr *)old_iph); -	return INET_ECN_encapsulate(tos, inner); -} -  static int ipgre_rcv(struct sk_buff *skb)  { +	struct net *net = dev_net(skb->dev); +	struct ip_tunnel_net *itn;  	const struct iphdr *iph; -	u8     *h; -	__be16    flags; -	__sum16   csum = 0; -	__be32 key = 0; -	u32    seqno = 0;  	struct ip_tunnel *tunnel; -	int    offset = 4; -	__be16 gre_proto; -	int    err; +	struct tnl_ptk_info tpi; +	int hdr_len; +	bool csum_err = false; -	if (!pskb_may_pull(skb, 16)) +	if (parse_gre_header(skb, &tpi, &csum_err, &hdr_len) < 0)  		goto drop; -	iph = ip_hdr(skb); -	h = skb->data; -	flags = *(__be16 *)h; - -	if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) { -		/* - Version must be 0. -		   - We do not support routing headers. -		 */ -		if (flags&(GRE_VERSION|GRE_ROUTING)) -			goto drop; - -		if (flags&GRE_CSUM) { -			switch (skb->ip_summed) { -			case CHECKSUM_COMPLETE: -				csum = csum_fold(skb->csum); -				if (!csum) -					break; -				/* fall through */ -			case CHECKSUM_NONE: -				skb->csum = 0; -				csum = __skb_checksum_complete(skb); -				skb->ip_summed = CHECKSUM_COMPLETE; -			} -			offset += 4; -		} -		if (flags&GRE_KEY) { -			key = *(__be32 *)(h + offset); -			offset += 4; -		} -		if (flags&GRE_SEQ) { -			seqno = ntohl(*(__be32 *)(h + offset)); -			offset += 4; -		} -	} +	if (tpi.proto == htons(ETH_P_TEB)) +		itn = net_generic(net, gre_tap_net_id); +	else +		itn = net_generic(net, ipgre_net_id); -	gre_proto = *(__be16 *)(h + 2); +	iph = ip_hdr(skb); +	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi.flags, +				  iph->saddr, iph->daddr, tpi.key); -	tunnel = ipgre_tunnel_lookup(skb->dev, -				     iph->saddr, iph->daddr, flags, key, -				     gre_proto);  	if (tunnel) { -		struct pcpu_tstats *tstats; - -		secpath_reset(skb); - -		skb->protocol = gre_proto; -		/* WCCP version 1 and 2 protocol decoding. -		 * - Change protocol to IP -		 * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header -		 */ -		if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) { -			skb->protocol = htons(ETH_P_IP); -			if ((*(h + offset) & 0xF0) != 0x40) -				offset += 4; -		} - -		skb->mac_header = skb->network_header; -		__pskb_pull(skb, offset); -		skb_postpull_rcsum(skb, skb_transport_header(skb), offset); -		skb->pkt_type = PACKET_HOST; -#ifdef CONFIG_NET_IPGRE_BROADCAST -		if (ipv4_is_multicast(iph->daddr)) { -			/* Looped back packet, drop it! */ -			if (rt_is_output_route(skb_rtable(skb))) -				goto drop; -			tunnel->dev->stats.multicast++; -			skb->pkt_type = PACKET_BROADCAST; -		} -#endif - -		if (((flags&GRE_CSUM) && csum) || -		    (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { -			tunnel->dev->stats.rx_crc_errors++; -			tunnel->dev->stats.rx_errors++; -			goto drop; -		} -		if (tunnel->parms.i_flags&GRE_SEQ) { -			if (!(flags&GRE_SEQ) || -			    (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) { -				tunnel->dev->stats.rx_fifo_errors++; -				tunnel->dev->stats.rx_errors++; -				goto drop; -			} -			tunnel->i_seqno = seqno + 1; -		} - -		/* Warning: All skb pointers will be invalidated! */ -		if (tunnel->dev->type == ARPHRD_ETHER) { -			if (!pskb_may_pull(skb, ETH_HLEN)) { -				tunnel->dev->stats.rx_length_errors++; -				tunnel->dev->stats.rx_errors++; -				goto drop; -			} - -			iph = ip_hdr(skb); -			skb->protocol = eth_type_trans(skb, tunnel->dev); -			skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); -		} - -		__skb_tunnel_rx(skb, tunnel->dev); - -		skb_reset_network_header(skb); -		err = IP_ECN_decapsulate(iph, skb); -		if (unlikely(err)) { -			if (log_ecn_error) -				net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", -						     &iph->saddr, iph->tos); -			if (err > 1) { -				++tunnel->dev->stats.rx_frame_errors; -				++tunnel->dev->stats.rx_errors; -				goto drop; -			} -		} - -		tstats = this_cpu_ptr(tunnel->dev->tstats); -		u64_stats_update_begin(&tstats->syncp); -		tstats->rx_packets++; -		tstats->rx_bytes += skb->len; -		u64_stats_update_end(&tstats->syncp); - -		gro_cells_receive(&tunnel->gro_cells, skb); +		ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);  		return 0;  	}  	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); -  drop:  	kfree_skb(skb);  	return 0; @@ -746,7 +354,7 @@ static struct sk_buff *handle_offloads(struct ip_tunnel *tunnel, struct sk_buff  		skb_shinfo(skb)->gso_type |= SKB_GSO_GRE;  		return skb;  	} else if (skb->ip_summed == CHECKSUM_PARTIAL && -		   tunnel->parms.o_flags&GRE_CSUM) { +		   tunnel->parms.o_flags&TUNNEL_CSUM) {  		err = skb_checksum_help(skb);  		if (unlikely(err))  			goto error; @@ -760,494 +368,157 @@ error:  	return ERR_PTR(err);  } -static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) +static struct sk_buff *gre_build_header(struct sk_buff *skb, +					const struct tnl_ptk_info *tpi, +					int hdr_len)  { -	struct pcpu_tstats *tstats = this_cpu_ptr(dev->tstats); -	struct ip_tunnel *tunnel = netdev_priv(dev); -	const struct iphdr  *old_iph; -	const struct iphdr  *tiph; -	struct flowi4 fl4; -	u8     tos; -	__be16 df; -	struct rtable *rt;     			/* Route to the other host */ -	struct net_device *tdev;		/* Device to other host */ -	struct iphdr  *iph;			/* Our new IP header */ -	unsigned int max_headroom;		/* The extra header space needed */ -	int    gre_hlen; -	__be32 dst; -	int    mtu; -	u8     ttl; -	int    err; -	int    pkt_len; - -	skb = handle_offloads(tunnel, skb); -	if (IS_ERR(skb)) { -		dev->stats.tx_dropped++; -		return NETDEV_TX_OK; -	} - -	if (!skb->encapsulation) { -		skb_reset_inner_headers(skb); -		skb->encapsulation = 1; -	} - -	old_iph = ip_hdr(skb); - -	if (dev->type == ARPHRD_ETHER) -		IPCB(skb)->flags = 0; +	struct gre_base_hdr *greh; -	if (dev->header_ops && dev->type == ARPHRD_IPGRE) { -		gre_hlen = 0; -		tiph = (const struct iphdr *)skb->data; -	} else { -		gre_hlen = tunnel->hlen; -		tiph = &tunnel->parms.iph; -	} +	skb_push(skb, hdr_len); -	if ((dst = tiph->daddr) == 0) { -		/* NBMA tunnel */ +	greh = (struct gre_base_hdr *)skb->data; +	greh->flags = tnl_flags_to_gre_flags(tpi->flags); +	greh->protocol = tpi->proto; -		if (skb_dst(skb) == NULL) { -			dev->stats.tx_fifo_errors++; -			goto tx_error; -		} +	if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) { +		__be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); -		if (skb->protocol == htons(ETH_P_IP)) { -			rt = skb_rtable(skb); -			dst = rt_nexthop(rt, old_iph->daddr); +		if (tpi->flags&TUNNEL_SEQ) { +			*ptr = tpi->seq; +			ptr--;  		} -#if IS_ENABLED(CONFIG_IPV6) -		else if (skb->protocol == htons(ETH_P_IPV6)) { -			const struct in6_addr *addr6; -			struct neighbour *neigh; -			bool do_tx_error_icmp; -			int addr_type; - -			neigh = dst_neigh_lookup(skb_dst(skb), &ipv6_hdr(skb)->daddr); -			if (neigh == NULL) -				goto tx_error; - -			addr6 = (const struct in6_addr *)&neigh->primary_key; -			addr_type = ipv6_addr_type(addr6); - -			if (addr_type == IPV6_ADDR_ANY) { -				addr6 = &ipv6_hdr(skb)->daddr; -				addr_type = ipv6_addr_type(addr6); -			} - -			if ((addr_type & IPV6_ADDR_COMPATv4) == 0) -				do_tx_error_icmp = true; -			else { -				do_tx_error_icmp = false; -				dst = addr6->s6_addr32[3]; -			} -			neigh_release(neigh); -			if (do_tx_error_icmp) -				goto tx_error_icmp; +		if (tpi->flags&TUNNEL_KEY) { +			*ptr = tpi->key; +			ptr--;  		} -#endif -		else -			goto tx_error; -	} - -	ttl = tiph->ttl; -	tos = tiph->tos; -	if (tos & 0x1) { -		tos &= ~0x1; -		if (skb->protocol == htons(ETH_P_IP)) -			tos = old_iph->tos; -		else if (skb->protocol == htons(ETH_P_IPV6)) -			tos = ipv6_get_dsfield((const struct ipv6hdr *)old_iph); -	} - -	rt = ip_route_output_gre(dev_net(dev), &fl4, dst, tiph->saddr, -				 tunnel->parms.o_key, RT_TOS(tos), -				 tunnel->parms.link); -	if (IS_ERR(rt)) { -		dev->stats.tx_carrier_errors++; -		goto tx_error; -	} -	tdev = rt->dst.dev; - -	if (tdev == dev) { -		ip_rt_put(rt); -		dev->stats.collisions++; -		goto tx_error; -	} - -	df = tiph->frag_off; -	if (df) -		mtu = dst_mtu(&rt->dst) - dev->hard_header_len - tunnel->hlen; -	else -		mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; - -	if (skb_dst(skb)) -		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - -	if (skb->protocol == htons(ETH_P_IP)) { -		df |= (old_iph->frag_off&htons(IP_DF)); - -		if (!skb_is_gso(skb) && -		    (old_iph->frag_off&htons(IP_DF)) && -		    mtu < ntohs(old_iph->tot_len)) { -			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); -			ip_rt_put(rt); -			goto tx_error; +		if (tpi->flags&TUNNEL_CSUM && +		    !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) { +			*(__sum16 *)ptr = 0; +			*(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0, +								 skb->len, 0));  		}  	} -#if IS_ENABLED(CONFIG_IPV6) -	else if (skb->protocol == htons(ETH_P_IPV6)) { -		struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); -		if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) { -			if ((tunnel->parms.iph.daddr && -			     !ipv4_is_multicast(tunnel->parms.iph.daddr)) || -			    rt6->rt6i_dst.plen == 128) { -				rt6->rt6i_flags |= RTF_MODIFIED; -				dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); -			} -		} - -		if (!skb_is_gso(skb) && -		    mtu >= IPV6_MIN_MTU && -		    mtu < skb->len - tunnel->hlen + gre_hlen) { -			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); -			ip_rt_put(rt); -			goto tx_error; -		} -	} -#endif +	return skb; +} -	if (tunnel->err_count > 0) { -		if (time_before(jiffies, -				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { -			tunnel->err_count--; +static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, +		       const struct iphdr *tnl_params, +		       __be16 proto) +{ +	struct ip_tunnel *tunnel = netdev_priv(dev); +	struct tnl_ptk_info tpi; -			dst_link_failure(skb); -		} else -			tunnel->err_count = 0; +	if (likely(!skb->encapsulation)) { +		skb_reset_inner_headers(skb); +		skb->encapsulation = 1;  	} -	max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->dst.header_len; +	tpi.flags = tunnel->parms.o_flags; +	tpi.proto = proto; +	tpi.key = tunnel->parms.o_key; +	if (tunnel->parms.o_flags & TUNNEL_SEQ) +		tunnel->o_seqno++; +	tpi.seq = htonl(tunnel->o_seqno); -	if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| -	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { -		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); -		if (max_headroom > dev->needed_headroom) -			dev->needed_headroom = max_headroom; -		if (!new_skb) { -			ip_rt_put(rt); -			dev->stats.tx_dropped++; -			dev_kfree_skb(skb); -			return NETDEV_TX_OK; -		} -		if (skb->sk) -			skb_set_owner_w(new_skb, skb->sk); -		dev_kfree_skb(skb); -		skb = new_skb; -		old_iph = ip_hdr(skb); -		/* Warning : tiph value might point to freed memory */ +	/* Push GRE header. */ +	skb = gre_build_header(skb, &tpi, tunnel->hlen); +	if (unlikely(!skb)) { +		dev->stats.tx_dropped++; +		return;  	} -	skb_push(skb, gre_hlen); -	skb_reset_network_header(skb); -	skb_set_transport_header(skb, sizeof(*iph)); -	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); -	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | -			      IPSKB_REROUTED); -	skb_dst_drop(skb); -	skb_dst_set(skb, &rt->dst); - -	/* -	 *	Push down and install the IPIP header. -	 */ - -	iph 			=	ip_hdr(skb); -	iph->version		=	4; -	iph->ihl		=	sizeof(struct iphdr) >> 2; -	iph->frag_off		=	df; -	iph->protocol		=	IPPROTO_GRE; -	iph->tos		=	ipgre_ecn_encapsulate(tos, old_iph, skb); -	iph->daddr		=	fl4.daddr; -	iph->saddr		=	fl4.saddr; -	iph->ttl		=	ttl; - -	tunnel_ip_select_ident(skb, old_iph, &rt->dst); +	ip_tunnel_xmit(skb, dev, tnl_params); +} -	if (ttl == 0) { -		if (skb->protocol == htons(ETH_P_IP)) -			iph->ttl = old_iph->ttl; -#if IS_ENABLED(CONFIG_IPV6) -		else if (skb->protocol == htons(ETH_P_IPV6)) -			iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit; -#endif -		else -			iph->ttl = ip4_dst_hoplimit(&rt->dst); -	} +static netdev_tx_t ipgre_xmit(struct sk_buff *skb, +			      struct net_device *dev) +{ +	struct ip_tunnel *tunnel = netdev_priv(dev); +	const struct iphdr *tnl_params; -	((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags; -	((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ? -				   htons(ETH_P_TEB) : skb->protocol; +	skb = handle_offloads(tunnel, skb); +	if (IS_ERR(skb)) +		goto out; -	if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { -		__be32 *ptr = (__be32 *)(((u8 *)iph) + tunnel->hlen - 4); +	if (dev->header_ops) { +		/* Need space for new headers */ +		if (skb_cow_head(skb, dev->needed_headroom - +				      (tunnel->hlen + sizeof(struct iphdr)))) +			goto free_skb; -		if (tunnel->parms.o_flags&GRE_SEQ) { -			++tunnel->o_seqno; -			*ptr = htonl(tunnel->o_seqno); -			ptr--; -		} -		if (tunnel->parms.o_flags&GRE_KEY) { -			*ptr = tunnel->parms.o_key; -			ptr--; -		} -		/* Skip GRE checksum if skb is getting offloaded. */ -		if (!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE) && -		    (tunnel->parms.o_flags&GRE_CSUM)) { -			int offset = skb_transport_offset(skb); +		tnl_params = (const struct iphdr *)skb->data; -			if (skb_has_shared_frag(skb)) { -				err = __skb_linearize(skb); -				if (err) -					goto tx_error; -			} +		/* Pull skb since ip_tunnel_xmit() needs skb->data pointing +		 * to gre header. +		 */ +		skb_pull(skb, tunnel->hlen + sizeof(struct iphdr)); +	} else { +		if (skb_cow_head(skb, dev->needed_headroom)) +			goto free_skb; -			*ptr = 0; -			*(__sum16 *)ptr = csum_fold(skb_checksum(skb, offset, -								 skb->len - offset, -								 0)); -		} +		tnl_params = &tunnel->parms.iph;  	} -	nf_reset(skb); +	__gre_xmit(skb, dev, tnl_params, skb->protocol); -	pkt_len = skb->len - skb_transport_offset(skb); -	err = ip_local_out(skb); -	if (likely(net_xmit_eval(err) == 0)) { -		u64_stats_update_begin(&tstats->syncp); -		tstats->tx_bytes += pkt_len; -		tstats->tx_packets++; -		u64_stats_update_end(&tstats->syncp); -	} else { -		dev->stats.tx_errors++; -		dev->stats.tx_aborted_errors++; -	}  	return NETDEV_TX_OK; -#if IS_ENABLED(CONFIG_IPV6) -tx_error_icmp: -	dst_link_failure(skb); -#endif -tx_error: -	dev->stats.tx_errors++; +free_skb:  	dev_kfree_skb(skb); +out: +	dev->stats.tx_dropped++;  	return NETDEV_TX_OK;  } -static int ipgre_tunnel_bind_dev(struct net_device *dev) +static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, +				struct net_device *dev)  { -	struct net_device *tdev = NULL; -	struct ip_tunnel *tunnel; -	const struct iphdr *iph; -	int hlen = LL_MAX_HEADER; -	int mtu = ETH_DATA_LEN; -	int addend = sizeof(struct iphdr) + 4; - -	tunnel = netdev_priv(dev); -	iph = &tunnel->parms.iph; - -	/* Guess output device to choose reasonable mtu and needed_headroom */ - -	if (iph->daddr) { -		struct flowi4 fl4; -		struct rtable *rt; - -		rt = ip_route_output_gre(dev_net(dev), &fl4, -					 iph->daddr, iph->saddr, -					 tunnel->parms.o_key, -					 RT_TOS(iph->tos), -					 tunnel->parms.link); -		if (!IS_ERR(rt)) { -			tdev = rt->dst.dev; -			ip_rt_put(rt); -		} - -		if (dev->type != ARPHRD_ETHER) -			dev->flags |= IFF_POINTOPOINT; -	} - -	if (!tdev && tunnel->parms.link) -		tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); +	struct ip_tunnel *tunnel = netdev_priv(dev); -	if (tdev) { -		hlen = tdev->hard_header_len + tdev->needed_headroom; -		mtu = tdev->mtu; -	} -	dev->iflink = tunnel->parms.link; +	skb = handle_offloads(tunnel, skb); +	if (IS_ERR(skb)) +		goto out; -	/* Precalculate GRE options length */ -	if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) { -		if (tunnel->parms.o_flags&GRE_CSUM) -			addend += 4; -		if (tunnel->parms.o_flags&GRE_KEY) -			addend += 4; -		if (tunnel->parms.o_flags&GRE_SEQ) -			addend += 4; -	} -	dev->needed_headroom = addend + hlen; -	mtu -= dev->hard_header_len + addend; +	if (skb_cow_head(skb, dev->needed_headroom)) +		goto free_skb; -	if (mtu < 68) -		mtu = 68; +	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB)); -	tunnel->hlen = addend; -	/* TCP offload with GRE SEQ is not supported. */ -	if (!(tunnel->parms.o_flags & GRE_SEQ)) { -		dev->features		|= NETIF_F_GSO_SOFTWARE; -		dev->hw_features	|= NETIF_F_GSO_SOFTWARE; -	} +	return NETDEV_TX_OK; -	return mtu; +free_skb: +	dev_kfree_skb(skb); +out: +	dev->stats.tx_dropped++; +	return NETDEV_TX_OK;  } -static int -ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) +static int ipgre_tunnel_ioctl(struct net_device *dev, +			      struct ifreq *ifr, int cmd)  {  	int err = 0;  	struct ip_tunnel_parm p; -	struct ip_tunnel *t; -	struct net *net = dev_net(dev); -	struct ipgre_net *ign = net_generic(net, ipgre_net_id); - -	switch (cmd) { -	case SIOCGETTUNNEL: -		t = NULL; -		if (dev == ign->fb_tunnel_dev) { -			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { -				err = -EFAULT; -				break; -			} -			t = ipgre_tunnel_locate(net, &p, 0); -		} -		if (t == NULL) -			t = netdev_priv(dev); -		memcpy(&p, &t->parms, sizeof(p)); -		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) -			err = -EFAULT; -		break; - -	case SIOCADDTUNNEL: -	case SIOCCHGTUNNEL: -		err = -EPERM; -		if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) -			goto done; - -		err = -EFAULT; -		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) -			goto done; - -		err = -EINVAL; -		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || -		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || -		    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) -			goto done; -		if (p.iph.ttl) -			p.iph.frag_off |= htons(IP_DF); - -		if (!(p.i_flags&GRE_KEY)) -			p.i_key = 0; -		if (!(p.o_flags&GRE_KEY)) -			p.o_key = 0; - -		t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); - -		if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { -			if (t != NULL) { -				if (t->dev != dev) { -					err = -EEXIST; -					break; -				} -			} else { -				unsigned int nflags = 0; -				t = netdev_priv(dev); - -				if (ipv4_is_multicast(p.iph.daddr)) -					nflags = IFF_BROADCAST; -				else if (p.iph.daddr) -					nflags = IFF_POINTOPOINT; - -				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { -					err = -EINVAL; -					break; -				} -				ipgre_tunnel_unlink(ign, t); -				synchronize_net(); -				t->parms.iph.saddr = p.iph.saddr; -				t->parms.iph.daddr = p.iph.daddr; -				t->parms.i_key = p.i_key; -				t->parms.o_key = p.o_key; -				memcpy(dev->dev_addr, &p.iph.saddr, 4); -				memcpy(dev->broadcast, &p.iph.daddr, 4); -				ipgre_tunnel_link(ign, t); -				netdev_state_change(dev); -			} -		} - -		if (t) { -			err = 0; -			if (cmd == SIOCCHGTUNNEL) { -				t->parms.iph.ttl = p.iph.ttl; -				t->parms.iph.tos = p.iph.tos; -				t->parms.iph.frag_off = p.iph.frag_off; -				if (t->parms.link != p.link) { -					t->parms.link = p.link; -					dev->mtu = ipgre_tunnel_bind_dev(dev); -					netdev_state_change(dev); -				} -			} -			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) -				err = -EFAULT; -		} else -			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); -		break; - -	case SIOCDELTUNNEL: -		err = -EPERM; -		if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) -			goto done; - -		if (dev == ign->fb_tunnel_dev) { -			err = -EFAULT; -			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) -				goto done; -			err = -ENOENT; -			if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL) -				goto done; -			err = -EPERM; -			if (t == netdev_priv(ign->fb_tunnel_dev)) -				goto done; -			dev = t->dev; -		} -		unregister_netdevice(dev); -		err = 0; -		break; - -	default: -		err = -EINVAL; +	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) +		return -EFAULT; +	if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || +	    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || +	    ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) { +		return -EINVAL;  	} +	p.i_flags = gre_flags_to_tnl_flags(p.i_flags); +	p.o_flags = gre_flags_to_tnl_flags(p.o_flags); -done: -	return err; -} +	err = ip_tunnel_ioctl(dev, &p, cmd); +	if (err) +		return err; -static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) -{ -	struct ip_tunnel *tunnel = netdev_priv(dev); -	if (new_mtu < 68 || -	    new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen) -		return -EINVAL; -	dev->mtu = new_mtu; +	p.i_flags = tnl_flags_to_gre_flags(p.i_flags); +	p.o_flags = tnl_flags_to_gre_flags(p.o_flags); + +	if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) +		return -EFAULT;  	return 0;  } @@ -1277,25 +548,23 @@ static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)     ...     ftp fec0:6666:6666::193.233.7.65     ... -   */ -  static int ipgre_header(struct sk_buff *skb, struct net_device *dev,  			unsigned short type,  			const void *daddr, const void *saddr, unsigned int len)  {  	struct ip_tunnel *t = netdev_priv(dev); -	struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); -	__be16 *p = (__be16 *)(iph+1); +	struct iphdr *iph; +	struct gre_base_hdr *greh; -	memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); -	p[0]		= t->parms.o_flags; -	p[1]		= htons(type); +	iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph)); +	greh = (struct gre_base_hdr *)(iph+1); +	greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags); +	greh->protocol = htons(type); -	/* -	 *	Set the source hardware address. -	 */ +	memcpy(iph, &t->parms.iph, sizeof(struct iphdr)); +	/* Set the source hardware address. */  	if (saddr)  		memcpy(&iph->saddr, saddr, 4);  	if (daddr) @@ -1303,7 +572,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev,  	if (iph->daddr)  		return t->hlen; -	return -t->hlen; +	return -(t->hlen + sizeof(*iph));  }  static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr) @@ -1357,31 +626,21 @@ static int ipgre_close(struct net_device *dev)  	}  	return 0;  } -  #endif  static const struct net_device_ops ipgre_netdev_ops = {  	.ndo_init		= ipgre_tunnel_init, -	.ndo_uninit		= ipgre_tunnel_uninit, +	.ndo_uninit		= ip_tunnel_uninit,  #ifdef CONFIG_NET_IPGRE_BROADCAST  	.ndo_open		= ipgre_open,  	.ndo_stop		= ipgre_close,  #endif -	.ndo_start_xmit		= ipgre_tunnel_xmit, +	.ndo_start_xmit		= ipgre_xmit,  	.ndo_do_ioctl		= ipgre_tunnel_ioctl, -	.ndo_change_mtu		= ipgre_tunnel_change_mtu, -	.ndo_get_stats64	= ipgre_get_stats64, +	.ndo_change_mtu		= ip_tunnel_change_mtu, +	.ndo_get_stats64	= ip_tunnel_get_stats64,  }; -static void ipgre_dev_free(struct net_device *dev) -{ -	struct ip_tunnel *tunnel = netdev_priv(dev); - -	gro_cells_destroy(&tunnel->gro_cells); -	free_percpu(dev->tstats); -	free_netdev(dev); -} -  #define GRE_FEATURES (NETIF_F_SG |		\  		      NETIF_F_FRAGLIST |	\  		      NETIF_F_HIGHDMA |		\ @@ -1390,35 +649,48 @@ static void ipgre_dev_free(struct net_device *dev)  static void ipgre_tunnel_setup(struct net_device *dev)  {  	dev->netdev_ops		= &ipgre_netdev_ops; -	dev->destructor 	= ipgre_dev_free; +	ip_tunnel_setup(dev, ipgre_net_id); +} -	dev->type		= ARPHRD_IPGRE; -	dev->needed_headroom 	= LL_MAX_HEADER + sizeof(struct iphdr) + 4; +static void __gre_tunnel_init(struct net_device *dev) +{ +	struct ip_tunnel *tunnel; + +	tunnel = netdev_priv(dev); +	tunnel->hlen = ip_gre_calc_hlen(tunnel->parms.o_flags); +	tunnel->parms.iph.protocol = IPPROTO_GRE; + +	dev->needed_headroom	= LL_MAX_HEADER + sizeof(struct iphdr) + 4;  	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr) - 4; -	dev->flags		= IFF_NOARP; -	dev->iflink		= 0; -	dev->addr_len		= 4; -	dev->features		|= NETIF_F_NETNS_LOCAL; -	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE; -	dev->features		|= GRE_FEATURES; +	dev->features		|= NETIF_F_NETNS_LOCAL | GRE_FEATURES;  	dev->hw_features	|= GRE_FEATURES; + +	if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) { +		/* TCP offload with GRE SEQ is not supported. */ +		dev->features    |= NETIF_F_GSO_SOFTWARE; +		dev->hw_features |= NETIF_F_GSO_SOFTWARE; +		/* Can use a lockless transmit, unless we generate +		 * output sequences +		 */ +		dev->features |= NETIF_F_LLTX; +	}  }  static int ipgre_tunnel_init(struct net_device *dev)  { -	struct ip_tunnel *tunnel; -	struct iphdr *iph; -	int err; +	struct ip_tunnel *tunnel = netdev_priv(dev); +	struct iphdr *iph = &tunnel->parms.iph; -	tunnel = netdev_priv(dev); -	iph = &tunnel->parms.iph; +	__gre_tunnel_init(dev); -	tunnel->dev = dev; -	strcpy(tunnel->parms.name, dev->name); +	memcpy(dev->dev_addr, &iph->saddr, 4); +	memcpy(dev->broadcast, &iph->daddr, 4); -	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); -	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); +	dev->type		= ARPHRD_IPGRE; +	dev->flags		= IFF_NOARP; +	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE; +	dev->addr_len		= 4;  	if (iph->daddr) {  #ifdef CONFIG_NET_IPGRE_BROADCAST @@ -1432,106 +704,30 @@ static int ipgre_tunnel_init(struct net_device *dev)  	} else  		dev->header_ops = &ipgre_header_ops; -	dev->tstats = alloc_percpu(struct pcpu_tstats); -	if (!dev->tstats) -		return -ENOMEM; - -	err = gro_cells_init(&tunnel->gro_cells, dev); -	if (err) { -		free_percpu(dev->tstats); -		return err; -	} - -	return 0; +	return ip_tunnel_init(dev);  } -static void ipgre_fb_tunnel_init(struct net_device *dev) -{ -	struct ip_tunnel *tunnel = netdev_priv(dev); -	struct iphdr *iph = &tunnel->parms.iph; - -	tunnel->dev = dev; -	strcpy(tunnel->parms.name, dev->name); - -	iph->version		= 4; -	iph->protocol		= IPPROTO_GRE; -	iph->ihl		= 5; -	tunnel->hlen		= sizeof(struct iphdr) + 4; - -	dev_hold(dev); -} - -  static const struct gre_protocol ipgre_protocol = {  	.handler     = ipgre_rcv,  	.err_handler = ipgre_err,  }; -static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head) -{ -	int prio; - -	for (prio = 0; prio < 4; prio++) { -		int h; -		for (h = 0; h < HASH_SIZE; h++) { -			struct ip_tunnel *t; - -			t = rtnl_dereference(ign->tunnels[prio][h]); - -			while (t != NULL) { -				unregister_netdevice_queue(t->dev, head); -				t = rtnl_dereference(t->next); -			} -		} -	} -} -  static int __net_init ipgre_init_net(struct net *net)  { -	struct ipgre_net *ign = net_generic(net, ipgre_net_id); -	int err; - -	ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0", -					   ipgre_tunnel_setup); -	if (!ign->fb_tunnel_dev) { -		err = -ENOMEM; -		goto err_alloc_dev; -	} -	dev_net_set(ign->fb_tunnel_dev, net); - -	ipgre_fb_tunnel_init(ign->fb_tunnel_dev); -	ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops; - -	if ((err = register_netdev(ign->fb_tunnel_dev))) -		goto err_reg_dev; - -	rcu_assign_pointer(ign->tunnels_wc[0], -			   netdev_priv(ign->fb_tunnel_dev)); -	return 0; - -err_reg_dev: -	ipgre_dev_free(ign->fb_tunnel_dev); -err_alloc_dev: -	return err; +	return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);  }  static void __net_exit ipgre_exit_net(struct net *net)  { -	struct ipgre_net *ign; -	LIST_HEAD(list); - -	ign = net_generic(net, ipgre_net_id); -	rtnl_lock(); -	ipgre_destroy_tunnels(ign, &list); -	unregister_netdevice_many(&list); -	rtnl_unlock(); +	struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id); +	ip_tunnel_delete_net(itn);  }  static struct pernet_operations ipgre_net_ops = {  	.init = ipgre_init_net,  	.exit = ipgre_exit_net,  	.id   = &ipgre_net_id, -	.size = sizeof(struct ipgre_net), +	.size = sizeof(struct ip_tunnel_net),  };  static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) @@ -1576,8 +772,8 @@ out:  	return ipgre_tunnel_validate(tb, data);  } -static void ipgre_netlink_parms(struct nlattr *data[], -				struct ip_tunnel_parm *parms) +static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[], +			       struct ip_tunnel_parm *parms)  {  	memset(parms, 0, sizeof(*parms)); @@ -1590,10 +786,10 @@ static void ipgre_netlink_parms(struct nlattr *data[],  		parms->link = nla_get_u32(data[IFLA_GRE_LINK]);  	if (data[IFLA_GRE_IFLAGS]) -		parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]); +		parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));  	if (data[IFLA_GRE_OFLAGS]) -		parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]); +		parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));  	if (data[IFLA_GRE_IKEY])  		parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); @@ -1617,148 +813,46 @@ static void ipgre_netlink_parms(struct nlattr *data[],  		parms->iph.frag_off = htons(IP_DF);  } -static int ipgre_tap_init(struct net_device *dev) +static int gre_tap_init(struct net_device *dev)  { -	struct ip_tunnel *tunnel; - -	tunnel = netdev_priv(dev); - -	tunnel->dev = dev; -	strcpy(tunnel->parms.name, dev->name); +	__gre_tunnel_init(dev); -	ipgre_tunnel_bind_dev(dev); - -	dev->tstats = alloc_percpu(struct pcpu_tstats); -	if (!dev->tstats) -		return -ENOMEM; - -	return 0; +	return ip_tunnel_init(dev);  } -static const struct net_device_ops ipgre_tap_netdev_ops = { -	.ndo_init		= ipgre_tap_init, -	.ndo_uninit		= ipgre_tunnel_uninit, -	.ndo_start_xmit		= ipgre_tunnel_xmit, +static const struct net_device_ops gre_tap_netdev_ops = { +	.ndo_init		= gre_tap_init, +	.ndo_uninit		= ip_tunnel_uninit, +	.ndo_start_xmit		= gre_tap_xmit,  	.ndo_set_mac_address 	= eth_mac_addr,  	.ndo_validate_addr	= eth_validate_addr, -	.ndo_change_mtu		= ipgre_tunnel_change_mtu, -	.ndo_get_stats64	= ipgre_get_stats64, +	.ndo_change_mtu		= ip_tunnel_change_mtu, +	.ndo_get_stats64	= ip_tunnel_get_stats64,  };  static void ipgre_tap_setup(struct net_device *dev)  { -  	ether_setup(dev); - -	dev->netdev_ops		= &ipgre_tap_netdev_ops; -	dev->destructor 	= ipgre_dev_free; - -	dev->iflink		= 0; -	dev->features		|= NETIF_F_NETNS_LOCAL; - -	dev->features		|= GRE_FEATURES; -	dev->hw_features	|= GRE_FEATURES; +	dev->netdev_ops		= &gre_tap_netdev_ops; +	ip_tunnel_setup(dev, gre_tap_net_id);  } -static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], -			 struct nlattr *data[]) +static int ipgre_newlink(struct net *src_net, struct net_device *dev, +			 struct nlattr *tb[], struct nlattr *data[])  { -	struct ip_tunnel *nt; -	struct net *net = dev_net(dev); -	struct ipgre_net *ign = net_generic(net, ipgre_net_id); -	int mtu; -	int err; - -	nt = netdev_priv(dev); -	ipgre_netlink_parms(data, &nt->parms); - -	if (ipgre_tunnel_find(net, &nt->parms, dev->type)) -		return -EEXIST; - -	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) -		eth_hw_addr_random(dev); - -	mtu = ipgre_tunnel_bind_dev(dev); -	if (!tb[IFLA_MTU]) -		dev->mtu = mtu; - -	/* Can use a lockless transmit, unless we generate output sequences */ -	if (!(nt->parms.o_flags & GRE_SEQ)) -		dev->features |= NETIF_F_LLTX; - -	err = register_netdevice(dev); -	if (err) -		goto out; - -	dev_hold(dev); -	ipgre_tunnel_link(ign, nt); +	struct ip_tunnel_parm p; -out: -	return err; +	ipgre_netlink_parms(data, tb, &p); +	return ip_tunnel_newlink(dev, tb, &p);  }  static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],  			    struct nlattr *data[])  { -	struct ip_tunnel *t, *nt; -	struct net *net = dev_net(dev); -	struct ipgre_net *ign = net_generic(net, ipgre_net_id);  	struct ip_tunnel_parm p; -	int mtu; - -	if (dev == ign->fb_tunnel_dev) -		return -EINVAL; - -	nt = netdev_priv(dev); -	ipgre_netlink_parms(data, &p); -	t = ipgre_tunnel_locate(net, &p, 0); - -	if (t) { -		if (t->dev != dev) -			return -EEXIST; -	} else { -		t = nt; - -		if (dev->type != ARPHRD_ETHER) { -			unsigned int nflags = 0; - -			if (ipv4_is_multicast(p.iph.daddr)) -				nflags = IFF_BROADCAST; -			else if (p.iph.daddr) -				nflags = IFF_POINTOPOINT; - -			if ((dev->flags ^ nflags) & -			    (IFF_POINTOPOINT | IFF_BROADCAST)) -				return -EINVAL; -		} - -		ipgre_tunnel_unlink(ign, t); -		t->parms.iph.saddr = p.iph.saddr; -		t->parms.iph.daddr = p.iph.daddr; -		t->parms.i_key = p.i_key; -		if (dev->type != ARPHRD_ETHER) { -			memcpy(dev->dev_addr, &p.iph.saddr, 4); -			memcpy(dev->broadcast, &p.iph.daddr, 4); -		} -		ipgre_tunnel_link(ign, t); -		netdev_state_change(dev); -	} - -	t->parms.o_key = p.o_key; -	t->parms.iph.ttl = p.iph.ttl; -	t->parms.iph.tos = p.iph.tos; -	t->parms.iph.frag_off = p.iph.frag_off; - -	if (t->parms.link != p.link) { -		t->parms.link = p.link; -		mtu = ipgre_tunnel_bind_dev(dev); -		if (!tb[IFLA_MTU]) -			dev->mtu = mtu; -		netdev_state_change(dev); -	} - -	return 0; +	ipgre_netlink_parms(data, tb, &p); +	return ip_tunnel_changelink(dev, tb, &p);  }  static size_t ipgre_get_size(const struct net_device *dev) @@ -1793,8 +887,8 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)  	struct ip_tunnel_parm *p = &t->parms;  	if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || -	    nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) || -	    nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) || +	    nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) || +	    nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||  	    nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||  	    nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||  	    nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) || @@ -1832,6 +926,7 @@ static struct rtnl_link_ops ipgre_link_ops __read_mostly = {  	.validate	= ipgre_tunnel_validate,  	.newlink	= ipgre_newlink,  	.changelink	= ipgre_changelink, +	.dellink	= ip_tunnel_dellink,  	.get_size	= ipgre_get_size,  	.fill_info	= ipgre_fill_info,  }; @@ -1845,13 +940,28 @@ static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {  	.validate	= ipgre_tap_validate,  	.newlink	= ipgre_newlink,  	.changelink	= ipgre_changelink, +	.dellink	= ip_tunnel_dellink,  	.get_size	= ipgre_get_size,  	.fill_info	= ipgre_fill_info,  }; -/* - *	And now the modules code and kernel interface. - */ +static int __net_init ipgre_tap_init_net(struct net *net) +{ +	return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, NULL); +} + +static void __net_exit ipgre_tap_exit_net(struct net *net) +{ +	struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id); +	ip_tunnel_delete_net(itn); +} + +static struct pernet_operations ipgre_tap_net_ops = { +	.init = ipgre_tap_init_net, +	.exit = ipgre_tap_exit_net, +	.id   = &gre_tap_net_id, +	.size = sizeof(struct ip_tunnel_net), +};  static int __init ipgre_init(void)  { @@ -1863,6 +973,10 @@ static int __init ipgre_init(void)  	if (err < 0)  		return err; +	err = register_pernet_device(&ipgre_tap_net_ops); +	if (err < 0) +		goto pnet_tap_faied; +  	err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);  	if (err < 0) {  		pr_info("%s: can't add protocol\n", __func__); @@ -1877,16 +991,17 @@ static int __init ipgre_init(void)  	if (err < 0)  		goto tap_ops_failed; -out: -	return err; +	return 0;  tap_ops_failed:  	rtnl_link_unregister(&ipgre_link_ops);  rtnl_link_failed:  	gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);  add_proto_failed: +	unregister_pernet_device(&ipgre_tap_net_ops); +pnet_tap_faied:  	unregister_pernet_device(&ipgre_net_ops); -	goto out; +	return err;  }  static void __exit ipgre_fini(void) @@ -1895,6 +1010,7 @@ static void __exit ipgre_fini(void)  	rtnl_link_unregister(&ipgre_link_ops);  	if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)  		pr_info("%s: can't remove protocol\n", __func__); +	unregister_pernet_device(&ipgre_tap_net_ops);  	unregister_pernet_device(&ipgre_net_ops);  } @@ -1904,3 +1020,4 @@ MODULE_LICENSE("GPL");  MODULE_ALIAS_RTNL_LINK("gre");  MODULE_ALIAS_RTNL_LINK("gretap");  MODULE_ALIAS_NETDEV("gre0"); +MODULE_ALIAS_NETDEV("gretap0"); diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 2bdf802e28e..3da817b89e9 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -419,7 +419,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,  	iph = ip_hdr(skb);  	if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) -		goto inhdr_error; +		goto csum_error;  	len = ntohs(iph->tot_len);  	if (skb->len < len) { @@ -446,6 +446,8 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,  	return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, dev, NULL,  		       ip_rcv_finish); +csum_error: +	IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_CSUMERRORS);  inhdr_error:  	IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS);  drop: diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 5e12dca7b3d..147abf5275a 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -430,8 +430,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)  	to->tc_index = from->tc_index;  #endif  	nf_copy(to, from); -#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ -    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)  	to->nf_trace = from->nf_trace;  #endif  #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c new file mode 100644 index 00000000000..e4147ec1665 --- /dev/null +++ b/net/ipv4/ip_tunnel.c @@ -0,0 +1,1035 @@ +/* + * Copyright (c) 2013 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/capability.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/in.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/if_arp.h> +#include <linux/mroute.h> +#include <linux/init.h> +#include <linux/in6.h> +#include <linux/inetdevice.h> +#include <linux/igmp.h> +#include <linux/netfilter_ipv4.h> +#include <linux/etherdevice.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <linux/rculist.h> + +#include <net/sock.h> +#include <net/ip.h> +#include <net/icmp.h> +#include <net/protocol.h> +#include <net/ip_tunnels.h> +#include <net/arp.h> +#include <net/checksum.h> +#include <net/dsfield.h> +#include <net/inet_ecn.h> +#include <net/xfrm.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> +#include <net/rtnetlink.h> + +#if IS_ENABLED(CONFIG_IPV6) +#include <net/ipv6.h> +#include <net/ip6_fib.h> +#include <net/ip6_route.h> +#endif + +static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn, +				   __be32 key, __be32 remote) +{ +	return hash_32((__force u32)key ^ (__force u32)remote, +			 IP_TNL_HASH_BITS); +} + +/* Often modified stats are per cpu, other are shared (netdev->stats) */ +struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, +						struct rtnl_link_stats64 *tot) +{ +	int i; + +	for_each_possible_cpu(i) { +		const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); +		u64 rx_packets, rx_bytes, tx_packets, tx_bytes; +		unsigned int start; + +		do { +			start = u64_stats_fetch_begin_bh(&tstats->syncp); +			rx_packets = tstats->rx_packets; +			tx_packets = tstats->tx_packets; +			rx_bytes = tstats->rx_bytes; +			tx_bytes = tstats->tx_bytes; +		} while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); + +		tot->rx_packets += rx_packets; +		tot->tx_packets += tx_packets; +		tot->rx_bytes   += rx_bytes; +		tot->tx_bytes   += tx_bytes; +	} + +	tot->multicast = dev->stats.multicast; + +	tot->rx_crc_errors = dev->stats.rx_crc_errors; +	tot->rx_fifo_errors = dev->stats.rx_fifo_errors; +	tot->rx_length_errors = dev->stats.rx_length_errors; +	tot->rx_frame_errors = dev->stats.rx_frame_errors; +	tot->rx_errors = dev->stats.rx_errors; + +	tot->tx_fifo_errors = dev->stats.tx_fifo_errors; +	tot->tx_carrier_errors = dev->stats.tx_carrier_errors; +	tot->tx_dropped = dev->stats.tx_dropped; +	tot->tx_aborted_errors = dev->stats.tx_aborted_errors; +	tot->tx_errors = dev->stats.tx_errors; + +	tot->collisions  = dev->stats.collisions; + +	return tot; +} +EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64); + +static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p, +				__be16 flags, __be32 key) +{ +	if (p->i_flags & TUNNEL_KEY) { +		if (flags & TUNNEL_KEY) +			return key == p->i_key; +		else +			/* key expected, none present */ +			return false; +	} else +		return !(flags & TUNNEL_KEY); +} + +/* Fallback tunnel: no source, no destination, no key, no options + +   Tunnel hash table: +   We require exact key match i.e. if a key is present in packet +   it will match only tunnel with the same key; if it is not present, +   it will match only keyless tunnel. + +   All keysless packets, if not matched configured keyless tunnels +   will match fallback tunnel. +   Given src, dst and key, find appropriate for input tunnel. +*/ +struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, +				   int link, __be16 flags, +				   __be32 remote, __be32 local, +				   __be32 key) +{ +	unsigned int hash; +	struct ip_tunnel *t, *cand = NULL; +	struct hlist_head *head; + +	hash = ip_tunnel_hash(itn, key, remote); +	head = &itn->tunnels[hash]; + +	hlist_for_each_entry_rcu(t, head, hash_node) { +		if (local != t->parms.iph.saddr || +		    remote != t->parms.iph.daddr || +		    !(t->dev->flags & IFF_UP)) +			continue; + +		if (!ip_tunnel_key_match(&t->parms, flags, key)) +			continue; + +		if (t->parms.link == link) +			return t; +		else +			cand = t; +	} + +	hlist_for_each_entry_rcu(t, head, hash_node) { +		if (remote != t->parms.iph.daddr || +		    !(t->dev->flags & IFF_UP)) +			continue; + +		if (!ip_tunnel_key_match(&t->parms, flags, key)) +			continue; + +		if (t->parms.link == link) +			return t; +		else if (!cand) +			cand = t; +	} + +	hash = ip_tunnel_hash(itn, key, 0); +	head = &itn->tunnels[hash]; + +	hlist_for_each_entry_rcu(t, head, hash_node) { +		if ((local != t->parms.iph.saddr && +		     (local != t->parms.iph.daddr || +		      !ipv4_is_multicast(local))) || +		    !(t->dev->flags & IFF_UP)) +			continue; + +		if (!ip_tunnel_key_match(&t->parms, flags, key)) +			continue; + +		if (t->parms.link == link) +			return t; +		else if (!cand) +			cand = t; +	} + +	if (flags & TUNNEL_NO_KEY) +		goto skip_key_lookup; + +	hlist_for_each_entry_rcu(t, head, hash_node) { +		if (t->parms.i_key != key || +		    !(t->dev->flags & IFF_UP)) +			continue; + +		if (t->parms.link == link) +			return t; +		else if (!cand) +			cand = t; +	} + +skip_key_lookup: +	if (cand) +		return cand; + +	if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP) +		return netdev_priv(itn->fb_tunnel_dev); + + +	return NULL; +} +EXPORT_SYMBOL_GPL(ip_tunnel_lookup); + +static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn, +				    struct ip_tunnel_parm *parms) +{ +	unsigned int h; +	__be32 remote; + +	if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr)) +		remote = parms->iph.daddr; +	else +		remote = 0; + +	h = ip_tunnel_hash(itn, parms->i_key, remote); +	return &itn->tunnels[h]; +} + +static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t) +{ +	struct hlist_head *head = ip_bucket(itn, &t->parms); + +	hlist_add_head_rcu(&t->hash_node, head); +} + +static void ip_tunnel_del(struct ip_tunnel *t) +{ +	hlist_del_init_rcu(&t->hash_node); +} + +static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, +					struct ip_tunnel_parm *parms, +					int type) +{ +	__be32 remote = parms->iph.daddr; +	__be32 local = parms->iph.saddr; +	__be32 key = parms->i_key; +	int link = parms->link; +	struct ip_tunnel *t = NULL; +	struct hlist_head *head = ip_bucket(itn, parms); + +	hlist_for_each_entry_rcu(t, head, hash_node) { +		if (local == t->parms.iph.saddr && +		    remote == t->parms.iph.daddr && +		    key == t->parms.i_key && +		    link == t->parms.link && +		    type == t->dev->type) +			break; +	} +	return t; +} + +static struct net_device *__ip_tunnel_create(struct net *net, +					     const struct rtnl_link_ops *ops, +					     struct ip_tunnel_parm *parms) +{ +	int err; +	struct ip_tunnel *tunnel; +	struct net_device *dev; +	char name[IFNAMSIZ]; + +	if (parms->name[0]) +		strlcpy(name, parms->name, IFNAMSIZ); +	else { +		if (strlen(ops->kind) > (IFNAMSIZ - 3)) { +			err = -E2BIG; +			goto failed; +		} +		strlcpy(name, ops->kind, IFNAMSIZ); +		strncat(name, "%d", 2); +	} + +	ASSERT_RTNL(); +	dev = alloc_netdev(ops->priv_size, name, ops->setup); +	if (!dev) { +		err = -ENOMEM; +		goto failed; +	} +	dev_net_set(dev, net); + +	dev->rtnl_link_ops = ops; + +	tunnel = netdev_priv(dev); +	tunnel->parms = *parms; + +	err = register_netdevice(dev); +	if (err) +		goto failed_free; + +	return dev; + +failed_free: +	free_netdev(dev); +failed: +	return ERR_PTR(err); +} + +static inline struct rtable *ip_route_output_tunnel(struct net *net, +						    struct flowi4 *fl4, +						    int proto, +						    __be32 daddr, __be32 saddr, +						    __be32 key, __u8 tos, int oif) +{ +	memset(fl4, 0, sizeof(*fl4)); +	fl4->flowi4_oif = oif; +	fl4->daddr = daddr; +	fl4->saddr = saddr; +	fl4->flowi4_tos = tos; +	fl4->flowi4_proto = proto; +	fl4->fl4_gre_key = key; +	return ip_route_output_key(net, fl4); +} + +static int ip_tunnel_bind_dev(struct net_device *dev) +{ +	struct net_device *tdev = NULL; +	struct ip_tunnel *tunnel = netdev_priv(dev); +	const struct iphdr *iph; +	int hlen = LL_MAX_HEADER; +	int mtu = ETH_DATA_LEN; +	int t_hlen = tunnel->hlen + sizeof(struct iphdr); + +	iph = &tunnel->parms.iph; + +	/* Guess output device to choose reasonable mtu and needed_headroom */ +	if (iph->daddr) { +		struct flowi4 fl4; +		struct rtable *rt; + +		rt = ip_route_output_tunnel(dev_net(dev), &fl4, +					    tunnel->parms.iph.protocol, +					    iph->daddr, iph->saddr, +					    tunnel->parms.o_key, +					    RT_TOS(iph->tos), +					    tunnel->parms.link); +		if (!IS_ERR(rt)) { +			tdev = rt->dst.dev; +			ip_rt_put(rt); +		} +		if (dev->type != ARPHRD_ETHER) +			dev->flags |= IFF_POINTOPOINT; +	} + +	if (!tdev && tunnel->parms.link) +		tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); + +	if (tdev) { +		hlen = tdev->hard_header_len + tdev->needed_headroom; +		mtu = tdev->mtu; +	} +	dev->iflink = tunnel->parms.link; + +	dev->needed_headroom = t_hlen + hlen; +	mtu -= (dev->hard_header_len + t_hlen); + +	if (mtu < 68) +		mtu = 68; + +	return mtu; +} + +static struct ip_tunnel *ip_tunnel_create(struct net *net, +					  struct ip_tunnel_net *itn, +					  struct ip_tunnel_parm *parms) +{ +	struct ip_tunnel *nt, *fbt; +	struct net_device *dev; + +	BUG_ON(!itn->fb_tunnel_dev); +	fbt = netdev_priv(itn->fb_tunnel_dev); +	dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms); +	if (IS_ERR(dev)) +		return NULL; + +	dev->mtu = ip_tunnel_bind_dev(dev); + +	nt = netdev_priv(dev); +	ip_tunnel_add(itn, nt); +	return nt; +} + +int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, +		  const struct tnl_ptk_info *tpi, bool log_ecn_error) +{ +	struct pcpu_tstats *tstats; +	const struct iphdr *iph = ip_hdr(skb); +	int err; + +	secpath_reset(skb); + +	skb->protocol = tpi->proto; + +	skb->mac_header = skb->network_header; +	__pskb_pull(skb, tunnel->hlen); +	skb_postpull_rcsum(skb, skb_transport_header(skb), tunnel->hlen); +#ifdef CONFIG_NET_IPGRE_BROADCAST +	if (ipv4_is_multicast(iph->daddr)) { +		/* Looped back packet, drop it! */ +		if (rt_is_output_route(skb_rtable(skb))) +			goto drop; +		tunnel->dev->stats.multicast++; +		skb->pkt_type = PACKET_BROADCAST; +	} +#endif + +	if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) || +	     ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) { +		tunnel->dev->stats.rx_crc_errors++; +		tunnel->dev->stats.rx_errors++; +		goto drop; +	} + +	if (tunnel->parms.i_flags&TUNNEL_SEQ) { +		if (!(tpi->flags&TUNNEL_SEQ) || +		    (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) { +			tunnel->dev->stats.rx_fifo_errors++; +			tunnel->dev->stats.rx_errors++; +			goto drop; +		} +		tunnel->i_seqno = ntohl(tpi->seq) + 1; +	} + +	/* Warning: All skb pointers will be invalidated! */ +	if (tunnel->dev->type == ARPHRD_ETHER) { +		if (!pskb_may_pull(skb, ETH_HLEN)) { +			tunnel->dev->stats.rx_length_errors++; +			tunnel->dev->stats.rx_errors++; +			goto drop; +		} + +		iph = ip_hdr(skb); +		skb->protocol = eth_type_trans(skb, tunnel->dev); +		skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); +	} + +	skb->pkt_type = PACKET_HOST; +	__skb_tunnel_rx(skb, tunnel->dev); + +	skb_reset_network_header(skb); +	err = IP_ECN_decapsulate(iph, skb); +	if (unlikely(err)) { +		if (log_ecn_error) +			net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", +					&iph->saddr, iph->tos); +		if (err > 1) { +			++tunnel->dev->stats.rx_frame_errors; +			++tunnel->dev->stats.rx_errors; +			goto drop; +		} +	} + +	tstats = this_cpu_ptr(tunnel->dev->tstats); +	u64_stats_update_begin(&tstats->syncp); +	tstats->rx_packets++; +	tstats->rx_bytes += skb->len; +	u64_stats_update_end(&tstats->syncp); + +	gro_cells_receive(&tunnel->gro_cells, skb); +	return 0; + +drop: +	kfree_skb(skb); +	return 0; +} +EXPORT_SYMBOL_GPL(ip_tunnel_rcv); + +void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, +		    const struct iphdr *tnl_params) +{ +	struct ip_tunnel *tunnel = netdev_priv(dev); +	const struct iphdr *inner_iph; +	struct iphdr *iph; +	struct flowi4 fl4; +	u8     tos, ttl; +	__be16 df; +	struct rtable *rt;		/* Route to the other host */ +	struct net_device *tdev;	/* Device to other host */ +	unsigned int max_headroom;	/* The extra header space needed */ +	__be32 dst; +	int mtu; + +	inner_iph = (const struct iphdr *)skb_inner_network_header(skb); + +	dst = tnl_params->daddr; +	if (dst == 0) { +		/* NBMA tunnel */ + +		if (skb_dst(skb) == NULL) { +			dev->stats.tx_fifo_errors++; +			goto tx_error; +		} + +		if (skb->protocol == htons(ETH_P_IP)) { +			rt = skb_rtable(skb); +			dst = rt_nexthop(rt, inner_iph->daddr); +		} +#if IS_ENABLED(CONFIG_IPV6) +		else if (skb->protocol == htons(ETH_P_IPV6)) { +			const struct in6_addr *addr6; +			struct neighbour *neigh; +			bool do_tx_error_icmp; +			int addr_type; + +			neigh = dst_neigh_lookup(skb_dst(skb), +						 &ipv6_hdr(skb)->daddr); +			if (neigh == NULL) +				goto tx_error; + +			addr6 = (const struct in6_addr *)&neigh->primary_key; +			addr_type = ipv6_addr_type(addr6); + +			if (addr_type == IPV6_ADDR_ANY) { +				addr6 = &ipv6_hdr(skb)->daddr; +				addr_type = ipv6_addr_type(addr6); +			} + +			if ((addr_type & IPV6_ADDR_COMPATv4) == 0) +				do_tx_error_icmp = true; +			else { +				do_tx_error_icmp = false; +				dst = addr6->s6_addr32[3]; +			} +			neigh_release(neigh); +			if (do_tx_error_icmp) +				goto tx_error_icmp; +		} +#endif +		else +			goto tx_error; +	} + +	tos = tnl_params->tos; +	if (tos & 0x1) { +		tos &= ~0x1; +		if (skb->protocol == htons(ETH_P_IP)) +			tos = inner_iph->tos; +		else if (skb->protocol == htons(ETH_P_IPV6)) +			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); +	} + +	rt = ip_route_output_tunnel(dev_net(dev), &fl4, +				    tunnel->parms.iph.protocol, +				    dst, tnl_params->saddr, +				    tunnel->parms.o_key, +				    RT_TOS(tos), +				    tunnel->parms.link); +	if (IS_ERR(rt)) { +		dev->stats.tx_carrier_errors++; +		goto tx_error; +	} +	tdev = rt->dst.dev; + +	if (tdev == dev) { +		ip_rt_put(rt); +		dev->stats.collisions++; +		goto tx_error; +	} + +	df = tnl_params->frag_off; + +	if (df) +		mtu = dst_mtu(&rt->dst) - dev->hard_header_len +					- sizeof(struct iphdr); +	else +		mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; + +	if (skb_dst(skb)) +		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + +	if (skb->protocol == htons(ETH_P_IP)) { +		df |= (inner_iph->frag_off&htons(IP_DF)); + +		if (!skb_is_gso(skb) && +		    (inner_iph->frag_off&htons(IP_DF)) && +		     mtu < ntohs(inner_iph->tot_len)) { +			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); +			ip_rt_put(rt); +			goto tx_error; +		} +	} +#if IS_ENABLED(CONFIG_IPV6) +	else if (skb->protocol == htons(ETH_P_IPV6)) { +		struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); + +		if (rt6 && mtu < dst_mtu(skb_dst(skb)) && +		    mtu >= IPV6_MIN_MTU) { +			if ((tunnel->parms.iph.daddr && +			    !ipv4_is_multicast(tunnel->parms.iph.daddr)) || +			    rt6->rt6i_dst.plen == 128) { +				rt6->rt6i_flags |= RTF_MODIFIED; +				dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); +			} +		} + +		if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && +		    mtu < skb->len) { +			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); +			ip_rt_put(rt); +			goto tx_error; +		} +	} +#endif + +	if (tunnel->err_count > 0) { +		if (time_before(jiffies, +				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { +			tunnel->err_count--; + +			dst_link_failure(skb); +		} else +			tunnel->err_count = 0; +	} + +	ttl = tnl_params->ttl; +	if (ttl == 0) { +		if (skb->protocol == htons(ETH_P_IP)) +			ttl = inner_iph->ttl; +#if IS_ENABLED(CONFIG_IPV6) +		else if (skb->protocol == htons(ETH_P_IPV6)) +			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit; +#endif +		else +			ttl = ip4_dst_hoplimit(&rt->dst); +	} + +	max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr) +					       + rt->dst.header_len; +	if (max_headroom > dev->needed_headroom) { +		dev->needed_headroom = max_headroom; +		if (skb_cow_head(skb, dev->needed_headroom)) { +			dev->stats.tx_dropped++; +			dev_kfree_skb(skb); +			return; +		} +	} + +	skb_dst_drop(skb); +	skb_dst_set(skb, &rt->dst); +	memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + +	/* Push down and install the IP header. */ +	skb_push(skb, sizeof(struct iphdr)); +	skb_reset_network_header(skb); + +	iph = ip_hdr(skb); +	inner_iph = (const struct iphdr *)skb_inner_network_header(skb); + +	iph->version	=	4; +	iph->ihl	=	sizeof(struct iphdr) >> 2; +	iph->frag_off	=	df; +	iph->protocol	=	tnl_params->protocol; +	iph->tos	=	ip_tunnel_ecn_encap(tos, inner_iph, skb); +	iph->daddr	=	fl4.daddr; +	iph->saddr	=	fl4.saddr; +	iph->ttl	=	ttl; +	tunnel_ip_select_ident(skb, inner_iph, &rt->dst); + +	iptunnel_xmit(skb, dev); +	return; + +#if IS_ENABLED(CONFIG_IPV6) +tx_error_icmp: +	dst_link_failure(skb); +#endif +tx_error: +	dev->stats.tx_errors++; +	dev_kfree_skb(skb); +} +EXPORT_SYMBOL_GPL(ip_tunnel_xmit); + +static void ip_tunnel_update(struct ip_tunnel_net *itn, +			     struct ip_tunnel *t, +			     struct net_device *dev, +			     struct ip_tunnel_parm *p, +			     bool set_mtu) +{ +	ip_tunnel_del(t); +	t->parms.iph.saddr = p->iph.saddr; +	t->parms.iph.daddr = p->iph.daddr; +	t->parms.i_key = p->i_key; +	t->parms.o_key = p->o_key; +	if (dev->type != ARPHRD_ETHER) { +		memcpy(dev->dev_addr, &p->iph.saddr, 4); +		memcpy(dev->broadcast, &p->iph.daddr, 4); +	} +	ip_tunnel_add(itn, t); + +	t->parms.iph.ttl = p->iph.ttl; +	t->parms.iph.tos = p->iph.tos; +	t->parms.iph.frag_off = p->iph.frag_off; + +	if (t->parms.link != p->link) { +		int mtu; + +		t->parms.link = p->link; +		mtu = ip_tunnel_bind_dev(dev); +		if (set_mtu) +			dev->mtu = mtu; +	} +	netdev_state_change(dev); +} + +int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd) +{ +	int err = 0; +	struct ip_tunnel *t; +	struct net *net = dev_net(dev); +	struct ip_tunnel *tunnel = netdev_priv(dev); +	struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); + +	BUG_ON(!itn->fb_tunnel_dev); +	switch (cmd) { +	case SIOCGETTUNNEL: +		t = NULL; +		if (dev == itn->fb_tunnel_dev) +			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); +		if (t == NULL) +			t = netdev_priv(dev); +		memcpy(p, &t->parms, sizeof(*p)); +		break; + +	case SIOCADDTUNNEL: +	case SIOCCHGTUNNEL: +		err = -EPERM; +		if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) +			goto done; +		if (p->iph.ttl) +			p->iph.frag_off |= htons(IP_DF); +		if (!(p->i_flags&TUNNEL_KEY)) +			p->i_key = 0; +		if (!(p->o_flags&TUNNEL_KEY)) +			p->o_key = 0; + +		t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); + +		if (!t && (cmd == SIOCADDTUNNEL)) +			t = ip_tunnel_create(net, itn, p); + +		if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { +			if (t != NULL) { +				if (t->dev != dev) { +					err = -EEXIST; +					break; +				} +			} else { +				unsigned int nflags = 0; + +				if (ipv4_is_multicast(p->iph.daddr)) +					nflags = IFF_BROADCAST; +				else if (p->iph.daddr) +					nflags = IFF_POINTOPOINT; + +				if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) { +					err = -EINVAL; +					break; +				} + +				t = netdev_priv(dev); +			} +		} + +		if (t) { +			err = 0; +			ip_tunnel_update(itn, t, dev, p, true); +		} else +			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); +		break; + +	case SIOCDELTUNNEL: +		err = -EPERM; +		if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) +			goto done; + +		if (dev == itn->fb_tunnel_dev) { +			err = -ENOENT; +			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); +			if (t == NULL) +				goto done; +			err = -EPERM; +			if (t == netdev_priv(itn->fb_tunnel_dev)) +				goto done; +			dev = t->dev; +		} +		unregister_netdevice(dev); +		err = 0; +		break; + +	default: +		err = -EINVAL; +	} + +done: +	return err; +} +EXPORT_SYMBOL_GPL(ip_tunnel_ioctl); + +int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu) +{ +	struct ip_tunnel *tunnel = netdev_priv(dev); +	int t_hlen = tunnel->hlen + sizeof(struct iphdr); + +	if (new_mtu < 68 || +	    new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen) +		return -EINVAL; +	dev->mtu = new_mtu; +	return 0; +} +EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu); + +static void ip_tunnel_dev_free(struct net_device *dev) +{ +	struct ip_tunnel *tunnel = netdev_priv(dev); + +	gro_cells_destroy(&tunnel->gro_cells); +	free_percpu(dev->tstats); +	free_netdev(dev); +} + +void ip_tunnel_dellink(struct net_device *dev, struct list_head *head) +{ +	struct net *net = dev_net(dev); +	struct ip_tunnel *tunnel = netdev_priv(dev); +	struct ip_tunnel_net *itn; + +	itn = net_generic(net, tunnel->ip_tnl_net_id); + +	if (itn->fb_tunnel_dev != dev) { +		ip_tunnel_del(netdev_priv(dev)); +		unregister_netdevice_queue(dev, head); +	} +} +EXPORT_SYMBOL_GPL(ip_tunnel_dellink); + +int __net_init ip_tunnel_init_net(struct net *net, int ip_tnl_net_id, +				  struct rtnl_link_ops *ops, char *devname) +{ +	struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id); +	struct ip_tunnel_parm parms; + +	itn->tunnels = kzalloc(IP_TNL_HASH_SIZE * sizeof(struct hlist_head), GFP_KERNEL); +	if (!itn->tunnels) +		return -ENOMEM; + +	if (!ops) { +		itn->fb_tunnel_dev = NULL; +		return 0; +	} +	memset(&parms, 0, sizeof(parms)); +	if (devname) +		strlcpy(parms.name, devname, IFNAMSIZ); + +	rtnl_lock(); +	itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms); +	rtnl_unlock(); +	if (IS_ERR(itn->fb_tunnel_dev)) { +		kfree(itn->tunnels); +		return PTR_ERR(itn->fb_tunnel_dev); +	} + +	return 0; +} +EXPORT_SYMBOL_GPL(ip_tunnel_init_net); + +static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head) +{ +	int h; + +	for (h = 0; h < IP_TNL_HASH_SIZE; h++) { +		struct ip_tunnel *t; +		struct hlist_node *n; +		struct hlist_head *thead = &itn->tunnels[h]; + +		hlist_for_each_entry_safe(t, n, thead, hash_node) +			unregister_netdevice_queue(t->dev, head); +	} +	if (itn->fb_tunnel_dev) +		unregister_netdevice_queue(itn->fb_tunnel_dev, head); +} + +void __net_exit ip_tunnel_delete_net(struct ip_tunnel_net *itn) +{ +	LIST_HEAD(list); + +	rtnl_lock(); +	ip_tunnel_destroy(itn, &list); +	unregister_netdevice_many(&list); +	rtnl_unlock(); +	kfree(itn->tunnels); +} +EXPORT_SYMBOL_GPL(ip_tunnel_delete_net); + +int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], +		      struct ip_tunnel_parm *p) +{ +	struct ip_tunnel *nt; +	struct net *net = dev_net(dev); +	struct ip_tunnel_net *itn; +	int mtu; +	int err; + +	nt = netdev_priv(dev); +	itn = net_generic(net, nt->ip_tnl_net_id); + +	if (ip_tunnel_find(itn, p, dev->type)) +		return -EEXIST; + +	nt->parms = *p; +	err = register_netdevice(dev); +	if (err) +		goto out; + +	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) +		eth_hw_addr_random(dev); + +	mtu = ip_tunnel_bind_dev(dev); +	if (!tb[IFLA_MTU]) +		dev->mtu = mtu; + +	ip_tunnel_add(itn, nt); + +out: +	return err; +} +EXPORT_SYMBOL_GPL(ip_tunnel_newlink); + +int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], +			 struct ip_tunnel_parm *p) +{ +	struct ip_tunnel *t, *nt; +	struct net *net = dev_net(dev); +	struct ip_tunnel *tunnel = netdev_priv(dev); +	struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); + +	if (dev == itn->fb_tunnel_dev) +		return -EINVAL; + +	nt = netdev_priv(dev); + +	t = ip_tunnel_find(itn, p, dev->type); + +	if (t) { +		if (t->dev != dev) +			return -EEXIST; +	} else { +		t = nt; + +		if (dev->type != ARPHRD_ETHER) { +			unsigned int nflags = 0; + +			if (ipv4_is_multicast(p->iph.daddr)) +				nflags = IFF_BROADCAST; +			else if (p->iph.daddr) +				nflags = IFF_POINTOPOINT; + +			if ((dev->flags ^ nflags) & +			    (IFF_POINTOPOINT | IFF_BROADCAST)) +				return -EINVAL; +		} +	} + +	ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]); +	return 0; +} +EXPORT_SYMBOL_GPL(ip_tunnel_changelink); + +int ip_tunnel_init(struct net_device *dev) +{ +	struct ip_tunnel *tunnel = netdev_priv(dev); +	struct iphdr *iph = &tunnel->parms.iph; +	int err; + +	dev->destructor	= ip_tunnel_dev_free; +	dev->tstats = alloc_percpu(struct pcpu_tstats); +	if (!dev->tstats) +		return -ENOMEM; + +	err = gro_cells_init(&tunnel->gro_cells, dev); +	if (err) { +		free_percpu(dev->tstats); +		return err; +	} + +	tunnel->dev = dev; +	strcpy(tunnel->parms.name, dev->name); +	iph->version		= 4; +	iph->ihl		= 5; + +	return 0; +} +EXPORT_SYMBOL_GPL(ip_tunnel_init); + +void ip_tunnel_uninit(struct net_device *dev) +{ +	struct net *net = dev_net(dev); +	struct ip_tunnel *tunnel = netdev_priv(dev); +	struct ip_tunnel_net *itn; + +	itn = net_generic(net, tunnel->ip_tnl_net_id); +	/* fb_tunnel_dev will be unregisted in net-exit call. */ +	if (itn->fb_tunnel_dev != dev) +		ip_tunnel_del(netdev_priv(dev)); +} +EXPORT_SYMBOL_GPL(ip_tunnel_uninit); + +/* Do least required initialization, rest of init is done in tunnel_init call */ +void ip_tunnel_setup(struct net_device *dev, int net_id) +{ +	struct ip_tunnel *tunnel = netdev_priv(dev); +	tunnel->ip_tnl_net_id = net_id; +} +EXPORT_SYMBOL_GPL(ip_tunnel_setup); + +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index c3a4233c0ac..9d2bdb2c1d3 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -38,7 +38,7 @@  #include <net/sock.h>  #include <net/ip.h>  #include <net/icmp.h> -#include <net/ipip.h> +#include <net/ip_tunnels.h>  #include <net/inet_ecn.h>  #include <net/xfrm.h>  #include <net/net_namespace.h> @@ -82,44 +82,6 @@ static int vti_tunnel_bind_dev(struct net_device *dev);  } while (0) -static struct rtnl_link_stats64 *vti_get_stats64(struct net_device *dev, -						 struct rtnl_link_stats64 *tot) -{ -	int i; - -	for_each_possible_cpu(i) { -		const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); -		u64 rx_packets, rx_bytes, tx_packets, tx_bytes; -		unsigned int start; - -		do { -			start = u64_stats_fetch_begin_bh(&tstats->syncp); -			rx_packets = tstats->rx_packets; -			tx_packets = tstats->tx_packets; -			rx_bytes = tstats->rx_bytes; -			tx_bytes = tstats->tx_bytes; -		} while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); - -		tot->rx_packets += rx_packets; -		tot->tx_packets += tx_packets; -		tot->rx_bytes   += rx_bytes; -		tot->tx_bytes   += tx_bytes; -	} - -	tot->multicast = dev->stats.multicast; -	tot->rx_crc_errors = dev->stats.rx_crc_errors; -	tot->rx_fifo_errors = dev->stats.rx_fifo_errors; -	tot->rx_length_errors = dev->stats.rx_length_errors; -	tot->rx_errors = dev->stats.rx_errors; -	tot->tx_fifo_errors = dev->stats.tx_fifo_errors; -	tot->tx_carrier_errors = dev->stats.tx_carrier_errors; -	tot->tx_dropped = dev->stats.tx_dropped; -	tot->tx_aborted_errors = dev->stats.tx_aborted_errors; -	tot->tx_errors = dev->stats.tx_errors; - -	return tot; -} -  static struct ip_tunnel *vti_tunnel_lookup(struct net *net,  					   __be32 remote, __be32 local)  { @@ -597,7 +559,7 @@ static const struct net_device_ops vti_netdev_ops = {  	.ndo_start_xmit	= vti_tunnel_xmit,  	.ndo_do_ioctl	= vti_tunnel_ioctl,  	.ndo_change_mtu	= vti_tunnel_change_mtu, -	.ndo_get_stats64 = vti_get_stats64, +	.ndo_get_stats64 = ip_tunnel_get_stats64,  };  static void vti_dev_free(struct net_device *dev) diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index f01d1b1aff7..59cb8c76905 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -75,6 +75,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)  	t->props.mode = x->props.mode;  	t->props.saddr.a4 = x->props.saddr.a4;  	t->props.flags = x->props.flags; +	t->props.extra_flags = x->props.extra_flags;  	memcpy(&t->mark, &x->mark, sizeof(t->mark));  	if (xfrm_init_state(t)) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index bf6c5cf31ae..efa1138fa52 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -206,7 +206,7 @@ static int __init ic_open_devs(void)  	struct ic_device *d, **last;  	struct net_device *dev;  	unsigned short oflags; -	unsigned long start; +	unsigned long start, next_msg;  	last = &ic_first_dev;  	rtnl_lock(); @@ -263,12 +263,23 @@ static int __init ic_open_devs(void)  	/* wait for a carrier on at least one device */  	start = jiffies; +	next_msg = start + msecs_to_jiffies(CONF_CARRIER_TIMEOUT/12);  	while (jiffies - start < msecs_to_jiffies(CONF_CARRIER_TIMEOUT)) { +		int wait, elapsed; +  		for_each_netdev(&init_net, dev)  			if (ic_is_init_dev(dev) && netif_carrier_ok(dev))  				goto have_carrier;  		msleep(1); + +		if time_before(jiffies, next_msg) +			continue; + +		elapsed = jiffies_to_msecs(jiffies - start); +		wait = (CONF_CARRIER_TIMEOUT - elapsed + 500)/1000; +		pr_info("Waiting up to %d more seconds for network.\n", wait); +		next_msg = jiffies + msecs_to_jiffies(CONF_CARRIER_TIMEOUT/12);  	}  have_carrier:  	rtnl_unlock(); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 8f024d41eef..77bfcce64fe 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -111,227 +111,21 @@  #include <net/sock.h>  #include <net/ip.h>  #include <net/icmp.h> -#include <net/ipip.h> +#include <net/ip_tunnels.h>  #include <net/inet_ecn.h>  #include <net/xfrm.h>  #include <net/net_namespace.h>  #include <net/netns/generic.h> -#define HASH_SIZE  16 -#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) -  static bool log_ecn_error = true;  module_param(log_ecn_error, bool, 0644);  MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");  static int ipip_net_id __read_mostly; -struct ipip_net { -	struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE]; -	struct ip_tunnel __rcu *tunnels_r[HASH_SIZE]; -	struct ip_tunnel __rcu *tunnels_l[HASH_SIZE]; -	struct ip_tunnel __rcu *tunnels_wc[1]; -	struct ip_tunnel __rcu **tunnels[4]; - -	struct net_device *fb_tunnel_dev; -};  static int ipip_tunnel_init(struct net_device *dev); -static void ipip_tunnel_setup(struct net_device *dev); -static void ipip_dev_free(struct net_device *dev);  static struct rtnl_link_ops ipip_link_ops __read_mostly; -static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev, -						  struct rtnl_link_stats64 *tot) -{ -	int i; - -	for_each_possible_cpu(i) { -		const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); -		u64 rx_packets, rx_bytes, tx_packets, tx_bytes; -		unsigned int start; - -		do { -			start = u64_stats_fetch_begin_bh(&tstats->syncp); -			rx_packets = tstats->rx_packets; -			tx_packets = tstats->tx_packets; -			rx_bytes = tstats->rx_bytes; -			tx_bytes = tstats->tx_bytes; -		} while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); - -		tot->rx_packets += rx_packets; -		tot->tx_packets += tx_packets; -		tot->rx_bytes   += rx_bytes; -		tot->tx_bytes   += tx_bytes; -	} - -	tot->tx_fifo_errors = dev->stats.tx_fifo_errors; -	tot->tx_carrier_errors = dev->stats.tx_carrier_errors; -	tot->tx_dropped = dev->stats.tx_dropped; -	tot->tx_aborted_errors = dev->stats.tx_aborted_errors; -	tot->tx_errors = dev->stats.tx_errors; -	tot->collisions = dev->stats.collisions; - -	return tot; -} - -static struct ip_tunnel *ipip_tunnel_lookup(struct net *net, -		__be32 remote, __be32 local) -{ -	unsigned int h0 = HASH(remote); -	unsigned int h1 = HASH(local); -	struct ip_tunnel *t; -	struct ipip_net *ipn = net_generic(net, ipip_net_id); - -	for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1]) -		if (local == t->parms.iph.saddr && -		    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) -			return t; - -	for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0]) -		if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) -			return t; - -	for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1]) -		if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) -			return t; - -	t = rcu_dereference(ipn->tunnels_wc[0]); -	if (t && (t->dev->flags&IFF_UP)) -		return t; -	return NULL; -} - -static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn, -		struct ip_tunnel_parm *parms) -{ -	__be32 remote = parms->iph.daddr; -	__be32 local = parms->iph.saddr; -	unsigned int h = 0; -	int prio = 0; - -	if (remote) { -		prio |= 2; -		h ^= HASH(remote); -	} -	if (local) { -		prio |= 1; -		h ^= HASH(local); -	} -	return &ipn->tunnels[prio][h]; -} - -static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn, -		struct ip_tunnel *t) -{ -	return __ipip_bucket(ipn, &t->parms); -} - -static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t) -{ -	struct ip_tunnel __rcu **tp; -	struct ip_tunnel *iter; - -	for (tp = ipip_bucket(ipn, t); -	     (iter = rtnl_dereference(*tp)) != NULL; -	     tp = &iter->next) { -		if (t == iter) { -			rcu_assign_pointer(*tp, t->next); -			break; -		} -	} -} - -static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t) -{ -	struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t); - -	rcu_assign_pointer(t->next, rtnl_dereference(*tp)); -	rcu_assign_pointer(*tp, t); -} - -static int ipip_tunnel_create(struct net_device *dev) -{ -	struct ip_tunnel *t = netdev_priv(dev); -	struct net *net = dev_net(dev); -	struct ipip_net *ipn = net_generic(net, ipip_net_id); -	int err; - -	err = ipip_tunnel_init(dev); -	if (err < 0) -		goto out; - -	err = register_netdevice(dev); -	if (err < 0) -		goto out; - -	strcpy(t->parms.name, dev->name); -	dev->rtnl_link_ops = &ipip_link_ops; - -	dev_hold(dev); -	ipip_tunnel_link(ipn, t); -	return 0; - -out: -	return err; -} - -static struct ip_tunnel *ipip_tunnel_locate(struct net *net, -		struct ip_tunnel_parm *parms, int create) -{ -	__be32 remote = parms->iph.daddr; -	__be32 local = parms->iph.saddr; -	struct ip_tunnel *t, *nt; -	struct ip_tunnel __rcu **tp; -	struct net_device *dev; -	char name[IFNAMSIZ]; -	struct ipip_net *ipn = net_generic(net, ipip_net_id); - -	for (tp = __ipip_bucket(ipn, parms); -		 (t = rtnl_dereference(*tp)) != NULL; -		 tp = &t->next) { -		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) -			return t; -	} -	if (!create) -		return NULL; - -	if (parms->name[0]) -		strlcpy(name, parms->name, IFNAMSIZ); -	else -		strcpy(name, "tunl%d"); - -	dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup); -	if (dev == NULL) -		return NULL; - -	dev_net_set(dev, net); - -	nt = netdev_priv(dev); -	nt->parms = *parms; - -	if (ipip_tunnel_create(dev) < 0) -		goto failed_free; - -	return nt; - -failed_free: -	ipip_dev_free(dev); -	return NULL; -} - -/* called with RTNL */ -static void ipip_tunnel_uninit(struct net_device *dev) -{ -	struct net *net = dev_net(dev); -	struct ipip_net *ipn = net_generic(net, ipip_net_id); - -	if (dev == ipn->fb_tunnel_dev) -		RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL); -	else -		ipip_tunnel_unlink(ipn, netdev_priv(dev)); -	dev_put(dev); -} -  static int ipip_err(struct sk_buff *skb, u32 info)  { @@ -339,41 +133,17 @@ static int ipip_err(struct sk_buff *skb, u32 info)     8 bytes of packet payload. It means, that precise relaying of     ICMP in the real Internet is absolutely infeasible.   */ +	struct net *net = dev_net(skb->dev); +	struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);  	const struct iphdr *iph = (const struct iphdr *)skb->data; -	const int type = icmp_hdr(skb)->type; -	const int code = icmp_hdr(skb)->code;  	struct ip_tunnel *t;  	int err; - -	switch (type) { -	default: -	case ICMP_PARAMETERPROB: -		return 0; - -	case ICMP_DEST_UNREACH: -		switch (code) { -		case ICMP_SR_FAILED: -		case ICMP_PORT_UNREACH: -			/* Impossible event. */ -			return 0; -		default: -			/* All others are translated to HOST_UNREACH. -			   rfc2003 contains "deep thoughts" about NET_UNREACH, -			   I believe they are just ether pollution. --ANK -			 */ -			break; -		} -		break; -	case ICMP_TIME_EXCEEDED: -		if (code != ICMP_EXC_TTL) -			return 0; -		break; -	case ICMP_REDIRECT: -		break; -	} +	const int type = icmp_hdr(skb)->type; +	const int code = icmp_hdr(skb)->code;  	err = -ENOENT; -	t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); +	t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, +			     iph->daddr, iph->saddr, 0);  	if (t == NULL)  		goto out; @@ -403,53 +173,29 @@ static int ipip_err(struct sk_buff *skb, u32 info)  	else  		t->err_count = 1;  	t->err_time = jiffies; -out: +out:  	return err;  } +static const struct tnl_ptk_info tpi = { +	/* no tunnel info required for ipip. */ +	.proto = htons(ETH_P_IP), +}; +  static int ipip_rcv(struct sk_buff *skb)  { +	struct net *net = dev_net(skb->dev); +	struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);  	struct ip_tunnel *tunnel;  	const struct iphdr *iph = ip_hdr(skb); -	int err; - -	tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr); -	if (tunnel != NULL) { -		struct pcpu_tstats *tstats; +	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, +			iph->saddr, iph->daddr, 0); +	if (tunnel) {  		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))  			goto drop; - -		secpath_reset(skb); - -		skb->mac_header = skb->network_header; -		skb_reset_network_header(skb); -		skb->protocol = htons(ETH_P_IP); -		skb->pkt_type = PACKET_HOST; - -		__skb_tunnel_rx(skb, tunnel->dev); - -		err = IP_ECN_decapsulate(iph, skb); -		if (unlikely(err)) { -			if (log_ecn_error) -				net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", -						     &iph->saddr, iph->tos); -			if (err > 1) { -				++tunnel->dev->stats.rx_frame_errors; -				++tunnel->dev->stats.rx_errors; -				goto drop; -			} -		} - -		tstats = this_cpu_ptr(tunnel->dev->tstats); -		u64_stats_update_begin(&tstats->syncp); -		tstats->rx_packets++; -		tstats->rx_bytes += skb->len; -		u64_stats_update_end(&tstats->syncp); - -		netif_rx(skb); -		return 0; +		return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);  	}  	return -1; @@ -463,329 +209,64 @@ drop:   *	This function assumes it is being called from dev_queue_xmit()   *	and that skb is filled properly by that function.   */ -  static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)  {  	struct ip_tunnel *tunnel = netdev_priv(dev);  	const struct iphdr  *tiph = &tunnel->parms.iph; -	u8     tos = tunnel->parms.iph.tos; -	__be16 df = tiph->frag_off; -	struct rtable *rt;     			/* Route to the other host */ -	struct net_device *tdev;		/* Device to other host */ -	const struct iphdr  *old_iph; -	struct iphdr  *iph;			/* Our new IP header */ -	unsigned int max_headroom;		/* The extra header space needed */ -	__be32 dst = tiph->daddr; -	struct flowi4 fl4; -	int    mtu; - -	if (skb->protocol != htons(ETH_P_IP)) -		goto tx_error; - -	if (skb->ip_summed == CHECKSUM_PARTIAL && -	    skb_checksum_help(skb)) -		goto tx_error; - -	old_iph = ip_hdr(skb); - -	if (tos & 1) -		tos = old_iph->tos; - -	if (!dst) { -		/* NBMA tunnel */ -		if ((rt = skb_rtable(skb)) == NULL) { -			dev->stats.tx_fifo_errors++; -			goto tx_error; -		} -		dst = rt_nexthop(rt, old_iph->daddr); -	} - -	rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, -				   dst, tiph->saddr, -				   0, 0, -				   IPPROTO_IPIP, RT_TOS(tos), -				   tunnel->parms.link); -	if (IS_ERR(rt)) { -		dev->stats.tx_carrier_errors++; -		goto tx_error_icmp; -	} -	tdev = rt->dst.dev; -	if (tdev == dev) { -		ip_rt_put(rt); -		dev->stats.collisions++; +	if (unlikely(skb->protocol != htons(ETH_P_IP)))  		goto tx_error; -	} - -	df |= old_iph->frag_off & htons(IP_DF); - -	if (df) { -		mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); - -		if (mtu < 68) { -			dev->stats.collisions++; -			ip_rt_put(rt); -			goto tx_error; -		} - -		if (skb_dst(skb)) -			skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - -		if ((old_iph->frag_off & htons(IP_DF)) && -		    mtu < ntohs(old_iph->tot_len)) { -			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, -				  htonl(mtu)); -			ip_rt_put(rt); -			goto tx_error; -		} -	} - -	if (tunnel->err_count > 0) { -		if (time_before(jiffies, -				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { -			tunnel->err_count--; -			dst_link_failure(skb); -		} else -			tunnel->err_count = 0; -	} - -	/* -	 * Okay, now see if we can stuff it in the buffer as-is. -	 */ -	max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr)); -	if (skb_headroom(skb) < max_headroom || skb_shared(skb) || -	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { -		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); -		if (!new_skb) { -			ip_rt_put(rt); -			dev->stats.tx_dropped++; -			dev_kfree_skb(skb); -			return NETDEV_TX_OK; -		} -		if (skb->sk) -			skb_set_owner_w(new_skb, skb->sk); -		dev_kfree_skb(skb); -		skb = new_skb; -		old_iph = ip_hdr(skb); +	if (likely(!skb->encapsulation)) { +		skb_reset_inner_headers(skb); +		skb->encapsulation = 1;  	} -	skb->transport_header = skb->network_header; -	skb_push(skb, sizeof(struct iphdr)); -	skb_reset_network_header(skb); -	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); -	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | -			      IPSKB_REROUTED); -	skb_dst_drop(skb); -	skb_dst_set(skb, &rt->dst); - -	/* -	 *	Push down and install the IPIP header. -	 */ - -	iph 			=	ip_hdr(skb); -	iph->version		=	4; -	iph->ihl		=	sizeof(struct iphdr)>>2; -	iph->frag_off		=	df; -	iph->protocol		=	IPPROTO_IPIP; -	iph->tos		=	INET_ECN_encapsulate(tos, old_iph->tos); -	iph->daddr		=	fl4.daddr; -	iph->saddr		=	fl4.saddr; - -	if ((iph->ttl = tiph->ttl) == 0) -		iph->ttl	=	old_iph->ttl; - -	iptunnel_xmit(skb, dev); +	ip_tunnel_xmit(skb, dev, tiph);  	return NETDEV_TX_OK; -tx_error_icmp: -	dst_link_failure(skb);  tx_error:  	dev->stats.tx_errors++;  	dev_kfree_skb(skb);  	return NETDEV_TX_OK;  } -static void ipip_tunnel_bind_dev(struct net_device *dev) -{ -	struct net_device *tdev = NULL; -	struct ip_tunnel *tunnel; -	const struct iphdr *iph; - -	tunnel = netdev_priv(dev); -	iph = &tunnel->parms.iph; - -	if (iph->daddr) { -		struct rtable *rt; -		struct flowi4 fl4; - -		rt = ip_route_output_ports(dev_net(dev), &fl4, NULL, -					   iph->daddr, iph->saddr, -					   0, 0, -					   IPPROTO_IPIP, -					   RT_TOS(iph->tos), -					   tunnel->parms.link); -		if (!IS_ERR(rt)) { -			tdev = rt->dst.dev; -			ip_rt_put(rt); -		} -		dev->flags |= IFF_POINTOPOINT; -	} - -	if (!tdev && tunnel->parms.link) -		tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); - -	if (tdev) { -		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); -		dev->mtu = tdev->mtu - sizeof(struct iphdr); -	} -	dev->iflink = tunnel->parms.link; -} - -static void ipip_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p) -{ -	struct net *net = dev_net(t->dev); -	struct ipip_net *ipn = net_generic(net, ipip_net_id); - -	ipip_tunnel_unlink(ipn, t); -	synchronize_net(); -	t->parms.iph.saddr = p->iph.saddr; -	t->parms.iph.daddr = p->iph.daddr; -	memcpy(t->dev->dev_addr, &p->iph.saddr, 4); -	memcpy(t->dev->broadcast, &p->iph.daddr, 4); -	ipip_tunnel_link(ipn, t); -	t->parms.iph.ttl = p->iph.ttl; -	t->parms.iph.tos = p->iph.tos; -	t->parms.iph.frag_off = p->iph.frag_off; -	if (t->parms.link != p->link) { -		t->parms.link = p->link; -		ipip_tunnel_bind_dev(t->dev); -	} -	netdev_state_change(t->dev); -} -  static int -ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) +ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)  {  	int err = 0;  	struct ip_tunnel_parm p; -	struct ip_tunnel *t; -	struct net *net = dev_net(dev); -	struct ipip_net *ipn = net_generic(net, ipip_net_id); - -	switch (cmd) { -	case SIOCGETTUNNEL: -		t = NULL; -		if (dev == ipn->fb_tunnel_dev) { -			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { -				err = -EFAULT; -				break; -			} -			t = ipip_tunnel_locate(net, &p, 0); -		} -		if (t == NULL) -			t = netdev_priv(dev); -		memcpy(&p, &t->parms, sizeof(p)); -		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) -			err = -EFAULT; -		break; - -	case SIOCADDTUNNEL: -	case SIOCCHGTUNNEL: -		err = -EPERM; -		if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) -			goto done; - -		err = -EFAULT; -		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) -			goto done; - -		err = -EINVAL; -		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || -		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) -			goto done; -		if (p.iph.ttl) -			p.iph.frag_off |= htons(IP_DF); -		t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); +	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) +		return -EFAULT; -		if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { -			if (t != NULL) { -				if (t->dev != dev) { -					err = -EEXIST; -					break; -				} -			} else { -				if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) || -				    (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) { -					err = -EINVAL; -					break; -				} -				t = netdev_priv(dev); -			} - -			ipip_tunnel_update(t, &p); -		} - -		if (t) { -			err = 0; -			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) -				err = -EFAULT; -		} else -			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); -		break; - -	case SIOCDELTUNNEL: -		err = -EPERM; -		if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) -			goto done; - -		if (dev == ipn->fb_tunnel_dev) { -			err = -EFAULT; -			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) -				goto done; -			err = -ENOENT; -			if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL) -				goto done; -			err = -EPERM; -			if (t->dev == ipn->fb_tunnel_dev) -				goto done; -			dev = t->dev; -		} -		unregister_netdevice(dev); -		err = 0; -		break; +	if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || +			p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) +		return -EINVAL; +	if (p.i_key || p.o_key || p.i_flags || p.o_flags) +		return -EINVAL; +	if (p.iph.ttl) +		p.iph.frag_off |= htons(IP_DF); -	default: -		err = -EINVAL; -	} +	err = ip_tunnel_ioctl(dev, &p, cmd); +	if (err) +		return err; -done: -	return err; -} +	if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) +		return -EFAULT; -static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu) -{ -	if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr)) -		return -EINVAL; -	dev->mtu = new_mtu;  	return 0;  }  static const struct net_device_ops ipip_netdev_ops = { -	.ndo_uninit	= ipip_tunnel_uninit, +	.ndo_init       = ipip_tunnel_init, +	.ndo_uninit     = ip_tunnel_uninit,  	.ndo_start_xmit	= ipip_tunnel_xmit,  	.ndo_do_ioctl	= ipip_tunnel_ioctl, -	.ndo_change_mtu	= ipip_tunnel_change_mtu, -	.ndo_get_stats64 = ipip_get_stats64, +	.ndo_change_mtu = ip_tunnel_change_mtu, +	.ndo_get_stats64 = ip_tunnel_get_stats64,  }; -static void ipip_dev_free(struct net_device *dev) -{ -	free_percpu(dev->tstats); -	free_netdev(dev); -} -  #define IPIP_FEATURES (NETIF_F_SG |		\  		       NETIF_F_FRAGLIST |	\  		       NETIF_F_HIGHDMA |	\ @@ -794,11 +275,8 @@ static void ipip_dev_free(struct net_device *dev)  static void ipip_tunnel_setup(struct net_device *dev)  {  	dev->netdev_ops		= &ipip_netdev_ops; -	dev->destructor		= ipip_dev_free;  	dev->type		= ARPHRD_TUNNEL; -	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr); -	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr);  	dev->flags		= IFF_NOARP;  	dev->iflink		= 0;  	dev->addr_len		= 4; @@ -808,46 +286,19 @@ static void ipip_tunnel_setup(struct net_device *dev)  	dev->features		|= IPIP_FEATURES;  	dev->hw_features	|= IPIP_FEATURES; +	ip_tunnel_setup(dev, ipip_net_id);  }  static int ipip_tunnel_init(struct net_device *dev)  {  	struct ip_tunnel *tunnel = netdev_priv(dev); -	tunnel->dev = dev; -  	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);  	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); -	ipip_tunnel_bind_dev(dev); - -	dev->tstats = alloc_percpu(struct pcpu_tstats); -	if (!dev->tstats) -		return -ENOMEM; - -	return 0; -} - -static int __net_init ipip_fb_tunnel_init(struct net_device *dev) -{ -	struct ip_tunnel *tunnel = netdev_priv(dev); -	struct iphdr *iph = &tunnel->parms.iph; -	struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id); - -	tunnel->dev = dev; -	strcpy(tunnel->parms.name, dev->name); - -	iph->version		= 4; -	iph->protocol		= IPPROTO_IPIP; -	iph->ihl		= 5; - -	dev->tstats = alloc_percpu(struct pcpu_tstats); -	if (!dev->tstats) -		return -ENOMEM; - -	dev_hold(dev); -	rcu_assign_pointer(ipn->tunnels_wc[0], tunnel); -	return 0; +	tunnel->hlen = 0; +	tunnel->parms.iph.protocol = IPPROTO_IPIP; +	return ip_tunnel_init(dev);  }  static void ipip_netlink_parms(struct nlattr *data[], @@ -887,28 +338,16 @@ static void ipip_netlink_parms(struct nlattr *data[],  static int ipip_newlink(struct net *src_net, struct net_device *dev,  			struct nlattr *tb[], struct nlattr *data[])  { -	struct net *net = dev_net(dev); -	struct ip_tunnel *nt; - -	nt = netdev_priv(dev); -	ipip_netlink_parms(data, &nt->parms); - -	if (ipip_tunnel_locate(net, &nt->parms, 0)) -		return -EEXIST; +	struct ip_tunnel_parm p; -	return ipip_tunnel_create(dev); +	ipip_netlink_parms(data, &p); +	return ip_tunnel_newlink(dev, tb, &p);  }  static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],  			   struct nlattr *data[])  { -	struct ip_tunnel *t;  	struct ip_tunnel_parm p; -	struct net *net = dev_net(dev); -	struct ipip_net *ipn = net_generic(net, ipip_net_id); - -	if (dev == ipn->fb_tunnel_dev) -		return -EINVAL;  	ipip_netlink_parms(data, &p); @@ -916,16 +355,7 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],  	    (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))  		return -EINVAL; -	t = ipip_tunnel_locate(net, &p, 0); - -	if (t) { -		if (t->dev != dev) -			return -EEXIST; -	} else -		t = netdev_priv(dev); - -	ipip_tunnel_update(t, &p); -	return 0; +	return ip_tunnel_changelink(dev, tb, &p);  }  static size_t ipip_get_size(const struct net_device *dev) @@ -982,6 +412,7 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly = {  	.setup		= ipip_tunnel_setup,  	.newlink	= ipip_newlink,  	.changelink	= ipip_changelink, +	.dellink	= ip_tunnel_dellink,  	.get_size	= ipip_get_size,  	.fill_info	= ipip_fill_info,  }; @@ -992,90 +423,29 @@ static struct xfrm_tunnel ipip_handler __read_mostly = {  	.priority	=	1,  }; -static const char banner[] __initconst = -	KERN_INFO "IPv4 over IPv4 tunneling driver\n"; - -static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head) -{ -	int prio; - -	for (prio = 1; prio < 4; prio++) { -		int h; -		for (h = 0; h < HASH_SIZE; h++) { -			struct ip_tunnel *t; - -			t = rtnl_dereference(ipn->tunnels[prio][h]); -			while (t != NULL) { -				unregister_netdevice_queue(t->dev, head); -				t = rtnl_dereference(t->next); -			} -		} -	} -} -  static int __net_init ipip_init_net(struct net *net)  { -	struct ipip_net *ipn = net_generic(net, ipip_net_id); -	struct ip_tunnel *t; -	int err; - -	ipn->tunnels[0] = ipn->tunnels_wc; -	ipn->tunnels[1] = ipn->tunnels_l; -	ipn->tunnels[2] = ipn->tunnels_r; -	ipn->tunnels[3] = ipn->tunnels_r_l; - -	ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), -					   "tunl0", -					   ipip_tunnel_setup); -	if (!ipn->fb_tunnel_dev) { -		err = -ENOMEM; -		goto err_alloc_dev; -	} -	dev_net_set(ipn->fb_tunnel_dev, net); - -	err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev); -	if (err) -		goto err_reg_dev; - -	if ((err = register_netdev(ipn->fb_tunnel_dev))) -		goto err_reg_dev; - -	t = netdev_priv(ipn->fb_tunnel_dev); - -	strcpy(t->parms.name, ipn->fb_tunnel_dev->name); -	return 0; - -err_reg_dev: -	ipip_dev_free(ipn->fb_tunnel_dev); -err_alloc_dev: -	/* nothing */ -	return err; +	return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");  }  static void __net_exit ipip_exit_net(struct net *net)  { -	struct ipip_net *ipn = net_generic(net, ipip_net_id); -	LIST_HEAD(list); - -	rtnl_lock(); -	ipip_destroy_tunnels(ipn, &list); -	unregister_netdevice_queue(ipn->fb_tunnel_dev, &list); -	unregister_netdevice_many(&list); -	rtnl_unlock(); +	struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); +	ip_tunnel_delete_net(itn);  }  static struct pernet_operations ipip_net_ops = {  	.init = ipip_init_net,  	.exit = ipip_exit_net,  	.id   = &ipip_net_id, -	.size = sizeof(struct ipip_net), +	.size = sizeof(struct ip_tunnel_net),  };  static int __init ipip_init(void)  {  	int err; -	printk(banner); +	pr_info("ipip: IPv4 over IPv4 tunneling driver\n");  	err = register_pernet_device(&ipip_net_ops);  	if (err < 0) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 5f95b3aa579..9d9610ae785 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -61,7 +61,7 @@  #include <linux/netfilter_ipv4.h>  #include <linux/compat.h>  #include <linux/export.h> -#include <net/ipip.h> +#include <net/ip_tunnels.h>  #include <net/checksum.h>  #include <net/netlink.h>  #include <net/fib_rules.h> @@ -626,9 +626,9 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)  		if (ip_hdr(skb)->version == 0) {  			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));  			nlh->nlmsg_type = NLMSG_ERROR; -			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); +			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));  			skb_trim(skb, nlh->nlmsg_len); -			e = NLMSG_DATA(nlh); +			e = nlmsg_data(nlh);  			e->error = -ETIMEDOUT;  			memset(&e->msg, 0, sizeof(e->msg)); @@ -910,14 +910,14 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,  		if (ip_hdr(skb)->version == 0) {  			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); -			if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) { +			if (__ipmr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {  				nlh->nlmsg_len = skb_tail_pointer(skb) -  						 (u8 *)nlh;  			} else {  				nlh->nlmsg_type = NLMSG_ERROR; -				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); +				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));  				skb_trim(skb, nlh->nlmsg_len); -				e = NLMSG_DATA(nlh); +				e = nlmsg_data(nlh);  				e->error = -EMSGSIZE;  				memset(&e->msg, 0, sizeof(e->msg));  			} diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 4c0cf63dd92..c3e0adea9c2 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -1,4 +1,9 @@ -/* IPv4 specific functions of netfilter core */ +/* + * IPv4 specific functions of netfilter core + * + * Rusty Russell (C) 2000 -- This code is GPL. + * Patrick McHardy (C) 2006-2012 + */  #include <linux/kernel.h>  #include <linux/netfilter.h>  #include <linux/netfilter_ipv4.h> @@ -40,14 +45,14 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type)  	fl4.flowi4_flags = flags;  	rt = ip_route_output_key(net, &fl4);  	if (IS_ERR(rt)) -		return -1; +		return PTR_ERR(rt);  	/* Drop old route. */  	skb_dst_drop(skb);  	skb_dst_set(skb, &rt->dst);  	if (skb_dst(skb)->error) -		return -1; +		return skb_dst(skb)->error;  #ifdef CONFIG_XFRM  	if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && @@ -56,7 +61,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type)  		skb_dst_set(skb, NULL);  		dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0);  		if (IS_ERR(dst)) -			return -1; +			return PTR_ERR(dst);;  		skb_dst_set(skb, dst);  	}  #endif @@ -66,7 +71,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type)  	if (skb_headroom(skb) < hh_len &&  	    pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)),  				0, GFP_ATOMIC)) -		return -1; +		return -ENOMEM;  	return 0;  } diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 0d755c50994..e7916c19393 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -71,7 +71,7 @@ config IP_NF_MATCH_ECN  config IP_NF_MATCH_RPFILTER  	tristate '"rpfilter" reverse path filter match support' -	depends on NETFILTER_ADVANCED +	depends on NETFILTER_ADVANCED && (IP_NF_MANGLE || IP_NF_RAW)  	---help---  	  This option allows you to match packets whose replies would  	  go out via the interface the packet came in. diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 7dc6a974359..85a4f21aac1 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -6,6 +6,7 @@   * Some ARP specific bits are:   *   * Copyright (C) 2002 David S. Miller (davem@redhat.com) + * Copyright (C) 2006-2009 Patrick McHardy <kaber@trash.net>   *   */  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 79ca5e70d49..eadab1ed650 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -48,9 +48,7 @@ static int __net_init arptable_filter_net_init(struct net *net)  	net->ipv4.arptable_filter =  		arpt_register_table(net, &packet_filter, repl);  	kfree(repl); -	if (IS_ERR(net->ipv4.arptable_filter)) -		return PTR_ERR(net->ipv4.arptable_filter); -	return 0; +	return PTR_RET(net->ipv4.arptable_filter);  }  static void __net_exit arptable_filter_net_exit(struct net *net) diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 3efcf87400c..d23118d95ff 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -3,6 +3,7 @@   *   * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling   * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org> + * Copyright (C) 2006-2010 Patrick McHardy <kaber@trash.net>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License version 2 as @@ -182,8 +183,7 @@ ipt_get_target_c(const struct ipt_entry *e)  	return ipt_get_target((struct ipt_entry *)e);  } -#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ -    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)  static const char *const hooknames[] = {  	[NF_INET_PRE_ROUTING]		= "PREROUTING",  	[NF_INET_LOCAL_IN]		= "INPUT", @@ -259,6 +259,7 @@ static void trace_packet(const struct sk_buff *skb,  	const char *hookname, *chainname, *comment;  	const struct ipt_entry *iter;  	unsigned int rulenum = 0; +	struct net *net = dev_net(in ? in : out);  	table_base = private->entries[smp_processor_id()];  	root = get_entry(table_base, private->hook_entry[hook]); @@ -271,7 +272,7 @@ static void trace_packet(const struct sk_buff *skb,  		    &chainname, &comment, &rulenum) != 0)  			break; -	nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo, +	nf_log_packet(net, AF_INET, hook, skb, in, out, &trace_loginfo,  		      "TRACE: %s:%s:%s:%u ",  		      tablename, chainname, comment, rulenum);  } @@ -361,8 +362,7 @@ ipt_do_table(struct sk_buff *skb,  		t = ipt_get_target(e);  		IP_NF_ASSERT(t->u.kernel.target); -#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ -    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)  		/* The packet is traced: log it */  		if (unlikely(skb->nf_trace))  			trace_packet(skb, hook, in, out, diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 7d168dcbd13..f8a222cb644 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -4,6 +4,7 @@   * (C) 2000-2004 by Harald Welte <laforge@netfilter.org>   * (C) 1999-2001 Paul `Rusty' Russell   * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2005-2007 Patrick McHardy <kaber@trash.net>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License version 2 as @@ -37,7 +38,7 @@  #include <linux/skbuff.h>  #include <linux/kernel.h>  #include <linux/timer.h> -#include <linux/netlink.h> +#include <net/netlink.h>  #include <linux/netdevice.h>  #include <linux/mm.h>  #include <linux/moduleparam.h> @@ -45,6 +46,7 @@  #include <linux/netfilter/x_tables.h>  #include <linux/netfilter_ipv4/ipt_ULOG.h>  #include <net/netfilter/nf_log.h> +#include <net/netns/generic.h>  #include <net/sock.h>  #include <linux/bitops.h>  #include <asm/unaligned.h> @@ -78,15 +80,23 @@ typedef struct {  	struct timer_list timer;	/* the timer function */  } ulog_buff_t; -static ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS];	/* array of buffers */ +static int ulog_net_id __read_mostly; +struct ulog_net { +	unsigned int nlgroup[ULOG_MAXNLGROUPS]; +	ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS]; +	struct sock *nflognl; +	spinlock_t lock; +}; -static struct sock *nflognl;		/* our socket */ -static DEFINE_SPINLOCK(ulog_lock);	/* spinlock */ +static struct ulog_net *ulog_pernet(struct net *net) +{ +	return net_generic(net, ulog_net_id); +}  /* send one ulog_buff_t to userspace */ -static void ulog_send(unsigned int nlgroupnum) +static void ulog_send(struct ulog_net *ulog, unsigned int nlgroupnum)  { -	ulog_buff_t *ub = &ulog_buffers[nlgroupnum]; +	ulog_buff_t *ub = &ulog->ulog_buffers[nlgroupnum];  	pr_debug("ulog_send: timer is deleting\n");  	del_timer(&ub->timer); @@ -103,7 +113,8 @@ static void ulog_send(unsigned int nlgroupnum)  	NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1;  	pr_debug("throwing %d packets to netlink group %u\n",  		 ub->qlen, nlgroupnum + 1); -	netlink_broadcast(nflognl, ub->skb, 0, nlgroupnum + 1, GFP_ATOMIC); +	netlink_broadcast(ulog->nflognl, ub->skb, 0, nlgroupnum + 1, +			  GFP_ATOMIC);  	ub->qlen = 0;  	ub->skb = NULL; @@ -114,13 +125,16 @@ static void ulog_send(unsigned int nlgroupnum)  /* timer function to flush queue in flushtimeout time */  static void ulog_timer(unsigned long data)  { +	struct ulog_net *ulog = container_of((void *)data, +					     struct ulog_net, +					     nlgroup[*(unsigned int *)data]);  	pr_debug("timer function called, calling ulog_send\n");  	/* lock to protect against somebody modifying our structure  	 * from ipt_ulog_target at the same time */ -	spin_lock_bh(&ulog_lock); -	ulog_send(data); -	spin_unlock_bh(&ulog_lock); +	spin_lock_bh(&ulog->lock); +	ulog_send(ulog, data); +	spin_unlock_bh(&ulog->lock);  }  static struct sk_buff *ulog_alloc_skb(unsigned int size) @@ -160,6 +174,8 @@ static void ipt_ulog_packet(unsigned int hooknum,  	size_t size, copy_len;  	struct nlmsghdr *nlh;  	struct timeval tv; +	struct net *net = dev_net(in ? in : out); +	struct ulog_net *ulog = ulog_pernet(net);  	/* ffs == find first bit set, necessary because userspace  	 * is already shifting groupnumber, but we need unshifted. @@ -172,11 +188,11 @@ static void ipt_ulog_packet(unsigned int hooknum,  	else  		copy_len = loginfo->copy_range; -	size = NLMSG_SPACE(sizeof(*pm) + copy_len); +	size = nlmsg_total_size(sizeof(*pm) + copy_len); -	ub = &ulog_buffers[groupnum]; +	ub = &ulog->ulog_buffers[groupnum]; -	spin_lock_bh(&ulog_lock); +	spin_lock_bh(&ulog->lock);  	if (!ub->skb) {  		if (!(ub->skb = ulog_alloc_skb(size))) @@ -186,7 +202,7 @@ static void ipt_ulog_packet(unsigned int hooknum,  		/* either the queue len is too high or we don't have  		 * enough room in nlskb left. send it to userspace. */ -		ulog_send(groupnum); +		ulog_send(ulog, groupnum);  		if (!(ub->skb = ulog_alloc_skb(size)))  			goto alloc_failure; @@ -260,16 +276,16 @@ static void ipt_ulog_packet(unsigned int hooknum,  	if (ub->qlen >= loginfo->qthreshold) {  		if (loginfo->qthreshold > 1)  			nlh->nlmsg_type = NLMSG_DONE; -		ulog_send(groupnum); +		ulog_send(ulog, groupnum);  	}  out_unlock: -	spin_unlock_bh(&ulog_lock); +	spin_unlock_bh(&ulog->lock);  	return;  alloc_failure:  	pr_debug("Error building netlink message\n"); -	spin_unlock_bh(&ulog_lock); +	spin_unlock_bh(&ulog->lock);  }  static unsigned int @@ -376,54 +392,43 @@ static struct nf_logger ipt_ulog_logger __read_mostly = {  	.me		= THIS_MODULE,  }; -static int __init ulog_tg_init(void) +static int __net_init ulog_tg_net_init(struct net *net)  { -	int ret, i; +	int i; +	struct ulog_net *ulog = ulog_pernet(net);  	struct netlink_kernel_cfg cfg = {  		.groups	= ULOG_MAXNLGROUPS,  	}; -	pr_debug("init module\n"); - -	if (nlbufsiz > 128*1024) { -		pr_warning("Netlink buffer has to be <= 128kB\n"); -		return -EINVAL; -	} - +	spin_lock_init(&ulog->lock);  	/* initialize ulog_buffers */  	for (i = 0; i < ULOG_MAXNLGROUPS; i++) -		setup_timer(&ulog_buffers[i].timer, ulog_timer, i); +		setup_timer(&ulog->ulog_buffers[i].timer, ulog_timer, i); -	nflognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, &cfg); -	if (!nflognl) +	ulog->nflognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg); +	if (!ulog->nflognl)  		return -ENOMEM; -	ret = xt_register_target(&ulog_tg_reg); -	if (ret < 0) { -		netlink_kernel_release(nflognl); -		return ret; -	}  	if (nflog) -		nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger); +		nf_log_set(net, NFPROTO_IPV4, &ipt_ulog_logger);  	return 0;  } -static void __exit ulog_tg_exit(void) +static void __net_exit ulog_tg_net_exit(struct net *net)  {  	ulog_buff_t *ub;  	int i; - -	pr_debug("cleanup_module\n"); +	struct ulog_net *ulog = ulog_pernet(net);  	if (nflog) -		nf_log_unregister(&ipt_ulog_logger); -	xt_unregister_target(&ulog_tg_reg); -	netlink_kernel_release(nflognl); +		nf_log_unset(net, &ipt_ulog_logger); + +	netlink_kernel_release(ulog->nflognl);  	/* remove pending timers and free allocated skb's */  	for (i = 0; i < ULOG_MAXNLGROUPS; i++) { -		ub = &ulog_buffers[i]; +		ub = &ulog->ulog_buffers[i];  		pr_debug("timer is deleting\n");  		del_timer(&ub->timer); @@ -434,5 +439,50 @@ static void __exit ulog_tg_exit(void)  	}  } +static struct pernet_operations ulog_tg_net_ops = { +	.init = ulog_tg_net_init, +	.exit = ulog_tg_net_exit, +	.id   = &ulog_net_id, +	.size = sizeof(struct ulog_net), +}; + +static int __init ulog_tg_init(void) +{ +	int ret; +	pr_debug("init module\n"); + +	if (nlbufsiz > 128*1024) { +		pr_warn("Netlink buffer has to be <= 128kB\n"); +		return -EINVAL; +	} + +	ret = register_pernet_subsys(&ulog_tg_net_ops); +	if (ret) +		goto out_pernet; + +	ret = xt_register_target(&ulog_tg_reg); +	if (ret < 0) +		goto out_target; + +	if (nflog) +		nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger); + +	return 0; + +out_target: +	unregister_pernet_subsys(&ulog_tg_net_ops); +out_pernet: +	return ret; +} + +static void __exit ulog_tg_exit(void) +{ +	pr_debug("cleanup_module\n"); +	if (nflog) +		nf_log_unregister(&ipt_ulog_logger); +	xt_unregister_target(&ulog_tg_reg); +	unregister_pernet_subsys(&ulog_tg_net_ops); +} +  module_init(ulog_tg_init);  module_exit(ulog_tg_exit); diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 85d88f20644..cba5658ec82 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -44,6 +44,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out)  	u_int8_t tos;  	__be32 saddr, daddr;  	u_int32_t mark; +	int err;  	/* root is playing with raw sockets. */  	if (skb->len < sizeof(struct iphdr) || @@ -66,9 +67,11 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out)  		if (iph->saddr != saddr ||  		    iph->daddr != daddr ||  		    skb->mark != mark || -		    iph->tos != tos) -			if (ip_route_me_harder(skb, RTN_UNSPEC)) -				ret = NF_DROP; +		    iph->tos != tos) { +			err = ip_route_me_harder(skb, RTN_UNSPEC); +			if (err < 0) +				ret = NF_DROP_ERR(err); +		}  	}  	return ret; diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index eeaff7e4acb..6383273d54e 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -176,6 +176,7 @@ nf_nat_ipv4_out(unsigned int hooknum,  #ifdef CONFIG_XFRM  	const struct nf_conn *ct;  	enum ip_conntrack_info ctinfo; +	int err;  #endif  	unsigned int ret; @@ -195,9 +196,11 @@ nf_nat_ipv4_out(unsigned int hooknum,  		     ct->tuplehash[!dir].tuple.dst.u3.ip) ||  		    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&  		     ct->tuplehash[dir].tuple.src.u.all != -		     ct->tuplehash[!dir].tuple.dst.u.all)) -			if (nf_xfrm_me_harder(skb, AF_INET) < 0) -				ret = NF_DROP; +		     ct->tuplehash[!dir].tuple.dst.u.all)) { +			err = nf_xfrm_me_harder(skb, AF_INET); +			if (err < 0) +				ret = NF_DROP_ERR(err); +		}  	}  #endif  	return ret; @@ -213,6 +216,7 @@ nf_nat_ipv4_local_fn(unsigned int hooknum,  	const struct nf_conn *ct;  	enum ip_conntrack_info ctinfo;  	unsigned int ret; +	int err;  	/* root is playing with raw sockets. */  	if (skb->len < sizeof(struct iphdr) || @@ -226,16 +230,19 @@ nf_nat_ipv4_local_fn(unsigned int hooknum,  		if (ct->tuplehash[dir].tuple.dst.u3.ip !=  		    ct->tuplehash[!dir].tuple.src.u3.ip) { -			if (ip_route_me_harder(skb, RTN_UNSPEC)) -				ret = NF_DROP; +			err = ip_route_me_harder(skb, RTN_UNSPEC); +			if (err < 0) +				ret = NF_DROP_ERR(err);  		}  #ifdef CONFIG_XFRM  		else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&  			 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&  			 ct->tuplehash[dir].tuple.dst.u.all != -			 ct->tuplehash[!dir].tuple.src.u.all) -			if (nf_xfrm_me_harder(skb, AF_INET) < 0) -				ret = NF_DROP; +			 ct->tuplehash[!dir].tuple.src.u.all) { +			err = nf_xfrm_me_harder(skb, AF_INET); +			if (err < 0) +				ret = NF_DROP_ERR(err); +		}  #endif  	}  	return ret; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 2820aa18b54..567d84168bd 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -1,6 +1,7 @@  /* (C) 1999-2001 Paul `Rusty' Russell   * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2006-2012 Patrick McHardy <kaber@trash.net>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License version 2 as diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index f2ca1279408..4c48e434bb1 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -2,6 +2,7 @@   *   * (C) 1999-2001 Paul `Rusty' Russell   * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2006-2010 Patrick McHardy <kaber@trash.net>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License version 2 as diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 5241d997ab7..a338dad41b7 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -1,5 +1,6 @@  /* (C) 1999-2001 Paul `Rusty' Russell   * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2006-2010 Patrick McHardy <kaber@trash.net>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License version 2 as @@ -187,8 +188,8 @@ icmp_error(struct net *net, struct nf_conn *tmpl,  	icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);  	if (icmph == NULL) {  		if (LOG_INVALID(net, IPPROTO_ICMP)) -			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, -				      "nf_ct_icmp: short packet "); +			nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, +				      NULL, "nf_ct_icmp: short packet ");  		return -NF_ACCEPT;  	} @@ -196,7 +197,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,  	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&  	    nf_ip_checksum(skb, hooknum, dataoff, 0)) {  		if (LOG_INVALID(net, IPPROTO_ICMP)) -			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, +			nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,  				      "nf_ct_icmp: bad HW ICMP checksum ");  		return -NF_ACCEPT;  	} @@ -209,7 +210,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,  	 */  	if (icmph->type > NR_ICMP_TYPES) {  		if (LOG_INVALID(net, IPPROTO_ICMP)) -			nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, +			nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,  				      "nf_ct_icmp: invalid ICMP type ");  		return -NF_ACCEPT;  	} diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index 9c3db10b22d..9eea059dd62 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -2,6 +2,7 @@   * H.323 extension for NAT alteration.   *   * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net> + * Copyright (c) 2006-2012 Patrick McHardy <kaber@trash.net>   *   * This source code is licensed under General Public License version 2.   * diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index a06d7d74817..657d2307f03 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -13,6 +13,8 @@   *   * Development of this code funded by Astaro AG (http://www.astaro.com/)   * + * (C) 2006-2012 Patrick McHardy <kaber@trash.net> + *   * TODO: - NAT to a unique tuple, not to TCP source port   * 	   (needs netfilter tuple reservation)   */ diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c index ea44f02563b..690d890111b 100644 --- a/net/ipv4/netfilter/nf_nat_proto_gre.c +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c @@ -21,6 +21,8 @@   *   * Development of this code funded by Astaro AG (http://www.astaro.com/)   * + * (C) 2006-2012 Patrick McHardy <kaber@trash.net> + *   */  #include <linux/module.h> diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index bac712293fd..5f011cc89cd 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -38,6 +38,8 @@   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA   *   * Author: James Morris <jmorris@intercode.com.au> + * + * Copyright (c) 2006-2010 Patrick McHardy <kaber@trash.net>   */  #include <linux/module.h>  #include <linux/moduleparam.h> diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 2e91006d607..7d93d62cd5f 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -514,9 +514,8 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,  	ipc.opt = NULL;  	ipc.oif = sk->sk_bound_dev_if;  	ipc.tx_flags = 0; -	err = sock_tx_timestamp(sk, &ipc.tx_flags); -	if (err) -		return err; + +	sock_tx_timestamp(sk, &ipc.tx_flags);  	if (msg->msg_controllen) {  		err = ip_cmsg_send(sock_net(sk), msg, &ipc); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 32030a24e77..2a5bf86d241 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -125,6 +125,7 @@ static const struct snmp_mib snmp4_ipextstats_list[] = {  	SNMP_MIB_ITEM("OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS),  	SNMP_MIB_ITEM("InBcastOctets", IPSTATS_MIB_INBCASTOCTETS),  	SNMP_MIB_ITEM("OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS), +	SNMP_MIB_ITEM("InCsumErrors", IPSTATS_MIB_CSUMERRORS),  	SNMP_MIB_SENTINEL  }; @@ -162,6 +163,7 @@ static const struct snmp_mib snmp4_tcp_list[] = {  	SNMP_MIB_ITEM("RetransSegs", TCP_MIB_RETRANSSEGS),  	SNMP_MIB_ITEM("InErrs", TCP_MIB_INERRS),  	SNMP_MIB_ITEM("OutRsts", TCP_MIB_OUTRSTS), +	SNMP_MIB_ITEM("InCsumErrors", TCP_MIB_CSUMERRORS),  	SNMP_MIB_SENTINEL  }; @@ -172,6 +174,7 @@ static const struct snmp_mib snmp4_udp_list[] = {  	SNMP_MIB_ITEM("OutDatagrams", UDP_MIB_OUTDATAGRAMS),  	SNMP_MIB_ITEM("RcvbufErrors", UDP_MIB_RCVBUFERRORS),  	SNMP_MIB_ITEM("SndbufErrors", UDP_MIB_SNDBUFERRORS), +	SNMP_MIB_ITEM("InCsumErrors", UDP_MIB_CSUMERRORS),  	SNMP_MIB_SENTINEL  }; @@ -224,6 +227,8 @@ static const struct snmp_mib snmp4_net_list[] = {  	SNMP_MIB_ITEM("TCPForwardRetrans", LINUX_MIB_TCPFORWARDRETRANS),  	SNMP_MIB_ITEM("TCPSlowStartRetrans", LINUX_MIB_TCPSLOWSTARTRETRANS),  	SNMP_MIB_ITEM("TCPTimeouts", LINUX_MIB_TCPTIMEOUTS), +	SNMP_MIB_ITEM("TCPLossProbes", LINUX_MIB_TCPLOSSPROBES), +	SNMP_MIB_ITEM("TCPLossProbeRecovery", LINUX_MIB_TCPLOSSPROBERECOVERY),  	SNMP_MIB_ITEM("TCPRenoRecoveryFail", LINUX_MIB_TCPRENORECOVERYFAIL),  	SNMP_MIB_ITEM("TCPSackRecoveryFail", LINUX_MIB_TCPSACKRECOVERYFAIL),  	SNMP_MIB_ITEM("TCPSchedulerFailed", LINUX_MIB_TCPSCHEDULERFAILED), @@ -267,6 +272,7 @@ static const struct snmp_mib snmp4_net_list[] = {  	SNMP_MIB_ITEM("TCPFastOpenPassiveFail", LINUX_MIB_TCPFASTOPENPASSIVEFAIL),  	SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW),  	SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD), +	SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES),  	SNMP_MIB_SENTINEL  }; @@ -319,15 +325,16 @@ static void icmp_put(struct seq_file *seq)  	struct net *net = seq->private;  	atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs; -	seq_puts(seq, "\nIcmp: InMsgs InErrors"); +	seq_puts(seq, "\nIcmp: InMsgs InErrors InCsumErrors");  	for (i=0; icmpmibmap[i].name != NULL; i++)  		seq_printf(seq, " In%s", icmpmibmap[i].name);  	seq_printf(seq, " OutMsgs OutErrors");  	for (i=0; icmpmibmap[i].name != NULL; i++)  		seq_printf(seq, " Out%s", icmpmibmap[i].name); -	seq_printf(seq, "\nIcmp: %lu %lu", +	seq_printf(seq, "\nIcmp: %lu %lu %lu",  		snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INMSGS), -		snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS)); +		snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS), +		snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS));  	for (i=0; icmpmibmap[i].name != NULL; i++)  		seq_printf(seq, " %lu",  			   atomic_long_read(ptr + icmpmibmap[i].index)); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6e2851464f8..550781a17b3 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2311,7 +2311,7 @@ nla_put_failure:  	return -EMSGSIZE;  } -static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) +static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)  {  	struct net *net = sock_net(in_skb->sk);  	struct rtmsg *rtm; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 397e0f69435..b05c96e7af8 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -267,7 +267,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,  			     struct ip_options *opt)  {  	struct tcp_options_received tcp_opt; -	const u8 *hash_location;  	struct inet_request_sock *ireq;  	struct tcp_request_sock *treq;  	struct tcp_sock *tp = tcp_sk(sk); @@ -294,7 +293,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,  	/* check for timestamp cookie support */  	memset(&tcp_opt, 0, sizeof(tcp_opt)); -	tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL); +	tcp_parse_options(skb, &tcp_opt, 0, NULL);  	if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok))  		goto out; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 960fd29d9b8..fa2f63fc453 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -28,7 +28,7 @@  static int zero;  static int one = 1; -static int two = 2; +static int four = 4;  static int tcp_retr1_max = 255;  static int ip_local_port_range_min[] = { 1, 1 };  static int ip_local_port_range_max[] = { 65535, 65535 }; @@ -592,13 +592,6 @@ static struct ctl_table ipv4_table[] = {  		.proc_handler	= proc_dointvec  	},  	{ -		.procname	= "tcp_frto_response", -		.data		= &sysctl_tcp_frto_response, -		.maxlen		= sizeof(int), -		.mode		= 0644, -		.proc_handler	= proc_dointvec -	}, -	{  		.procname	= "tcp_low_latency",  		.data		= &sysctl_tcp_low_latency,  		.maxlen		= sizeof(int), @@ -733,13 +726,6 @@ static struct ctl_table ipv4_table[] = {  		.proc_handler	= proc_dointvec,  	},  	{ -		.procname	= "tcp_cookie_size", -		.data		= &sysctl_tcp_cookie_size, -		.maxlen		= sizeof(int), -		.mode		= 0644, -		.proc_handler	= proc_dointvec -	}, -	{  		.procname       = "tcp_thin_linear_timeouts",  		.data           = &sysctl_tcp_thin_linear_timeouts,  		.maxlen         = sizeof(int), @@ -760,7 +746,7 @@ static struct ctl_table ipv4_table[] = {  		.mode		= 0644,  		.proc_handler	= proc_dointvec_minmax,  		.extra1		= &zero, -		.extra2		= &two, +		.extra2		= &four,  	},  	{  		.procname	= "udp_mem", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e2202079070..dcb116dde21 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -409,15 +409,6 @@ void tcp_init_sock(struct sock *sk)  	icsk->icsk_sync_mss = tcp_sync_mss; -	/* TCP Cookie Transactions */ -	if (sysctl_tcp_cookie_size > 0) { -		/* Default, cookies without s_data_payload. */ -		tp->cookie_values = -			kzalloc(sizeof(*tp->cookie_values), -				sk->sk_allocation); -		if (tp->cookie_values != NULL) -			kref_init(&tp->cookie_values->kref); -	}  	/* Presumed zeroed, in order of appearance:  	 *	cookie_in_always, cookie_out_never,  	 *	s_data_constant, s_data_in, s_data_out @@ -2397,92 +2388,6 @@ static int do_tcp_setsockopt(struct sock *sk, int level,  		release_sock(sk);  		return err;  	} -	case TCP_COOKIE_TRANSACTIONS: { -		struct tcp_cookie_transactions ctd; -		struct tcp_cookie_values *cvp = NULL; - -		if (sizeof(ctd) > optlen) -			return -EINVAL; -		if (copy_from_user(&ctd, optval, sizeof(ctd))) -			return -EFAULT; - -		if (ctd.tcpct_used > sizeof(ctd.tcpct_value) || -		    ctd.tcpct_s_data_desired > TCP_MSS_DESIRED) -			return -EINVAL; - -		if (ctd.tcpct_cookie_desired == 0) { -			/* default to global value */ -		} else if ((0x1 & ctd.tcpct_cookie_desired) || -			   ctd.tcpct_cookie_desired > TCP_COOKIE_MAX || -			   ctd.tcpct_cookie_desired < TCP_COOKIE_MIN) { -			return -EINVAL; -		} - -		if (TCP_COOKIE_OUT_NEVER & ctd.tcpct_flags) { -			/* Supercedes all other values */ -			lock_sock(sk); -			if (tp->cookie_values != NULL) { -				kref_put(&tp->cookie_values->kref, -					 tcp_cookie_values_release); -				tp->cookie_values = NULL; -			} -			tp->rx_opt.cookie_in_always = 0; /* false */ -			tp->rx_opt.cookie_out_never = 1; /* true */ -			release_sock(sk); -			return err; -		} - -		/* Allocate ancillary memory before locking. -		 */ -		if (ctd.tcpct_used > 0 || -		    (tp->cookie_values == NULL && -		     (sysctl_tcp_cookie_size > 0 || -		      ctd.tcpct_cookie_desired > 0 || -		      ctd.tcpct_s_data_desired > 0))) { -			cvp = kzalloc(sizeof(*cvp) + ctd.tcpct_used, -				      GFP_KERNEL); -			if (cvp == NULL) -				return -ENOMEM; - -			kref_init(&cvp->kref); -		} -		lock_sock(sk); -		tp->rx_opt.cookie_in_always = -			(TCP_COOKIE_IN_ALWAYS & ctd.tcpct_flags); -		tp->rx_opt.cookie_out_never = 0; /* false */ - -		if (tp->cookie_values != NULL) { -			if (cvp != NULL) { -				/* Changed values are recorded by a changed -				 * pointer, ensuring the cookie will differ, -				 * without separately hashing each value later. -				 */ -				kref_put(&tp->cookie_values->kref, -					 tcp_cookie_values_release); -			} else { -				cvp = tp->cookie_values; -			} -		} - -		if (cvp != NULL) { -			cvp->cookie_desired = ctd.tcpct_cookie_desired; - -			if (ctd.tcpct_used > 0) { -				memcpy(cvp->s_data_payload, ctd.tcpct_value, -				       ctd.tcpct_used); -				cvp->s_data_desired = ctd.tcpct_used; -				cvp->s_data_constant = 1; /* true */ -			} else { -				/* No constant payload data. */ -				cvp->s_data_desired = ctd.tcpct_s_data_desired; -				cvp->s_data_constant = 0; /* false */ -			} - -			tp->cookie_values = cvp; -		} -		release_sock(sk); -		return err; -	}  	default:  		/* fallthru */  		break; @@ -2902,41 +2807,6 @@ static int do_tcp_getsockopt(struct sock *sk, int level,  			return -EFAULT;  		return 0; -	case TCP_COOKIE_TRANSACTIONS: { -		struct tcp_cookie_transactions ctd; -		struct tcp_cookie_values *cvp = tp->cookie_values; - -		if (get_user(len, optlen)) -			return -EFAULT; -		if (len < sizeof(ctd)) -			return -EINVAL; - -		memset(&ctd, 0, sizeof(ctd)); -		ctd.tcpct_flags = (tp->rx_opt.cookie_in_always ? -				   TCP_COOKIE_IN_ALWAYS : 0) -				| (tp->rx_opt.cookie_out_never ? -				   TCP_COOKIE_OUT_NEVER : 0); - -		if (cvp != NULL) { -			ctd.tcpct_flags |= (cvp->s_data_in ? -					    TCP_S_DATA_IN : 0) -					 | (cvp->s_data_out ? -					    TCP_S_DATA_OUT : 0); - -			ctd.tcpct_cookie_desired = cvp->cookie_desired; -			ctd.tcpct_s_data_desired = cvp->s_data_desired; - -			memcpy(&ctd.tcpct_value[0], &cvp->cookie_pair[0], -			       cvp->cookie_pair_size); -			ctd.tcpct_used = cvp->cookie_pair_size; -		} - -		if (put_user(sizeof(ctd), optlen)) -			return -EFAULT; -		if (copy_to_user(optval, &ctd, sizeof(ctd))) -			return -EFAULT; -		return 0; -	}  	case TCP_THIN_LINEAR_TIMEOUTS:  		val = tp->thin_lto;  		break; @@ -3015,6 +2885,8 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,  	__be32 delta;  	unsigned int oldlen;  	unsigned int mss; +	struct sk_buff *gso_skb = skb; +	__sum16 newcheck;  	if (!pskb_may_pull(skb, sizeof(*th)))  		goto out; @@ -3044,6 +2916,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,  			       SKB_GSO_TCP_ECN |  			       SKB_GSO_TCPV6 |  			       SKB_GSO_GRE | +			       SKB_GSO_UDP_TUNNEL |  			       0) ||  			     !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))  			goto out; @@ -3064,11 +2937,13 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,  	th = tcp_hdr(skb);  	seq = ntohl(th->seq); +	newcheck = ~csum_fold((__force __wsum)((__force u32)th->check + +					       (__force u32)delta)); +  	do {  		th->fin = th->psh = 0; +		th->check = newcheck; -		th->check = ~csum_fold((__force __wsum)((__force u32)th->check + -				       (__force u32)delta));  		if (skb->ip_summed != CHECKSUM_PARTIAL)  			th->check =  			     csum_fold(csum_partial(skb_transport_header(skb), @@ -3082,6 +2957,17 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,  		th->cwr = 0;  	} while (skb->next); +	/* Following permits TCP Small Queues to work well with GSO : +	 * The callback to TCP stack will be called at the time last frag +	 * is freed at TX completion, and not right now when gso_skb +	 * is freed by GSO engine +	 */ +	if (gso_skb->destructor == tcp_wfree) { +		swap(gso_skb->sk, skb->sk); +		swap(gso_skb->destructor, skb->destructor); +		swap(gso_skb->truesize, skb->truesize); +	} +  	delta = htonl(oldlen + (skb->tail - skb->transport_header) +  		      skb->data_len);  	th->check = ~csum_fold((__force __wsum)((__force u32)th->check + @@ -3408,134 +3294,6 @@ EXPORT_SYMBOL(tcp_md5_hash_key);  #endif -/* Each Responder maintains up to two secret values concurrently for - * efficient secret rollover.  Each secret value has 4 states: - * - * Generating.  (tcp_secret_generating != tcp_secret_primary) - *    Generates new Responder-Cookies, but not yet used for primary - *    verification.  This is a short-term state, typically lasting only - *    one round trip time (RTT). - * - * Primary.  (tcp_secret_generating == tcp_secret_primary) - *    Used both for generation and primary verification. - * - * Retiring.  (tcp_secret_retiring != tcp_secret_secondary) - *    Used for verification, until the first failure that can be - *    verified by the newer Generating secret.  At that time, this - *    cookie's state is changed to Secondary, and the Generating - *    cookie's state is changed to Primary.  This is a short-term state, - *    typically lasting only one round trip time (RTT). - * - * Secondary.  (tcp_secret_retiring == tcp_secret_secondary) - *    Used for secondary verification, after primary verification - *    failures.  This state lasts no more than twice the Maximum Segment - *    Lifetime (2MSL).  Then, the secret is discarded. - */ -struct tcp_cookie_secret { -	/* The secret is divided into two parts.  The digest part is the -	 * equivalent of previously hashing a secret and saving the state, -	 * and serves as an initialization vector (IV).  The message part -	 * serves as the trailing secret. -	 */ -	u32				secrets[COOKIE_WORKSPACE_WORDS]; -	unsigned long			expires; -}; - -#define TCP_SECRET_1MSL (HZ * TCP_PAWS_MSL) -#define TCP_SECRET_2MSL (HZ * TCP_PAWS_MSL * 2) -#define TCP_SECRET_LIFE (HZ * 600) - -static struct tcp_cookie_secret tcp_secret_one; -static struct tcp_cookie_secret tcp_secret_two; - -/* Essentially a circular list, without dynamic allocation. */ -static struct tcp_cookie_secret *tcp_secret_generating; -static struct tcp_cookie_secret *tcp_secret_primary; -static struct tcp_cookie_secret *tcp_secret_retiring; -static struct tcp_cookie_secret *tcp_secret_secondary; - -static DEFINE_SPINLOCK(tcp_secret_locker); - -/* Select a pseudo-random word in the cookie workspace. - */ -static inline u32 tcp_cookie_work(const u32 *ws, const int n) -{ -	return ws[COOKIE_DIGEST_WORDS + ((COOKIE_MESSAGE_WORDS-1) & ws[n])]; -} - -/* Fill bakery[COOKIE_WORKSPACE_WORDS] with generator, updating as needed. - * Called in softirq context. - * Returns: 0 for success. - */ -int tcp_cookie_generator(u32 *bakery) -{ -	unsigned long jiffy = jiffies; - -	if (unlikely(time_after_eq(jiffy, tcp_secret_generating->expires))) { -		spin_lock_bh(&tcp_secret_locker); -		if (!time_after_eq(jiffy, tcp_secret_generating->expires)) { -			/* refreshed by another */ -			memcpy(bakery, -			       &tcp_secret_generating->secrets[0], -			       COOKIE_WORKSPACE_WORDS); -		} else { -			/* still needs refreshing */ -			get_random_bytes(bakery, COOKIE_WORKSPACE_WORDS); - -			/* The first time, paranoia assumes that the -			 * randomization function isn't as strong.  But, -			 * this secret initialization is delayed until -			 * the last possible moment (packet arrival). -			 * Although that time is observable, it is -			 * unpredictably variable.  Mash in the most -			 * volatile clock bits available, and expire the -			 * secret extra quickly. -			 */ -			if (unlikely(tcp_secret_primary->expires == -				     tcp_secret_secondary->expires)) { -				struct timespec tv; - -				getnstimeofday(&tv); -				bakery[COOKIE_DIGEST_WORDS+0] ^= -					(u32)tv.tv_nsec; - -				tcp_secret_secondary->expires = jiffy -					+ TCP_SECRET_1MSL -					+ (0x0f & tcp_cookie_work(bakery, 0)); -			} else { -				tcp_secret_secondary->expires = jiffy -					+ TCP_SECRET_LIFE -					+ (0xff & tcp_cookie_work(bakery, 1)); -				tcp_secret_primary->expires = jiffy -					+ TCP_SECRET_2MSL -					+ (0x1f & tcp_cookie_work(bakery, 2)); -			} -			memcpy(&tcp_secret_secondary->secrets[0], -			       bakery, COOKIE_WORKSPACE_WORDS); - -			rcu_assign_pointer(tcp_secret_generating, -					   tcp_secret_secondary); -			rcu_assign_pointer(tcp_secret_retiring, -					   tcp_secret_primary); -			/* -			 * Neither call_rcu() nor synchronize_rcu() needed. -			 * Retiring data is not freed.  It is replaced after -			 * further (locked) pointer updates, and a quiet time -			 * (minimum 1MSL, maximum LIFE - 2MSL). -			 */ -		} -		spin_unlock_bh(&tcp_secret_locker); -	} else { -		rcu_read_lock_bh(); -		memcpy(bakery, -		       &rcu_dereference(tcp_secret_generating)->secrets[0], -		       COOKIE_WORKSPACE_WORDS); -		rcu_read_unlock_bh(); -	} -	return 0; -} -EXPORT_SYMBOL(tcp_cookie_generator); -  void tcp_done(struct sock *sk)  {  	struct request_sock *req = tcp_sk(sk)->fastopen_rsk; @@ -3590,7 +3348,6 @@ void __init tcp_init(void)  	unsigned long limit;  	int max_rshare, max_wshare, cnt;  	unsigned int i; -	unsigned long jiffy = jiffies;  	BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); @@ -3666,13 +3423,5 @@ void __init tcp_init(void)  	tcp_register_congestion_control(&tcp_reno); -	memset(&tcp_secret_one.secrets[0], 0, sizeof(tcp_secret_one.secrets)); -	memset(&tcp_secret_two.secrets[0], 0, sizeof(tcp_secret_two.secrets)); -	tcp_secret_one.expires = jiffy; /* past due */ -	tcp_secret_two.expires = jiffy; /* past due */ -	tcp_secret_generating = &tcp_secret_one; -	tcp_secret_primary = &tcp_secret_one; -	tcp_secret_retiring = &tcp_secret_two; -	tcp_secret_secondary = &tcp_secret_two;  	tcp_tasklet_init();  } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 13b9c08fc15..08bbe609652 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -93,12 +93,11 @@ int sysctl_tcp_stdurg __read_mostly;  int sysctl_tcp_rfc1337 __read_mostly;  int sysctl_tcp_max_orphans __read_mostly = NR_FILE;  int sysctl_tcp_frto __read_mostly = 2; -int sysctl_tcp_frto_response __read_mostly;  int sysctl_tcp_thin_dupack __read_mostly;  int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; -int sysctl_tcp_early_retrans __read_mostly = 2; +int sysctl_tcp_early_retrans __read_mostly = 3;  #define FLAG_DATA		0x01 /* Incoming frame contained data.		*/  #define FLAG_WIN_UPDATE		0x02 /* Incoming ACK was a window update.	*/ @@ -108,10 +107,9 @@ int sysctl_tcp_early_retrans __read_mostly = 2;  #define FLAG_DATA_SACKED	0x20 /* New SACK.				*/  #define FLAG_ECE		0x40 /* ECE in this ACK				*/  #define FLAG_SLOWPATH		0x100 /* Do not skip RFC checks for window update.*/ -#define FLAG_ONLY_ORIG_SACKED	0x200 /* SACKs only non-rexmit sent before RTO */ +#define FLAG_ORIG_SACK_ACKED	0x200 /* Never retransmitted data are (s)acked	*/  #define FLAG_SND_UNA_ADVANCED	0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */  #define FLAG_DSACKING_ACK	0x800 /* SACK blocks contained D-SACK info */ -#define FLAG_NONHEAD_RETRANS_ACKED	0x1000 /* Non-head rexmitted data was ACKed */  #define FLAG_SACK_RENEGING	0x2000 /* snd_una advanced to a sacked seq */  #define FLAG_UPDATE_TS_RECENT	0x4000 /* tcp_replace_ts_recent() */ @@ -119,7 +117,6 @@ int sysctl_tcp_early_retrans __read_mostly = 2;  #define FLAG_NOT_DUP		(FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)  #define FLAG_CA_ALERT		(FLAG_DATA_SACKED|FLAG_ECE)  #define FLAG_FORWARD_PROGRESS	(FLAG_ACKED|FLAG_DATA_SACKED) -#define FLAG_ANY_PROGRESS	(FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED)  #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)  #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH)) @@ -1160,10 +1157,8 @@ static u8 tcp_sacktag_one(struct sock *sk,  					   tcp_highest_sack_seq(tp)))  					state->reord = min(fack_count,  							   state->reord); - -				/* SACK enhanced F-RTO (RFC4138; Appendix B) */ -				if (!after(end_seq, tp->frto_highmark)) -					state->flag |= FLAG_ONLY_ORIG_SACKED; +				if (!after(end_seq, tp->high_seq)) +					state->flag |= FLAG_ORIG_SACK_ACKED;  			}  			if (sacked & TCPCB_LOST) { @@ -1556,7 +1551,6 @@ static int  tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,  			u32 prior_snd_una)  { -	const struct inet_connection_sock *icsk = inet_csk(sk);  	struct tcp_sock *tp = tcp_sk(sk);  	const unsigned char *ptr = (skb_transport_header(ack_skb) +  				    TCP_SKB_CB(ack_skb)->sacked); @@ -1729,12 +1723,6 @@ walk:  				       start_seq, end_seq, dup_sack);  advance_sp: -		/* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct -		 * due to in-order walk -		 */ -		if (after(end_seq, tp->frto_highmark)) -			state.flag &= ~FLAG_ONLY_ORIG_SACKED; -  		i++;  	} @@ -1751,8 +1739,7 @@ advance_sp:  	tcp_verify_left_out(tp);  	if ((state.reord < tp->fackets_out) && -	    ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) && -	    (!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark))) +	    ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))  		tcp_update_reordering(sk, tp->fackets_out - state.reord, 0);  out: @@ -1826,197 +1813,6 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp)  	tp->sacked_out = 0;  } -static int tcp_is_sackfrto(const struct tcp_sock *tp) -{ -	return (sysctl_tcp_frto == 0x2) && !tcp_is_reno(tp); -} - -/* F-RTO can only be used if TCP has never retransmitted anything other than - * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here) - */ -bool tcp_use_frto(struct sock *sk) -{ -	const struct tcp_sock *tp = tcp_sk(sk); -	const struct inet_connection_sock *icsk = inet_csk(sk); -	struct sk_buff *skb; - -	if (!sysctl_tcp_frto) -		return false; - -	/* MTU probe and F-RTO won't really play nicely along currently */ -	if (icsk->icsk_mtup.probe_size) -		return false; - -	if (tcp_is_sackfrto(tp)) -		return true; - -	/* Avoid expensive walking of rexmit queue if possible */ -	if (tp->retrans_out > 1) -		return false; - -	skb = tcp_write_queue_head(sk); -	if (tcp_skb_is_last(sk, skb)) -		return true; -	skb = tcp_write_queue_next(sk, skb);	/* Skips head */ -	tcp_for_write_queue_from(skb, sk) { -		if (skb == tcp_send_head(sk)) -			break; -		if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) -			return false; -		/* Short-circuit when first non-SACKed skb has been checked */ -		if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) -			break; -	} -	return true; -} - -/* RTO occurred, but do not yet enter Loss state. Instead, defer RTO - * recovery a bit and use heuristics in tcp_process_frto() to detect if - * the RTO was spurious. Only clear SACKED_RETRANS of the head here to - * keep retrans_out counting accurate (with SACK F-RTO, other than head - * may still have that bit set); TCPCB_LOST and remaining SACKED_RETRANS - * bits are handled if the Loss state is really to be entered (in - * tcp_enter_frto_loss). - * - * Do like tcp_enter_loss() would; when RTO expires the second time it - * does: - *  "Reduce ssthresh if it has not yet been made inside this window." - */ -void tcp_enter_frto(struct sock *sk) -{ -	const struct inet_connection_sock *icsk = inet_csk(sk); -	struct tcp_sock *tp = tcp_sk(sk); -	struct sk_buff *skb; - -	if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) || -	    tp->snd_una == tp->high_seq || -	    ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) && -	     !icsk->icsk_retransmits)) { -		tp->prior_ssthresh = tcp_current_ssthresh(sk); -		/* Our state is too optimistic in ssthresh() call because cwnd -		 * is not reduced until tcp_enter_frto_loss() when previous F-RTO -		 * recovery has not yet completed. Pattern would be this: RTO, -		 * Cumulative ACK, RTO (2xRTO for the same segment does not end -		 * up here twice). -		 * RFC4138 should be more specific on what to do, even though -		 * RTO is quite unlikely to occur after the first Cumulative ACK -		 * due to back-off and complexity of triggering events ... -		 */ -		if (tp->frto_counter) { -			u32 stored_cwnd; -			stored_cwnd = tp->snd_cwnd; -			tp->snd_cwnd = 2; -			tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); -			tp->snd_cwnd = stored_cwnd; -		} else { -			tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); -		} -		/* ... in theory, cong.control module could do "any tricks" in -		 * ssthresh(), which means that ca_state, lost bits and lost_out -		 * counter would have to be faked before the call occurs. We -		 * consider that too expensive, unlikely and hacky, so modules -		 * using these in ssthresh() must deal these incompatibility -		 * issues if they receives CA_EVENT_FRTO and frto_counter != 0 -		 */ -		tcp_ca_event(sk, CA_EVENT_FRTO); -	} - -	tp->undo_marker = tp->snd_una; -	tp->undo_retrans = 0; - -	skb = tcp_write_queue_head(sk); -	if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) -		tp->undo_marker = 0; -	if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { -		TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; -		tp->retrans_out -= tcp_skb_pcount(skb); -	} -	tcp_verify_left_out(tp); - -	/* Too bad if TCP was application limited */ -	tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1); - -	/* Earlier loss recovery underway (see RFC4138; Appendix B). -	 * The last condition is necessary at least in tp->frto_counter case. -	 */ -	if (tcp_is_sackfrto(tp) && (tp->frto_counter || -	    ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) && -	    after(tp->high_seq, tp->snd_una)) { -		tp->frto_highmark = tp->high_seq; -	} else { -		tp->frto_highmark = tp->snd_nxt; -	} -	tcp_set_ca_state(sk, TCP_CA_Disorder); -	tp->high_seq = tp->snd_nxt; -	tp->frto_counter = 1; -} - -/* Enter Loss state after F-RTO was applied. Dupack arrived after RTO, - * which indicates that we should follow the traditional RTO recovery, - * i.e. mark everything lost and do go-back-N retransmission. - */ -static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) -{ -	struct tcp_sock *tp = tcp_sk(sk); -	struct sk_buff *skb; - -	tp->lost_out = 0; -	tp->retrans_out = 0; -	if (tcp_is_reno(tp)) -		tcp_reset_reno_sack(tp); - -	tcp_for_write_queue(skb, sk) { -		if (skb == tcp_send_head(sk)) -			break; - -		TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; -		/* -		 * Count the retransmission made on RTO correctly (only when -		 * waiting for the first ACK and did not get it)... -		 */ -		if ((tp->frto_counter == 1) && !(flag & FLAG_DATA_ACKED)) { -			/* For some reason this R-bit might get cleared? */ -			if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) -				tp->retrans_out += tcp_skb_pcount(skb); -			/* ...enter this if branch just for the first segment */ -			flag |= FLAG_DATA_ACKED; -		} else { -			if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS) -				tp->undo_marker = 0; -			TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; -		} - -		/* Marking forward transmissions that were made after RTO lost -		 * can cause unnecessary retransmissions in some scenarios, -		 * SACK blocks will mitigate that in some but not in all cases. -		 * We used to not mark them but it was causing break-ups with -		 * receivers that do only in-order receival. -		 * -		 * TODO: we could detect presence of such receiver and select -		 * different behavior per flow. -		 */ -		if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { -			TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; -			tp->lost_out += tcp_skb_pcount(skb); -			tp->retransmit_high = TCP_SKB_CB(skb)->end_seq; -		} -	} -	tcp_verify_left_out(tp); - -	tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments; -	tp->snd_cwnd_cnt = 0; -	tp->snd_cwnd_stamp = tcp_time_stamp; -	tp->frto_counter = 0; - -	tp->reordering = min_t(unsigned int, tp->reordering, -			       sysctl_tcp_reordering); -	tcp_set_ca_state(sk, TCP_CA_Loss); -	tp->high_seq = tp->snd_nxt; -	TCP_ECN_queue_cwr(tp); - -	tcp_clear_all_retrans_hints(tp); -} -  static void tcp_clear_retrans_partial(struct tcp_sock *tp)  {  	tp->retrans_out = 0; @@ -2043,10 +1839,13 @@ void tcp_enter_loss(struct sock *sk, int how)  	const struct inet_connection_sock *icsk = inet_csk(sk);  	struct tcp_sock *tp = tcp_sk(sk);  	struct sk_buff *skb; +	bool new_recovery = false;  	/* Reduce ssthresh if it has not yet been made inside this window. */ -	if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || +	if (icsk->icsk_ca_state <= TCP_CA_Disorder || +	    !after(tp->high_seq, tp->snd_una) ||  	    (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { +		new_recovery = true;  		tp->prior_ssthresh = tcp_current_ssthresh(sk);  		tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);  		tcp_ca_event(sk, CA_EVENT_LOSS); @@ -2088,8 +1887,14 @@ void tcp_enter_loss(struct sock *sk, int how)  	tcp_set_ca_state(sk, TCP_CA_Loss);  	tp->high_seq = tp->snd_nxt;  	TCP_ECN_queue_cwr(tp); -	/* Abort F-RTO algorithm if one is in progress */ -	tp->frto_counter = 0; + +	/* F-RTO RFC5682 sec 3.1 step 1: retransmit SND.UNA if no previous +	 * loss recovery is underway except recurring timeout(s) on +	 * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing +	 */ +	tp->frto = sysctl_tcp_frto && +		   (new_recovery || icsk->icsk_retransmits) && +		   !inet_csk(sk)->icsk_mtup.probe_size;  }  /* If ACK arrived pointing to a remembered SACK, it means that our @@ -2148,15 +1953,16 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag)  	 * max(RTT/4, 2msec) unless ack has ECE mark, no RTT samples  	 * available, or RTO is scheduled to fire first.  	 */ -	if (sysctl_tcp_early_retrans < 2 || (flag & FLAG_ECE) || !tp->srtt) +	if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 || +	    (flag & FLAG_ECE) || !tp->srtt)  		return false;  	delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2));  	if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay)))  		return false; -	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, delay, TCP_RTO_MAX); -	tp->early_retrans_delayed = 1; +	inet_csk_reset_xmit_timer(sk, ICSK_TIME_EARLY_RETRANS, delay, +				  TCP_RTO_MAX);  	return true;  } @@ -2272,10 +2078,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)  	struct tcp_sock *tp = tcp_sk(sk);  	__u32 packets_out; -	/* Do not perform any recovery during F-RTO algorithm */ -	if (tp->frto_counter) -		return false; -  	/* Trick#1: The loss is proven. */  	if (tp->lost_out)  		return true; @@ -2319,7 +2121,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)  	 * interval if appropriate.  	 */  	if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out && -	    (tp->packets_out == (tp->sacked_out + 1) && tp->packets_out < 4) && +	    (tp->packets_out >= (tp->sacked_out + 1) && tp->packets_out < 4) &&  	    !tcp_may_send_now(sk))  		return !tcp_pause_early_retransmit(sk, flag); @@ -2636,12 +2438,12 @@ static int tcp_try_undo_partial(struct sock *sk, int acked)  	return failed;  } -/* Undo during loss recovery after partial ACK. */ -static bool tcp_try_undo_loss(struct sock *sk) +/* Undo during loss recovery after partial ACK or using F-RTO. */ +static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)  {  	struct tcp_sock *tp = tcp_sk(sk); -	if (tcp_may_undo(tp)) { +	if (frto_undo || tcp_may_undo(tp)) {  		struct sk_buff *skb;  		tcp_for_write_queue(skb, sk) {  			if (skb == tcp_send_head(sk)) @@ -2655,9 +2457,12 @@ static bool tcp_try_undo_loss(struct sock *sk)  		tp->lost_out = 0;  		tcp_undo_cwr(sk, true);  		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO); +		if (frto_undo) +			NET_INC_STATS_BH(sock_net(sk), +					 LINUX_MIB_TCPSPURIOUSRTOS);  		inet_csk(sk)->icsk_retransmits = 0;  		tp->undo_marker = 0; -		if (tcp_is_sack(tp)) +		if (frto_undo || tcp_is_sack(tp))  			tcp_set_ca_state(sk, TCP_CA_Open);  		return true;  	} @@ -2679,6 +2484,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh)  	struct tcp_sock *tp = tcp_sk(sk);  	tp->high_seq = tp->snd_nxt; +	tp->tlp_high_seq = 0;  	tp->snd_cwnd_cnt = 0;  	tp->prior_cwnd = tp->snd_cwnd;  	tp->prr_delivered = 0; @@ -2756,7 +2562,7 @@ static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked)  	tcp_verify_left_out(tp); -	if (!tp->frto_counter && !tcp_any_retrans_done(sk)) +	if (!tcp_any_retrans_done(sk))  		tp->retrans_stamp = 0;  	if (flag & FLAG_ECE) @@ -2873,6 +2679,58 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)  	tcp_set_ca_state(sk, TCP_CA_Recovery);  } +/* Process an ACK in CA_Loss state. Move to CA_Open if lost data are + * recovered or spurious. Otherwise retransmits more on partial ACKs. + */ +static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) +{ +	struct inet_connection_sock *icsk = inet_csk(sk); +	struct tcp_sock *tp = tcp_sk(sk); +	bool recovered = !before(tp->snd_una, tp->high_seq); + +	if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */ +		if (flag & FLAG_ORIG_SACK_ACKED) { +			/* Step 3.b. A timeout is spurious if not all data are +			 * lost, i.e., never-retransmitted data are (s)acked. +			 */ +			tcp_try_undo_loss(sk, true); +			return; +		} +		if (after(tp->snd_nxt, tp->high_seq) && +		    (flag & FLAG_DATA_SACKED || is_dupack)) { +			tp->frto = 0; /* Loss was real: 2nd part of step 3.a */ +		} else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) { +			tp->high_seq = tp->snd_nxt; +			__tcp_push_pending_frames(sk, tcp_current_mss(sk), +						  TCP_NAGLE_OFF); +			if (after(tp->snd_nxt, tp->high_seq)) +				return; /* Step 2.b */ +			tp->frto = 0; +		} +	} + +	if (recovered) { +		/* F-RTO RFC5682 sec 3.1 step 2.a and 1st part of step 3.a */ +		icsk->icsk_retransmits = 0; +		tcp_try_undo_recovery(sk); +		return; +	} +	if (flag & FLAG_DATA_ACKED) +		icsk->icsk_retransmits = 0; +	if (tcp_is_reno(tp)) { +		/* A Reno DUPACK means new data in F-RTO step 2.b above are +		 * delivered. Lower inflight to clock out (re)tranmissions. +		 */ +		if (after(tp->snd_nxt, tp->high_seq) && is_dupack) +			tcp_add_reno_sack(sk); +		else if (flag & FLAG_SND_UNA_ADVANCED) +			tcp_reset_reno_sack(tp); +	} +	if (tcp_try_undo_loss(sk, false)) +		return; +	tcp_xmit_retransmit_queue(sk); +} +  /* Process an event, which can update packets-in-flight not trivially.   * Main goal of this function is to calculate new estimate for left_out,   * taking into account both packets sitting in receiver's buffer and @@ -2919,12 +2777,6 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,  		tp->retrans_stamp = 0;  	} else if (!before(tp->snd_una, tp->high_seq)) {  		switch (icsk->icsk_ca_state) { -		case TCP_CA_Loss: -			icsk->icsk_retransmits = 0; -			if (tcp_try_undo_recovery(sk)) -				return; -			break; -  		case TCP_CA_CWR:  			/* CWR is to be held something *above* high_seq  			 * is ACKed for CWR bit to reach receiver. */ @@ -2955,18 +2807,10 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,  		newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;  		break;  	case TCP_CA_Loss: -		if (flag & FLAG_DATA_ACKED) -			icsk->icsk_retransmits = 0; -		if (tcp_is_reno(tp) && flag & FLAG_SND_UNA_ADVANCED) -			tcp_reset_reno_sack(tp); -		if (!tcp_try_undo_loss(sk)) { -			tcp_moderate_cwnd(tp); -			tcp_xmit_retransmit_queue(sk); -			return; -		} +		tcp_process_loss(sk, flag, is_dupack);  		if (icsk->icsk_ca_state != TCP_CA_Open)  			return; -		/* Loss is undone; fall through to processing in Open state. */ +		/* Fall through to processing in Open state. */  	default:  		if (tcp_is_reno(tp)) {  			if (flag & FLAG_SND_UNA_ADVANCED) @@ -3079,6 +2923,7 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)   */  void tcp_rearm_rto(struct sock *sk)  { +	const struct inet_connection_sock *icsk = inet_csk(sk);  	struct tcp_sock *tp = tcp_sk(sk);  	/* If the retrans timer is currently being used by Fast Open @@ -3092,12 +2937,13 @@ void tcp_rearm_rto(struct sock *sk)  	} else {  		u32 rto = inet_csk(sk)->icsk_rto;  		/* Offset the time elapsed after installing regular RTO */ -		if (tp->early_retrans_delayed) { +		if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || +		    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {  			struct sk_buff *skb = tcp_write_queue_head(sk);  			const u32 rto_time_stamp = TCP_SKB_CB(skb)->when + rto;  			s32 delta = (s32)(rto_time_stamp - tcp_time_stamp);  			/* delta may not be positive if the socket is locked -			 * when the delayed ER timer fires and is rescheduled. +			 * when the retrans timer fires and is rescheduled.  			 */  			if (delta > 0)  				rto = delta; @@ -3105,7 +2951,6 @@ void tcp_rearm_rto(struct sock *sk)  		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,  					  TCP_RTO_MAX);  	} -	tp->early_retrans_delayed = 0;  }  /* This function is called when the delayed ER timer fires. TCP enters @@ -3193,8 +3038,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,  			flag |= FLAG_RETRANS_DATA_ACKED;  			ca_seq_rtt = -1;  			seq_rtt = -1; -			if ((flag & FLAG_DATA_ACKED) || (acked_pcount > 1)) -				flag |= FLAG_NONHEAD_RETRANS_ACKED;  		} else {  			ca_seq_rtt = now - scb->when;  			last_ackt = skb->tstamp; @@ -3203,6 +3046,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,  			}  			if (!(sacked & TCPCB_SACKED_ACKED))  				reord = min(pkts_acked, reord); +			if (!after(scb->end_seq, tp->high_seq)) +				flag |= FLAG_ORIG_SACK_ACKED;  		}  		if (sacked & TCPCB_SACKED_ACKED) @@ -3403,150 +3248,6 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32  	return flag;  } -/* A very conservative spurious RTO response algorithm: reduce cwnd and - * continue in congestion avoidance. - */ -static void tcp_conservative_spur_to_response(struct tcp_sock *tp) -{ -	tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); -	tp->snd_cwnd_cnt = 0; -	TCP_ECN_queue_cwr(tp); -	tcp_moderate_cwnd(tp); -} - -/* A conservative spurious RTO response algorithm: reduce cwnd using - * PRR and continue in congestion avoidance. - */ -static void tcp_cwr_spur_to_response(struct sock *sk) -{ -	tcp_enter_cwr(sk, 0); -} - -static void tcp_undo_spur_to_response(struct sock *sk, int flag) -{ -	if (flag & FLAG_ECE) -		tcp_cwr_spur_to_response(sk); -	else -		tcp_undo_cwr(sk, true); -} - -/* F-RTO spurious RTO detection algorithm (RFC4138) - * - * F-RTO affects during two new ACKs following RTO (well, almost, see inline - * comments). State (ACK number) is kept in frto_counter. When ACK advances - * window (but not to or beyond highest sequence sent before RTO): - *   On First ACK,  send two new segments out. - *   On Second ACK, RTO was likely spurious. Do spurious response (response - *                  algorithm is not part of the F-RTO detection algorithm - *                  given in RFC4138 but can be selected separately). - * Otherwise (basically on duplicate ACK), RTO was (likely) caused by a loss - * and TCP falls back to conventional RTO recovery. F-RTO allows overriding - * of Nagle, this is done using frto_counter states 2 and 3, when a new data - * segment of any size sent during F-RTO, state 2 is upgraded to 3. - * - * Rationale: if the RTO was spurious, new ACKs should arrive from the - * original window even after we transmit two new data segments. - * - * SACK version: - *   on first step, wait until first cumulative ACK arrives, then move to - *   the second step. In second step, the next ACK decides. - * - * F-RTO is implemented (mainly) in four functions: - *   - tcp_use_frto() is used to determine if TCP is can use F-RTO - *   - tcp_enter_frto() prepares TCP state on RTO if F-RTO is used, it is - *     called when tcp_use_frto() showed green light - *   - tcp_process_frto() handles incoming ACKs during F-RTO algorithm - *   - tcp_enter_frto_loss() is called if there is not enough evidence - *     to prove that the RTO is indeed spurious. It transfers the control - *     from F-RTO to the conventional RTO recovery - */ -static bool tcp_process_frto(struct sock *sk, int flag) -{ -	struct tcp_sock *tp = tcp_sk(sk); - -	tcp_verify_left_out(tp); - -	/* Duplicate the behavior from Loss state (fastretrans_alert) */ -	if (flag & FLAG_DATA_ACKED) -		inet_csk(sk)->icsk_retransmits = 0; - -	if ((flag & FLAG_NONHEAD_RETRANS_ACKED) || -	    ((tp->frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED))) -		tp->undo_marker = 0; - -	if (!before(tp->snd_una, tp->frto_highmark)) { -		tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag); -		return true; -	} - -	if (!tcp_is_sackfrto(tp)) { -		/* RFC4138 shortcoming in step 2; should also have case c): -		 * ACK isn't duplicate nor advances window, e.g., opposite dir -		 * data, winupdate -		 */ -		if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP)) -			return true; - -		if (!(flag & FLAG_DATA_ACKED)) { -			tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3), -					    flag); -			return true; -		} -	} else { -		if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) { -			if (!tcp_packets_in_flight(tp)) { -				tcp_enter_frto_loss(sk, 2, flag); -				return true; -			} - -			/* Prevent sending of new data. */ -			tp->snd_cwnd = min(tp->snd_cwnd, -					   tcp_packets_in_flight(tp)); -			return true; -		} - -		if ((tp->frto_counter >= 2) && -		    (!(flag & FLAG_FORWARD_PROGRESS) || -		     ((flag & FLAG_DATA_SACKED) && -		      !(flag & FLAG_ONLY_ORIG_SACKED)))) { -			/* RFC4138 shortcoming (see comment above) */ -			if (!(flag & FLAG_FORWARD_PROGRESS) && -			    (flag & FLAG_NOT_DUP)) -				return true; - -			tcp_enter_frto_loss(sk, 3, flag); -			return true; -		} -	} - -	if (tp->frto_counter == 1) { -		/* tcp_may_send_now needs to see updated state */ -		tp->snd_cwnd = tcp_packets_in_flight(tp) + 2; -		tp->frto_counter = 2; - -		if (!tcp_may_send_now(sk)) -			tcp_enter_frto_loss(sk, 2, flag); - -		return true; -	} else { -		switch (sysctl_tcp_frto_response) { -		case 2: -			tcp_undo_spur_to_response(sk, flag); -			break; -		case 1: -			tcp_conservative_spur_to_response(tp); -			break; -		default: -			tcp_cwr_spur_to_response(sk); -			break; -		} -		tp->frto_counter = 0; -		tp->undo_marker = 0; -		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS); -	} -	return false; -} -  /* RFC 5961 7 [ACK Throttling] */  static void tcp_send_challenge_ack(struct sock *sk)  { @@ -3586,6 +3287,38 @@ static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)  	}  } +/* This routine deals with acks during a TLP episode. + * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe. + */ +static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) +{ +	struct tcp_sock *tp = tcp_sk(sk); +	bool is_tlp_dupack = (ack == tp->tlp_high_seq) && +			     !(flag & (FLAG_SND_UNA_ADVANCED | +				       FLAG_NOT_DUP | FLAG_DATA_SACKED)); + +	/* Mark the end of TLP episode on receiving TLP dupack or when +	 * ack is after tlp_high_seq. +	 */ +	if (is_tlp_dupack) { +		tp->tlp_high_seq = 0; +		return; +	} + +	if (after(ack, tp->tlp_high_seq)) { +		tp->tlp_high_seq = 0; +		/* Don't reduce cwnd if DSACK arrives for TLP retrans. */ +		if (!(flag & FLAG_DSACKING_ACK)) { +			tcp_init_cwnd_reduction(sk, true); +			tcp_set_ca_state(sk, TCP_CA_CWR); +			tcp_end_cwnd_reduction(sk); +			tcp_set_ca_state(sk, TCP_CA_Open); +			NET_INC_STATS_BH(sock_net(sk), +					 LINUX_MIB_TCPLOSSPROBERECOVERY); +		} +	} +} +  /* This routine deals with incoming acks, but not outgoing ones. */  static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)  { @@ -3600,7 +3333,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)  	int prior_packets;  	int prior_sacked = tp->sacked_out;  	int pkts_acked = 0; -	bool frto_cwnd = false;  	/* If the ack is older than previous acks  	 * then we can probably ignore it. @@ -3620,7 +3352,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)  	if (after(ack, tp->snd_nxt))  		goto invalid_ack; -	if (tp->early_retrans_delayed) +	if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || +	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)  		tcp_rearm_rto(sk);  	if (after(ack, prior_snd_una)) @@ -3679,30 +3412,29 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)  	pkts_acked = prior_packets - tp->packets_out; -	if (tp->frto_counter) -		frto_cwnd = tcp_process_frto(sk, flag); -	/* Guarantee sacktag reordering detection against wrap-arounds */ -	if (before(tp->frto_highmark, tp->snd_una)) -		tp->frto_highmark = 0; -  	if (tcp_ack_is_dubious(sk, flag)) {  		/* Advance CWND, if state allows this. */ -		if ((flag & FLAG_DATA_ACKED) && !frto_cwnd && -		    tcp_may_raise_cwnd(sk, flag)) +		if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))  			tcp_cong_avoid(sk, ack, prior_in_flight);  		is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));  		tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,  				      is_dupack, flag);  	} else { -		if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) +		if (flag & FLAG_DATA_ACKED)  			tcp_cong_avoid(sk, ack, prior_in_flight);  	} +	if (tp->tlp_high_seq) +		tcp_process_tlp_ack(sk, ack, flag); +  	if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {  		struct dst_entry *dst = __sk_dst_get(sk);  		if (dst)  			dst_confirm(dst);  	} + +	if (icsk->icsk_pending == ICSK_TIME_RETRANS) +		tcp_schedule_loss_probe(sk);  	return 1;  no_queue: @@ -3716,6 +3448,9 @@ no_queue:  	 */  	if (tcp_send_head(sk))  		tcp_ack_probe(sk); + +	if (tp->tlp_high_seq) +		tcp_process_tlp_ack(sk, ack, flag);  	return 1;  invalid_ack: @@ -3740,8 +3475,8 @@ old_ack:   * But, this can also be called on packets in the established flow when   * the fast version below fails.   */ -void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx, -		       const u8 **hvpp, int estab, +void tcp_parse_options(const struct sk_buff *skb, +		       struct tcp_options_received *opt_rx, int estab,  		       struct tcp_fastopen_cookie *foc)  {  	const unsigned char *ptr; @@ -3825,31 +3560,6 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o  				 */  				break;  #endif -			case TCPOPT_COOKIE: -				/* This option is variable length. -				 */ -				switch (opsize) { -				case TCPOLEN_COOKIE_BASE: -					/* not yet implemented */ -					break; -				case TCPOLEN_COOKIE_PAIR: -					/* not yet implemented */ -					break; -				case TCPOLEN_COOKIE_MIN+0: -				case TCPOLEN_COOKIE_MIN+2: -				case TCPOLEN_COOKIE_MIN+4: -				case TCPOLEN_COOKIE_MIN+6: -				case TCPOLEN_COOKIE_MAX: -					/* 16-bit multiple */ -					opt_rx->cookie_plus = opsize; -					*hvpp = ptr; -					break; -				default: -					/* ignore option */ -					break; -				} -				break; -  			case TCPOPT_EXP:  				/* Fast Open option shares code 254 using a  				 * 16 bits magic number. It's valid only in @@ -3895,8 +3605,7 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr   * If it is wrong it falls back on tcp_parse_options().   */  static bool tcp_fast_parse_options(const struct sk_buff *skb, -				   const struct tcphdr *th, -				   struct tcp_sock *tp, const u8 **hvpp) +				   const struct tcphdr *th, struct tcp_sock *tp)  {  	/* In the spirit of fast parsing, compare doff directly to constant  	 * values.  Because equality is used, short doff can be ignored here. @@ -3910,7 +3619,7 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb,  			return true;  	} -	tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL); +	tcp_parse_options(skb, &tp->rx_opt, 1, NULL);  	if (tp->rx_opt.saw_tstamp)  		tp->rx_opt.rcv_tsecr -= tp->tsoffset; @@ -5270,12 +4979,10 @@ out:  static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,  				  const struct tcphdr *th, int syn_inerr)  { -	const u8 *hash_location;  	struct tcp_sock *tp = tcp_sk(sk);  	/* RFC1323: H1. Apply PAWS check first. */ -	if (tcp_fast_parse_options(skb, th, tp, &hash_location) && -	    tp->rx_opt.saw_tstamp && +	if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&  	    tcp_paws_discard(sk, skb)) {  		if (!th->rst) {  			NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); @@ -5566,6 +5273,7 @@ step5:  	return 0;  csum_error: +	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);  	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);  discard: @@ -5624,12 +5332,11 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,  	if (mss == tp->rx_opt.user_mss) {  		struct tcp_options_received opt; -		const u8 *hash_location;  		/* Get original SYNACK MSS value if user MSS sets mss_clamp */  		tcp_clear_options(&opt);  		opt.user_mss = opt.mss_clamp = 0; -		tcp_parse_options(synack, &opt, &hash_location, 0, NULL); +		tcp_parse_options(synack, &opt, 0, NULL);  		mss = opt.mss_clamp;  	} @@ -5660,14 +5367,12 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,  static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,  					 const struct tcphdr *th, unsigned int len)  { -	const u8 *hash_location;  	struct inet_connection_sock *icsk = inet_csk(sk);  	struct tcp_sock *tp = tcp_sk(sk); -	struct tcp_cookie_values *cvp = tp->cookie_values;  	struct tcp_fastopen_cookie foc = { .len = -1 };  	int saved_clamp = tp->rx_opt.mss_clamp; -	tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, &foc); +	tcp_parse_options(skb, &tp->rx_opt, 0, &foc);  	if (tp->rx_opt.saw_tstamp)  		tp->rx_opt.rcv_tsecr -= tp->tsoffset; @@ -5764,30 +5469,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,  		 * is initialized. */  		tp->copied_seq = tp->rcv_nxt; -		if (cvp != NULL && -		    cvp->cookie_pair_size > 0 && -		    tp->rx_opt.cookie_plus > 0) { -			int cookie_size = tp->rx_opt.cookie_plus -					- TCPOLEN_COOKIE_BASE; -			int cookie_pair_size = cookie_size -					     + cvp->cookie_desired; - -			/* A cookie extension option was sent and returned. -			 * Note that each incoming SYNACK replaces the -			 * Responder cookie.  The initial exchange is most -			 * fragile, as protection against spoofing relies -			 * entirely upon the sequence and timestamp (above). -			 * This replacement strategy allows the correct pair to -			 * pass through, while any others will be filtered via -			 * Responder verification later. -			 */ -			if (sizeof(cvp->cookie_pair) >= cookie_pair_size) { -				memcpy(&cvp->cookie_pair[cvp->cookie_desired], -				       hash_location, cookie_size); -				cvp->cookie_pair_size = cookie_pair_size; -			} -		} -  		smp_mb();  		tcp_finish_connect(sk, skb); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d09203c6326..d979657b8a1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -838,7 +838,6 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,   */  static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,  			      struct request_sock *req, -			      struct request_values *rvp,  			      u16 queue_mapping,  			      bool nocache)  { @@ -851,7 +850,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,  	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)  		return -1; -	skb = tcp_make_synack(sk, dst, req, rvp, NULL); +	skb = tcp_make_synack(sk, dst, req, NULL);  	if (skb) {  		__tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr); @@ -868,10 +867,9 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,  	return err;  } -static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req, -			     struct request_values *rvp) +static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)  { -	int res = tcp_v4_send_synack(sk, NULL, req, rvp, 0, false); +	int res = tcp_v4_send_synack(sk, NULL, req, 0, false);  	if (!res)  		TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); @@ -1371,8 +1369,7 @@ static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,  static int tcp_v4_conn_req_fastopen(struct sock *sk,  				    struct sk_buff *skb,  				    struct sk_buff *skb_synack, -				    struct request_sock *req, -				    struct request_values *rvp) +				    struct request_sock *req)  {  	struct tcp_sock *tp = tcp_sk(sk);  	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; @@ -1467,9 +1464,7 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk,  int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)  { -	struct tcp_extend_values tmp_ext;  	struct tcp_options_received tmp_opt; -	const u8 *hash_location;  	struct request_sock *req;  	struct inet_request_sock *ireq;  	struct tcp_sock *tp = tcp_sk(sk); @@ -1519,42 +1514,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)  	tcp_clear_options(&tmp_opt);  	tmp_opt.mss_clamp = TCP_MSS_DEFAULT;  	tmp_opt.user_mss  = tp->rx_opt.user_mss; -	tcp_parse_options(skb, &tmp_opt, &hash_location, 0, -	    want_cookie ? NULL : &foc); - -	if (tmp_opt.cookie_plus > 0 && -	    tmp_opt.saw_tstamp && -	    !tp->rx_opt.cookie_out_never && -	    (sysctl_tcp_cookie_size > 0 || -	     (tp->cookie_values != NULL && -	      tp->cookie_values->cookie_desired > 0))) { -		u8 *c; -		u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS]; -		int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE; - -		if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0) -			goto drop_and_release; - -		/* Secret recipe starts with IP addresses */ -		*mess++ ^= (__force u32)daddr; -		*mess++ ^= (__force u32)saddr; - -		/* plus variable length Initiator Cookie */ -		c = (u8 *)mess; -		while (l-- > 0) -			*c++ ^= *hash_location++; - -		want_cookie = false;	/* not our kind of cookie */ -		tmp_ext.cookie_out_never = 0; /* false */ -		tmp_ext.cookie_plus = tmp_opt.cookie_plus; -	} else if (!tp->rx_opt.cookie_in_always) { -		/* redundant indications, but ensure initialization. */ -		tmp_ext.cookie_out_never = 1; /* true */ -		tmp_ext.cookie_plus = 0; -	} else { -		goto drop_and_release; -	} -	tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always; +	tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);  	if (want_cookie && !tmp_opt.saw_tstamp)  		tcp_clear_options(&tmp_opt); @@ -1636,7 +1596,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)  	 * of tcp_v4_send_synack()->tcp_select_initial_window().  	 */  	skb_synack = tcp_make_synack(sk, dst, req, -	    (struct request_values *)&tmp_ext,  	    fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);  	if (skb_synack) { @@ -1660,8 +1619,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)  		if (fastopen_cookie_present(&foc) && foc.len != 0)  			NET_INC_STATS_BH(sock_net(sk),  			    LINUX_MIB_TCPFASTOPENPASSIVEFAIL); -	} else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req, -	    (struct request_values *)&tmp_ext)) +	} else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req))  		goto drop_and_free;  	return 0; @@ -1908,6 +1866,7 @@ discard:  	return 0;  csum_err: +	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);  	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);  	goto discard;  } @@ -1950,6 +1909,51 @@ void tcp_v4_early_demux(struct sk_buff *skb)  	}  } +/* Packet is added to VJ-style prequeue for processing in process + * context, if a reader task is waiting. Apparently, this exciting + * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93) + * failed somewhere. Latency? Burstiness? Well, at least now we will + * see, why it failed. 8)8)				  --ANK + * + */ +bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) +{ +	struct tcp_sock *tp = tcp_sk(sk); + +	if (sysctl_tcp_low_latency || !tp->ucopy.task) +		return false; + +	if (skb->len <= tcp_hdrlen(skb) && +	    skb_queue_len(&tp->ucopy.prequeue) == 0) +		return false; + +	skb_dst_force(skb); +	__skb_queue_tail(&tp->ucopy.prequeue, skb); +	tp->ucopy.memory += skb->truesize; +	if (tp->ucopy.memory > sk->sk_rcvbuf) { +		struct sk_buff *skb1; + +		BUG_ON(sock_owned_by_user(sk)); + +		while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) { +			sk_backlog_rcv(sk, skb1); +			NET_INC_STATS_BH(sock_net(sk), +					 LINUX_MIB_TCPPREQUEUEDROPPED); +		} + +		tp->ucopy.memory = 0; +	} else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { +		wake_up_interruptible_sync_poll(sk_sleep(sk), +					   POLLIN | POLLRDNORM | POLLRDBAND); +		if (!inet_csk_ack_scheduled(sk)) +			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, +						  (3 * tcp_rto_min(sk)) / 4, +						  TCP_RTO_MAX); +	} +	return true; +} +EXPORT_SYMBOL(tcp_prequeue); +  /*   *	From tcp_input.c   */ @@ -1983,7 +1987,7 @@ int tcp_v4_rcv(struct sk_buff *skb)  	 * provided case of th->doff==0 is eliminated.  	 * So, we defer the checks. */  	if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb)) -		goto bad_packet; +		goto csum_error;  	th = tcp_hdr(skb);  	iph = ip_hdr(skb); @@ -2049,6 +2053,8 @@ no_tcp_socket:  		goto discard_it;  	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { +csum_error: +		TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);  bad_packet:  		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);  	} else { @@ -2070,10 +2076,13 @@ do_time_wait:  		goto discard_it;  	} -	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { -		TCP_INC_STATS_BH(net, TCP_MIB_INERRS); +	if (skb->len < (th->doff << 2)) {  		inet_twsk_put(inet_twsk(sk)); -		goto discard_it; +		goto bad_packet; +	} +	if (tcp_checksum_complete(skb)) { +		inet_twsk_put(inet_twsk(sk)); +		goto csum_error;  	}  	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {  	case TCP_TW_SYN: { @@ -2197,12 +2206,6 @@ void tcp_v4_destroy_sock(struct sock *sk)  	if (inet_csk(sk)->icsk_bind_hash)  		inet_put_port(sk); -	/* TCP Cookie Transactions */ -	if (tp->cookie_values != NULL) { -		kref_put(&tp->cookie_values->kref, -			 tcp_cookie_values_release); -		tp->cookie_values = NULL; -	}  	BUG_ON(tp->fastopen_rsk != NULL);  	/* If socket is aborted during connect operation */ @@ -2659,7 +2662,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)  	__u16 srcp = ntohs(inet->inet_sport);  	int rx_queue; -	if (icsk->icsk_pending == ICSK_TIME_RETRANS) { +	if (icsk->icsk_pending == ICSK_TIME_RETRANS || +	    icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || +	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {  		timer_active	= 1;  		timer_expires	= icsk->icsk_timeout;  	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index b6f3583ddfe..da14436c173 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -64,7 +64,6 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg)  {  	struct cg_proto *cg_proto;  	struct tcp_memcontrol *tcp; -	u64 val;  	cg_proto = tcp_prot.proto_cgroup(memcg);  	if (!cg_proto) @@ -72,8 +71,6 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg)  	tcp = tcp_from_cgproto(cg_proto);  	percpu_counter_destroy(&tcp->tcp_sockets_allocated); - -	val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT);  }  EXPORT_SYMBOL(tcp_destroy_cgroup); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index b83a49cc381..0f017882725 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -93,13 +93,12 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,  			   const struct tcphdr *th)  {  	struct tcp_options_received tmp_opt; -	const u8 *hash_location;  	struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);  	bool paws_reject = false;  	tmp_opt.saw_tstamp = 0;  	if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { -		tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); +		tcp_parse_options(skb, &tmp_opt, 0, NULL);  		if (tmp_opt.saw_tstamp) {  			tmp_opt.rcv_tsecr	-= tcptw->tw_ts_offset; @@ -388,32 +387,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,  		struct tcp_request_sock *treq = tcp_rsk(req);  		struct inet_connection_sock *newicsk = inet_csk(newsk);  		struct tcp_sock *newtp = tcp_sk(newsk); -		struct tcp_sock *oldtp = tcp_sk(sk); -		struct tcp_cookie_values *oldcvp = oldtp->cookie_values; - -		/* TCP Cookie Transactions require space for the cookie pair, -		 * as it differs for each connection.  There is no need to -		 * copy any s_data_payload stored at the original socket. -		 * Failure will prevent resuming the connection. -		 * -		 * Presumed copied, in order of appearance: -		 *	cookie_in_always, cookie_out_never -		 */ -		if (oldcvp != NULL) { -			struct tcp_cookie_values *newcvp = -				kzalloc(sizeof(*newtp->cookie_values), -					GFP_ATOMIC); - -			if (newcvp != NULL) { -				kref_init(&newcvp->kref); -				newcvp->cookie_desired = -						oldcvp->cookie_desired; -				newtp->cookie_values = newcvp; -			} else { -				/* Not Yet Implemented */ -				newtp->cookie_values = NULL; -			} -		}  		/* Now setup tcp_sock */  		newtp->pred_flags = 0; @@ -422,8 +395,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,  		newtp->rcv_nxt = treq->rcv_isn + 1;  		newtp->snd_sml = newtp->snd_una = -		newtp->snd_nxt = newtp->snd_up = -			treq->snt_isn + 1 + tcp_s_data_size(oldtp); +		newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;  		tcp_prequeue_init(newtp);  		INIT_LIST_HEAD(&newtp->tsq_node); @@ -440,6 +412,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,  		newtp->fackets_out = 0;  		newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;  		tcp_enable_early_retrans(newtp); +		newtp->tlp_high_seq = 0;  		/* So many TCP implementations out there (incorrectly) count the  		 * initial SYN frame in their delayed-ACK and congestion control @@ -449,9 +422,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,  		newtp->snd_cwnd = TCP_INIT_CWND;  		newtp->snd_cwnd_cnt = 0; -		newtp->frto_counter = 0; -		newtp->frto_highmark = 0; -  		if (newicsk->icsk_ca_ops != &tcp_init_congestion_ops &&  		    !try_module_get(newicsk->icsk_ca_ops->owner))  			newicsk->icsk_ca_ops = &tcp_init_congestion_ops; @@ -459,8 +429,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,  		tcp_set_ca_state(newsk, TCP_CA_Open);  		tcp_init_xmit_timers(newsk);  		skb_queue_head_init(&newtp->out_of_order_queue); -		newtp->write_seq = newtp->pushed_seq = -			treq->snt_isn + 1 + tcp_s_data_size(oldtp); +		newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1;  		newtp->rx_opt.saw_tstamp = 0; @@ -537,7 +506,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,  			   bool fastopen)  {  	struct tcp_options_received tmp_opt; -	const u8 *hash_location;  	struct sock *child;  	const struct tcphdr *th = tcp_hdr(skb);  	__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); @@ -547,7 +515,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,  	tmp_opt.saw_tstamp = 0;  	if (th->doff > (sizeof(struct tcphdr)>>2)) { -		tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); +		tcp_parse_options(skb, &tmp_opt, 0, NULL);  		if (tmp_opt.saw_tstamp) {  			tmp_opt.ts_recent = req->ts_recent; @@ -583,8 +551,13 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,  		 *  		 * Note that even if there is new data in the SYN packet  		 * they will be thrown away too. +		 * +		 * Reset timer after retransmitting SYNACK, similar to +		 * the idea of fast retransmit in recovery.  		 */ -		inet_rtx_syn_ack(sk, req); +		if (!inet_rtx_syn_ack(sk, req)) +			req->expires = min(TCP_TIMEOUT_INIT << req->num_timeout, +					   TCP_RTO_MAX) + jiffies;  		return NULL;  	} @@ -647,7 +620,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,  	 */  	if ((flg & TCP_FLAG_ACK) && !fastopen &&  	    (TCP_SKB_CB(skb)->ack_seq != -	     tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk)))) +	     tcp_rsk(req)->snt_isn + 1))  		return sk;  	/* Also, it would be not so bad idea to check rcv_tsecr, which diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 509912a5ff9..536d40929ba 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -65,28 +65,24 @@ int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;  /* By default, RFC2861 behavior.  */  int sysctl_tcp_slow_start_after_idle __read_mostly = 1; -int sysctl_tcp_cookie_size __read_mostly = 0; /* TCP_COOKIE_MAX */ -EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size); -  static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,  			   int push_one, gfp_t gfp);  /* Account for new data that has been sent to the network. */  static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)  { +	struct inet_connection_sock *icsk = inet_csk(sk);  	struct tcp_sock *tp = tcp_sk(sk);  	unsigned int prior_packets = tp->packets_out;  	tcp_advance_send_head(sk, skb);  	tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; -	/* Don't override Nagle indefinitely with F-RTO */ -	if (tp->frto_counter == 2) -		tp->frto_counter = 3; -  	tp->packets_out += tcp_skb_pcount(skb); -	if (!prior_packets || tp->early_retrans_delayed) +	if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || +	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {  		tcp_rearm_rto(sk); +	}  }  /* SND.NXT, if window was not shrunk. @@ -384,7 +380,6 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)  #define OPTION_TS		(1 << 1)  #define OPTION_MD5		(1 << 2)  #define OPTION_WSCALE		(1 << 3) -#define OPTION_COOKIE_EXTENSION	(1 << 4)  #define OPTION_FAST_OPEN_COOKIE	(1 << 8)  struct tcp_out_options { @@ -398,36 +393,6 @@ struct tcp_out_options {  	struct tcp_fastopen_cookie *fastopen_cookie;	/* Fast open cookie */  }; -/* The sysctl int routines are generic, so check consistency here. - */ -static u8 tcp_cookie_size_check(u8 desired) -{ -	int cookie_size; - -	if (desired > 0) -		/* previously specified */ -		return desired; - -	cookie_size = ACCESS_ONCE(sysctl_tcp_cookie_size); -	if (cookie_size <= 0) -		/* no default specified */ -		return 0; - -	if (cookie_size <= TCP_COOKIE_MIN) -		/* value too small, specify minimum */ -		return TCP_COOKIE_MIN; - -	if (cookie_size >= TCP_COOKIE_MAX) -		/* value too large, specify maximum */ -		return TCP_COOKIE_MAX; - -	if (cookie_size & 1) -		/* 8-bit multiple, illegal, fix it */ -		cookie_size++; - -	return (u8)cookie_size; -} -  /* Write previously computed TCP options to the packet.   *   * Beware: Something in the Internet is very sensitive to the ordering of @@ -446,27 +411,9 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,  {  	u16 options = opts->options;	/* mungable copy */ -	/* Having both authentication and cookies for security is redundant, -	 * and there's certainly not enough room.  Instead, the cookie-less -	 * extension variant is proposed. -	 * -	 * Consider the pessimal case with authentication.  The options -	 * could look like: -	 *   COOKIE|MD5(20) + MSS(4) + SACK|TS(12) + WSCALE(4) == 40 -	 */  	if (unlikely(OPTION_MD5 & options)) { -		if (unlikely(OPTION_COOKIE_EXTENSION & options)) { -			*ptr++ = htonl((TCPOPT_COOKIE << 24) | -				       (TCPOLEN_COOKIE_BASE << 16) | -				       (TCPOPT_MD5SIG << 8) | -				       TCPOLEN_MD5SIG); -		} else { -			*ptr++ = htonl((TCPOPT_NOP << 24) | -				       (TCPOPT_NOP << 16) | -				       (TCPOPT_MD5SIG << 8) | -				       TCPOLEN_MD5SIG); -		} -		options &= ~OPTION_COOKIE_EXTENSION; +		*ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | +			       (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);  		/* overload cookie hash location */  		opts->hash_location = (__u8 *)ptr;  		ptr += 4; @@ -495,44 +442,6 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,  		*ptr++ = htonl(opts->tsecr);  	} -	/* Specification requires after timestamp, so do it now. -	 * -	 * Consider the pessimal case without authentication.  The options -	 * could look like: -	 *   MSS(4) + SACK|TS(12) + COOKIE(20) + WSCALE(4) == 40 -	 */ -	if (unlikely(OPTION_COOKIE_EXTENSION & options)) { -		__u8 *cookie_copy = opts->hash_location; -		u8 cookie_size = opts->hash_size; - -		/* 8-bit multiple handled in tcp_cookie_size_check() above, -		 * and elsewhere. -		 */ -		if (0x2 & cookie_size) { -			__u8 *p = (__u8 *)ptr; - -			/* 16-bit multiple */ -			*p++ = TCPOPT_COOKIE; -			*p++ = TCPOLEN_COOKIE_BASE + cookie_size; -			*p++ = *cookie_copy++; -			*p++ = *cookie_copy++; -			ptr++; -			cookie_size -= 2; -		} else { -			/* 32-bit multiple */ -			*ptr++ = htonl(((TCPOPT_NOP << 24) | -					(TCPOPT_NOP << 16) | -					(TCPOPT_COOKIE << 8) | -					TCPOLEN_COOKIE_BASE) + -				       cookie_size); -		} - -		if (cookie_size > 0) { -			memcpy(ptr, cookie_copy, cookie_size); -			ptr += (cookie_size / 4); -		} -	} -  	if (unlikely(OPTION_SACK_ADVERTISE & options)) {  		*ptr++ = htonl((TCPOPT_NOP << 24) |  			       (TCPOPT_NOP << 16) | @@ -591,11 +500,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,  				struct tcp_md5sig_key **md5)  {  	struct tcp_sock *tp = tcp_sk(sk); -	struct tcp_cookie_values *cvp = tp->cookie_values;  	unsigned int remaining = MAX_TCP_OPTION_SPACE; -	u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ? -			 tcp_cookie_size_check(cvp->cookie_desired) : -			 0;  	struct tcp_fastopen_request *fastopen = tp->fastopen_req;  #ifdef CONFIG_TCP_MD5SIG @@ -647,52 +552,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,  			tp->syn_fastopen = 1;  		}  	} -	/* Note that timestamps are required by the specification. -	 * -	 * Odd numbers of bytes are prohibited by the specification, ensuring -	 * that the cookie is 16-bit aligned, and the resulting cookie pair is -	 * 32-bit aligned. -	 */ -	if (*md5 == NULL && -	    (OPTION_TS & opts->options) && -	    cookie_size > 0) { -		int need = TCPOLEN_COOKIE_BASE + cookie_size; - -		if (0x2 & need) { -			/* 32-bit multiple */ -			need += 2; /* NOPs */ - -			if (need > remaining) { -				/* try shrinking cookie to fit */ -				cookie_size -= 2; -				need -= 4; -			} -		} -		while (need > remaining && TCP_COOKIE_MIN <= cookie_size) { -			cookie_size -= 4; -			need -= 4; -		} -		if (TCP_COOKIE_MIN <= cookie_size) { -			opts->options |= OPTION_COOKIE_EXTENSION; -			opts->hash_location = (__u8 *)&cvp->cookie_pair[0]; -			opts->hash_size = cookie_size; - -			/* Remember for future incarnations. */ -			cvp->cookie_desired = cookie_size; -			if (cvp->cookie_desired != cvp->cookie_pair_size) { -				/* Currently use random bytes as a nonce, -				 * assuming these are completely unpredictable -				 * by hostile users of the same system. -				 */ -				get_random_bytes(&cvp->cookie_pair[0], -						 cookie_size); -				cvp->cookie_pair_size = cookie_size; -			} - -			remaining -= need; -		} -	}  	return MAX_TCP_OPTION_SPACE - remaining;  } @@ -702,14 +562,10 @@ static unsigned int tcp_synack_options(struct sock *sk,  				   unsigned int mss, struct sk_buff *skb,  				   struct tcp_out_options *opts,  				   struct tcp_md5sig_key **md5, -				   struct tcp_extend_values *xvp,  				   struct tcp_fastopen_cookie *foc)  {  	struct inet_request_sock *ireq = inet_rsk(req);  	unsigned int remaining = MAX_TCP_OPTION_SPACE; -	u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ? -			 xvp->cookie_plus : -			 0;  #ifdef CONFIG_TCP_MD5SIG  	*md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req); @@ -757,28 +613,7 @@ static unsigned int tcp_synack_options(struct sock *sk,  			remaining -= need;  		}  	} -	/* Similar rationale to tcp_syn_options() applies here, too. -	 * If the <SYN> options fit, the same options should fit now! -	 */ -	if (*md5 == NULL && -	    ireq->tstamp_ok && -	    cookie_plus > TCPOLEN_COOKIE_BASE) { -		int need = cookie_plus; /* has TCPOLEN_COOKIE_BASE */ -		if (0x2 & need) { -			/* 32-bit multiple */ -			need += 2; /* NOPs */ -		} -		if (need <= remaining) { -			opts->options |= OPTION_COOKIE_EXTENSION; -			opts->hash_size = cookie_plus - TCPOLEN_COOKIE_BASE; -			remaining -= need; -		} else { -			/* There's no error return, so flag it. */ -			xvp->cookie_out_never = 1; /* true */ -			opts->hash_size = 0; -		} -	}  	return MAX_TCP_OPTION_SPACE - remaining;  } @@ -953,7 +788,7 @@ void __init tcp_tasklet_init(void)   * We cant xmit new skbs from this context, as we might already   * hold qdisc lock.   */ -static void tcp_wfree(struct sk_buff *skb) +void tcp_wfree(struct sk_buff *skb)  {  	struct sock *sk = skb->sk;  	struct tcp_sock *tp = tcp_sk(sk); @@ -1012,6 +847,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,  		__net_timestamp(skb);  	if (likely(clone_it)) { +		const struct sk_buff *fclone = skb + 1; + +		if (unlikely(skb->fclone == SKB_FCLONE_ORIG && +			     fclone->fclone == SKB_FCLONE_CLONE)) +			NET_INC_STATS_BH(sock_net(sk), +					 LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); +  		if (unlikely(skb_cloned(skb)))  			skb = pskb_copy(skb, gfp_mask);  		else @@ -1632,11 +1474,8 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf  	if (nonagle & TCP_NAGLE_PUSH)  		return true; -	/* Don't use the nagle rule for urgent data (or for the final FIN). -	 * Nagle can be ignored during F-RTO too (see RFC4138). -	 */ -	if (tcp_urg_mode(tp) || (tp->frto_counter == 2) || -	    (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) +	/* Don't use the nagle rule for urgent data (or for the final FIN). */ +	if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))  		return true;  	if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) @@ -1961,6 +1800,9 @@ static int tcp_mtu_probe(struct sock *sk)   * snd_up-64k-mss .. snd_up cannot be large. However, taking into   * account rare use of URG, this is not a big flaw.   * + * Send at most one packet when push_one > 0. Temporarily ignore + * cwnd limit to force at most one packet out when push_one == 2. +   * Returns true, if no segments are in flight and we have queued segments,   * but cannot send anything now because of SWS or another problem.   */ @@ -1996,8 +1838,13 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,  			goto repair; /* Skip network transmission */  		cwnd_quota = tcp_cwnd_test(tp, skb); -		if (!cwnd_quota) -			break; +		if (!cwnd_quota) { +			if (push_one == 2) +				/* Force out a loss probe pkt. */ +				cwnd_quota = 1; +			else +				break; +		}  		if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))  			break; @@ -2051,10 +1898,129 @@ repair:  	if (likely(sent_pkts)) {  		if (tcp_in_cwnd_reduction(sk))  			tp->prr_out += sent_pkts; + +		/* Send one loss probe per tail loss episode. */ +		if (push_one != 2) +			tcp_schedule_loss_probe(sk);  		tcp_cwnd_validate(sk);  		return false;  	} -	return !tp->packets_out && tcp_send_head(sk); +	return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk)); +} + +bool tcp_schedule_loss_probe(struct sock *sk) +{ +	struct inet_connection_sock *icsk = inet_csk(sk); +	struct tcp_sock *tp = tcp_sk(sk); +	u32 timeout, tlp_time_stamp, rto_time_stamp; +	u32 rtt = tp->srtt >> 3; + +	if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS)) +		return false; +	/* No consecutive loss probes. */ +	if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) { +		tcp_rearm_rto(sk); +		return false; +	} +	/* Don't do any loss probe on a Fast Open connection before 3WHS +	 * finishes. +	 */ +	if (sk->sk_state == TCP_SYN_RECV) +		return false; + +	/* TLP is only scheduled when next timer event is RTO. */ +	if (icsk->icsk_pending != ICSK_TIME_RETRANS) +		return false; + +	/* Schedule a loss probe in 2*RTT for SACK capable connections +	 * in Open state, that are either limited by cwnd or application. +	 */ +	if (sysctl_tcp_early_retrans < 3 || !rtt || !tp->packets_out || +	    !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open) +		return false; + +	if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) && +	     tcp_send_head(sk)) +		return false; + +	/* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account +	 * for delayed ack when there's one outstanding packet. +	 */ +	timeout = rtt << 1; +	if (tp->packets_out == 1) +		timeout = max_t(u32, timeout, +				(rtt + (rtt >> 1) + TCP_DELACK_MAX)); +	timeout = max_t(u32, timeout, msecs_to_jiffies(10)); + +	/* If RTO is shorter, just schedule TLP in its place. */ +	tlp_time_stamp = tcp_time_stamp + timeout; +	rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout; +	if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) { +		s32 delta = rto_time_stamp - tcp_time_stamp; +		if (delta > 0) +			timeout = delta; +	} + +	inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout, +				  TCP_RTO_MAX); +	return true; +} + +/* When probe timeout (PTO) fires, send a new segment if one exists, else + * retransmit the last segment. + */ +void tcp_send_loss_probe(struct sock *sk) +{ +	struct tcp_sock *tp = tcp_sk(sk); +	struct sk_buff *skb; +	int pcount; +	int mss = tcp_current_mss(sk); +	int err = -1; + +	if (tcp_send_head(sk) != NULL) { +		err = tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC); +		goto rearm_timer; +	} + +	/* At most one outstanding TLP retransmission. */ +	if (tp->tlp_high_seq) +		goto rearm_timer; + +	/* Retransmit last segment. */ +	skb = tcp_write_queue_tail(sk); +	if (WARN_ON(!skb)) +		goto rearm_timer; + +	pcount = tcp_skb_pcount(skb); +	if (WARN_ON(!pcount)) +		goto rearm_timer; + +	if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) { +		if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss))) +			goto rearm_timer; +		skb = tcp_write_queue_tail(sk); +	} + +	if (WARN_ON(!skb || !tcp_skb_pcount(skb))) +		goto rearm_timer; + +	/* Probe with zero data doesn't trigger fast recovery. */ +	if (skb->len > 0) +		err = __tcp_retransmit_skb(sk, skb); + +	/* Record snd_nxt for loss detection. */ +	if (likely(!err)) +		tp->tlp_high_seq = tp->snd_nxt; + +rearm_timer: +	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, +				  inet_csk(sk)->icsk_rto, +				  TCP_RTO_MAX); + +	if (likely(!err)) +		NET_INC_STATS_BH(sock_net(sk), +				 LINUX_MIB_TCPLOSSPROBES); +	return;  }  /* Push out any pending frames which were held back due to @@ -2679,32 +2645,24 @@ int tcp_send_synack(struct sock *sk)   * sk: listener socket   * dst: dst entry attached to the SYNACK   * req: request_sock pointer - * rvp: request_values pointer   *   * Allocate one skb and build a SYNACK packet.   * @dst is consumed : Caller should not use it again.   */  struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,  				struct request_sock *req, -				struct request_values *rvp,  				struct tcp_fastopen_cookie *foc)  {  	struct tcp_out_options opts; -	struct tcp_extend_values *xvp = tcp_xv(rvp);  	struct inet_request_sock *ireq = inet_rsk(req);  	struct tcp_sock *tp = tcp_sk(sk); -	const struct tcp_cookie_values *cvp = tp->cookie_values;  	struct tcphdr *th;  	struct sk_buff *skb;  	struct tcp_md5sig_key *md5;  	int tcp_header_size;  	int mss; -	int s_data_desired = 0; -	if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired) -		s_data_desired = cvp->s_data_desired; -	skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired, -			sk_gfp_atomic(sk, GFP_ATOMIC)); +	skb = alloc_skb(MAX_TCP_HEADER + 15, sk_gfp_atomic(sk, GFP_ATOMIC));  	if (unlikely(!skb)) {  		dst_release(dst);  		return NULL; @@ -2747,9 +2705,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,  	else  #endif  	TCP_SKB_CB(skb)->when = tcp_time_stamp; -	tcp_header_size = tcp_synack_options(sk, req, mss, -					     skb, &opts, &md5, xvp, foc) -			+ sizeof(*th); +	tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5, +					     foc) + sizeof(*th);  	skb_push(skb, tcp_header_size);  	skb_reset_transport_header(skb); @@ -2767,40 +2724,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,  	tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,  			     TCPHDR_SYN | TCPHDR_ACK); -	if (OPTION_COOKIE_EXTENSION & opts.options) { -		if (s_data_desired) { -			u8 *buf = skb_put(skb, s_data_desired); - -			/* copy data directly from the listening socket. */ -			memcpy(buf, cvp->s_data_payload, s_data_desired); -			TCP_SKB_CB(skb)->end_seq += s_data_desired; -		} - -		if (opts.hash_size > 0) { -			__u32 workspace[SHA_WORKSPACE_WORDS]; -			u32 *mess = &xvp->cookie_bakery[COOKIE_DIGEST_WORDS]; -			u32 *tail = &mess[COOKIE_MESSAGE_WORDS-1]; - -			/* Secret recipe depends on the Timestamp, (future) -			 * Sequence and Acknowledgment Numbers, Initiator -			 * Cookie, and others handled by IP variant caller. -			 */ -			*tail-- ^= opts.tsval; -			*tail-- ^= tcp_rsk(req)->rcv_isn + 1; -			*tail-- ^= TCP_SKB_CB(skb)->seq + 1; - -			/* recommended */ -			*tail-- ^= (((__force u32)th->dest << 16) | (__force u32)th->source); -			*tail-- ^= (u32)(unsigned long)cvp; /* per sockopt */ - -			sha_transform((__u32 *)&xvp->cookie_bakery[0], -				      (char *)mess, -				      &workspace[0]); -			opts.hash_location = -				(__u8 *)&xvp->cookie_bakery[0]; -		} -	} -  	th->seq = htonl(TCP_SKB_CB(skb)->seq);  	/* XXX data is queued and acked as is. No buffer/window check */  	th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index b78aac30c49..4b85e6f636c 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -342,10 +342,6 @@ void tcp_retransmit_timer(struct sock *sk)  	struct tcp_sock *tp = tcp_sk(sk);  	struct inet_connection_sock *icsk = inet_csk(sk); -	if (tp->early_retrans_delayed) { -		tcp_resume_early_retransmit(sk); -		return; -	}  	if (tp->fastopen_rsk) {  		WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&  			     sk->sk_state != TCP_FIN_WAIT1); @@ -360,6 +356,8 @@ void tcp_retransmit_timer(struct sock *sk)  	WARN_ON(tcp_write_queue_empty(sk)); +	tp->tlp_high_seq = 0; +  	if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&  	    !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {  		/* Receiver dastardly shrinks window. Our retransmits @@ -418,11 +416,7 @@ void tcp_retransmit_timer(struct sock *sk)  		NET_INC_STATS_BH(sock_net(sk), mib_idx);  	} -	if (tcp_use_frto(sk)) { -		tcp_enter_frto(sk); -	} else { -		tcp_enter_loss(sk, 0); -	} +	tcp_enter_loss(sk, 0);  	if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) {  		/* Retransmission failed because of local congestion, @@ -495,13 +489,20 @@ void tcp_write_timer_handler(struct sock *sk)  	}  	event = icsk->icsk_pending; -	icsk->icsk_pending = 0;  	switch (event) { +	case ICSK_TIME_EARLY_RETRANS: +		tcp_resume_early_retransmit(sk); +		break; +	case ICSK_TIME_LOSS_PROBE: +		tcp_send_loss_probe(sk); +		break;  	case ICSK_TIME_RETRANS: +		icsk->icsk_pending = 0;  		tcp_retransmit_timer(sk);  		break;  	case ICSK_TIME_PROBE0: +		icsk->icsk_pending = 0;  		tcp_probe_timer(sk);  		break;  	} diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index 1b91bf48e27..76a1e23259e 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -236,7 +236,7 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event)  		tp->snd_cwnd = tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);  		break; -	case CA_EVENT_FRTO: +	case CA_EVENT_LOSS:  		tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);  		/* Update RTT_min when next ack arrives */  		w->reset_rtt_min = 1; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0a073a26372..3159d16441d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -902,9 +902,9 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,  	ipc.addr = inet->inet_saddr;  	ipc.oif = sk->sk_bound_dev_if; -	err = sock_tx_timestamp(sk, &ipc.tx_flags); -	if (err) -		return err; + +	sock_tx_timestamp(sk, &ipc.tx_flags); +  	if (msg->msg_controllen) {  		err = ip_cmsg_send(sock_net(sk), msg, &ipc);  		if (err) @@ -1131,6 +1131,8 @@ static unsigned int first_packet_length(struct sock *sk)  	spin_lock_bh(&rcvq->lock);  	while ((skb = skb_peek(rcvq)) != NULL &&  		udp_lib_checksum_complete(skb)) { +		UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, +				 IS_UDPLITE(sk));  		UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS,  				 IS_UDPLITE(sk));  		atomic_inc(&sk->sk_drops); @@ -1286,8 +1288,10 @@ out:  csum_copy_err:  	slow = lock_sock_fast(sk); -	if (!skb_kill_datagram(sk, skb, flags)) +	if (!skb_kill_datagram(sk, skb, flags)) { +		UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);  		UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); +	}  	unlock_sock_fast(sk, slow);  	if (noblock) @@ -1513,7 +1517,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)  	if (rcu_access_pointer(sk->sk_filter) &&  	    udp_lib_checksum_complete(skb)) -		goto drop; +		goto csum_error;  	if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) @@ -1533,6 +1537,8 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)  	return rc; +csum_error: +	UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);  drop:  	UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);  	atomic_inc(&sk->sk_drops); @@ -1749,6 +1755,7 @@ csum_error:  		       proto == IPPROTO_UDPLITE ? "Lite" : "",  		       &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest),  		       ulen); +	UDP_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);  drop:  	UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);  	kfree_skb(skb); @@ -2279,31 +2286,88 @@ void __init udp_init(void)  int udp4_ufo_send_check(struct sk_buff *skb)  { -	const struct iphdr *iph; -	struct udphdr *uh; - -	if (!pskb_may_pull(skb, sizeof(*uh))) +	if (!pskb_may_pull(skb, sizeof(struct udphdr)))  		return -EINVAL; -	iph = ip_hdr(skb); -	uh = udp_hdr(skb); +	if (likely(!skb->encapsulation)) { +		const struct iphdr *iph; +		struct udphdr *uh; + +		iph = ip_hdr(skb); +		uh = udp_hdr(skb); -	uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, -				       IPPROTO_UDP, 0); -	skb->csum_start = skb_transport_header(skb) - skb->head; -	skb->csum_offset = offsetof(struct udphdr, check); -	skb->ip_summed = CHECKSUM_PARTIAL; +		uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, +				IPPROTO_UDP, 0); +		skb->csum_start = skb_transport_header(skb) - skb->head; +		skb->csum_offset = offsetof(struct udphdr, check); +		skb->ip_summed = CHECKSUM_PARTIAL; +	}  	return 0;  } +static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, +		netdev_features_t features) +{ +	struct sk_buff *segs = ERR_PTR(-EINVAL); +	int mac_len = skb->mac_len; +	int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); +	int outer_hlen; +	netdev_features_t enc_features; + +	if (unlikely(!pskb_may_pull(skb, tnl_hlen))) +		goto out; + +	skb->encapsulation = 0; +	__skb_pull(skb, tnl_hlen); +	skb_reset_mac_header(skb); +	skb_set_network_header(skb, skb_inner_network_offset(skb)); +	skb->mac_len = skb_inner_network_offset(skb); + +	/* segment inner packet. */ +	enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); +	segs = skb_mac_gso_segment(skb, enc_features); +	if (!segs || IS_ERR(segs)) +		goto out; + +	outer_hlen = skb_tnl_header_len(skb); +	skb = segs; +	do { +		struct udphdr *uh; +		int udp_offset = outer_hlen - tnl_hlen; + +		skb->mac_len = mac_len; + +		skb_push(skb, outer_hlen); +		skb_reset_mac_header(skb); +		skb_set_network_header(skb, mac_len); +		skb_set_transport_header(skb, udp_offset); +		uh = udp_hdr(skb); +		uh->len = htons(skb->len - udp_offset); + +		/* csum segment if tunnel sets skb with csum. */ +		if (unlikely(uh->check)) { +			struct iphdr *iph = ip_hdr(skb); + +			uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, +						       skb->len - udp_offset, +						       IPPROTO_UDP, 0); +			uh->check = csum_fold(skb_checksum(skb, udp_offset, +							   skb->len - udp_offset, 0)); +			if (uh->check == 0) +				uh->check = CSUM_MANGLED_0; + +		} +		skb->ip_summed = CHECKSUM_NONE; +	} while ((skb = skb->next)); +out: +	return segs; +} +  struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,  	netdev_features_t features)  {  	struct sk_buff *segs = ERR_PTR(-EINVAL);  	unsigned int mss; -	int offset; -	__wsum csum; -  	mss = skb_shinfo(skb)->gso_size;  	if (unlikely(skb->len <= mss))  		goto out; @@ -2313,6 +2377,7 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,  		int type = skb_shinfo(skb)->gso_type;  		if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | +				      SKB_GSO_UDP_TUNNEL |  				      SKB_GSO_GRE) ||  			     !(type & (SKB_GSO_UDP))))  			goto out; @@ -2323,20 +2388,27 @@ struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,  		goto out;  	} -	/* Do software UFO. Complete and fill in the UDP checksum as HW cannot -	 * do checksum of UDP packets sent as multiple IP fragments. -	 */ -	offset = skb_checksum_start_offset(skb); -	csum = skb_checksum(skb, offset, skb->len - offset, 0); -	offset += skb->csum_offset; -	*(__sum16 *)(skb->data + offset) = csum_fold(csum); -	skb->ip_summed = CHECKSUM_NONE; -  	/* Fragment the skb. IP headers of the fragments are updated in  	 * inet_gso_segment()  	 */ -	segs = skb_segment(skb, features); +	if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) +		segs = skb_udp_tunnel_segment(skb, features); +	else { +		int offset; +		__wsum csum; + +		/* Do software UFO. Complete and fill in the UDP checksum as +		 * HW cannot do checksum of UDP packets sent as multiple +		 * IP fragments. +		 */ +		offset = skb_checksum_start_offset(skb); +		csum = skb_checksum(skb, offset, skb->len - offset, 0); +		offset += skb->csum_offset; +		*(__sum16 *)(skb->data + offset) = csum_fold(csum); +		skb->ip_summed = CHECKSUM_NONE; + +		segs = skb_segment(skb, features); +	}  out:  	return segs;  } - diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 505b30ad918..7927db0a927 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -25,7 +25,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,  		return 0;  	return inet_sk_diag_fill(sk, NULL, skb, req, -			sk_user_ns(NETLINK_CB(cb->skb).ssk), +			sk_user_ns(NETLINK_CB(cb->skb).sk),  			NETLINK_CB(cb->skb).portid,  			cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);  } @@ -64,14 +64,14 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,  		goto out;  	err = -ENOMEM; -	rep = alloc_skb(NLMSG_SPACE((sizeof(struct inet_diag_msg) + -				     sizeof(struct inet_diag_meminfo) + -				     64)), GFP_KERNEL); +	rep = nlmsg_new(sizeof(struct inet_diag_msg) + +			sizeof(struct inet_diag_meminfo) + 64, +			GFP_KERNEL);  	if (!rep)  		goto out;  	err = inet_sk_diag_fill(sk, NULL, rep, req, -			   sk_user_ns(NETLINK_CB(in_skb).ssk), +			   sk_user_ns(NETLINK_CB(in_skb).sk),  			   NETLINK_CB(in_skb).portid,  			   nlh->nlmsg_seq, 0, nlh);  	if (err < 0) { diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index fe5189e2e11..eb1dd4d643f 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -103,8 +103,12 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)  	top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family); -	/* DS disclosed */ -	top_iph->tos = INET_ECN_encapsulate(XFRM_MODE_SKB_CB(skb)->tos, +	/* DS disclosing depends on XFRM_SA_XFLAG_DONT_ENCAP_DSCP */ +	if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) +		top_iph->tos = 0; +	else +		top_iph->tos = XFRM_MODE_SKB_CB(skb)->tos; +	top_iph->tos = INET_ECN_encapsulate(top_iph->tos,  					    XFRM_MODE_SKB_CB(skb)->tos);  	flags = x->props.flags; diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index ed0b9e2e797..11b13ea69db 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -156,6 +156,7 @@ config INET6_XFRM_MODE_ROUTEOPTIMIZATION  config IPV6_SIT  	tristate "IPv6: IPv6-in-IPv4 tunnel (SIT driver)"  	select INET_TUNNEL +	select NET_IP_TUNNEL  	select IPV6_NDISC_NODETYPE  	default y  	---help--- @@ -201,6 +202,7 @@ config IPV6_TUNNEL  config IPV6_GRE  	tristate "IPv6: GRE tunnel"  	select IPV6_TUNNEL +	select NET_IP_TUNNEL  	---help---  	  Tunneling means encapsulating data of one protocol type within  	  another protocol and sending it over a channel that understands the diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 309af19a0a0..9af088d2cda 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -40,7 +40,7 @@ obj-$(CONFIG_IPV6_SIT) += sit.o  obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o  obj-$(CONFIG_IPV6_GRE) += ip6_gre.o -obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o +obj-y += addrconf_core.o exthdrs_core.o ip6_checksum.o ip6_icmp.o  obj-$(CONFIG_INET) += output_core.o protocol.o $(ipv6-offload)  obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index dae802c0af7..d1ab6ab29a5 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -70,6 +70,7 @@  #include <net/snmp.h>  #include <net/af_ieee802154.h> +#include <net/firewire.h>  #include <net/ipv6.h>  #include <net/protocol.h>  #include <net/ndisc.h> @@ -419,6 +420,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)  		ipv6_regen_rndid((unsigned long) ndev);  	}  #endif +	ndev->token = in6addr_any;  	if (netif_running(dev) && addrconf_qdisc_ok(dev))  		ndev->if_flags |= IF_READY; @@ -542,8 +544,7 @@ static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = {  };  static int inet6_netconf_get_devconf(struct sk_buff *in_skb, -				     struct nlmsghdr *nlh, -				     void *arg) +				     struct nlmsghdr *nlh)  {  	struct net *net = sock_net(in_skb->sk);  	struct nlattr *tb[NETCONFA_MAX+1]; @@ -603,6 +604,77 @@ errout:  	return err;  } +static int inet6_netconf_dump_devconf(struct sk_buff *skb, +				      struct netlink_callback *cb) +{ +	struct net *net = sock_net(skb->sk); +	int h, s_h; +	int idx, s_idx; +	struct net_device *dev; +	struct inet6_dev *idev; +	struct hlist_head *head; + +	s_h = cb->args[0]; +	s_idx = idx = cb->args[1]; + +	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { +		idx = 0; +		head = &net->dev_index_head[h]; +		rcu_read_lock(); +		cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^ +			  net->dev_base_seq; +		hlist_for_each_entry_rcu(dev, head, index_hlist) { +			if (idx < s_idx) +				goto cont; +			idev = __in6_dev_get(dev); +			if (!idev) +				goto cont; + +			if (inet6_netconf_fill_devconf(skb, dev->ifindex, +						       &idev->cnf, +						       NETLINK_CB(cb->skb).portid, +						       cb->nlh->nlmsg_seq, +						       RTM_NEWNETCONF, +						       NLM_F_MULTI, +						       -1) <= 0) { +				rcu_read_unlock(); +				goto done; +			} +			nl_dump_check_consistent(cb, nlmsg_hdr(skb)); +cont: +			idx++; +		} +		rcu_read_unlock(); +	} +	if (h == NETDEV_HASHENTRIES) { +		if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_ALL, +					       net->ipv6.devconf_all, +					       NETLINK_CB(cb->skb).portid, +					       cb->nlh->nlmsg_seq, +					       RTM_NEWNETCONF, NLM_F_MULTI, +					       -1) <= 0) +			goto done; +		else +			h++; +	} +	if (h == NETDEV_HASHENTRIES + 1) { +		if (inet6_netconf_fill_devconf(skb, NETCONFA_IFINDEX_DEFAULT, +					       net->ipv6.devconf_dflt, +					       NETLINK_CB(cb->skb).portid, +					       cb->nlh->nlmsg_seq, +					       RTM_NEWNETCONF, NLM_F_MULTI, +					       -1) <= 0) +			goto done; +		else +			h++; +	} +done: +	cb->args[0] = h; +	cb->args[1] = idx; + +	return skb->len; +} +  #ifdef CONFIG_SYSCTL  static void dev_forward_change(struct inet6_dev *idev)  { @@ -804,6 +876,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,  	ifa->prefix_len = pfxlen;  	ifa->flags = flags | IFA_F_TENTATIVE;  	ifa->cstamp = ifa->tstamp = jiffies; +	ifa->tokenized = false;  	ifa->rt = rt; @@ -1666,6 +1739,20 @@ static int addrconf_ifid_eui64(u8 *eui, struct net_device *dev)  	return 0;  } +static int addrconf_ifid_ieee1394(u8 *eui, struct net_device *dev) +{ +	union fwnet_hwaddr *ha; + +	if (dev->addr_len != FWNET_ALEN) +		return -1; + +	ha = (union fwnet_hwaddr *)dev->dev_addr; + +	memcpy(eui, &ha->uc.uniq_id, sizeof(ha->uc.uniq_id)); +	eui[0] ^= 2; +	return 0; +} +  static int addrconf_ifid_arcnet(u8 *eui, struct net_device *dev)  {  	/* XXX: inherit EUI-64 from other interface -- yoshfuji */ @@ -1730,6 +1817,8 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)  		return addrconf_ifid_gre(eui, dev);  	case ARPHRD_IEEE802154:  		return addrconf_ifid_eui64(eui, dev); +	case ARPHRD_IEEE1394: +		return addrconf_ifid_ieee1394(eui, dev);  	}  	return -1;  } @@ -2044,11 +2133,19 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)  		struct inet6_ifaddr *ifp;  		struct in6_addr addr;  		int create = 0, update_lft = 0; +		bool tokenized = false;  		if (pinfo->prefix_len == 64) {  			memcpy(&addr, &pinfo->prefix, 8); -			if (ipv6_generate_eui64(addr.s6_addr + 8, dev) && -			    ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) { + +			if (!ipv6_addr_any(&in6_dev->token)) { +				read_lock_bh(&in6_dev->lock); +				memcpy(addr.s6_addr + 8, +				       in6_dev->token.s6_addr + 8, 8); +				read_unlock_bh(&in6_dev->lock); +				tokenized = true; +			} else if (ipv6_generate_eui64(addr.s6_addr + 8, dev) && +				   ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) {  				in6_dev_put(in6_dev);  				return;  			} @@ -2089,6 +2186,7 @@ ok:  			update_lft = create = 1;  			ifp->cstamp = jiffies; +			ifp->tokenized = tokenized;  			addrconf_dad_start(ifp);  		} @@ -2598,7 +2696,8 @@ static void addrconf_dev_config(struct net_device *dev)  	    (dev->type != ARPHRD_FDDI) &&  	    (dev->type != ARPHRD_ARCNET) &&  	    (dev->type != ARPHRD_INFINIBAND) && -	    (dev->type != ARPHRD_IEEE802154)) { +	    (dev->type != ARPHRD_IEEE802154) && +	    (dev->type != ARPHRD_IEEE1394)) {  		/* Alas, we support only Ethernet autoconfiguration. */  		return;  	} @@ -3535,7 +3634,7 @@ static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = {  };  static int -inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)  {  	struct net *net = sock_net(skb->sk);  	struct ifaddrmsg *ifm; @@ -3601,7 +3700,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,  }  static int -inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)  {  	struct net *net = sock_net(skb->sk);  	struct ifaddrmsg *ifm; @@ -3832,6 +3931,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,  						NLM_F_MULTI);  			if (err <= 0)  				break; +			nl_dump_check_consistent(cb, nlmsg_hdr(skb));  		}  		break;  	} @@ -3889,6 +3989,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,  	s_ip_idx = ip_idx = cb->args[2];  	rcu_read_lock(); +	cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^ net->dev_base_seq;  	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {  		idx = 0;  		head = &net->dev_index_head[h]; @@ -3940,8 +4041,7 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb)  	return inet6_dump_addr(skb, cb, type);  } -static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, -			     void *arg) +static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh)  {  	struct net *net = sock_net(in_skb->sk);  	struct ifaddrmsg *ifm; @@ -4074,7 +4174,8 @@ static inline size_t inet6_ifla6_size(void)  	     + nla_total_size(sizeof(struct ifla_cacheinfo))  	     + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */  	     + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */ -	     + nla_total_size(ICMP6_MIB_MAX * 8); /* IFLA_INET6_ICMP6STATS */ +	     + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */ +	     + nla_total_size(sizeof(struct in6_addr)); /* IFLA_INET6_TOKEN */  }  static inline size_t inet6_if_nlmsg_size(void) @@ -4161,6 +4262,13 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev)  		goto nla_put_failure;  	snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla)); +	nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr)); +	if (nla == NULL) +		goto nla_put_failure; +	read_lock_bh(&idev->lock); +	memcpy(nla_data(nla), idev->token.s6_addr, nla_len(nla)); +	read_unlock_bh(&idev->lock); +  	return 0;  nla_put_failure: @@ -4188,6 +4296,80 @@ static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev)  	return 0;  } +static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) +{ +	struct inet6_ifaddr *ifp; +	struct net_device *dev = idev->dev; +	bool update_rs = false; + +	if (token == NULL) +		return -EINVAL; +	if (ipv6_addr_any(token)) +		return -EINVAL; +	if (dev->flags & (IFF_LOOPBACK | IFF_NOARP)) +		return -EINVAL; +	if (!ipv6_accept_ra(idev)) +		return -EINVAL; +	if (idev->cnf.rtr_solicits <= 0) +		return -EINVAL; + +	write_lock_bh(&idev->lock); + +	BUILD_BUG_ON(sizeof(token->s6_addr) != 16); +	memcpy(idev->token.s6_addr + 8, token->s6_addr + 8, 8); + +	write_unlock_bh(&idev->lock); + +	if (!idev->dead && (idev->if_flags & IF_READY)) { +		struct in6_addr ll_addr; + +		ipv6_get_lladdr(dev, &ll_addr, IFA_F_TENTATIVE | +				IFA_F_OPTIMISTIC); + +		/* If we're not ready, then normal ifup will take care +		 * of this. Otherwise, we need to request our rs here. +		 */ +		ndisc_send_rs(dev, &ll_addr, &in6addr_linklocal_allrouters); +		update_rs = true; +	} + +	write_lock_bh(&idev->lock); + +	if (update_rs) +		idev->if_flags |= IF_RS_SENT; + +	/* Well, that's kinda nasty ... */ +	list_for_each_entry(ifp, &idev->addr_list, if_list) { +		spin_lock(&ifp->lock); +		if (ifp->tokenized) { +			ifp->valid_lft = 0; +			ifp->prefered_lft = 0; +		} +		spin_unlock(&ifp->lock); +	} + +	write_unlock_bh(&idev->lock); +	return 0; +} + +static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) +{ +	int err = -EINVAL; +	struct inet6_dev *idev = __in6_dev_get(dev); +	struct nlattr *tb[IFLA_INET6_MAX + 1]; + +	if (!idev) +		return -EAFNOSUPPORT; + +	if (nla_parse_nested(tb, IFLA_INET6_MAX, nla, NULL) < 0) +		BUG(); + +	if (tb[IFLA_INET6_TOKEN]) +		err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN])); + +	return err; +} +  static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,  			     u32 portid, u32 seq, int event, unsigned int flags)  { @@ -4366,6 +4548,8 @@ errout:  static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)  { +	struct net *net = dev_net(ifp->idev->dev); +  	inet6_ifa_notify(event ? : RTM_NEWADDR, ifp);  	switch (event) { @@ -4391,6 +4575,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)  			dst_free(&ifp->rt->dst);  		break;  	} +	atomic_inc(&net->ipv6.dev_addr_genid);  }  static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) @@ -4871,6 +5056,7 @@ static struct rtnl_af_ops inet6_ops = {  	.family		  = AF_INET6,  	.fill_link_af	  = inet6_fill_link_af,  	.get_link_af_size = inet6_get_link_af_size, +	.set_link_af	  = inet6_set_link_af,  };  /* @@ -4943,7 +5129,7 @@ int __init addrconf_init(void)  	__rtnl_register(PF_INET6, RTM_GETANYCAST, NULL,  			inet6_dump_ifacaddr, NULL);  	__rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf, -			NULL, NULL); +			inet6_netconf_dump_devconf, NULL);  	ipv6_addr_label_rtnl_register(); diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index aad64352cb6..f083a583a05 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -414,8 +414,7 @@ static const struct nla_policy ifal_policy[IFAL_MAX+1] = {  	[IFAL_LABEL]		= { .len = sizeof(u32), },  }; -static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, -			     void *arg) +static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh)  {  	struct net *net = sock_net(skb->sk);  	struct ifaddrlblmsg *ifal; @@ -436,10 +435,7 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,  	if (!tb[IFAL_ADDRESS])  		return -EINVAL; -  	pfx = nla_data(tb[IFAL_ADDRESS]); -	if (!pfx) -		return -EINVAL;  	if (!tb[IFAL_LABEL])  		return -EINVAL; @@ -533,8 +529,7 @@ static inline int ip6addrlbl_msgsize(void)  		+ nla_total_size(4);	/* IFAL_LABEL */  } -static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, -			  void *arg) +static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh)  {  	struct net *net = sock_net(in_skb->sk);  	struct ifaddrlblmsg *ifal; @@ -561,10 +556,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,  	if (!tb[IFAL_ADDRESS])  		return -EINVAL; -  	addr = nla_data(tb[IFAL_ADDRESS]); -	if (!addr) -		return -EINVAL;  	rcu_read_lock();  	p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 6b793bfc0e1..ab5c7ad482c 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -49,7 +49,6 @@  #include <net/udp.h>  #include <net/udplite.h>  #include <net/tcp.h> -#include <net/ipip.h>  #include <net/protocol.h>  #include <net/inet_common.h>  #include <net/route.h> @@ -323,7 +322,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)  			struct net_device *dev = NULL;  			rcu_read_lock(); -			if (addr_type & IPV6_ADDR_LINKLOCAL) { +			if (__ipv6_addr_needs_scope_id(addr_type)) {  				if (addr_len >= sizeof(struct sockaddr_in6) &&  				    addr->sin6_scope_id) {  					/* Override any existing binding, if another one @@ -471,8 +470,8 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,  		sin->sin6_port = inet->inet_sport;  	} -	if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) -		sin->sin6_scope_id = sk->sk_bound_dev_if; +	sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr, +						 sk->sk_bound_dev_if);  	*uaddr_len = sizeof(*sin);  	return 0;  } diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index f5a54782a34..4b56cbbc789 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -124,7 +124,7 @@ ipv4_connected:  		goto out;  	} -	if (addr_type&IPV6_ADDR_LINKLOCAL) { +	if (__ipv6_addr_needs_scope_id(addr_type)) {  		if (addr_len >= sizeof(struct sockaddr_in6) &&  		    usin->sin6_scope_id) {  			if (sk->sk_bound_dev_if && @@ -355,18 +355,19 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)  		sin->sin6_family = AF_INET6;  		sin->sin6_flowinfo = 0;  		sin->sin6_port = serr->port; -		sin->sin6_scope_id = 0;  		if (skb->protocol == htons(ETH_P_IPV6)) {  			const struct ipv6hdr *ip6h = container_of((struct in6_addr *)(nh + serr->addr_offset),  								  struct ipv6hdr, daddr);  			sin->sin6_addr = ip6h->daddr;  			if (np->sndflow)  				sin->sin6_flowinfo = ip6_flowinfo(ip6h); -			if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) -				sin->sin6_scope_id = IP6CB(skb)->iif; +			sin->sin6_scope_id = +				ipv6_iface_scope_id(&sin->sin6_addr, +						    IP6CB(skb)->iif);  		} else {  			ipv6_addr_set_v4mapped(*(__be32 *)(nh + serr->addr_offset),  					       &sin->sin6_addr); +			sin->sin6_scope_id = 0;  		}  	} @@ -376,18 +377,19 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)  	if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) {  		sin->sin6_family = AF_INET6;  		sin->sin6_flowinfo = 0; -		sin->sin6_scope_id = 0;  		if (skb->protocol == htons(ETH_P_IPV6)) {  			sin->sin6_addr = ipv6_hdr(skb)->saddr;  			if (np->rxopt.all)  				ip6_datagram_recv_ctl(sk, msg, skb); -			if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) -				sin->sin6_scope_id = IP6CB(skb)->iif; +			sin->sin6_scope_id = +				ipv6_iface_scope_id(&sin->sin6_addr, +						    IP6CB(skb)->iif);  		} else {  			struct inet_sock *inet = inet_sk(sk);  			ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,  					       &sin->sin6_addr); +			sin->sin6_scope_id = 0;  			if (inet->cmsg_flags)  				ip_cmsg_recv(msg, skb);  		} @@ -592,7 +594,9 @@ int ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,  			sin6.sin6_addr = ipv6_hdr(skb)->daddr;  			sin6.sin6_port = ports[1];  			sin6.sin6_flowinfo = 0; -			sin6.sin6_scope_id = 0; +			sin6.sin6_scope_id = +				ipv6_iface_scope_id(&ipv6_hdr(skb)->daddr, +						    opt->iif);  			put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6);  		} diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index fff5bdd8b68..b4ff0a42b8c 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -124,15 +124,6 @@ static __inline__ void icmpv6_xmit_unlock(struct sock *sk)  }  /* - * Slightly more convenient version of icmpv6_send. - */ -void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos) -{ -	icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos); -	kfree_skb(skb); -} - -/*   * Figure out, may we reply to this packet with icmp error.   *   * We do not reply, if: @@ -332,7 +323,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *sk  	 * anycast.  	 */  	if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) { -		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: acast source\n"); +		LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: acast source\n");  		dst_release(dst);  		return ERR_PTR(-EINVAL);  	} @@ -381,7 +372,7 @@ relookup_failed:  /*   *	Send an ICMP message in response to a packet in error   */ -void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) +static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)  {  	struct net *net = dev_net(skb->dev);  	struct inet6_dev *idev = NULL; @@ -406,7 +397,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)  	/*  	 *	Make sure we respect the rules  	 *	i.e. RFC 1885 2.4(e) -	 *	Rule (e.1) is enforced by not using icmpv6_send +	 *	Rule (e.1) is enforced by not using icmp6_send  	 *	in any code that processes icmp errors.  	 */  	addr_type = ipv6_addr_type(&hdr->daddr); @@ -434,7 +425,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)  	 *	Source addr check  	 */ -	if (addr_type & IPV6_ADDR_LINKLOCAL) +	if (__ipv6_addr_needs_scope_id(addr_type))  		iif = skb->dev->ifindex;  	/* @@ -444,7 +435,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)  	 *	and anycast addresses will be checked later.  	 */  	if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) { -		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n"); +		LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: addr_any/mcast source\n");  		return;  	} @@ -452,7 +443,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)  	 *	Never answer to a ICMP packet.  	 */  	if (is_ineligible(skb)) { -		LIMIT_NETDEBUG(KERN_DEBUG "icmpv6_send: no reply to icmp error\n"); +		LIMIT_NETDEBUG(KERN_DEBUG "icmp6_send: no reply to icmp error\n");  		return;  	} @@ -529,7 +520,14 @@ out_dst_release:  out:  	icmpv6_xmit_unlock(sk);  } -EXPORT_SYMBOL(icmpv6_send); + +/* Slightly more convenient version of icmp6_send. + */ +void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos) +{ +	icmp6_send(skb, ICMPV6_PARAMPROB, code, pos); +	kfree_skb(skb); +}  static void icmpv6_echo_reply(struct sk_buff *skb)  { @@ -701,7 +699,7 @@ static int icmpv6_rcv(struct sk_buff *skb)  		if (__skb_checksum_complete(skb)) {  			LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n",  				       saddr, daddr); -			goto discard_it; +			goto csum_error;  		}  	} @@ -787,6 +785,8 @@ static int icmpv6_rcv(struct sk_buff *skb)  	kfree_skb(skb);  	return 0; +csum_error: +	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);  discard_it:  	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS);  drop_no_count: @@ -885,8 +885,14 @@ int __init icmpv6_init(void)  	err = -EAGAIN;  	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)  		goto fail; + +	err = inet6_register_icmp_sender(icmp6_send); +	if (err) +		goto sender_reg_err;  	return 0; +sender_reg_err: +	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);  fail:  	pr_err("Failed to register ICMP6 protocol\n");  	unregister_pernet_subsys(&icmpv6_sk_ops); @@ -895,6 +901,7 @@ fail:  void icmpv6_cleanup(void)  { +	inet6_unregister_icmp_sender(icmp6_send);  	unregister_pernet_subsys(&icmpv6_sk_ops);  	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);  } diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 9bfab19ff3c..e4311cbc8b4 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -54,6 +54,10 @@ int inet6_csk_bind_conflict(const struct sock *sk,  				if (ipv6_rcv_saddr_equal(sk, sk2))  					break;  			} +			if (!relax && reuse && sk2->sk_reuse && +			    sk2->sk_state != TCP_LISTEN && +			    ipv6_rcv_saddr_equal(sk, sk2)) +				break;  		}  	} @@ -169,10 +173,8 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)  	sin6->sin6_port	= inet_sk(sk)->inet_dport;  	/* We do not store received flowlabel for TCP */  	sin6->sin6_flowinfo = 0; -	sin6->sin6_scope_id = 0; -	if (sk->sk_bound_dev_if && -	    ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) -		sin6->sin6_scope_id = sk->sk_bound_dev_if; +	sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, +						  sk->sk_bound_dev_if);  }  EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr); diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index b973ed3d06c..46e88433ec7 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -144,7 +144,9 @@ static void ip6_fl_gc(unsigned long dummy)  	spin_lock(&ip6_fl_lock);  	for (i=0; i<=FL_HASH_MASK; i++) { -		struct ip6_flowlabel *fl, **flp; +		struct ip6_flowlabel *fl; +		struct ip6_flowlabel __rcu **flp; +  		flp = &fl_ht[i];  		while ((fl = rcu_dereference_protected(*flp,  						       lockdep_is_held(&ip6_fl_lock))) != NULL) { @@ -179,7 +181,9 @@ static void __net_exit ip6_fl_purge(struct net *net)  	spin_lock(&ip6_fl_lock);  	for (i = 0; i <= FL_HASH_MASK; i++) { -		struct ip6_flowlabel *fl, **flp; +		struct ip6_flowlabel *fl; +		struct ip6_flowlabel __rcu **flp; +  		flp = &fl_ht[i];  		while ((fl = rcu_dereference_protected(*flp,  						       lockdep_is_held(&ip6_fl_lock))) != NULL) { @@ -506,7 +510,8 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)  	struct ipv6_pinfo *np = inet6_sk(sk);  	struct in6_flowlabel_req freq;  	struct ipv6_fl_socklist *sfl1=NULL; -	struct ipv6_fl_socklist *sfl, **sflp; +	struct ipv6_fl_socklist *sfl; +	struct ipv6_fl_socklist __rcu **sflp;  	struct ip6_flowlabel *fl, *fl1 = NULL; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index e4efffe2522..d3ddd840035 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -38,6 +38,7 @@  #include <net/sock.h>  #include <net/ip.h> +#include <net/ip_tunnels.h>  #include <net/icmp.h>  #include <net/protocol.h>  #include <net/addrconf.h> @@ -110,46 +111,6 @@ static u32 HASH_ADDR(const struct in6_addr *addr)  #define tunnels_l	tunnels[1]  #define tunnels_wc	tunnels[0] -static struct rtnl_link_stats64 *ip6gre_get_stats64(struct net_device *dev, -		struct rtnl_link_stats64 *tot) -{ -	int i; - -	for_each_possible_cpu(i) { -		const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); -		u64 rx_packets, rx_bytes, tx_packets, tx_bytes; -		unsigned int start; - -		do { -			start = u64_stats_fetch_begin_bh(&tstats->syncp); -			rx_packets = tstats->rx_packets; -			tx_packets = tstats->tx_packets; -			rx_bytes = tstats->rx_bytes; -			tx_bytes = tstats->tx_bytes; -		} while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); - -		tot->rx_packets += rx_packets; -		tot->tx_packets += tx_packets; -		tot->rx_bytes   += rx_bytes; -		tot->tx_bytes   += tx_bytes; -	} - -	tot->multicast = dev->stats.multicast; -	tot->rx_crc_errors = dev->stats.rx_crc_errors; -	tot->rx_fifo_errors = dev->stats.rx_fifo_errors; -	tot->rx_length_errors = dev->stats.rx_length_errors; -	tot->rx_frame_errors = dev->stats.rx_frame_errors; -	tot->rx_errors = dev->stats.rx_errors; - -	tot->tx_fifo_errors = dev->stats.tx_fifo_errors; -	tot->tx_carrier_errors = dev->stats.tx_carrier_errors; -	tot->tx_dropped = dev->stats.tx_dropped; -	tot->tx_aborted_errors = dev->stats.tx_aborted_errors; -	tot->tx_errors = dev->stats.tx_errors; - -	return tot; -} -  /* Given src, dst and key, find appropriate for input tunnel. */  static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, @@ -667,7 +628,6 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,  	struct net_device_stats *stats = &tunnel->dev->stats;  	int err = -1;  	u8 proto; -	int pkt_len;  	struct sk_buff *new_skb;  	if (dev->type == ARPHRD_ETHER) @@ -801,23 +761,9 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,  		}  	} -	nf_reset(skb); -	pkt_len = skb->len; -	err = ip6_local_out(skb); - -	if (net_xmit_eval(err) == 0) { -		struct pcpu_tstats *tstats = this_cpu_ptr(tunnel->dev->tstats); - -		tstats->tx_bytes += pkt_len; -		tstats->tx_packets++; -	} else { -		stats->tx_errors++; -		stats->tx_aborted_errors++; -	} - +	ip6tunnel_xmit(skb, dev);  	if (ndst)  		ip6_tnl_dst_store(tunnel, ndst); -  	return 0;  tx_err_link_failure:  	stats->tx_carrier_errors++; @@ -1271,7 +1217,7 @@ static const struct net_device_ops ip6gre_netdev_ops = {  	.ndo_start_xmit		= ip6gre_tunnel_xmit,  	.ndo_do_ioctl		= ip6gre_tunnel_ioctl,  	.ndo_change_mtu		= ip6gre_tunnel_change_mtu, -	.ndo_get_stats64	= ip6gre_get_stats64, +	.ndo_get_stats64	= ip_tunnel_get_stats64,  };  static void ip6gre_dev_free(struct net_device *dev) @@ -1520,7 +1466,7 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = {  	.ndo_set_mac_address = eth_mac_addr,  	.ndo_validate_addr = eth_validate_addr,  	.ndo_change_mtu = ip6gre_tunnel_change_mtu, -	.ndo_get_stats64 = ip6gre_get_stats64, +	.ndo_get_stats64 = ip_tunnel_get_stats64,  };  static void ip6gre_tap_setup(struct net_device *dev) diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c new file mode 100644 index 00000000000..4578e23834f --- /dev/null +++ b/net/ipv6/ip6_icmp.c @@ -0,0 +1,47 @@ +#include <linux/export.h> +#include <linux/icmpv6.h> +#include <linux/mutex.h> +#include <linux/netdevice.h> +#include <linux/spinlock.h> + +#include <net/ipv6.h> + +#if IS_ENABLED(CONFIG_IPV6) + +static ip6_icmp_send_t __rcu *ip6_icmp_send; + +int inet6_register_icmp_sender(ip6_icmp_send_t *fn) +{ +	return (cmpxchg((ip6_icmp_send_t **)&ip6_icmp_send, NULL, fn) == NULL) ? +	        0 : -EBUSY; +} +EXPORT_SYMBOL(inet6_register_icmp_sender); + +int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn) +{ +	int ret; + +	ret = (cmpxchg((ip6_icmp_send_t **)&ip6_icmp_send, fn, NULL) == fn) ? +	      0 : -EINVAL; + +	synchronize_net(); + +	return ret; +} +EXPORT_SYMBOL(inet6_unregister_icmp_sender); + +void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) +{ +	ip6_icmp_send_t *send; + +	rcu_read_lock(); +	send = rcu_dereference(ip6_icmp_send); + +	if (!send) +		goto out; +	send(skb, type, code, info); +out: +	rcu_read_unlock(); +} +EXPORT_SYMBOL(icmpv6_send); +#endif diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 8234c1dcdf7..71b766ee821 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -92,14 +92,12 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,  	u8 *prevhdr;  	int offset = 0; -	if (!(features & NETIF_F_V6_CSUM)) -		features &= ~NETIF_F_SG; -  	if (unlikely(skb_shinfo(skb)->gso_type &  		     ~(SKB_GSO_UDP |  		       SKB_GSO_DODGY |  		       SKB_GSO_TCP_ECN |  		       SKB_GSO_GRE | +		       SKB_GSO_UDP_TUNNEL |  		       SKB_GSO_TCPV6 |  		       0)))  		goto out; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 155eccfa776..d2eedf19233 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1224,11 +1224,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,  	}  	/* For UDP, check if TX timestamp is enabled */ -	if (sk->sk_type == SOCK_DGRAM) { -		err = sock_tx_timestamp(sk, &tx_flags); -		if (err) -			goto error; -	} +	if (sk->sk_type == SOCK_DGRAM) +		sock_tx_timestamp(sk, &tx_flags);  	/*  	 * Let's try using as much space as possible. diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index fff83cbc197..1e55866cead 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -47,6 +47,7 @@  #include <net/icmp.h>  #include <net/ip.h> +#include <net/ip_tunnels.h>  #include <net/ipv6.h>  #include <net/ip6_route.h>  #include <net/addrconf.h> @@ -955,7 +956,6 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,  	unsigned int max_headroom = sizeof(struct ipv6hdr);  	u8 proto;  	int err = -1; -	int pkt_len;  	if (!fl6->flowi6_mark)  		dst = ip6_tnl_dst_check(t); @@ -1035,19 +1035,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,  	ipv6h->nexthdr = proto;  	ipv6h->saddr = fl6->saddr;  	ipv6h->daddr = fl6->daddr; -	nf_reset(skb); -	pkt_len = skb->len; -	err = ip6_local_out(skb); - -	if (net_xmit_eval(err) == 0) { -		struct pcpu_tstats *tstats = this_cpu_ptr(t->dev->tstats); - -		tstats->tx_bytes += pkt_len; -		tstats->tx_packets++; -	} else { -		stats->tx_errors++; -		stats->tx_aborted_errors++; -	} +	ip6tunnel_xmit(skb, dev);  	if (ndst)  		ip6_tnl_dst_store(t, ndst);  	return 0; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 96bfb4e4b82..241fb8ad9fc 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -842,9 +842,9 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)  		if (ipv6_hdr(skb)->version == 0) {  			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));  			nlh->nlmsg_type = NLMSG_ERROR; -			nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); +			nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));  			skb_trim(skb, nlh->nlmsg_len); -			((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT; +			((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;  			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);  		} else  			kfree_skb(skb); @@ -1100,13 +1100,13 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,  		if (ipv6_hdr(skb)->version == 0) {  			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr)); -			if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) { +			if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {  				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;  			} else {  				nlh->nlmsg_type = NLMSG_ERROR; -				nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); +				nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));  				skb_trim(skb, nlh->nlmsg_len); -				((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE; +				((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;  			}  			rtnl_unicast(skb, net, NETLINK_CB(skb).portid);  		} else diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 76ef4353d51..2712ab22a17 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -610,8 +610,6 @@ void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,  		}  	}  #endif -	if (!dev->addr_len) -		send_sllao = 0;  	if (send_sllao)  		optlen += ndisc_opt_addr_space(dev); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 429089cb073..72836f40b73 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -1,3 +1,9 @@ +/* + * IPv6 specific functions of netfilter core + * + * Rusty Russell (C) 2000 -- This code is GPL. + * Patrick McHardy (C) 2006-2012 + */  #include <linux/kernel.h>  #include <linux/init.h>  #include <linux/ipv6.h> @@ -29,7 +35,7 @@ int ip6_route_me_harder(struct sk_buff *skb)  		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);  		LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n");  		dst_release(dst); -		return -EINVAL; +		return dst->error;  	}  	/* Drop old route. */ @@ -43,7 +49,7 @@ int ip6_route_me_harder(struct sk_buff *skb)  		skb_dst_set(skb, NULL);  		dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), skb->sk, 0);  		if (IS_ERR(dst)) -			return -1; +			return PTR_ERR(dst);  		skb_dst_set(skb, dst);  	}  #endif @@ -53,7 +59,7 @@ int ip6_route_me_harder(struct sk_buff *skb)  	if (skb_headroom(skb) < hh_len &&  	    pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)),  			     0, GFP_ATOMIC)) -		return -1; +		return -ENOMEM;  	return 0;  } diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index c72532a60d8..4433ab40e7d 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -105,7 +105,7 @@ config IP6_NF_MATCH_MH  config IP6_NF_MATCH_RPFILTER  	tristate '"rpfilter" reverse path filter match support' -	depends on NETFILTER_ADVANCED +	depends on NETFILTER_ADVANCED && (IP6_NF_MANGLE || IP6_NF_RAW)  	---help---  	  This option allows you to match packets whose replies would  	  go out via the interface the packet came in. diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 341b54ade72..44400c216dc 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -3,6 +3,7 @@   *   * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling   * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org> + * Copyright (c) 2006-2010 Patrick McHardy <kaber@trash.net>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License version 2 as @@ -284,6 +285,7 @@ static void trace_packet(const struct sk_buff *skb,  	const char *hookname, *chainname, *comment;  	const struct ip6t_entry *iter;  	unsigned int rulenum = 0; +	struct net *net = dev_net(in ? in : out);  	table_base = private->entries[smp_processor_id()];  	root = get_entry(table_base, private->hook_entry[hook]); @@ -296,7 +298,7 @@ static void trace_packet(const struct sk_buff *skb,  		    &chainname, &comment, &rulenum) != 0)  			break; -	nf_log_packet(AF_INET6, hook, skb, in, out, &trace_loginfo, +	nf_log_packet(net, AF_INET6, hook, skb, in, out, &trace_loginfo,  		      "TRACE: %s:%s:%s:%u ",  		      tablename, chainname, comment, rulenum);  } diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c index cb631143721..590f767db5d 100644 --- a/net/ipv6/netfilter/ip6t_NPT.c +++ b/net/ipv6/netfilter/ip6t_NPT.c @@ -18,9 +18,8 @@  static int ip6t_npt_checkentry(const struct xt_tgchk_param *par)  {  	struct ip6t_npt_tginfo *npt = par->targinfo; -	__wsum src_sum = 0, dst_sum = 0;  	struct in6_addr pfx; -	unsigned int i; +	__wsum src_sum, dst_sum;  	if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64)  		return -EINVAL; @@ -33,12 +32,8 @@ static int ip6t_npt_checkentry(const struct xt_tgchk_param *par)  	if (!ipv6_addr_equal(&pfx, &npt->dst_pfx.in6))  		return -EINVAL; -	for (i = 0; i < ARRAY_SIZE(npt->src_pfx.in6.s6_addr16); i++) { -		src_sum = csum_add(src_sum, -				(__force __wsum)npt->src_pfx.in6.s6_addr16[i]); -		dst_sum = csum_add(dst_sum, -				(__force __wsum)npt->dst_pfx.in6.s6_addr16[i]); -	} +	src_sum = csum_partial(&npt->src_pfx.in6, sizeof(npt->src_pfx.in6), 0); +	dst_sum = csum_partial(&npt->dst_pfx.in6, sizeof(npt->dst_pfx.in6), 0);  	npt->adjustment = ~csum_fold(csum_sub(src_sum, dst_sum));  	return 0; diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index ed3b427b284..70f9abc0efe 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -7,6 +7,8 @@   * Authors:   *	Yasuyuki Kozakai	<yasuyuki.kozakai@toshiba.co.jp>   * + * Copyright (c) 2005-2007 Patrick McHardy <kaber@trash.net> + *   * Based on net/ipv4/netfilter/ipt_REJECT.c   *   * This program is free software; you can redistribute it and/or diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 6134a1ebfb1..e075399d8b7 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -38,7 +38,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out)  	struct in6_addr saddr, daddr;  	u_int8_t hop_limit;  	u_int32_t flowlabel, mark; - +	int err;  #if 0  	/* root is playing with raw sockets. */  	if (skb->len < sizeof(struct iphdr) || @@ -65,8 +65,11 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out)  	     !ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &daddr) ||  	     skb->mark != mark ||  	     ipv6_hdr(skb)->hop_limit != hop_limit || -	     flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) -		return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP; +	     flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) { +		err = ip6_route_me_harder(skb); +		if (err < 0) +			ret = NF_DROP_ERR(err); +	}  	return ret;  } diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index e0e788d25b1..6383f90efda 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -179,6 +179,7 @@ nf_nat_ipv6_out(unsigned int hooknum,  #ifdef CONFIG_XFRM  	const struct nf_conn *ct;  	enum ip_conntrack_info ctinfo; +	int err;  #endif  	unsigned int ret; @@ -197,9 +198,11 @@ nf_nat_ipv6_out(unsigned int hooknum,  				      &ct->tuplehash[!dir].tuple.dst.u3) ||  		    (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&  		     ct->tuplehash[dir].tuple.src.u.all != -		     ct->tuplehash[!dir].tuple.dst.u.all)) -			if (nf_xfrm_me_harder(skb, AF_INET6) < 0) -				ret = NF_DROP; +		     ct->tuplehash[!dir].tuple.dst.u.all)) { +			err = nf_xfrm_me_harder(skb, AF_INET6); +			if (err < 0) +				ret = NF_DROP_ERR(err); +		}  	}  #endif  	return ret; @@ -215,6 +218,7 @@ nf_nat_ipv6_local_fn(unsigned int hooknum,  	const struct nf_conn *ct;  	enum ip_conntrack_info ctinfo;  	unsigned int ret; +	int err;  	/* root is playing with raw sockets. */  	if (skb->len < sizeof(struct ipv6hdr)) @@ -227,16 +231,19 @@ nf_nat_ipv6_local_fn(unsigned int hooknum,  		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,  				      &ct->tuplehash[!dir].tuple.src.u3)) { -			if (ip6_route_me_harder(skb)) -				ret = NF_DROP; +			err = ip6_route_me_harder(skb); +			if (err < 0) +				ret = NF_DROP_ERR(err);  		}  #ifdef CONFIG_XFRM  		else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&  			 ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&  			 ct->tuplehash[dir].tuple.dst.u.all != -			 ct->tuplehash[!dir].tuple.src.u.all) -			if (nf_xfrm_me_harder(skb, AF_INET6)) -				ret = NF_DROP; +			 ct->tuplehash[!dir].tuple.src.u.all) { +			err = nf_xfrm_me_harder(skb, AF_INET6); +			if (err < 0) +				ret = NF_DROP_ERR(err); +		}  #endif  	}  	return ret; diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 2b6c226f519..97bcf2bae85 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -330,12 +330,8 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)  					sizeof(sin6.sin6_addr));  	nf_ct_put(ct); - -	if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL) -		sin6.sin6_scope_id = sk->sk_bound_dev_if; -	else -		sin6.sin6_scope_id = 0; - +	sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr, +						 sk->sk_bound_dev_if);  	return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;  } diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 24df3dde007..b3807c5cb88 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -131,7 +131,8 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,  			 type + 128);  		nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple);  		if (LOG_INVALID(nf_ct_net(ct), IPPROTO_ICMPV6)) -			nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, +			nf_log_packet(nf_ct_net(ct), PF_INET6, 0, skb, NULL, +				      NULL, NULL,  				      "nf_ct_icmpv6: invalid new with type %d ",  				      type + 128);  		return false; @@ -203,7 +204,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,  	icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih);  	if (icmp6h == NULL) {  		if (LOG_INVALID(net, IPPROTO_ICMPV6)) -		nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, +			nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL,  			      "nf_ct_icmpv6: short packet ");  		return -NF_ACCEPT;  	} @@ -211,7 +212,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,  	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&  	    nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) {  		if (LOG_INVALID(net, IPPROTO_ICMPV6)) -			nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, +			nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL,  				      "nf_ct_icmpv6: ICMPv6 checksum failed ");  		return -NF_ACCEPT;  	} diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 6700069949d..dffdc1a389c 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -41,6 +41,7 @@  #include <net/rawv6.h>  #include <net/ndisc.h>  #include <net/addrconf.h> +#include <net/inet_ecn.h>  #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>  #include <linux/sysctl.h>  #include <linux/netfilter.h> @@ -138,6 +139,11 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)  }  #endif +static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) +{ +	return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK); +} +  static unsigned int nf_hashfn(struct inet_frag_queue *q)  {  	const struct frag_queue *nq; @@ -166,7 +172,7 @@ static void nf_ct_frag6_expire(unsigned long data)  /* Creation primitives. */  static inline struct frag_queue *fq_find(struct net *net, __be32 id,  					 u32 user, struct in6_addr *src, -					 struct in6_addr *dst) +					 struct in6_addr *dst, u8 ecn)  {  	struct inet_frag_queue *q;  	struct ip6_create_arg arg; @@ -176,6 +182,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id,  	arg.user = user;  	arg.src = src;  	arg.dst = dst; +	arg.ecn = ecn;  	read_lock_bh(&nf_frags.lock);  	hash = inet6_hash_frag(id, src, dst, nf_frags.rnd); @@ -196,6 +203,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,  	struct sk_buff *prev, *next;  	unsigned int payload_len;  	int offset, end; +	u8 ecn;  	if (fq->q.last_in & INET_FRAG_COMPLETE) {  		pr_debug("Already completed\n"); @@ -213,6 +221,8 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,  		return -1;  	} +	ecn = ip6_frag_ecn(ipv6_hdr(skb)); +  	if (skb->ip_summed == CHECKSUM_COMPLETE) {  		const unsigned char *nh = skb_network_header(skb);  		skb->csum = csum_sub(skb->csum, @@ -317,6 +327,7 @@ found:  	}  	fq->q.stamp = skb->tstamp;  	fq->q.meat += skb->len; +	fq->ecn |= ecn;  	if (payload_len > fq->q.max_size)  		fq->q.max_size = payload_len;  	add_frag_mem_limit(&fq->q, skb->truesize); @@ -352,12 +363,17 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)  {  	struct sk_buff *fp, *op, *head = fq->q.fragments;  	int    payload_len; +	u8 ecn;  	inet_frag_kill(&fq->q, &nf_frags);  	WARN_ON(head == NULL);  	WARN_ON(NFCT_FRAG6_CB(head)->offset != 0); +	ecn = ip_frag_ecn_table[fq->ecn]; +	if (unlikely(ecn == 0xff)) +		goto out_fail; +  	/* Unfragmented part is taken from the first segment. */  	payload_len = ((head->data - skb_network_header(head)) -  		       sizeof(struct ipv6hdr) + fq->q.len - @@ -428,6 +444,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)  	head->dev = dev;  	head->tstamp = fq->q.stamp;  	ipv6_hdr(head)->payload_len = htons(payload_len); +	ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);  	IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;  	/* Yes, and fold redundant checksum back. 8) */ @@ -572,7 +589,8 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)  	inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false);  	local_bh_enable(); -	fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr); +	fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr, +		     ip6_frag_ecn(hdr));  	if (fq == NULL) {  		pr_debug("Can't find and can't create new queue\n");  		goto ret_orig; diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index bbbe53a99b5..115cc58898f 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -90,6 +90,7 @@ static const struct snmp_mib snmp6_ipstats_list[] = {  	SNMP_MIB_ITEM("Ip6OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS),  	SNMP_MIB_ITEM("Ip6InBcastOctets", IPSTATS_MIB_INBCASTOCTETS),  	SNMP_MIB_ITEM("Ip6OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS), +	SNMP_MIB_ITEM("InCsumErrors", IPSTATS_MIB_CSUMERRORS),  	SNMP_MIB_SENTINEL  }; @@ -99,6 +100,7 @@ static const struct snmp_mib snmp6_icmp6_list[] = {  	SNMP_MIB_ITEM("Icmp6InErrors", ICMP6_MIB_INERRORS),  	SNMP_MIB_ITEM("Icmp6OutMsgs", ICMP6_MIB_OUTMSGS),  	SNMP_MIB_ITEM("Icmp6OutErrors", ICMP6_MIB_OUTERRORS), +	SNMP_MIB_ITEM("Icmp6InCsumErrors", ICMP6_MIB_CSUMERRORS),  	SNMP_MIB_SENTINEL  }; @@ -129,6 +131,7 @@ static const struct snmp_mib snmp6_udp6_list[] = {  	SNMP_MIB_ITEM("Udp6OutDatagrams", UDP_MIB_OUTDATAGRAMS),  	SNMP_MIB_ITEM("Udp6RcvbufErrors", UDP_MIB_RCVBUFERRORS),  	SNMP_MIB_ITEM("Udp6SndbufErrors", UDP_MIB_SNDBUFERRORS), +	SNMP_MIB_ITEM("Udp6InCsumErrors", UDP_MIB_CSUMERRORS),  	SNMP_MIB_SENTINEL  }; @@ -139,6 +142,7 @@ static const struct snmp_mib snmp6_udplite6_list[] = {  	SNMP_MIB_ITEM("UdpLite6OutDatagrams", UDP_MIB_OUTDATAGRAMS),  	SNMP_MIB_ITEM("UdpLite6RcvbufErrors", UDP_MIB_RCVBUFERRORS),  	SNMP_MIB_ITEM("UdpLite6SndbufErrors", UDP_MIB_SNDBUFERRORS), +	SNMP_MIB_ITEM("UdpLite6InCsumErrors", UDP_MIB_CSUMERRORS),  	SNMP_MIB_SENTINEL  }; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 330b5e7b7df..eedff8ccded 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -263,7 +263,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)  	if (addr_type != IPV6_ADDR_ANY) {  		struct net_device *dev = NULL; -		if (addr_type & IPV6_ADDR_LINKLOCAL) { +		if (__ipv6_addr_needs_scope_id(addr_type)) {  			if (addr_len >= sizeof(struct sockaddr_in6) &&  			    addr->sin6_scope_id) {  				/* Override any existing binding, if another @@ -498,9 +498,8 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,  		sin6->sin6_port = 0;  		sin6->sin6_addr = ipv6_hdr(skb)->saddr;  		sin6->sin6_flowinfo = 0; -		sin6->sin6_scope_id = 0; -		if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) -			sin6->sin6_scope_id = IP6CB(skb)->iif; +		sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, +							  IP6CB(skb)->iif);  	}  	sock_recv_ts_and_drops(msg, sk, skb); @@ -802,7 +801,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,  		if (addr_len >= sizeof(struct sockaddr_in6) &&  		    sin6->sin6_scope_id && -		    ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) +		    __ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr)))  			fl6.flowi6_oif = sin6->sin6_scope_id;  	} else {  		if (sk->sk_state != TCP_ESTABLISHED) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 0ba10e53a62..790d9f4b8b0 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -58,6 +58,7 @@  #include <net/ndisc.h>  #include <net/addrconf.h>  #include <net/inet_frag.h> +#include <net/inet_ecn.h>  struct ip6frag_skb_cb  { @@ -67,6 +68,10 @@ struct ip6frag_skb_cb  #define FRAG6_CB(skb)	((struct ip6frag_skb_cb*)((skb)->cb)) +static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) +{ +	return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK); +}  static struct inet_frags ip6_frags; @@ -119,6 +124,7 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a)  	fq->user = arg->user;  	fq->saddr = *arg->src;  	fq->daddr = *arg->dst; +	fq->ecn = arg->ecn;  }  EXPORT_SYMBOL(ip6_frag_init); @@ -173,7 +179,8 @@ static void ip6_frag_expire(unsigned long data)  }  static __inline__ struct frag_queue * -fq_find(struct net *net, __be32 id, const struct in6_addr *src, const struct in6_addr *dst) +fq_find(struct net *net, __be32 id, const struct in6_addr *src, +	const struct in6_addr *dst, u8 ecn)  {  	struct inet_frag_queue *q;  	struct ip6_create_arg arg; @@ -183,6 +190,7 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src, const struct in6  	arg.user = IP6_DEFRAG_LOCAL_DELIVER;  	arg.src = src;  	arg.dst = dst; +	arg.ecn = ecn;  	read_lock(&ip6_frags.lock);  	hash = inet6_hash_frag(id, src, dst, ip6_frags.rnd); @@ -202,6 +210,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,  	struct net_device *dev;  	int offset, end;  	struct net *net = dev_net(skb_dst(skb)->dev); +	u8 ecn;  	if (fq->q.last_in & INET_FRAG_COMPLETE)  		goto err; @@ -219,6 +228,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,  		return -1;  	} +	ecn = ip6_frag_ecn(ipv6_hdr(skb)); +  	if (skb->ip_summed == CHECKSUM_COMPLETE) {  		const unsigned char *nh = skb_network_header(skb);  		skb->csum = csum_sub(skb->csum, @@ -319,6 +330,7 @@ found:  	}  	fq->q.stamp = skb->tstamp;  	fq->q.meat += skb->len; +	fq->ecn |= ecn;  	add_frag_mem_limit(&fq->q, skb->truesize);  	/* The first fragment. @@ -370,9 +382,14 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,  	int    payload_len;  	unsigned int nhoff;  	int sum_truesize; +	u8 ecn;  	inet_frag_kill(&fq->q, &ip6_frags); +	ecn = ip_frag_ecn_table[fq->ecn]; +	if (unlikely(ecn == 0xff)) +		goto out_fail; +  	/* Make the one we just received the head. */  	if (prev) {  		head = prev->next; @@ -471,6 +488,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,  	head->dev = dev;  	head->tstamp = fq->q.stamp;  	ipv6_hdr(head)->payload_len = htons(payload_len); +	ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);  	IP6CB(head)->nhoff = nhoff;  	/* Yes, and fold redundant checksum back. 8) */ @@ -534,7 +552,8 @@ static int ipv6_frag_rcv(struct sk_buff *skb)  		IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),  				 IPSTATS_MIB_REASMFAILS, evicted); -	fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr); +	fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr, +		     ip6_frag_ecn(hdr));  	if (fq != NULL) {  		int ret; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e5fe0041adf..ad0aa6b0b86 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2355,7 +2355,7 @@ beginning:  	return last_err;  } -static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh)  {  	struct fib6_config cfg;  	int err; @@ -2370,7 +2370,7 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a  		return ip6_route_del(&cfg);  } -static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh)  {  	struct fib6_config cfg;  	int err; @@ -2562,7 +2562,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)  		     prefix, 0, NLM_F_MULTI);  } -static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) +static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)  {  	struct net *net = sock_net(in_skb->sk);  	struct nlattr *tb[RTA_MAX+1]; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 02f96dcbcf0..335363478bb 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -49,7 +49,7 @@  #include <net/ip.h>  #include <net/udp.h>  #include <net/icmp.h> -#include <net/ipip.h> +#include <net/ip_tunnels.h>  #include <net/inet_ecn.h>  #include <net/xfrm.h>  #include <net/dsfield.h> @@ -87,41 +87,6 @@ struct sit_net {  	struct net_device *fb_tunnel_dev;  }; -static struct rtnl_link_stats64 *ipip6_get_stats64(struct net_device *dev, -						   struct rtnl_link_stats64 *tot) -{ -	int i; - -	for_each_possible_cpu(i) { -		const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); -		u64 rx_packets, rx_bytes, tx_packets, tx_bytes; -		unsigned int start; - -		do { -			start = u64_stats_fetch_begin_bh(&tstats->syncp); -			rx_packets = tstats->rx_packets; -			tx_packets = tstats->tx_packets; -			rx_bytes = tstats->rx_bytes; -			tx_bytes = tstats->tx_bytes; -		} while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); - -		tot->rx_packets += rx_packets; -		tot->tx_packets += tx_packets; -		tot->rx_bytes   += rx_bytes; -		tot->tx_bytes   += tx_bytes; -	} - -	tot->rx_errors = dev->stats.rx_errors; -	tot->rx_frame_errors = dev->stats.rx_frame_errors; -	tot->tx_fifo_errors = dev->stats.tx_fifo_errors; -	tot->tx_carrier_errors = dev->stats.tx_carrier_errors; -	tot->tx_dropped = dev->stats.tx_dropped; -	tot->tx_aborted_errors = dev->stats.tx_aborted_errors; -	tot->tx_errors = dev->stats.tx_errors; - -	return tot; -} -  /*   * Must be invoked with rcu_read_lock   */ @@ -899,6 +864,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,  	if ((iph->ttl = tiph->ttl) == 0)  		iph->ttl	=	iph6->hop_limit; +	skb->ip_summed = CHECKSUM_NONE; +	ip_select_ident(iph, skb_dst(skb), NULL);  	iptunnel_xmit(skb, dev);  	return NETDEV_TX_OK; @@ -1200,7 +1167,7 @@ static const struct net_device_ops ipip6_netdev_ops = {  	.ndo_start_xmit	= ipip6_tunnel_xmit,  	.ndo_do_ioctl	= ipip6_tunnel_ioctl,  	.ndo_change_mtu	= ipip6_tunnel_change_mtu, -	.ndo_get_stats64= ipip6_get_stats64, +	.ndo_get_stats64 = ip_tunnel_get_stats64,  };  static void ipip6_dev_free(struct net_device *dev) diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 8a0848b60b3..d5dda20bd71 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -149,7 +149,6 @@ static inline int cookie_check(const struct sk_buff *skb, __u32 cookie)  struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)  {  	struct tcp_options_received tcp_opt; -	const u8 *hash_location;  	struct inet_request_sock *ireq;  	struct inet6_request_sock *ireq6;  	struct tcp_request_sock *treq; @@ -177,7 +176,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)  	/* check for timestamp cookie support */  	memset(&tcp_opt, 0, sizeof(tcp_opt)); -	tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL); +	tcp_parse_options(skb, &tcp_opt, 0, NULL);  	if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok))  		goto out; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 46a5be85be8..71167069b39 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -462,7 +462,6 @@ out:  static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,  			      struct flowi6 *fl6,  			      struct request_sock *req, -			      struct request_values *rvp,  			      u16 queue_mapping)  {  	struct inet6_request_sock *treq = inet6_rsk(req); @@ -474,7 +473,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,  	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)  		goto done; -	skb = tcp_make_synack(sk, dst, req, rvp, NULL); +	skb = tcp_make_synack(sk, dst, req, NULL);  	if (skb) {  		__tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr); @@ -489,13 +488,12 @@ done:  	return err;  } -static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req, -			     struct request_values *rvp) +static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req)  {  	struct flowi6 fl6;  	int res; -	res = tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0); +	res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0);  	if (!res)  		TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);  	return res; @@ -948,9 +946,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)   */  static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)  { -	struct tcp_extend_values tmp_ext;  	struct tcp_options_received tmp_opt; -	const u8 *hash_location;  	struct request_sock *req;  	struct inet6_request_sock *treq;  	struct ipv6_pinfo *np = inet6_sk(sk); @@ -988,50 +984,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)  	tcp_clear_options(&tmp_opt);  	tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);  	tmp_opt.user_mss = tp->rx_opt.user_mss; -	tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); - -	if (tmp_opt.cookie_plus > 0 && -	    tmp_opt.saw_tstamp && -	    !tp->rx_opt.cookie_out_never && -	    (sysctl_tcp_cookie_size > 0 || -	     (tp->cookie_values != NULL && -	      tp->cookie_values->cookie_desired > 0))) { -		u8 *c; -		u32 *d; -		u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS]; -		int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE; - -		if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0) -			goto drop_and_free; - -		/* Secret recipe starts with IP addresses */ -		d = (__force u32 *)&ipv6_hdr(skb)->daddr.s6_addr32[0]; -		*mess++ ^= *d++; -		*mess++ ^= *d++; -		*mess++ ^= *d++; -		*mess++ ^= *d++; -		d = (__force u32 *)&ipv6_hdr(skb)->saddr.s6_addr32[0]; -		*mess++ ^= *d++; -		*mess++ ^= *d++; -		*mess++ ^= *d++; -		*mess++ ^= *d++; - -		/* plus variable length Initiator Cookie */ -		c = (u8 *)mess; -		while (l-- > 0) -			*c++ ^= *hash_location++; - -		want_cookie = false;	/* not our kind of cookie */ -		tmp_ext.cookie_out_never = 0; /* false */ -		tmp_ext.cookie_plus = tmp_opt.cookie_plus; -	} else if (!tp->rx_opt.cookie_in_always) { -		/* redundant indications, but ensure initialization. */ -		tmp_ext.cookie_out_never = 1; /* true */ -		tmp_ext.cookie_plus = 0; -	} else { -		goto drop_and_free; -	} -	tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always; +	tcp_parse_options(skb, &tmp_opt, 0, NULL);  	if (want_cookie && !tmp_opt.saw_tstamp)  		tcp_clear_options(&tmp_opt); @@ -1109,7 +1062,6 @@ have_isn:  		goto drop_and_release;  	if (tcp_v6_send_synack(sk, dst, &fl6, req, -			       (struct request_values *)&tmp_ext,  			       skb_get_queue_mapping(skb)) ||  	    want_cookie)  		goto drop_and_free; @@ -1453,6 +1405,7 @@ discard:  	kfree_skb(skb);  	return 0;  csum_err: +	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);  	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);  	goto discard; @@ -1514,7 +1467,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)  		goto discard_it;  	if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb)) -		goto bad_packet; +		goto csum_error;  	th = tcp_hdr(skb);  	hdr = ipv6_hdr(skb); @@ -1578,6 +1531,8 @@ no_tcp_socket:  		goto discard_it;  	if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) { +csum_error: +		TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);  bad_packet:  		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);  	} else { @@ -1585,11 +1540,6 @@ bad_packet:  	}  discard_it: - -	/* -	 *	Discard frame -	 */ -  	kfree_skb(skb);  	return 0; @@ -1603,10 +1553,13 @@ do_time_wait:  		goto discard_it;  	} -	if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) { -		TCP_INC_STATS_BH(net, TCP_MIB_INERRS); +	if (skb->len < (th->doff<<2)) {  		inet_twsk_put(inet_twsk(sk)); -		goto discard_it; +		goto bad_packet; +	} +	if (tcp_checksum_complete(skb)) { +		inet_twsk_put(inet_twsk(sk)); +		goto csum_error;  	}  	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index d8e5e852fc7..d4defdd4493 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -450,15 +450,16 @@ try_again:  		sin6->sin6_family = AF_INET6;  		sin6->sin6_port = udp_hdr(skb)->source;  		sin6->sin6_flowinfo = 0; -		sin6->sin6_scope_id = 0; -		if (is_udp4) +		if (is_udp4) {  			ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,  					       &sin6->sin6_addr); -		else { +			sin6->sin6_scope_id = 0; +		} else {  			sin6->sin6_addr = ipv6_hdr(skb)->saddr; -			if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) -				sin6->sin6_scope_id = IP6CB(skb)->iif; +			sin6->sin6_scope_id = +				ipv6_iface_scope_id(&sin6->sin6_addr, +						    IP6CB(skb)->iif);  		}  	} @@ -482,12 +483,17 @@ out:  csum_copy_err:  	slow = lock_sock_fast(sk);  	if (!skb_kill_datagram(sk, skb, flags)) { -		if (is_udp4) +		if (is_udp4) { +			UDP_INC_STATS_USER(sock_net(sk), +					UDP_MIB_CSUMERRORS, is_udplite);  			UDP_INC_STATS_USER(sock_net(sk),  					UDP_MIB_INERRORS, is_udplite); -		else +		} else { +			UDP6_INC_STATS_USER(sock_net(sk), +					UDP_MIB_CSUMERRORS, is_udplite);  			UDP6_INC_STATS_USER(sock_net(sk),  					UDP_MIB_INERRORS, is_udplite); +		}  	}  	unlock_sock_fast(sk, slow); @@ -636,7 +642,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)  	if (rcu_access_pointer(sk->sk_filter)) {  		if (udp_lib_checksum_complete(skb)) -			goto drop; +			goto csum_error;  	}  	if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) @@ -655,6 +661,8 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)  	bh_unlock_sock(sk);  	return rc; +csum_error: +	UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);  drop:  	UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);  	atomic_inc(&sk->sk_drops); @@ -816,7 +824,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,  	}  	if (udp6_csum_init(skb, uh, proto)) -		goto discard; +		goto csum_error;  	/*  	 *	Multicast receive code @@ -849,7 +857,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,  		goto discard;  	if (udp_lib_checksum_complete(skb)) -		goto discard; +		goto csum_error;  	UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);  	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); @@ -866,7 +874,9 @@ short_packet:  		       skb->len,  		       daddr,  		       ntohs(uh->dest)); - +	goto discard; +csum_error: +	UDP6_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);  discard:  	UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);  	kfree_skb(skb); @@ -1118,7 +1128,7 @@ do_udp_sendmsg:  		if (addr_len >= sizeof(struct sockaddr_in6) &&  		    sin6->sin6_scope_id && -		    ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL) +		    __ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr)))  			fl6.flowi6_oif = sin6->sin6_scope_id;  	} else {  		if (sk->sk_state != TCP_ESTABLISHED) diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index cf05cf073c5..3bb3a891a42 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -21,6 +21,10 @@ static int udp6_ufo_send_check(struct sk_buff *skb)  	const struct ipv6hdr *ipv6h;  	struct udphdr *uh; +	/* UDP Tunnel offload on ipv6 is not yet supported. */ +	if (skb->encapsulation) +		return -EINVAL; +  	if (!pskb_may_pull(skb, sizeof(*uh)))  		return -EINVAL; @@ -56,7 +60,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,  		/* Packet is from an untrusted source, reset gso_segs. */  		int type = skb_shinfo(skb)->gso_type; -		if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | +		if (unlikely(type & ~(SKB_GSO_UDP | +				      SKB_GSO_DODGY | +				      SKB_GSO_UDP_TUNNEL |  				      SKB_GSO_GRE) ||  			     !(type & (SKB_GSO_UDP))))  			goto out; diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 9bf6a74a71d..4770d515c2c 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -49,8 +49,11 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)  	       sizeof(top_iph->flow_lbl));  	top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family); -	dsfield = XFRM_MODE_SKB_CB(skb)->tos; -	dsfield = INET_ECN_encapsulate(dsfield, dsfield); +	if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) +		dsfield = 0; +	else +		dsfield = XFRM_MODE_SKB_CB(skb)->tos; +	dsfield = INET_ECN_encapsulate(dsfield, XFRM_MODE_SKB_CB(skb)->tos);  	if (x->props.flags & XFRM_STATE_NOECN)  		dsfield &= ~INET_ECN_MASK;  	ipv6_change_dsfield(top_iph, 0, dsfield); diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index e493b3397ae..0578d4fa00a 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -305,8 +305,7 @@ static void irda_connect_response(struct irda_sock *self)  	IRDA_DEBUG(2, "%s()\n", __func__); -	skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER, -			GFP_ATOMIC); +	skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER, GFP_KERNEL);  	if (skb == NULL) {  		IRDA_DEBUG(0, "%s() Unable to allocate sk_buff!\n",  			   __func__); @@ -1120,7 +1119,7 @@ static int irda_create(struct net *net, struct socket *sock, int protocol,  	}  	/* Allocate networking socket */ -	sk = sk_alloc(net, PF_IRDA, GFP_ATOMIC, &irda_proto); +	sk = sk_alloc(net, PF_IRDA, GFP_KERNEL, &irda_proto);  	if (sk == NULL)  		return -ENOMEM; diff --git a/net/irda/ircomm/ircomm_core.c b/net/irda/ircomm/ircomm_core.c index 52079f19bbb..b797daac063 100644 --- a/net/irda/ircomm/ircomm_core.c +++ b/net/irda/ircomm/ircomm_core.c @@ -117,7 +117,7 @@ struct ircomm_cb *ircomm_open(notify_t *notify, __u8 service_type, int line)  	IRDA_ASSERT(ircomm != NULL, return NULL;); -	self = kzalloc(sizeof(struct ircomm_cb), GFP_ATOMIC); +	self = kzalloc(sizeof(struct ircomm_cb), GFP_KERNEL);  	if (self == NULL)  		return NULL; diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c index 1064621da6f..98ad6ec4bd3 100644 --- a/net/irda/irlmp.c +++ b/net/irda/irlmp.c @@ -58,7 +58,7 @@ int  sysctl_discovery_slots   = 6; /* 6 slots by default */  int  sysctl_lap_keepalive_time = LM_IDLE_TIMEOUT * 1000 / HZ;  char sysctl_devname[65]; -const char *irlmp_reasons[] = { +static const char *irlmp_reasons[] = {  	"ERROR, NOT USED",  	"LM_USER_REQUEST",  	"LM_LAP_DISCONNECT", diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 206ce6db2c3..ae691651b72 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1461,7 +1461,8 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock,  		return iucv_accept_poll(sk);  	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) -		mask |= POLLERR; +		mask |= POLLERR | +			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);  	if (sk->sk_shutdown & RCV_SHUTDOWN)  		mask |= POLLRDHUP; diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 8aecf5df665..6984c3a353c 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1777,7 +1777,7 @@ int l2tp_session_delete(struct l2tp_session *session)  	if (session->session_close != NULL)  		(*session->session_close)(session);  	if (session->deref) -		(*session->ref)(session); +		(*session->deref)(session);  	l2tp_session_dec_refcount(session);  	return 0;  } diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index a6893602f87..1a89c80e640 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -175,7 +175,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,  		 *       add it to the device after the station.  		 */  		if (!sta || !test_sta_flag(sta, WLAN_STA_ASSOC)) { -			ieee80211_key_free(sdata->local, key); +			ieee80211_key_free_unused(key);  			err = -ENOENT;  			goto out_unlock;  		} @@ -214,8 +214,6 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,  	}  	err = ieee80211_key_link(key, sdata, sta); -	if (err) -		ieee80211_key_free(sdata->local, key);   out_unlock:  	mutex_unlock(&sdata->local->sta_mtx); @@ -254,7 +252,7 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,  		goto out_unlock;  	} -	__ieee80211_key_free(key); +	ieee80211_key_free(key, true);  	ret = 0;   out_unlock: @@ -445,12 +443,14 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)  	struct ieee80211_sub_if_data *sdata = sta->sdata;  	struct ieee80211_local *local = sdata->local;  	struct timespec uptime; +	u64 packets = 0; +	int ac;  	sinfo->generation = sdata->local->sta_generation;  	sinfo->filled = STATION_INFO_INACTIVE_TIME | -			STATION_INFO_RX_BYTES | -			STATION_INFO_TX_BYTES | +			STATION_INFO_RX_BYTES64 | +			STATION_INFO_TX_BYTES64 |  			STATION_INFO_RX_PACKETS |  			STATION_INFO_TX_PACKETS |  			STATION_INFO_TX_RETRIES | @@ -467,10 +467,14 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)  	sinfo->connected_time = uptime.tv_sec - sta->last_connected;  	sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx); +	sinfo->tx_bytes = 0; +	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { +		sinfo->tx_bytes += sta->tx_bytes[ac]; +		packets += sta->tx_packets[ac]; +	} +	sinfo->tx_packets = packets;  	sinfo->rx_bytes = sta->rx_bytes; -	sinfo->tx_bytes = sta->tx_bytes;  	sinfo->rx_packets = sta->rx_packets; -	sinfo->tx_packets = sta->tx_packets;  	sinfo->tx_retries = sta->tx_retry_count;  	sinfo->tx_failed = sta->tx_retry_failed;  	sinfo->rx_dropped_misc = sta->rx_dropped; @@ -598,8 +602,8 @@ static void ieee80211_get_et_stats(struct wiphy *wiphy,  		data[i++] += sta->rx_fragments;		\  		data[i++] += sta->rx_dropped;		\  							\ -		data[i++] += sta->tx_packets;		\ -		data[i++] += sta->tx_bytes;		\ +		data[i++] += sinfo.tx_packets;		\ +		data[i++] += sinfo.tx_bytes;		\  		data[i++] += sta->tx_fragments;		\  		data[i++] += sta->tx_filtered_count;	\  		data[i++] += sta->tx_retry_failed;	\ @@ -621,13 +625,14 @@ static void ieee80211_get_et_stats(struct wiphy *wiphy,  		if (!(sta && !WARN_ON(sta->sdata->dev != dev)))  			goto do_survey; +		sinfo.filled = 0; +		sta_set_sinfo(sta, &sinfo); +  		i = 0;  		ADD_STA_STATS(sta);  		data[i++] = sta->sta_state; -		sinfo.filled = 0; -		sta_set_sinfo(sta, &sinfo);  		if (sinfo.filled & STATION_INFO_TX_BITRATE)  			data[i] = 100000 * @@ -800,8 +805,7 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy,  					IEEE80211_CHANCTX_EXCLUSIVE);  		}  	} else if (local->open_count == local->monitors) { -		local->_oper_channel = chandef->chan; -		local->_oper_channel_type = cfg80211_get_chandef_type(chandef); +		local->_oper_chandef = *chandef;  		ieee80211_hw_config(local, 0);  	} @@ -960,8 +964,13 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,  	sdata->vif.bss_conf.hidden_ssid =  		(params->hidden_ssid != NL80211_HIDDEN_SSID_NOT_IN_USE); -	sdata->vif.bss_conf.p2p_ctwindow = params->p2p_ctwindow; -	sdata->vif.bss_conf.p2p_oppps = params->p2p_opp_ps; +	memset(&sdata->vif.bss_conf.p2p_noa_attr, 0, +	       sizeof(sdata->vif.bss_conf.p2p_noa_attr)); +	sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow = +		params->p2p_ctwindow & IEEE80211_P2P_OPPPS_CTWINDOW_MASK; +	if (params->p2p_opp_ps) +		sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow |= +					IEEE80211_P2P_OPPPS_ENABLE_BIT;  	err = ieee80211_assign_beacon(sdata, ¶ms->beacon);  	if (err < 0) @@ -1034,12 +1043,17 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)  	list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)  		sta_info_flush_defer(vlan);  	sta_info_flush_defer(sdata); +	synchronize_net();  	rcu_barrier(); -	list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) +	list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) {  		sta_info_flush_cleanup(vlan); +		ieee80211_free_keys(vlan); +	}  	sta_info_flush_cleanup(sdata); +	ieee80211_free_keys(sdata);  	sdata->vif.bss_conf.enable_beacon = false; +	sdata->vif.bss_conf.ssid_len = 0;  	clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state);  	ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); @@ -1177,6 +1191,18 @@ static int sta_apply_parameters(struct ieee80211_local *local,  			mask |= BIT(NL80211_STA_FLAG_ASSOCIATED);  		if (set & BIT(NL80211_STA_FLAG_AUTHENTICATED))  			set |= BIT(NL80211_STA_FLAG_ASSOCIATED); +	} else if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { +		/* +		 * TDLS -- everything follows authorized, but +		 * only becoming authorized is possible, not +		 * going back +		 */ +		if (set & BIT(NL80211_STA_FLAG_AUTHORIZED)) { +			set |= BIT(NL80211_STA_FLAG_AUTHENTICATED) | +			       BIT(NL80211_STA_FLAG_ASSOCIATED); +			mask |= BIT(NL80211_STA_FLAG_AUTHENTICATED) | +				BIT(NL80211_STA_FLAG_ASSOCIATED); +		}  	}  	ret = sta_apply_auth_flags(local, sta, mask, set); @@ -1261,7 +1287,8 @@ static int sta_apply_parameters(struct ieee80211_local *local,  	if (ieee80211_vif_is_mesh(&sdata->vif)) {  #ifdef CONFIG_MAC80211_MESH  		u32 changed = 0; -		if (sdata->u.mesh.security & IEEE80211_MESH_SEC_SECURED) { + +		if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) {  			switch (params->plink_state) {  			case NL80211_PLINK_ESTAB:  				if (sta->plink_state != NL80211_PLINK_ESTAB) @@ -1292,15 +1319,18 @@ static int sta_apply_parameters(struct ieee80211_local *local,  				/*  nothing  */  				break;  			} -		} else { -			switch (params->plink_action) { -			case PLINK_ACTION_OPEN: -				changed |= mesh_plink_open(sta); -				break; -			case PLINK_ACTION_BLOCK: -				changed |= mesh_plink_block(sta); -				break; -			} +		} + +		switch (params->plink_action) { +		case NL80211_PLINK_ACTION_NO_ACTION: +			/* nothing */ +			break; +		case NL80211_PLINK_ACTION_OPEN: +			changed |= mesh_plink_open(sta); +			break; +		case NL80211_PLINK_ACTION_BLOCK: +			changed |= mesh_plink_block(sta); +			break;  		}  		if (params->local_pm) @@ -1346,8 +1376,10 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,  	 * defaults -- if userspace wants something else we'll  	 * change it accordingly in sta_apply_parameters()  	 */ -	sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); -	sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); +	if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) { +		sta_info_pre_move_state(sta, IEEE80211_STA_AUTH); +		sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); +	}  	err = sta_apply_parameters(local, sta, params);  	if (err) { @@ -1356,8 +1388,8 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,  	}  	/* -	 * for TDLS, rate control should be initialized only when supported -	 * rates are known. +	 * for TDLS, rate control should be initialized only when +	 * rates are known and station is marked authorized  	 */  	if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER))  		rate_control_rate_init(sta); @@ -1394,50 +1426,67 @@ static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev,  }  static int ieee80211_change_station(struct wiphy *wiphy, -				    struct net_device *dev, -				    u8 *mac, +				    struct net_device *dev, u8 *mac,  				    struct station_parameters *params)  {  	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);  	struct ieee80211_local *local = wiphy_priv(wiphy);  	struct sta_info *sta;  	struct ieee80211_sub_if_data *vlansdata; +	enum cfg80211_station_type statype;  	int err;  	mutex_lock(&local->sta_mtx);  	sta = sta_info_get_bss(sdata, mac);  	if (!sta) { -		mutex_unlock(&local->sta_mtx); -		return -ENOENT; +		err = -ENOENT; +		goto out_err;  	} -	/* in station mode, some updates are only valid with TDLS */ -	if (sdata->vif.type == NL80211_IFTYPE_STATION && -	    (params->supported_rates || params->ht_capa || params->vht_capa || -	     params->sta_modify_mask || -	     (params->sta_flags_mask & BIT(NL80211_STA_FLAG_WME))) && -	    !test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { -		mutex_unlock(&local->sta_mtx); -		return -EINVAL; +	switch (sdata->vif.type) { +	case NL80211_IFTYPE_MESH_POINT: +		if (sdata->u.mesh.user_mpm) +			statype = CFG80211_STA_MESH_PEER_USER; +		else +			statype = CFG80211_STA_MESH_PEER_KERNEL; +		break; +	case NL80211_IFTYPE_ADHOC: +		statype = CFG80211_STA_IBSS; +		break; +	case NL80211_IFTYPE_STATION: +		if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { +			statype = CFG80211_STA_AP_STA; +			break; +		} +		if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) +			statype = CFG80211_STA_TDLS_PEER_ACTIVE; +		else +			statype = CFG80211_STA_TDLS_PEER_SETUP; +		break; +	case NL80211_IFTYPE_AP: +	case NL80211_IFTYPE_AP_VLAN: +		statype = CFG80211_STA_AP_CLIENT; +		break; +	default: +		err = -EOPNOTSUPP; +		goto out_err;  	} +	err = cfg80211_check_station_change(wiphy, params, statype); +	if (err) +		goto out_err; +  	if (params->vlan && params->vlan != sta->sdata->dev) {  		bool prev_4addr = false;  		bool new_4addr = false;  		vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); -		if (vlansdata->vif.type != NL80211_IFTYPE_AP_VLAN && -		    vlansdata->vif.type != NL80211_IFTYPE_AP) { -			mutex_unlock(&local->sta_mtx); -			return -EINVAL; -		} -  		if (params->vlan->ieee80211_ptr->use_4addr) {  			if (vlansdata->u.vlan.sta) { -				mutex_unlock(&local->sta_mtx); -				return -EBUSY; +				err = -EBUSY; +				goto out_err;  			}  			rcu_assign_pointer(vlansdata->u.vlan.sta, sta); @@ -1464,12 +1513,12 @@ static int ieee80211_change_station(struct wiphy *wiphy,  	}  	err = sta_apply_parameters(local, sta, params); -	if (err) { -		mutex_unlock(&local->sta_mtx); -		return err; -	} +	if (err) +		goto out_err; -	if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) && params->supported_rates) +	/* When peer becomes authorized, init rate control as well */ +	if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) && +	    test_sta_flag(sta, WLAN_STA_AUTHORIZED))  		rate_control_rate_init(sta);  	mutex_unlock(&local->sta_mtx); @@ -1479,7 +1528,11 @@ static int ieee80211_change_station(struct wiphy *wiphy,  		ieee80211_recalc_ps(local, -1);  		ieee80211_recalc_ps_vif(sdata);  	} +  	return 0; +out_err: +	mutex_unlock(&local->sta_mtx); +	return err;  }  #ifdef CONFIG_MAC80211_MESH @@ -1489,7 +1542,6 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev,  	struct ieee80211_sub_if_data *sdata;  	struct mesh_path *mpath;  	struct sta_info *sta; -	int err;  	sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -1500,17 +1552,12 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev,  		return -ENOENT;  	} -	err = mesh_path_add(sdata, dst); -	if (err) { +	mpath = mesh_path_add(sdata, dst); +	if (IS_ERR(mpath)) {  		rcu_read_unlock(); -		return err; +		return PTR_ERR(mpath);  	} -	mpath = mesh_path_lookup(sdata, dst); -	if (!mpath) { -		rcu_read_unlock(); -		return -ENXIO; -	}  	mesh_path_fix_nexthop(mpath, sta);  	rcu_read_unlock(); @@ -1687,6 +1734,7 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh,  	ifmsh->mesh_sp_id = setup->sync_method;  	ifmsh->mesh_pp_id = setup->path_sel_proto;  	ifmsh->mesh_pm_id = setup->path_metric; +	ifmsh->user_mpm = setup->user_mpm;  	ifmsh->security = IEEE80211_MESH_SEC_NONE;  	if (setup->is_authenticated)  		ifmsh->security |= IEEE80211_MESH_SEC_AUTHED; @@ -1730,8 +1778,11 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy,  		conf->dot11MeshTTL = nconf->dot11MeshTTL;  	if (_chg_mesh_attr(NL80211_MESHCONF_ELEMENT_TTL, mask))  		conf->element_ttl = nconf->element_ttl; -	if (_chg_mesh_attr(NL80211_MESHCONF_AUTO_OPEN_PLINKS, mask)) +	if (_chg_mesh_attr(NL80211_MESHCONF_AUTO_OPEN_PLINKS, mask)) { +		if (ifmsh->user_mpm) +			return -EBUSY;  		conf->auto_open_plinks = nconf->auto_open_plinks; +	}  	if (_chg_mesh_attr(NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR, mask))  		conf->dot11MeshNbrOffsetMaxNeighbor =  			nconf->dot11MeshNbrOffsetMaxNeighbor; @@ -1910,12 +1961,20 @@ static int ieee80211_change_bss(struct wiphy *wiphy,  	}  	if (params->p2p_ctwindow >= 0) { -		sdata->vif.bss_conf.p2p_ctwindow = params->p2p_ctwindow; +		sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow &= +					~IEEE80211_P2P_OPPPS_CTWINDOW_MASK; +		sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow |= +			params->p2p_ctwindow & IEEE80211_P2P_OPPPS_CTWINDOW_MASK;  		changed |= BSS_CHANGED_P2P_PS;  	} -	if (params->p2p_opp_ps >= 0) { -		sdata->vif.bss_conf.p2p_oppps = params->p2p_opp_ps; +	if (params->p2p_opp_ps > 0) { +		sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow |= +					IEEE80211_P2P_OPPPS_ENABLE_BIT; +		changed |= BSS_CHANGED_P2P_PS; +	} else if (params->p2p_opp_ps == 0) { +		sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow &= +					~IEEE80211_P2P_OPPPS_ENABLE_BIT;  		changed |= BSS_CHANGED_P2P_PS;  	} @@ -2359,9 +2418,22 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,  	}  	for (i = 0; i < IEEE80211_NUM_BANDS; i++) { +		struct ieee80211_supported_band *sband = wiphy->bands[i]; +		int j; +  		sdata->rc_rateidx_mask[i] = mask->control[i].legacy;  		memcpy(sdata->rc_rateidx_mcs_mask[i], mask->control[i].mcs,  		       sizeof(mask->control[i].mcs)); + +		sdata->rc_has_mcs_mask[i] = false; +		if (!sband) +			continue; + +		for (j = 0; j < IEEE80211_HT_MCS_MASK_LEN; j++) +			if (~sdata->rc_rateidx_mcs_mask[i][j]) { +				sdata->rc_has_mcs_mask[i] = true; +				break; +			}  	}  	return 0; @@ -2371,7 +2443,8 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,  				    struct ieee80211_sub_if_data *sdata,  				    struct ieee80211_channel *channel,  				    unsigned int duration, u64 *cookie, -				    struct sk_buff *txskb) +				    struct sk_buff *txskb, +				    enum ieee80211_roc_type type)  {  	struct ieee80211_roc_work *roc, *tmp;  	bool queued = false; @@ -2390,6 +2463,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,  	roc->duration = duration;  	roc->req_duration = duration;  	roc->frame = txskb; +	roc->type = type;  	roc->mgmt_tx_cookie = (unsigned long)txskb;  	roc->sdata = sdata;  	INIT_DELAYED_WORK(&roc->work, ieee80211_sw_roc_work); @@ -2420,7 +2494,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,  	if (!duration)  		duration = 10; -	ret = drv_remain_on_channel(local, sdata, channel, duration); +	ret = drv_remain_on_channel(local, sdata, channel, duration, type);  	if (ret) {  		kfree(roc);  		return ret; @@ -2439,10 +2513,13 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,  		 *  		 * If it hasn't started yet, just increase the duration  		 * and add the new one to the list of dependents. +		 * If the type of the new ROC has higher priority, modify the +		 * type of the previous one to match that of the new one.  		 */  		if (!tmp->started) {  			list_add_tail(&roc->list, &tmp->dependents);  			tmp->duration = max(tmp->duration, roc->duration); +			tmp->type = max(tmp->type, roc->type);  			queued = true;  			break;  		} @@ -2454,16 +2531,18 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,  			/*  			 * In the offloaded ROC case, if it hasn't begun, add  			 * this new one to the dependent list to be handled -			 * when the the master one begins. If it has begun, +			 * when the master one begins. If it has begun,  			 * check that there's still a minimum time left and  			 * if so, start this one, transmitting the frame, but -			 * add it to the list directly after this one with a +			 * add it to the list directly after this one with  			 * a reduced time so we'll ask the driver to execute  			 * it right after finishing the previous one, in the  			 * hope that it'll also be executed right afterwards,  			 * effectively extending the old one.  			 * If there's no minimum time left, just add it to the  			 * normal list. +			 * TODO: the ROC type is ignored here, assuming that it +			 * is better to immediately use the current ROC.  			 */  			if (!tmp->hw_begun) {  				list_add_tail(&roc->list, &tmp->dependents); @@ -2557,7 +2636,8 @@ static int ieee80211_remain_on_channel(struct wiphy *wiphy,  	mutex_lock(&local->mtx);  	ret = ieee80211_start_roc_work(local, sdata, chan, -				       duration, cookie, NULL); +				       duration, cookie, NULL, +				       IEEE80211_ROC_TYPE_NORMAL);  	mutex_unlock(&local->mtx);  	return ret; @@ -2792,7 +2872,8 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,  	/* This will handle all kinds of coalescing and immediate TX */  	ret = ieee80211_start_roc_work(local, sdata, chan, -				       wait, cookie, skb); +				       wait, cookie, skb, +				       IEEE80211_ROC_TYPE_MGMT_TX);  	if (ret)  		kfree_skb(skb);   out_unlock: @@ -3302,9 +3383,7 @@ static int ieee80211_cfg_get_channel(struct wiphy *wiphy,  		if (local->use_chanctx)  			*chandef = local->monitor_chandef;  		else -			cfg80211_chandef_create(chandef, -						local->_oper_channel, -						local->_oper_channel_type); +			*chandef = local->_oper_chandef;  		ret = 0;  	}  	rcu_read_unlock(); diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 931be419ab5..03e8d2e3270 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -22,7 +22,7 @@ static void ieee80211_change_chanctx(struct ieee80211_local *local,  	drv_change_chanctx(local, ctx, IEEE80211_CHANCTX_CHANGE_WIDTH);  	if (!local->use_chanctx) { -		local->_oper_channel_type = cfg80211_get_chandef_type(chandef); +		local->_oper_chandef = *chandef;  		ieee80211_hw_config(local, 0);  	}  } @@ -57,6 +57,22 @@ ieee80211_find_chanctx(struct ieee80211_local *local,  	return NULL;  } +static bool ieee80211_is_radar_required(struct ieee80211_local *local) +{ +	struct ieee80211_sub_if_data *sdata; + +	rcu_read_lock(); +	list_for_each_entry_rcu(sdata, &local->interfaces, list) { +		if (sdata->radar_required) { +			rcu_read_unlock(); +			return true; +		} +	} +	rcu_read_unlock(); + +	return false; +} +  static struct ieee80211_chanctx *  ieee80211_new_chanctx(struct ieee80211_local *local,  		      const struct cfg80211_chan_def *chandef, @@ -76,6 +92,9 @@ ieee80211_new_chanctx(struct ieee80211_local *local,  	ctx->conf.rx_chains_static = 1;  	ctx->conf.rx_chains_dynamic = 1;  	ctx->mode = mode; +	ctx->conf.radar_enabled = ieee80211_is_radar_required(local); +	if (!local->use_chanctx) +		local->hw.conf.radar_enabled = ctx->conf.radar_enabled;  	/* acquire mutex to prevent idle from changing */  	mutex_lock(&local->mtx); @@ -85,9 +104,7 @@ ieee80211_new_chanctx(struct ieee80211_local *local,  		ieee80211_hw_config(local, changed);  	if (!local->use_chanctx) { -		local->_oper_channel_type = -			cfg80211_get_chandef_type(chandef); -		local->_oper_channel = chandef->chan; +		local->_oper_chandef = *chandef;  		ieee80211_hw_config(local, 0);  	} else {  		err = drv_add_chanctx(local, ctx); @@ -112,12 +129,24 @@ ieee80211_new_chanctx(struct ieee80211_local *local,  static void ieee80211_free_chanctx(struct ieee80211_local *local,  				   struct ieee80211_chanctx *ctx)  { +	bool check_single_channel = false;  	lockdep_assert_held(&local->chanctx_mtx);  	WARN_ON_ONCE(ctx->refcount != 0);  	if (!local->use_chanctx) { -		local->_oper_channel_type = NL80211_CHAN_NO_HT; +		struct cfg80211_chan_def *chandef = &local->_oper_chandef; +		chandef->width = NL80211_CHAN_WIDTH_20_NOHT; +		chandef->center_freq1 = chandef->chan->center_freq; +		chandef->center_freq2 = 0; + +		/* NOTE: Disabling radar is only valid here for +		 * single channel context. To be sure, check it ... +		 */ +		if (local->hw.conf.radar_enabled) +			check_single_channel = true; +		local->hw.conf.radar_enabled = false; +  		ieee80211_hw_config(local, 0);  	} else {  		drv_remove_chanctx(local, ctx); @@ -126,6 +155,9 @@ static void ieee80211_free_chanctx(struct ieee80211_local *local,  	list_del_rcu(&ctx->list);  	kfree_rcu(ctx, rcu_head); +	/* throw a warning if this wasn't the only channel context. */ +	WARN_ON(check_single_channel && !list_empty(&local->chanctx_list)); +  	mutex_lock(&local->mtx);  	ieee80211_recalc_idle(local);  	mutex_unlock(&local->mtx); @@ -237,19 +269,11 @@ static void __ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata)  void ieee80211_recalc_radar_chanctx(struct ieee80211_local *local,  				    struct ieee80211_chanctx *chanctx)  { -	struct ieee80211_sub_if_data *sdata; -	bool radar_enabled = false; +	bool radar_enabled;  	lockdep_assert_held(&local->chanctx_mtx); -	rcu_read_lock(); -	list_for_each_entry_rcu(sdata, &local->interfaces, list) { -		if (sdata->radar_required) { -			radar_enabled = true; -			break; -		} -	} -	rcu_read_unlock(); +	radar_enabled = ieee80211_is_radar_required(local);  	if (radar_enabled == chanctx->conf.radar_enabled)  		return; diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c index c3a3082b72e..1521cabad3d 100644 --- a/net/mac80211/debugfs_key.c +++ b/net/mac80211/debugfs_key.c @@ -295,7 +295,7 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata)  	char buf[50];  	struct ieee80211_key *key; -	if (!sdata->debugfs.dir) +	if (!sdata->vif.debugfs_dir)  		return;  	lockdep_assert_held(&sdata->local->key_mtx); @@ -311,7 +311,7 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata)  		sprintf(buf, "../keys/%d", key->debugfs.cnt);  		sdata->debugfs.default_unicast_key =  			debugfs_create_symlink("default_unicast_key", -					       sdata->debugfs.dir, buf); +					       sdata->vif.debugfs_dir, buf);  	}  	if (sdata->debugfs.default_multicast_key) { @@ -325,7 +325,7 @@ void ieee80211_debugfs_key_update_default(struct ieee80211_sub_if_data *sdata)  		sprintf(buf, "../keys/%d", key->debugfs.cnt);  		sdata->debugfs.default_multicast_key =  			debugfs_create_symlink("default_multicast_key", -					       sdata->debugfs.dir, buf); +					       sdata->vif.debugfs_dir, buf);  	}  } @@ -334,7 +334,7 @@ void ieee80211_debugfs_key_add_mgmt_default(struct ieee80211_sub_if_data *sdata)  	char buf[50];  	struct ieee80211_key *key; -	if (!sdata->debugfs.dir) +	if (!sdata->vif.debugfs_dir)  		return;  	key = key_mtx_dereference(sdata->local, @@ -343,7 +343,7 @@ void ieee80211_debugfs_key_add_mgmt_default(struct ieee80211_sub_if_data *sdata)  		sprintf(buf, "../keys/%d", key->debugfs.cnt);  		sdata->debugfs.default_mgmt_key =  			debugfs_create_symlink("default_mgmt_key", -					       sdata->debugfs.dir, buf); +					       sdata->vif.debugfs_dir, buf);  	} else  		ieee80211_debugfs_key_remove_mgmt_default(sdata);  } diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 059bbb82e84..14abcf44f97 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -124,6 +124,15 @@ static ssize_t ieee80211_if_fmt_##name(					\  	return scnprintf(buf, buflen, "%d\n", sdata->field / 16);	\  } +#define IEEE80211_IF_FMT_JIFFIES_TO_MS(name, field)			\ +static ssize_t ieee80211_if_fmt_##name(					\ +	const struct ieee80211_sub_if_data *sdata,			\ +	char *buf, int buflen)						\ +{									\ +	return scnprintf(buf, buflen, "%d\n",				\ +			 jiffies_to_msecs(sdata->field));		\ +} +  #define __IEEE80211_IF_FILE(name, _write)				\  static ssize_t ieee80211_if_read_##name(struct file *file,		\  					char __user *userbuf,		\ @@ -197,6 +206,7 @@ IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC);  IEEE80211_IF_FILE(aid, u.mgd.aid, DEC);  IEEE80211_IF_FILE(last_beacon, u.mgd.last_beacon_signal, DEC);  IEEE80211_IF_FILE(ave_beacon, u.mgd.ave_beacon_signal, DEC_DIV_16); +IEEE80211_IF_FILE(beacon_timeout, u.mgd.beacon_timeout, JIFFIES_TO_MS);  static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata,  			      enum ieee80211_smps_mode smps_mode) @@ -521,7 +531,7 @@ IEEE80211_IF_FILE(dot11MeshAwakeWindowDuration,  #endif  #define DEBUGFS_ADD_MODE(name, mode) \ -	debugfs_create_file(#name, mode, sdata->debugfs.dir, \ +	debugfs_create_file(#name, mode, sdata->vif.debugfs_dir, \  			    sdata, &name##_ops);  #define DEBUGFS_ADD(name) DEBUGFS_ADD_MODE(name, 0400) @@ -542,6 +552,7 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata)  	DEBUGFS_ADD(aid);  	DEBUGFS_ADD(last_beacon);  	DEBUGFS_ADD(ave_beacon); +	DEBUGFS_ADD(beacon_timeout);  	DEBUGFS_ADD_MODE(smps, 0600);  	DEBUGFS_ADD_MODE(tkip_mic_test, 0200);  	DEBUGFS_ADD_MODE(uapsd_queues, 0600); @@ -577,7 +588,7 @@ static void add_mesh_files(struct ieee80211_sub_if_data *sdata)  static void add_mesh_stats(struct ieee80211_sub_if_data *sdata)  {  	struct dentry *dir = debugfs_create_dir("mesh_stats", -						sdata->debugfs.dir); +						sdata->vif.debugfs_dir);  #define MESHSTATS_ADD(name)\  	debugfs_create_file(#name, 0400, dir, sdata, &name##_ops); @@ -594,7 +605,7 @@ static void add_mesh_stats(struct ieee80211_sub_if_data *sdata)  static void add_mesh_config(struct ieee80211_sub_if_data *sdata)  {  	struct dentry *dir = debugfs_create_dir("mesh_config", -						sdata->debugfs.dir); +						sdata->vif.debugfs_dir);  #define MESHPARAMS_ADD(name) \  	debugfs_create_file(#name, 0600, dir, sdata, &name##_ops); @@ -631,7 +642,7 @@ static void add_mesh_config(struct ieee80211_sub_if_data *sdata)  static void add_files(struct ieee80211_sub_if_data *sdata)  { -	if (!sdata->debugfs.dir) +	if (!sdata->vif.debugfs_dir)  		return;  	DEBUGFS_ADD(flags); @@ -673,21 +684,21 @@ void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata)  	char buf[10+IFNAMSIZ];  	sprintf(buf, "netdev:%s", sdata->name); -	sdata->debugfs.dir = debugfs_create_dir(buf, +	sdata->vif.debugfs_dir = debugfs_create_dir(buf,  		sdata->local->hw.wiphy->debugfsdir); -	if (sdata->debugfs.dir) +	if (sdata->vif.debugfs_dir)  		sdata->debugfs.subdir_stations = debugfs_create_dir("stations", -			sdata->debugfs.dir); +			sdata->vif.debugfs_dir);  	add_files(sdata);  }  void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata)  { -	if (!sdata->debugfs.dir) +	if (!sdata->vif.debugfs_dir)  		return; -	debugfs_remove_recursive(sdata->debugfs.dir); -	sdata->debugfs.dir = NULL; +	debugfs_remove_recursive(sdata->vif.debugfs_dir); +	sdata->vif.debugfs_dir = NULL;  }  void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata) @@ -695,7 +706,7 @@ void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata)  	struct dentry *dir;  	char buf[10 + IFNAMSIZ]; -	dir = sdata->debugfs.dir; +	dir = sdata->vif.debugfs_dir;  	if (!dir)  		return; diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index c7591f73dbc..44e201d60a1 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -54,6 +54,7 @@ STA_FILE(aid, sta.aid, D);  STA_FILE(dev, sdata->name, S);  STA_FILE(last_signal, last_signal, D);  STA_FILE(last_ack_signal, last_ack_signal, D); +STA_FILE(beacon_loss_count, beacon_loss_count, D);  static ssize_t sta_flags_read(struct file *file, char __user *userbuf,  			      size_t count, loff_t *ppos) @@ -325,6 +326,36 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf,  }  STA_OPS(ht_capa); +static ssize_t sta_vht_capa_read(struct file *file, char __user *userbuf, +				 size_t count, loff_t *ppos) +{ +	char buf[128], *p = buf; +	struct sta_info *sta = file->private_data; +	struct ieee80211_sta_vht_cap *vhtc = &sta->sta.vht_cap; + +	p += scnprintf(p, sizeof(buf) + buf - p, "VHT %ssupported\n", +			vhtc->vht_supported ? "" : "not "); +	if (vhtc->vht_supported) { +		p += scnprintf(p, sizeof(buf)+buf-p, "cap: %#.8x\n", vhtc->cap); + +		p += scnprintf(p, sizeof(buf)+buf-p, "RX MCS: %.4x\n", +			       le16_to_cpu(vhtc->vht_mcs.rx_mcs_map)); +		if (vhtc->vht_mcs.rx_highest) +			p += scnprintf(p, sizeof(buf)+buf-p, +				       "MCS RX highest: %d Mbps\n", +				       le16_to_cpu(vhtc->vht_mcs.rx_highest)); +		p += scnprintf(p, sizeof(buf)+buf-p, "TX MCS: %.4x\n", +			       le16_to_cpu(vhtc->vht_mcs.tx_mcs_map)); +		if (vhtc->vht_mcs.tx_highest) +			p += scnprintf(p, sizeof(buf)+buf-p, +				       "MCS TX highest: %d Mbps\n", +				       le16_to_cpu(vhtc->vht_mcs.tx_highest)); +	} + +	return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); +} +STA_OPS(vht_capa); +  static ssize_t sta_current_tx_rate_read(struct file *file, char __user *userbuf,  					size_t count, loff_t *ppos)  { @@ -404,7 +435,9 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta)  	DEBUGFS_ADD(agg_status);  	DEBUGFS_ADD(dev);  	DEBUGFS_ADD(last_signal); +	DEBUGFS_ADD(beacon_loss_count);  	DEBUGFS_ADD(ht_capa); +	DEBUGFS_ADD(vht_capa);  	DEBUGFS_ADD(last_ack_signal);  	DEBUGFS_ADD(current_tx_rate);  	DEBUGFS_ADD(last_rx_rate); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index ee56d0779d8..169664c122e 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -241,6 +241,22 @@ static inline u64 drv_prepare_multicast(struct ieee80211_local *local,  	return ret;  } +static inline void drv_set_multicast_list(struct ieee80211_local *local, +					  struct ieee80211_sub_if_data *sdata, +					  struct netdev_hw_addr_list *mc_list) +{ +	bool allmulti = sdata->flags & IEEE80211_SDATA_ALLMULTI; + +	trace_drv_set_multicast_list(local, sdata, mc_list->count); + +	check_sdata_in_driver(sdata); + +	if (local->ops->set_multicast_list) +		local->ops->set_multicast_list(&local->hw, &sdata->vif, +					       allmulti, mc_list); +	trace_drv_return_void(local); +} +  static inline void drv_configure_filter(struct ieee80211_local *local,  					unsigned int changed_flags,  					unsigned int *total_flags, @@ -531,43 +547,6 @@ static inline void drv_sta_remove_debugfs(struct ieee80211_local *local,  		local->ops->sta_remove_debugfs(&local->hw, &sdata->vif,  					       sta, dir);  } - -static inline -void drv_add_interface_debugfs(struct ieee80211_local *local, -			       struct ieee80211_sub_if_data *sdata) -{ -	might_sleep(); - -	check_sdata_in_driver(sdata); - -	if (!local->ops->add_interface_debugfs) -		return; - -	local->ops->add_interface_debugfs(&local->hw, &sdata->vif, -					  sdata->debugfs.dir); -} - -static inline -void drv_remove_interface_debugfs(struct ieee80211_local *local, -				  struct ieee80211_sub_if_data *sdata) -{ -	might_sleep(); - -	check_sdata_in_driver(sdata); - -	if (!local->ops->remove_interface_debugfs) -		return; - -	local->ops->remove_interface_debugfs(&local->hw, &sdata->vif, -					     sdata->debugfs.dir); -} -#else -static inline -void drv_add_interface_debugfs(struct ieee80211_local *local, -			       struct ieee80211_sub_if_data *sdata) {} -static inline -void drv_remove_interface_debugfs(struct ieee80211_local *local, -				  struct ieee80211_sub_if_data *sdata) {}  #endif  static inline __must_check @@ -741,13 +720,14 @@ static inline void drv_rfkill_poll(struct ieee80211_local *local)  		local->ops->rfkill_poll(&local->hw);  } -static inline void drv_flush(struct ieee80211_local *local, bool drop) +static inline void drv_flush(struct ieee80211_local *local, +			     u32 queues, bool drop)  {  	might_sleep(); -	trace_drv_flush(local, drop); +	trace_drv_flush(local, queues, drop);  	if (local->ops->flush) -		local->ops->flush(&local->hw, drop); +		local->ops->flush(&local->hw, queues, drop);  	trace_drv_return_void(local);  } @@ -787,15 +767,16 @@ static inline int drv_get_antenna(struct ieee80211_local *local,  static inline int drv_remain_on_channel(struct ieee80211_local *local,  					struct ieee80211_sub_if_data *sdata,  					struct ieee80211_channel *chan, -					unsigned int duration) +					unsigned int duration, +					enum ieee80211_roc_type type)  {  	int ret;  	might_sleep(); -	trace_drv_remain_on_channel(local, sdata, chan, duration); +	trace_drv_remain_on_channel(local, sdata, chan, duration, type);  	ret = local->ops->remain_on_channel(&local->hw, &sdata->vif, -					    chan, duration); +					    chan, duration, type);  	trace_drv_return_int(local, ret);  	return ret; diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 0db25d4bb22..af8cee06e4f 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -40,13 +40,6 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata,  	if (!ht_cap->ht_supported)  		return; -	if (sdata->vif.type != NL80211_IFTYPE_STATION) { -		/* AP interfaces call this code when adding new stations, -		 * so just silently ignore non station interfaces. -		 */ -		return; -	} -  	/* NOTE:  If you add more over-rides here, update register_hw  	 * ht_capa_mod_msk logic in main.c as well.  	 * And, if this method can ever change ht_cap.ht_supported, fix @@ -97,7 +90,7 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,  				       const struct ieee80211_ht_cap *ht_cap_ie,  				       struct sta_info *sta)  { -	struct ieee80211_sta_ht_cap ht_cap; +	struct ieee80211_sta_ht_cap ht_cap, own_cap;  	u8 ampdu_info, tx_mcs_set_cap;  	int i, max_tx_streams;  	bool changed; @@ -111,6 +104,18 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,  	ht_cap.ht_supported = true; +	own_cap = sband->ht_cap; + +	/* +	 * If user has specified capability over-rides, take care +	 * of that if the station we're setting up is the AP that +	 * we advertised a restricted capability set to. Override +	 * our own capabilities and then use those below. +	 */ +	if (sdata->vif.type == NL80211_IFTYPE_STATION && +	    !test_sta_flag(sta, WLAN_STA_TDLS_PEER)) +		ieee80211_apply_htcap_overrides(sdata, &own_cap); +  	/*  	 * The bits listed in this expression should be  	 * the same for the peer and us, if the station @@ -118,21 +123,20 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,  	 * we mask them out.  	 */  	ht_cap.cap = le16_to_cpu(ht_cap_ie->cap_info) & -		(sband->ht_cap.cap | -		 ~(IEEE80211_HT_CAP_LDPC_CODING | -		   IEEE80211_HT_CAP_SUP_WIDTH_20_40 | -		   IEEE80211_HT_CAP_GRN_FLD | -		   IEEE80211_HT_CAP_SGI_20 | -		   IEEE80211_HT_CAP_SGI_40 | -		   IEEE80211_HT_CAP_DSSSCCK40)); +		(own_cap.cap | ~(IEEE80211_HT_CAP_LDPC_CODING | +				 IEEE80211_HT_CAP_SUP_WIDTH_20_40 | +				 IEEE80211_HT_CAP_GRN_FLD | +				 IEEE80211_HT_CAP_SGI_20 | +				 IEEE80211_HT_CAP_SGI_40 | +				 IEEE80211_HT_CAP_DSSSCCK40));  	/*  	 * The STBC bits are asymmetric -- if we don't have  	 * TX then mask out the peer's RX and vice versa.  	 */ -	if (!(sband->ht_cap.cap & IEEE80211_HT_CAP_TX_STBC)) +	if (!(own_cap.cap & IEEE80211_HT_CAP_TX_STBC))  		ht_cap.cap &= ~IEEE80211_HT_CAP_RX_STBC; -	if (!(sband->ht_cap.cap & IEEE80211_HT_CAP_RX_STBC)) +	if (!(own_cap.cap & IEEE80211_HT_CAP_RX_STBC))  		ht_cap.cap &= ~IEEE80211_HT_CAP_TX_STBC;  	ampdu_info = ht_cap_ie->ampdu_params_info; @@ -142,7 +146,7 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,  		(ampdu_info & IEEE80211_HT_AMPDU_PARM_DENSITY) >> 2;  	/* own MCS TX capabilities */ -	tx_mcs_set_cap = sband->ht_cap.mcs.tx_params; +	tx_mcs_set_cap = own_cap.mcs.tx_params;  	/* Copy peer MCS TX capabilities, the driver might need them. */  	ht_cap.mcs.tx_params = ht_cap_ie->mcs.tx_params; @@ -168,26 +172,20 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata,  	 */  	for (i = 0; i < max_tx_streams; i++)  		ht_cap.mcs.rx_mask[i] = -			sband->ht_cap.mcs.rx_mask[i] & ht_cap_ie->mcs.rx_mask[i]; +			own_cap.mcs.rx_mask[i] & ht_cap_ie->mcs.rx_mask[i];  	if (tx_mcs_set_cap & IEEE80211_HT_MCS_TX_UNEQUAL_MODULATION)  		for (i = IEEE80211_HT_MCS_UNEQUAL_MODULATION_START_BYTE;  		     i < IEEE80211_HT_MCS_MASK_LEN; i++)  			ht_cap.mcs.rx_mask[i] = -				sband->ht_cap.mcs.rx_mask[i] & +				own_cap.mcs.rx_mask[i] &  					ht_cap_ie->mcs.rx_mask[i];  	/* handle MCS rate 32 too */ -	if (sband->ht_cap.mcs.rx_mask[32/8] & ht_cap_ie->mcs.rx_mask[32/8] & 1) +	if (own_cap.mcs.rx_mask[32/8] & ht_cap_ie->mcs.rx_mask[32/8] & 1)  		ht_cap.mcs.rx_mask[32/8] |= 1;   apply: -	/* -	 * If user has specified capability over-rides, take care -	 * of that here. -	 */ -	ieee80211_apply_htcap_overrides(sdata, &ht_cap); -  	changed = memcmp(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap));  	memcpy(&sta->sta.ht_cap, &ht_cap, sizeof(ht_cap)); diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 40b71dfcc79..170f9a7fa31 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -44,7 +44,6 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,  	struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;  	struct ieee80211_local *local = sdata->local;  	int rates, i; -	struct sk_buff *skb;  	struct ieee80211_mgmt *mgmt;  	u8 *pos;  	struct ieee80211_supported_band *sband; @@ -52,20 +51,14 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,  	u32 bss_change;  	u8 supp_rates[IEEE80211_MAX_SUPP_RATES];  	struct cfg80211_chan_def chandef; +	struct beacon_data *presp; +	int frame_len;  	lockdep_assert_held(&ifibss->mtx);  	/* Reset own TSF to allow time synchronization work. */  	drv_reset_tsf(local, sdata); -	skb = ifibss->skb; -	RCU_INIT_POINTER(ifibss->presp, NULL); -	synchronize_rcu(); -	skb->data = skb->head; -	skb->len = 0; -	skb_reset_tail_pointer(skb); -	skb_reserve(skb, sdata->local->hw.extra_tx_headroom); -  	if (!ether_addr_equal(ifibss->bssid, bssid))  		sta_info_flush(sdata); @@ -73,10 +66,19 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,  	if (sdata->vif.bss_conf.ibss_joined) {  		sdata->vif.bss_conf.ibss_joined = false;  		sdata->vif.bss_conf.ibss_creator = false; +		sdata->vif.bss_conf.enable_beacon = false;  		netif_carrier_off(sdata->dev); -		ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IBSS); +		ieee80211_bss_info_change_notify(sdata, +						 BSS_CHANGED_IBSS | +						 BSS_CHANGED_BEACON_ENABLED);  	} +	presp = rcu_dereference_protected(ifibss->presp, +					  lockdep_is_held(&ifibss->mtx)); +	rcu_assign_pointer(ifibss->presp, NULL); +	if (presp) +		kfree_rcu(presp, rcu_head); +  	sdata->drop_unencrypted = capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0;  	cfg80211_chandef_create(&chandef, chan, ifibss->channel_type); @@ -98,19 +100,24 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,  	sband = local->hw.wiphy->bands[chan->band]; -	/* build supported rates array */ -	pos = supp_rates; -	for (i = 0; i < sband->n_bitrates; i++) { -		int rate = sband->bitrates[i].bitrate; -		u8 basic = 0; -		if (basic_rates & BIT(i)) -			basic = 0x80; -		*pos++ = basic | (u8) (rate / 5); -	} -  	/* Build IBSS probe response */ -	mgmt = (void *) skb_put(skb, 24 + sizeof(mgmt->u.beacon)); -	memset(mgmt, 0, 24 + sizeof(mgmt->u.beacon)); +	frame_len = sizeof(struct ieee80211_hdr_3addr) + +		    12 /* struct ieee80211_mgmt.u.beacon */ + +		    2 + IEEE80211_MAX_SSID_LEN /* max SSID */ + +		    2 + 8 /* max Supported Rates */ + +		    3 /* max DS params */ + +		    4 /* IBSS params */ + +		    2 + (IEEE80211_MAX_SUPP_RATES - 8) + +		    2 + sizeof(struct ieee80211_ht_cap) + +		    2 + sizeof(struct ieee80211_ht_operation) + +		    ifibss->ie_len; +	presp = kzalloc(sizeof(*presp) + frame_len, GFP_KERNEL); +	if (!presp) +		return; + +	presp->head = (void *)(presp + 1); + +	mgmt = (void *) presp->head;  	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |  					  IEEE80211_STYPE_PROBE_RESP);  	eth_broadcast_addr(mgmt->da); @@ -120,27 +127,30 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,  	mgmt->u.beacon.timestamp = cpu_to_le64(tsf);  	mgmt->u.beacon.capab_info = cpu_to_le16(capability); -	pos = skb_put(skb, 2 + ifibss->ssid_len); +	pos = (u8 *)mgmt + offsetof(struct ieee80211_mgmt, u.beacon.variable); +  	*pos++ = WLAN_EID_SSID;  	*pos++ = ifibss->ssid_len;  	memcpy(pos, ifibss->ssid, ifibss->ssid_len); +	pos += ifibss->ssid_len; -	rates = sband->n_bitrates; -	if (rates > 8) -		rates = 8; -	pos = skb_put(skb, 2 + rates); +	rates = min_t(int, 8, sband->n_bitrates);  	*pos++ = WLAN_EID_SUPP_RATES;  	*pos++ = rates; -	memcpy(pos, supp_rates, rates); +	for (i = 0; i < rates; i++) { +		int rate = sband->bitrates[i].bitrate; +		u8 basic = 0; +		if (basic_rates & BIT(i)) +			basic = 0x80; +		*pos++ = basic | (u8) (rate / 5); +	}  	if (sband->band == IEEE80211_BAND_2GHZ) { -		pos = skb_put(skb, 2 + 1);  		*pos++ = WLAN_EID_DS_PARAMS;  		*pos++ = 1;  		*pos++ = ieee80211_frequency_to_channel(chan->center_freq);  	} -	pos = skb_put(skb, 2 + 2);  	*pos++ = WLAN_EID_IBSS_PARAMS;  	*pos++ = 2;  	/* FIX: set ATIM window based on scan results */ @@ -148,23 +158,25 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,  	*pos++ = 0;  	if (sband->n_bitrates > 8) { -		rates = sband->n_bitrates - 8; -		pos = skb_put(skb, 2 + rates);  		*pos++ = WLAN_EID_EXT_SUPP_RATES; -		*pos++ = rates; -		memcpy(pos, &supp_rates[8], rates); +		*pos++ = sband->n_bitrates - 8; +		for (i = 8; i < sband->n_bitrates; i++) { +			int rate = sband->bitrates[i].bitrate; +			u8 basic = 0; +			if (basic_rates & BIT(i)) +				basic = 0x80; +			*pos++ = basic | (u8) (rate / 5); +		}  	} -	if (ifibss->ie_len) -		memcpy(skb_put(skb, ifibss->ie_len), -		       ifibss->ie, ifibss->ie_len); +	if (ifibss->ie_len) { +		memcpy(pos, ifibss->ie, ifibss->ie_len); +		pos += ifibss->ie_len; +	}  	/* add HT capability and information IEs */  	if (chandef.width != NL80211_CHAN_WIDTH_20_NOHT &&  	    sband->ht_cap.ht_supported) { -		pos = skb_put(skb, 4 + -				   sizeof(struct ieee80211_ht_cap) + -				   sizeof(struct ieee80211_ht_operation));  		pos = ieee80211_ie_build_ht_cap(pos, &sband->ht_cap,  						sband->ht_cap.cap);  		/* @@ -177,7 +189,6 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,  	}  	if (local->hw.queues >= IEEE80211_NUM_ACS) { -		pos = skb_put(skb, 9);  		*pos++ = WLAN_EID_VENDOR_SPECIFIC;  		*pos++ = 7; /* len */  		*pos++ = 0x00; /* Microsoft OUI 00:50:F2 */ @@ -189,11 +200,17 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,  		*pos++ = 0; /* U-APSD no in use */  	} -	rcu_assign_pointer(ifibss->presp, skb); +	presp->head_len = pos - presp->head; +	if (WARN_ON(presp->head_len > frame_len)) +		return; + +	rcu_assign_pointer(ifibss->presp, presp);  	sdata->vif.bss_conf.enable_beacon = true;  	sdata->vif.bss_conf.beacon_int = beacon_int;  	sdata->vif.bss_conf.basic_rates = basic_rates; +	sdata->vif.bss_conf.ssid_len = ifibss->ssid_len; +	memcpy(sdata->vif.bss_conf.ssid, ifibss->ssid, ifibss->ssid_len);  	bss_change = BSS_CHANGED_BEACON_INT;  	bss_change |= ieee80211_reset_erp_info(sdata);  	bss_change |= BSS_CHANGED_BSSID; @@ -202,6 +219,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,  	bss_change |= BSS_CHANGED_BASIC_RATES;  	bss_change |= BSS_CHANGED_HT;  	bss_change |= BSS_CHANGED_IBSS; +	bss_change |= BSS_CHANGED_SSID;  	/*  	 * In 5 GHz/802.11a, we can always use short slot time. @@ -227,7 +245,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,  		  round_jiffies(jiffies + IEEE80211_IBSS_MERGE_INTERVAL));  	bss = cfg80211_inform_bss_frame(local->hw.wiphy, chan, -					mgmt, skb->len, 0, GFP_KERNEL); +					mgmt, presp->head_len, 0, GFP_KERNEL);  	cfg80211_put_bss(local->hw.wiphy, bss);  	netif_carrier_on(sdata->dev);  	cfg80211_ibss_joined(sdata->dev, ifibss->bssid, GFP_KERNEL); @@ -448,7 +466,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,  	struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band];  	bool rates_updated = false; -	if (elems->ds_params && elems->ds_params_len == 1) +	if (elems->ds_params)  		freq = ieee80211_channel_to_frequency(elems->ds_params[0],  						      band);  	else @@ -822,8 +840,7 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,  	struct ieee80211_local *local = sdata->local;  	int tx_last_beacon, len = req->len;  	struct sk_buff *skb; -	struct ieee80211_mgmt *resp; -	struct sk_buff *presp; +	struct beacon_data *presp;  	u8 *pos, *end;  	lockdep_assert_held(&ifibss->mtx); @@ -864,13 +881,15 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,  	}  	/* Reply with ProbeResp */ -	skb = skb_copy(presp, GFP_KERNEL); +	skb = dev_alloc_skb(local->tx_headroom + presp->head_len);  	if (!skb)  		return; -	resp = (struct ieee80211_mgmt *) skb->data; -	memcpy(resp->da, mgmt->sa, ETH_ALEN); -	ibss_dbg(sdata, "Sending ProbeResp to %pM\n", resp->da); +	skb_reserve(skb, local->tx_headroom); +	memcpy(skb_put(skb, presp->head_len), presp->head, presp->head_len); + +	memcpy(((struct ieee80211_mgmt *) skb->data)->da, mgmt->sa, ETH_ALEN); +	ibss_dbg(sdata, "Sending ProbeResp to %pM\n", mgmt->sa);  	IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;  	ieee80211_tx_skb(sdata, skb);  } @@ -895,7 +914,7 @@ void ieee80211_rx_mgmt_probe_beacon(struct ieee80211_sub_if_data *sdata,  		return;  	ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen, -				&elems); +			       false, &elems);  	ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems);  } @@ -985,36 +1004,9 @@ static void ieee80211_ibss_timer(unsigned long data)  {  	struct ieee80211_sub_if_data *sdata =  		(struct ieee80211_sub_if_data *) data; -	struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; -	struct ieee80211_local *local = sdata->local; - -	if (local->quiescing) { -		ifibss->timer_running = true; -		return; -	} - -	ieee80211_queue_work(&local->hw, &sdata->work); -} - -#ifdef CONFIG_PM -void ieee80211_ibss_quiesce(struct ieee80211_sub_if_data *sdata) -{ -	struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; -	if (del_timer_sync(&ifibss->timer)) -		ifibss->timer_running = true; -} - -void ieee80211_ibss_restart(struct ieee80211_sub_if_data *sdata) -{ -	struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; - -	if (ifibss->timer_running) { -		add_timer(&ifibss->timer); -		ifibss->timer_running = false; -	} +	ieee80211_queue_work(&sdata->local->hw, &sdata->work);  } -#endif  void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata)  { @@ -1047,23 +1039,8 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local)  int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,  			struct cfg80211_ibss_params *params)  { -	struct sk_buff *skb;  	u32 changed = 0; -	skb = dev_alloc_skb(sdata->local->hw.extra_tx_headroom + -			    sizeof(struct ieee80211_hdr_3addr) + -			    12 /* struct ieee80211_mgmt.u.beacon */ + -			    2 + IEEE80211_MAX_SSID_LEN /* max SSID */ + -			    2 + 8 /* max Supported Rates */ + -			    3 /* max DS params */ + -			    4 /* IBSS params */ + -			    2 + (IEEE80211_MAX_SUPP_RATES - 8) + -			    2 + sizeof(struct ieee80211_ht_cap) + -			    2 + sizeof(struct ieee80211_ht_operation) + -			    params->ie_len); -	if (!skb) -		return -ENOMEM; -  	mutex_lock(&sdata->u.ibss.mtx);  	if (params->bssid) { @@ -1092,7 +1069,6 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,  			sdata->u.ibss.ie_len = params->ie_len;  	} -	sdata->u.ibss.skb = skb;  	sdata->u.ibss.state = IEEE80211_IBSS_MLME_SEARCH;  	sdata->u.ibss.ibss_join_req = jiffies; @@ -1128,13 +1104,13 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,  int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)  { -	struct sk_buff *skb;  	struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;  	struct ieee80211_local *local = sdata->local;  	struct cfg80211_bss *cbss;  	u16 capability;  	int active_ibss;  	struct sta_info *sta; +	struct beacon_data *presp;  	mutex_lock(&sdata->u.ibss.mtx); @@ -1180,17 +1156,18 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)  	/* remove beacon */  	kfree(sdata->u.ibss.ie); -	skb = rcu_dereference_protected(sdata->u.ibss.presp, -					lockdep_is_held(&sdata->u.ibss.mtx)); +	presp = rcu_dereference_protected(ifibss->presp, +					  lockdep_is_held(&sdata->u.ibss.mtx));  	RCU_INIT_POINTER(sdata->u.ibss.presp, NULL);  	sdata->vif.bss_conf.ibss_joined = false;  	sdata->vif.bss_conf.ibss_creator = false;  	sdata->vif.bss_conf.enable_beacon = false; +	sdata->vif.bss_conf.ssid_len = 0;  	clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state);  	ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED |  						BSS_CHANGED_IBSS);  	synchronize_rcu(); -	kfree_skb(skb); +	kfree(presp);  	skb_queue_purge(&sdata->skb_queue); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 5672533a083..158e6eb188d 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -156,6 +156,7 @@ struct ieee80211_tx_data {  	struct ieee80211_sub_if_data *sdata;  	struct sta_info *sta;  	struct ieee80211_key *key; +	struct ieee80211_tx_rate rate;  	unsigned int flags;  }; @@ -316,6 +317,7 @@ struct ieee80211_roc_work {  	u32 duration, req_duration;  	struct sk_buff *frame;  	u64 cookie, mgmt_tx_cookie; +	enum ieee80211_roc_type type;  };  /* flags used in struct ieee80211_if_managed.flags */ @@ -401,7 +403,6 @@ struct ieee80211_if_managed {  	u16 aid; -	unsigned long timers_running; /* used for quiesce/restart */  	bool powersave; /* powersave requested for this iface */  	bool broken_ap; /* AP is broken -- turn off powersave */  	u8 dtim_period; @@ -443,7 +444,7 @@ struct ieee80211_if_managed {  	u8 use_4addr; -	u8 p2p_noa_index; +	s16 p2p_noa_index;  	/* Signal strength from the last Beacon frame in the current BSS. */  	int last_beacon_signal; @@ -480,6 +481,8 @@ struct ieee80211_if_managed {  	struct ieee80211_ht_cap ht_capa; /* configured ht-cap over-rides */  	struct ieee80211_ht_cap ht_capa_mask; /* Valid parts of ht_capa */ +	struct ieee80211_vht_cap vht_capa; /* configured VHT overrides */ +	struct ieee80211_vht_cap vht_capa_mask; /* Valid parts of vht_capa */  };  struct ieee80211_if_ibss { @@ -491,8 +494,6 @@ struct ieee80211_if_ibss {  	u32 basic_rates; -	bool timer_running; -  	bool fixed_bssid;  	bool fixed_channel;  	bool privacy; @@ -509,8 +510,7 @@ struct ieee80211_if_ibss {  	unsigned long ibss_join_req;  	/* probe response/beacon for IBSS */ -	struct sk_buff __rcu *presp; -	struct sk_buff *skb; +	struct beacon_data __rcu *presp;  	spinlock_t incomplete_lock;  	struct list_head incomplete_stations; @@ -544,8 +544,6 @@ struct ieee80211_if_mesh {  	struct timer_list mesh_path_timer;  	struct timer_list mesh_path_root_timer; -	unsigned long timers_running; -  	unsigned long wrkq_flags;  	u8 mesh_id[IEEE80211_MAX_MESH_ID_LEN]; @@ -591,6 +589,7 @@ struct ieee80211_if_mesh {  		IEEE80211_MESH_SEC_AUTHED = 0x1,  		IEEE80211_MESH_SEC_SECURED = 0x2,  	} security; +	bool user_mpm;  	/* Extensible Synchronization Framework */  	const struct ieee80211_mesh_sync_ops *sync_ops;  	s64 sync_offset_clockdrift_max; @@ -683,6 +682,8 @@ struct ieee80211_sub_if_data {  	/* count for keys needing tailroom space allocation */  	int crypto_tx_tailroom_needed_cnt; +	int crypto_tx_tailroom_pending_dec; +	struct delayed_work dec_tailroom_needed_wk;  	struct net_device *dev;  	struct ieee80211_local *local; @@ -740,6 +741,8 @@ struct ieee80211_sub_if_data {  	/* bitmap of allowed (non-MCS) rate indexes for rate control */  	u32 rc_rateidx_mask[IEEE80211_NUM_BANDS]; + +	bool rc_has_mcs_mask[IEEE80211_NUM_BANDS];  	u8  rc_rateidx_mcs_mask[IEEE80211_NUM_BANDS][IEEE80211_HT_MCS_MASK_LEN];  	union { @@ -758,7 +761,6 @@ struct ieee80211_sub_if_data {  #ifdef CONFIG_MAC80211_DEBUGFS  	struct { -		struct dentry *dir;  		struct dentry *subdir_stations;  		struct dentry *default_unicast_key;  		struct dentry *default_multicast_key; @@ -766,10 +768,6 @@ struct ieee80211_sub_if_data {  	} debugfs;  #endif -#ifdef CONFIG_PM -	struct ieee80211_bss_conf suspend_bss_conf; -#endif -  	/* must be last, dynamically sized area in this! */  	struct ieee80211_vif vif;  }; @@ -804,11 +802,6 @@ enum sdata_queue_type {  enum {  	IEEE80211_RX_MSG	= 1,  	IEEE80211_TX_STATUS_MSG	= 2, -	IEEE80211_EOSP_MSG	= 3, -}; - -struct skb_eosp_msg_data { -	u8 sta[ETH_ALEN], iface[ETH_ALEN];  };  enum queue_stop_reason { @@ -819,6 +812,7 @@ enum queue_stop_reason {  	IEEE80211_QUEUE_STOP_REASON_SUSPEND,  	IEEE80211_QUEUE_STOP_REASON_SKB_ADD,  	IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL, +	IEEE80211_QUEUE_STOP_REASON_FLUSH,  };  #ifdef CONFIG_MAC80211_LEDS @@ -1029,10 +1023,9 @@ struct ieee80211_local {  	enum mac80211_scan_state next_scan_state;  	struct delayed_work scan_work;  	struct ieee80211_sub_if_data __rcu *scan_sdata; -	struct ieee80211_channel *csa_channel; +	struct cfg80211_chan_def csa_chandef;  	/* For backward compatibility only -- do not use */ -	struct ieee80211_channel *_oper_channel; -	enum nl80211_channel_type _oper_channel_type; +	struct cfg80211_chan_def _oper_chandef;  	/* Temporary remain-on-channel for off-channel operations */  	struct ieee80211_channel *tmp_channel; @@ -1137,11 +1130,6 @@ struct ieee80211_local {  	struct ieee80211_sub_if_data __rcu *p2p_sdata; -	/* dummy netdev for use w/ NAPI */ -	struct net_device napi_dev; - -	struct napi_struct napi; -  	/* virtual monitor interface */  	struct ieee80211_sub_if_data __rcu *monitor_sdata;  	struct cfg80211_chan_def monitor_chandef; @@ -1173,11 +1161,8 @@ struct ieee802_11_elems {  	/* pointers to IEs */  	const u8 *ssid;  	const u8 *supp_rates; -	const u8 *fh_params;  	const u8 *ds_params; -	const u8 *cf_params;  	const struct ieee80211_tim_ie *tim; -	const u8 *ibss_params;  	const u8 *challenge;  	const u8 *rsn;  	const u8 *erp_info; @@ -1197,23 +1182,20 @@ struct ieee802_11_elems {  	const u8 *perr;  	const struct ieee80211_rann_ie *rann;  	const struct ieee80211_channel_sw_ie *ch_switch_ie; +	const struct ieee80211_ext_chansw_ie *ext_chansw_ie; +	const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie;  	const u8 *country_elem;  	const u8 *pwr_constr_elem; -	const u8 *quiet_elem;	/* first quite element */ -	const u8 *timeout_int; +	const struct ieee80211_timeout_interval_ie *timeout_int;  	const u8 *opmode_notif; +	const struct ieee80211_sec_chan_offs_ie *sec_chan_offs;  	/* length of them, respectively */  	u8 ssid_len;  	u8 supp_rates_len; -	u8 fh_params_len; -	u8 ds_params_len; -	u8 cf_params_len;  	u8 tim_len; -	u8 ibss_params_len;  	u8 challenge_len;  	u8 rsn_len; -	u8 erp_info_len;  	u8 ext_supp_rates_len;  	u8 wmm_info_len;  	u8 wmm_param_len; @@ -1223,9 +1205,6 @@ struct ieee802_11_elems {  	u8 prep_len;  	u8 perr_len;  	u8 country_elem_len; -	u8 quiet_elem_len; -	u8 num_of_quiet_elem;	/* can be more the one */ -	u8 timeout_int_len;  	/* whether a parse error occurred while retrieving these elements */  	bool parse_error; @@ -1280,12 +1259,6 @@ void ieee80211_recalc_ps_vif(struct ieee80211_sub_if_data *sdata);  int ieee80211_max_network_latency(struct notifier_block *nb,  				  unsigned long data, void *dummy);  int ieee80211_set_arp_filter(struct ieee80211_sub_if_data *sdata); -void -ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, -				 const struct ieee80211_channel_sw_ie *sw_elem, -				 struct ieee80211_bss *bss, u64 timestamp); -void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata); -void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata);  void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata);  void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,  				  struct sk_buff *skb); @@ -1303,8 +1276,6 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata,  int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,  			struct cfg80211_ibss_params *params);  int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata); -void ieee80211_ibss_quiesce(struct ieee80211_sub_if_data *sdata); -void ieee80211_ibss_restart(struct ieee80211_sub_if_data *sdata);  void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata);  void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,  				   struct sk_buff *skb); @@ -1347,7 +1318,8 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local);  void ieee80211_offchannel_return(struct ieee80211_local *local);  void ieee80211_roc_setup(struct ieee80211_local *local);  void ieee80211_start_next_roc(struct ieee80211_local *local); -void ieee80211_roc_purge(struct ieee80211_sub_if_data *sdata); +void ieee80211_roc_purge(struct ieee80211_local *local, +			 struct ieee80211_sub_if_data *sdata);  void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc, bool free);  void ieee80211_sw_roc_work(struct work_struct *work);  void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc); @@ -1368,6 +1340,8 @@ void ieee80211_adjust_monitor_flags(struct ieee80211_sub_if_data *sdata,  				    const int offset);  int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up);  void ieee80211_sdata_stop(struct ieee80211_sub_if_data *sdata); +int ieee80211_add_virtual_monitor(struct ieee80211_local *local); +void ieee80211_del_virtual_monitor(struct ieee80211_local *local);  bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata);  void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata); @@ -1443,6 +1417,8 @@ void ieee80211_sta_set_rx_nss(struct sta_info *sta);  void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,  				 struct sta_info *sta, u8 opmode,  				 enum ieee80211_band band, bool nss_only); +void ieee80211_apply_vhtcap_overrides(struct ieee80211_sub_if_data *sdata, +				      struct ieee80211_sta_vht_cap *vht_cap);  /* Spectrum management */  void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, @@ -1520,11 +1496,15 @@ static inline void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata,  	ieee80211_tx_skb_tid(sdata, skb, 7);  } -void ieee802_11_parse_elems(u8 *start, size_t len, -			    struct ieee802_11_elems *elems); -u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, +u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, bool action,  			       struct ieee802_11_elems *elems,  			       u64 filter, u32 crc); +static inline void ieee802_11_parse_elems(u8 *start, size_t len, bool action, +					  struct ieee802_11_elems *elems) +{ +	ieee802_11_parse_elems_crc(start, len, action, elems, 0, 0); +} +  u32 ieee80211_mandatory_rates(struct ieee80211_local *local,  			      enum ieee80211_band band); @@ -1540,8 +1520,10 @@ void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata,  			     struct ieee80211_hdr *hdr, bool ack);  void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, +				     unsigned long queues,  				     enum queue_stop_reason reason);  void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, +				     unsigned long queues,  				     enum queue_stop_reason reason);  void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue,  				    enum queue_stop_reason reason); @@ -1558,6 +1540,8 @@ static inline void ieee80211_add_pending_skbs(struct ieee80211_local *local,  {  	ieee80211_add_pending_skbs_fn(local, skbs, NULL, NULL);  } +void ieee80211_flush_queues(struct ieee80211_local *local, +			    struct ieee80211_sub_if_data *sdata);  void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,  			 u16 transaction, u16 auth_alg, u16 status, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 9ed49ad0380..60f1ce5e5e5 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1,5 +1,5 @@  /* - * Interface handling (except master interface) + * Interface handling   *   * Copyright 2002-2005, Instant802 Networks, Inc.   * Copyright 2005-2006, Devicescape Software, Inc. @@ -92,7 +92,7 @@ static u32 __ieee80211_idle_on(struct ieee80211_local *local)  	if (local->hw.conf.flags & IEEE80211_CONF_IDLE)  		return 0; -	drv_flush(local, false); +	ieee80211_flush_queues(local, NULL);  	local->hw.conf.flags |= IEEE80211_CONF_IDLE;  	return IEEE80211_CONF_CHANGE_IDLE; @@ -357,7 +357,7 @@ static void ieee80211_set_default_queues(struct ieee80211_sub_if_data *sdata)  	sdata->vif.cab_queue = IEEE80211_INVAL_HW_QUEUE;  } -static int ieee80211_add_virtual_monitor(struct ieee80211_local *local) +int ieee80211_add_virtual_monitor(struct ieee80211_local *local)  {  	struct ieee80211_sub_if_data *sdata;  	int ret; @@ -410,7 +410,7 @@ static int ieee80211_add_virtual_monitor(struct ieee80211_local *local)  	return 0;  } -static void ieee80211_del_virtual_monitor(struct ieee80211_local *local) +void ieee80211_del_virtual_monitor(struct ieee80211_local *local)  {  	struct ieee80211_sub_if_data *sdata; @@ -499,8 +499,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)  		res = drv_start(local);  		if (res)  			goto err_del_bss; -		if (local->ops->napi_poll) -			napi_enable(&local->napi);  		/* we're brought up, everything changes */  		hw_reconf_flags = ~0;  		ieee80211_led_radio(local, true); @@ -573,8 +571,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)  				goto err_del_interface;  		} -		drv_add_interface_debugfs(local, sdata); -  		if (sdata->vif.type == NL80211_IFTYPE_AP) {  			local->fif_pspoll++;  			local->fif_probe_req++; @@ -599,7 +595,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)  		case NL80211_IFTYPE_P2P_DEVICE:  			break;  		default: -			netif_carrier_on(dev); +			/* not reached */ +			WARN_ON(1);  		}  		/* @@ -656,8 +653,28 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)  	ieee80211_recalc_ps(local, -1); -	if (dev) -		netif_tx_start_all_queues(dev); +	if (dev) { +		unsigned long flags; +		int n_acs = IEEE80211_NUM_ACS; +		int ac; + +		if (local->hw.queues < IEEE80211_NUM_ACS) +			n_acs = 1; + +		spin_lock_irqsave(&local->queue_stop_reason_lock, flags); +		if (sdata->vif.cab_queue == IEEE80211_INVAL_HW_QUEUE || +		    (local->queue_stop_reasons[sdata->vif.cab_queue] == 0 && +		     skb_queue_empty(&local->pending[sdata->vif.cab_queue]))) { +			for (ac = 0; ac < n_acs; ac++) { +				int ac_queue = sdata->vif.hw_queue[ac]; + +				if (local->queue_stop_reasons[ac_queue] == 0 && +				    skb_queue_empty(&local->pending[ac_queue])) +					netif_start_subqueue(dev, ac); +			} +		} +		spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); +	}  	return 0;   err_del_interface: @@ -711,7 +728,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,  	if (sdata->dev)  		netif_tx_stop_all_queues(sdata->dev); -	ieee80211_roc_purge(sdata); +	ieee80211_roc_purge(local, sdata);  	if (sdata->vif.type == NL80211_IFTYPE_STATION)  		ieee80211_mgd_stop(sdata); @@ -736,12 +753,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,  	WARN_ON_ONCE((sdata->vif.type != NL80211_IFTYPE_WDS && flushed > 0) ||  		     (sdata->vif.type == NL80211_IFTYPE_WDS && flushed != 1)); -	/* -	 * Don't count this interface for promisc/allmulti while it -	 * is down. dev_mc_unsync() will invoke set_multicast_list -	 * on the master interface which will sync these down to the -	 * hardware as filter flags. -	 */ +	/* don't count this interface for promisc/allmulti while it is down */  	if (sdata->flags & IEEE80211_SDATA_ALLMULTI)  		atomic_dec(&local->iff_allmultis); @@ -762,8 +774,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,  				 sdata->dev->addr_len);  		spin_unlock_bh(&local->filter_lock);  		netif_addr_unlock_bh(sdata->dev); - -		ieee80211_configure_filter(local);  	}  	del_timer_sync(&local->dynamic_ps_timer); @@ -774,6 +784,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,  	cancel_delayed_work_sync(&sdata->dfs_cac_timer_work);  	if (sdata->wdev.cac_started) { +		WARN_ON(local->suspended);  		mutex_lock(&local->iflist_mtx);  		ieee80211_vif_release_channel(sdata);  		mutex_unlock(&local->iflist_mtx); @@ -824,14 +835,9 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,  		if (local->monitors == 0) {  			local->hw.conf.flags &= ~IEEE80211_CONF_MONITOR;  			hw_reconf_flags |= IEEE80211_CONF_CHANGE_MONITOR; -			ieee80211_del_virtual_monitor(local);  		}  		ieee80211_adjust_monitor_flags(sdata, -1); -		ieee80211_configure_filter(local); -		mutex_lock(&local->mtx); -		ieee80211_recalc_idle(local); -		mutex_unlock(&local->mtx);  		break;  	case NL80211_IFTYPE_P2P_DEVICE:  		/* relies on synchronize_rcu() below */ @@ -844,46 +850,28 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,  		 *  		 * sta_info_flush_cleanup() requires rcu_barrier()  		 * first to wait for the station call_rcu() calls -		 * to complete, here we need at least sychronize_rcu() -		 * it to wait for the RX path in case it is using the +		 * to complete, and we also need synchronize_rcu() +		 * to wait for the RX path in case it is using the  		 * interface and enqueuing frames at this very time on  		 * another CPU.  		 */ +		synchronize_rcu();  		rcu_barrier();  		sta_info_flush_cleanup(sdata); -		skb_queue_purge(&sdata->skb_queue); -  		/*  		 * Free all remaining keys, there shouldn't be any, -		 * except maybe group keys in AP more or WDS? +		 * except maybe in WDS mode?  		 */  		ieee80211_free_keys(sdata); -		drv_remove_interface_debugfs(local, sdata); - -		if (going_down) -			drv_remove_interface(local, sdata); +		/* fall through */ +	case NL80211_IFTYPE_AP: +		skb_queue_purge(&sdata->skb_queue);  	}  	sdata->bss = NULL; -	ieee80211_recalc_ps(local, -1); - -	if (local->open_count == 0) { -		if (local->ops->napi_poll) -			napi_disable(&local->napi); -		ieee80211_clear_tx_pending(local); -		ieee80211_stop_device(local); - -		/* no reconfiguring after stop! */ -		hw_reconf_flags = 0; -	} - -	/* do after stop to avoid reconfiguring when we stop anyway */ -	if (hw_reconf_flags) -		ieee80211_hw_config(local, hw_reconf_flags); -  	spin_lock_irqsave(&local->queue_stop_reason_lock, flags);  	for (i = 0; i < IEEE80211_MAX_QUEUES; i++) {  		skb_queue_walk_safe(&local->pending[i], skb, tmp) { @@ -896,7 +884,54 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,  	}  	spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); -	if (local->monitors == local->open_count && local->monitors > 0) +	if (local->open_count == 0) +		ieee80211_clear_tx_pending(local); + +	/* +	 * If the interface goes down while suspended, presumably because +	 * the device was unplugged and that happens before our resume, +	 * then the driver is already unconfigured and the remainder of +	 * this function isn't needed. +	 * XXX: what about WoWLAN? If the device has software state, e.g. +	 *	memory allocated, it might expect teardown commands from +	 *	mac80211 here? +	 */ +	if (local->suspended) { +		WARN_ON(local->wowlan); +		WARN_ON(rtnl_dereference(local->monitor_sdata)); +		return; +	} + +	switch (sdata->vif.type) { +	case NL80211_IFTYPE_AP_VLAN: +		break; +	case NL80211_IFTYPE_MONITOR: +		if (local->monitors == 0) +			ieee80211_del_virtual_monitor(local); + +		mutex_lock(&local->mtx); +		ieee80211_recalc_idle(local); +		mutex_unlock(&local->mtx); +		break; +	default: +		if (going_down) +			drv_remove_interface(local, sdata); +	} + +	ieee80211_recalc_ps(local, -1); + +	if (local->open_count == 0) { +		ieee80211_stop_device(local); + +		/* no reconfiguring after stop! */ +		return; +	} + +	/* do after stop to avoid reconfiguring when we stop anyway */ +	ieee80211_configure_filter(local); +	ieee80211_hw_config(local, hw_reconf_flags); + +	if (local->monitors == local->open_count)  		ieee80211_add_virtual_monitor(local);  } @@ -935,6 +970,17 @@ static void ieee80211_set_multicast_list(struct net_device *dev)  			atomic_dec(&local->iff_promiscs);  		sdata->flags ^= IEEE80211_SDATA_PROMISC;  	} + +	/* +	 * TODO: If somebody needs this on AP interfaces, +	 *	 it can be enabled easily but multicast +	 *	 addresses from VLANs need to be synced. +	 */ +	if (sdata->vif.type != NL80211_IFTYPE_MONITOR && +	    sdata->vif.type != NL80211_IFTYPE_AP_VLAN && +	    sdata->vif.type != NL80211_IFTYPE_AP) +		drv_set_multicast_list(local, sdata, &dev->mc); +  	spin_lock_bh(&local->filter_lock);  	__hw_addr_sync(&local->mc_list, &dev->mc, dev->addr_len);  	spin_unlock_bh(&local->filter_lock); @@ -1561,6 +1607,8 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,  	INIT_WORK(&sdata->cleanup_stations_wk, ieee80211_cleanup_sdata_stas_wk);  	INIT_DELAYED_WORK(&sdata->dfs_cac_timer_work,  			  ieee80211_dfs_cac_timer_work); +	INIT_DELAYED_WORK(&sdata->dec_tailroom_needed_wk, +			  ieee80211_delayed_tailroom_dec);  	for (i = 0; i < IEEE80211_NUM_BANDS; i++) {  		struct ieee80211_supported_band *sband; diff --git a/net/mac80211/key.c b/net/mac80211/key.c index ef252eb58c3..67059b88fea 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -248,11 +248,11 @@ void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata,  } -static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, -				    struct sta_info *sta, -				    bool pairwise, -				    struct ieee80211_key *old, -				    struct ieee80211_key *new) +static void ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, +				  struct sta_info *sta, +				  bool pairwise, +				  struct ieee80211_key *old, +				  struct ieee80211_key *new)  {  	int idx;  	bool defunikey, defmultikey, defmgmtkey; @@ -397,7 +397,41 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,  	return key;  } -static void __ieee80211_key_destroy(struct ieee80211_key *key) +static void ieee80211_key_free_common(struct ieee80211_key *key) +{ +	if (key->conf.cipher == WLAN_CIPHER_SUITE_CCMP) +		ieee80211_aes_key_free(key->u.ccmp.tfm); +	if (key->conf.cipher == WLAN_CIPHER_SUITE_AES_CMAC) +		ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm); +	kfree(key); +} + +static void __ieee80211_key_destroy(struct ieee80211_key *key, +				    bool delay_tailroom) +{ +	if (key->local) +		ieee80211_key_disable_hw_accel(key); + +	if (key->local) { +		struct ieee80211_sub_if_data *sdata = key->sdata; + +		ieee80211_debugfs_key_remove(key); + +		if (delay_tailroom) { +			/* see ieee80211_delayed_tailroom_dec */ +			sdata->crypto_tx_tailroom_pending_dec++; +			schedule_delayed_work(&sdata->dec_tailroom_needed_wk, +					      HZ/2); +		} else { +			sdata->crypto_tx_tailroom_needed_cnt--; +		} +	} + +	ieee80211_key_free_common(key); +} + +static void ieee80211_key_destroy(struct ieee80211_key *key, +				  bool delay_tailroom)  {  	if (!key)  		return; @@ -408,19 +442,13 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key)  	 */  	synchronize_net(); -	if (key->local) -		ieee80211_key_disable_hw_accel(key); - -	if (key->conf.cipher == WLAN_CIPHER_SUITE_CCMP) -		ieee80211_aes_key_free(key->u.ccmp.tfm); -	if (key->conf.cipher == WLAN_CIPHER_SUITE_AES_CMAC) -		ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm); -	if (key->local) { -		ieee80211_debugfs_key_remove(key); -		key->sdata->crypto_tx_tailroom_needed_cnt--; -	} +	__ieee80211_key_destroy(key, delay_tailroom); +} -	kfree(key); +void ieee80211_key_free_unused(struct ieee80211_key *key) +{ +	WARN_ON(key->sdata || key->local); +	ieee80211_key_free_common(key);  }  int ieee80211_key_link(struct ieee80211_key *key, @@ -440,32 +468,6 @@ int ieee80211_key_link(struct ieee80211_key *key,  	key->sdata = sdata;  	key->sta = sta; -	if (sta) { -		/* -		 * some hardware cannot handle TKIP with QoS, so -		 * we indicate whether QoS could be in use. -		 */ -		if (test_sta_flag(sta, WLAN_STA_WME)) -			key->conf.flags |= IEEE80211_KEY_FLAG_WMM_STA; -	} else { -		if (sdata->vif.type == NL80211_IFTYPE_STATION) { -			struct sta_info *ap; - -			/* -			 * We're getting a sta pointer in, so must be under -			 * appropriate locking for sta_info_get(). -			 */ - -			/* same here, the AP could be using QoS */ -			ap = sta_info_get(key->sdata, key->sdata->u.mgd.bssid); -			if (ap) { -				if (test_sta_flag(ap, WLAN_STA_WME)) -					key->conf.flags |= -						IEEE80211_KEY_FLAG_WMM_STA; -			} -		} -	} -  	mutex_lock(&sdata->local->key_mtx);  	if (sta && pairwise) @@ -477,19 +479,22 @@ int ieee80211_key_link(struct ieee80211_key *key,  	increment_tailroom_need_count(sdata); -	__ieee80211_key_replace(sdata, sta, pairwise, old_key, key); -	__ieee80211_key_destroy(old_key); +	ieee80211_key_replace(sdata, sta, pairwise, old_key, key); +	ieee80211_key_destroy(old_key, true);  	ieee80211_debugfs_key_add(key);  	ret = ieee80211_key_enable_hw_accel(key); +	if (ret) +		ieee80211_key_free(key, true); +  	mutex_unlock(&sdata->local->key_mtx);  	return ret;  } -void __ieee80211_key_free(struct ieee80211_key *key) +void ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom)  {  	if (!key)  		return; @@ -498,18 +503,10 @@ void __ieee80211_key_free(struct ieee80211_key *key)  	 * Replace key with nothingness if it was ever used.  	 */  	if (key->sdata) -		__ieee80211_key_replace(key->sdata, key->sta, +		ieee80211_key_replace(key->sdata, key->sta,  				key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,  				key, NULL); -	__ieee80211_key_destroy(key); -} - -void ieee80211_key_free(struct ieee80211_local *local, -			struct ieee80211_key *key) -{ -	mutex_lock(&local->key_mtx); -	__ieee80211_key_free(key); -	mutex_unlock(&local->key_mtx); +	ieee80211_key_destroy(key, delay_tailroom);  }  void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata) @@ -566,36 +563,109 @@ void ieee80211_iter_keys(struct ieee80211_hw *hw,  }  EXPORT_SYMBOL(ieee80211_iter_keys); -void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata) +void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata)  { -	struct ieee80211_key *key; +	struct ieee80211_key *key, *tmp; +	LIST_HEAD(keys); -	ASSERT_RTNL(); +	cancel_delayed_work_sync(&sdata->dec_tailroom_needed_wk);  	mutex_lock(&sdata->local->key_mtx); -	list_for_each_entry(key, &sdata->key_list, list) -		ieee80211_key_disable_hw_accel(key); +	sdata->crypto_tx_tailroom_needed_cnt -= +		sdata->crypto_tx_tailroom_pending_dec; +	sdata->crypto_tx_tailroom_pending_dec = 0; + +	ieee80211_debugfs_key_remove_mgmt_default(sdata); + +	list_for_each_entry_safe(key, tmp, &sdata->key_list, list) { +		ieee80211_key_replace(key->sdata, key->sta, +				key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, +				key, NULL); +		list_add_tail(&key->list, &keys); +	} + +	ieee80211_debugfs_key_update_default(sdata); + +	if (!list_empty(&keys)) { +		synchronize_net(); +		list_for_each_entry_safe(key, tmp, &keys, list) +			__ieee80211_key_destroy(key, false); +	} + +	WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt || +		     sdata->crypto_tx_tailroom_pending_dec);  	mutex_unlock(&sdata->local->key_mtx);  } -void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata) +void ieee80211_free_sta_keys(struct ieee80211_local *local, +			     struct sta_info *sta)  {  	struct ieee80211_key *key, *tmp; +	LIST_HEAD(keys); +	int i; -	mutex_lock(&sdata->local->key_mtx); +	mutex_lock(&local->key_mtx); +	for (i = 0; i < NUM_DEFAULT_KEYS; i++) { +		key = key_mtx_dereference(local, sta->gtk[i]); +		if (!key) +			continue; +		ieee80211_key_replace(key->sdata, key->sta, +				key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, +				key, NULL); +		list_add(&key->list, &keys); +	} -	ieee80211_debugfs_key_remove_mgmt_default(sdata); +	key = key_mtx_dereference(local, sta->ptk); +	if (key) { +		ieee80211_key_replace(key->sdata, key->sta, +				key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, +				key, NULL); +		list_add(&key->list, &keys); +	} -	list_for_each_entry_safe(key, tmp, &sdata->key_list, list) -		__ieee80211_key_free(key); +	/* +	 * NB: the station code relies on this being +	 * done even if there aren't any keys +	 */ +	synchronize_net(); -	ieee80211_debugfs_key_update_default(sdata); +	list_for_each_entry_safe(key, tmp, &keys, list) +		__ieee80211_key_destroy(key, true); -	mutex_unlock(&sdata->local->key_mtx); +	mutex_unlock(&local->key_mtx);  } +void ieee80211_delayed_tailroom_dec(struct work_struct *wk) +{ +	struct ieee80211_sub_if_data *sdata; + +	sdata = container_of(wk, struct ieee80211_sub_if_data, +			     dec_tailroom_needed_wk.work); + +	/* +	 * The reason for the delayed tailroom needed decrementing is to +	 * make roaming faster: during roaming, all keys are first deleted +	 * and then new keys are installed. The first new key causes the +	 * crypto_tx_tailroom_needed_cnt to go from 0 to 1, which invokes +	 * the cost of synchronize_net() (which can be slow). Avoid this +	 * by deferring the crypto_tx_tailroom_needed_cnt decrementing on +	 * key removal for a while, so if we roam the value is larger than +	 * zero and no 0->1 transition happens. +	 * +	 * The cost is that if the AP switching was from an AP with keys +	 * to one without, we still allocate tailroom while it would no +	 * longer be needed. However, in the typical (fast) roaming case +	 * within an ESS this usually won't happen. +	 */ + +	mutex_lock(&sdata->local->key_mtx); +	sdata->crypto_tx_tailroom_needed_cnt -= +		sdata->crypto_tx_tailroom_pending_dec; +	sdata->crypto_tx_tailroom_pending_dec = 0; +	mutex_unlock(&sdata->local->key_mtx); +}  void ieee80211_gtk_rekey_notify(struct ieee80211_vif *vif, const u8 *bssid,  				const u8 *replay_ctr, gfp_t gfp) diff --git a/net/mac80211/key.h b/net/mac80211/key.h index 382dc44ed33..e8de3e6d780 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -129,23 +129,25 @@ struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,  					  size_t seq_len, const u8 *seq);  /*   * Insert a key into data structures (sdata, sta if necessary) - * to make it used, free old key. + * to make it used, free old key. On failure, also free the new key.   */ -int __must_check ieee80211_key_link(struct ieee80211_key *key, -				    struct ieee80211_sub_if_data *sdata, -				    struct sta_info *sta); -void __ieee80211_key_free(struct ieee80211_key *key); -void ieee80211_key_free(struct ieee80211_local *local, -			struct ieee80211_key *key); +int ieee80211_key_link(struct ieee80211_key *key, +		       struct ieee80211_sub_if_data *sdata, +		       struct sta_info *sta); +void ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom); +void ieee80211_key_free_unused(struct ieee80211_key *key);  void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx,  			       bool uni, bool multi);  void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata,  				    int idx);  void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata); +void ieee80211_free_sta_keys(struct ieee80211_local *local, +			     struct sta_info *sta);  void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata); -void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata);  #define key_mtx_dereference(local, ref) \  	rcu_dereference_protected(ref, lockdep_is_held(&((local)->key_mtx))) +void ieee80211_delayed_tailroom_dec(struct work_struct *wk); +  #endif /* IEEE80211_KEY_H */ diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 1a8591b77a1..8a7bfc47d57 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -95,43 +95,47 @@ static void ieee80211_reconfig_filter(struct work_struct *work)  static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local)  {  	struct ieee80211_sub_if_data *sdata; -	struct ieee80211_channel *chan; +	struct cfg80211_chan_def chandef = {};  	u32 changed = 0;  	int power; -	enum nl80211_channel_type channel_type;  	u32 offchannel_flag; -	bool scanning = false;  	offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL; +  	if (local->scan_channel) { -		chan = local->scan_channel; +		chandef.chan = local->scan_channel;  		/* If scanning on oper channel, use whatever channel-type  		 * is currently in use.  		 */ -		if (chan == local->_oper_channel) -			channel_type = local->_oper_channel_type; -		else -			channel_type = NL80211_CHAN_NO_HT; +		if (chandef.chan == local->_oper_chandef.chan) { +			chandef = local->_oper_chandef; +		} else { +			chandef.width = NL80211_CHAN_WIDTH_20_NOHT; +			chandef.center_freq1 = chandef.chan->center_freq; +		}  	} else if (local->tmp_channel) { -		chan = local->tmp_channel; -		channel_type = NL80211_CHAN_NO_HT; -	} else { -		chan = local->_oper_channel; -		channel_type = local->_oper_channel_type; -	} +		chandef.chan = local->tmp_channel; +		chandef.width = NL80211_CHAN_WIDTH_20_NOHT; +		chandef.center_freq1 = chandef.chan->center_freq; +	} else +		chandef = local->_oper_chandef; -	if (chan != local->_oper_channel || -	    channel_type != local->_oper_channel_type) +	WARN(!cfg80211_chandef_valid(&chandef), +	     "control:%d MHz width:%d center: %d/%d MHz", +	     chandef.chan->center_freq, chandef.width, +	     chandef.center_freq1, chandef.center_freq2); + +	if (!cfg80211_chandef_identical(&chandef, &local->_oper_chandef))  		local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL;  	else  		local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL;  	offchannel_flag ^= local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL; -	if (offchannel_flag || chan != local->hw.conf.channel || -	    channel_type != local->hw.conf.channel_type) { -		local->hw.conf.channel = chan; -		local->hw.conf.channel_type = channel_type; +	if (offchannel_flag || +	    !cfg80211_chandef_identical(&local->hw.conf.chandef, +					&local->_oper_chandef)) { +		local->hw.conf.chandef = chandef;  		changed |= IEEE80211_CONF_CHANGE_CHANNEL;  	} @@ -147,10 +151,7 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local)  		changed |= IEEE80211_CONF_CHANGE_SMPS;  	} -	scanning = test_bit(SCAN_SW_SCANNING, &local->scanning) || -		   test_bit(SCAN_ONCHANNEL_SCANNING, &local->scanning) || -		   test_bit(SCAN_HW_SCANNING, &local->scanning); -	power = chan->max_power; +	power = chandef.chan->max_power;  	rcu_read_lock();  	list_for_each_entry_rcu(sdata, &local->interfaces, list) { @@ -226,8 +227,6 @@ u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata)  static void ieee80211_tasklet_handler(unsigned long data)  {  	struct ieee80211_local *local = (struct ieee80211_local *) data; -	struct sta_info *sta, *tmp; -	struct skb_eosp_msg_data *eosp_data;  	struct sk_buff *skb;  	while ((skb = skb_dequeue(&local->skb_queue)) || @@ -243,18 +242,6 @@ static void ieee80211_tasklet_handler(unsigned long data)  			skb->pkt_type = 0;  			ieee80211_tx_status(&local->hw, skb);  			break; -		case IEEE80211_EOSP_MSG: -			eosp_data = (void *)skb->cb; -			for_each_sta_info(local, eosp_data->sta, sta, tmp) { -				/* skip wrong virtual interface */ -				if (memcmp(eosp_data->iface, -					   sta->sdata->vif.addr, ETH_ALEN)) -					continue; -				clear_sta_flag(sta, WLAN_STA_SP); -				break; -			} -			dev_kfree_skb(skb); -			break;  		default:  			WARN(1, "mac80211: Packet is of unknown type %d\n",  			     skb->pkt_type); @@ -295,8 +282,8 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw)  		   "Hardware restart was requested\n");  	/* use this reason, ieee80211_reconfig will unblock it */ -	ieee80211_stop_queues_by_reason(hw, -		IEEE80211_QUEUE_STOP_REASON_SUSPEND); +	ieee80211_stop_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP, +					IEEE80211_QUEUE_STOP_REASON_SUSPEND);  	/*  	 * Stop all Rx during the reconfig. We don't want state changes @@ -399,30 +386,6 @@ static int ieee80211_ifa6_changed(struct notifier_block *nb,  }  #endif -static int ieee80211_napi_poll(struct napi_struct *napi, int budget) -{ -	struct ieee80211_local *local = -		container_of(napi, struct ieee80211_local, napi); - -	return local->ops->napi_poll(&local->hw, budget); -} - -void ieee80211_napi_schedule(struct ieee80211_hw *hw) -{ -	struct ieee80211_local *local = hw_to_local(hw); - -	napi_schedule(&local->napi); -} -EXPORT_SYMBOL(ieee80211_napi_schedule); - -void ieee80211_napi_complete(struct ieee80211_hw *hw) -{ -	struct ieee80211_local *local = hw_to_local(hw); - -	napi_complete(&local->napi); -} -EXPORT_SYMBOL(ieee80211_napi_complete); -  /* There isn't a lot of sense in it, but you can transmit anything you like */  static const struct ieee80211_txrx_stypes  ieee80211_default_mgmt_stypes[NUM_NL80211_IFTYPES] = { @@ -501,6 +464,27 @@ static const struct ieee80211_ht_cap mac80211_ht_capa_mod_mask = {  	},  }; +static const struct ieee80211_vht_cap mac80211_vht_capa_mod_mask = { +	.vht_cap_info = +		cpu_to_le32(IEEE80211_VHT_CAP_RXLDPC | +			    IEEE80211_VHT_CAP_SHORT_GI_80 | +			    IEEE80211_VHT_CAP_SHORT_GI_160 | +			    IEEE80211_VHT_CAP_RXSTBC_1 | +			    IEEE80211_VHT_CAP_RXSTBC_2 | +			    IEEE80211_VHT_CAP_RXSTBC_3 | +			    IEEE80211_VHT_CAP_RXSTBC_4 | +			    IEEE80211_VHT_CAP_TXSTBC | +			    IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE | +			    IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE | +			    IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN | +			    IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN | +			    IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK), +	.supp_mcs = { +		.rx_mcs_map = cpu_to_le16(~0), +		.tx_mcs_map = cpu_to_le16(~0), +	}, +}; +  static const u8 extended_capabilities[] = {  	0, 0, 0, 0, 0, 0, 0,  	WLAN_EXT_CAPA8_OPMODE_NOTIF, @@ -572,7 +556,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,  	wiphy->features |= NL80211_FEATURE_SK_TX_STATUS |  			   NL80211_FEATURE_SAE |  			   NL80211_FEATURE_HT_IBSS | -			   NL80211_FEATURE_VIF_TXPOWER; +			   NL80211_FEATURE_VIF_TXPOWER | +			   NL80211_FEATURE_USERSPACE_MPM;  	if (!ops->hw_scan)  		wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN | @@ -607,8 +592,11 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,  					 IEEE80211_RADIOTAP_MCS_HAVE_BW;  	local->hw.radiotap_vht_details = IEEE80211_RADIOTAP_VHT_KNOWN_GI |  					 IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH; +	local->hw.uapsd_queues = IEEE80211_DEFAULT_UAPSD_QUEUES; +	local->hw.uapsd_max_sp_len = IEEE80211_DEFAULT_MAX_SP_LEN;  	local->user_power_level = IEEE80211_UNSET_POWER_LEVEL;  	wiphy->ht_capa_mod_mask = &mac80211_ht_capa_mod_mask; +	wiphy->vht_capa_mod_mask = &mac80211_vht_capa_mod_mask;  	INIT_LIST_HEAD(&local->interfaces); @@ -664,9 +652,6 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,  	skb_queue_head_init(&local->skb_queue);  	skb_queue_head_init(&local->skb_queue_unreliable); -	/* init dummy netdev for use w/ NAPI */ -	init_dummy_netdev(&local->napi_dev); -  	ieee80211_led_names(local);  	ieee80211_roc_setup(local); @@ -683,6 +668,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)  	int channels, max_bitrates;  	bool supp_ht, supp_vht;  	netdev_features_t feature_whitelist; +	struct cfg80211_chan_def dflt_chandef = {};  	static const u32 cipher_suites[] = {  		/* keep WEP first, it may be removed below */  		WLAN_CIPHER_SUITE_WEP40, @@ -760,15 +746,19 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)  		sband = local->hw.wiphy->bands[band];  		if (!sband)  			continue; -		if (!local->use_chanctx && !local->_oper_channel) { + +		if (!dflt_chandef.chan) { +			cfg80211_chandef_create(&dflt_chandef, +						&sband->channels[0], +						NL80211_CHAN_NO_HT);  			/* init channel we're on */ -			local->hw.conf.channel = -			local->_oper_channel = &sband->channels[0]; -			local->hw.conf.channel_type = NL80211_CHAN_NO_HT; +			if (!local->use_chanctx && !local->_oper_chandef.chan) { +				local->hw.conf.chandef = dflt_chandef; +				local->_oper_chandef = dflt_chandef; +			} +			local->monitor_chandef = dflt_chandef;  		} -		cfg80211_chandef_create(&local->monitor_chandef, -					&sband->channels[0], -					NL80211_CHAN_NO_HT); +  		channels += sband->n_channels;  		if (max_bitrates < sband->n_bitrates) @@ -851,22 +841,10 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)  	if (supp_ht)  		local->scan_ies_len += 2 + sizeof(struct ieee80211_ht_cap); -	if (supp_vht) { +	if (supp_vht)  		local->scan_ies_len +=  			2 + sizeof(struct ieee80211_vht_cap); -		/* -		 * (for now at least), drivers wanting to use VHT must -		 * support channel contexts, as they contain all the -		 * necessary VHT information and the global hw config -		 * doesn't (yet) -		 */ -		if (WARN_ON(!local->use_chanctx)) { -			result = -EINVAL; -			goto fail_wiphy_register; -		} -	} -  	if (!local->ops->hw_scan) {  		/* For hw_scan, driver needs to set these up. */  		local->hw.wiphy->max_scan_ssids = 4; @@ -1021,9 +999,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)  		goto fail_ifa6;  #endif -	netif_napi_add(&local->napi_dev, &local->napi, ieee80211_napi_poll, -			local->hw.napi_weight); -  	return 0;  #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 4749b385869..6952760881c 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -13,10 +13,6 @@  #include "ieee80211_i.h"  #include "mesh.h" -#define TMR_RUNNING_HK	0 -#define TMR_RUNNING_MP	1 -#define TMR_RUNNING_MPR	2 -  static int mesh_allocated;  static struct kmem_cache *rm_cache; @@ -50,11 +46,6 @@ static void ieee80211_mesh_housekeeping_timer(unsigned long data)  	set_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags); -	if (local->quiescing) { -		set_bit(TMR_RUNNING_HK, &ifmsh->timers_running); -		return; -	} -  	ieee80211_queue_work(&local->hw, &sdata->work);  } @@ -165,7 +156,7 @@ void mesh_sta_cleanup(struct sta_info *sta)  	 * an update.  	 */  	changed = mesh_accept_plinks_update(sdata); -	if (sdata->u.mesh.security == IEEE80211_MESH_SEC_NONE) { +	if (!sdata->u.mesh.user_mpm) {  		changed |= mesh_plink_deactivate(sta);  		del_timer_sync(&sta->plink_timer);  	} @@ -479,15 +470,8 @@ static void ieee80211_mesh_path_timer(unsigned long data)  {  	struct ieee80211_sub_if_data *sdata =  		(struct ieee80211_sub_if_data *) data; -	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; -	struct ieee80211_local *local = sdata->local; - -	if (local->quiescing) { -		set_bit(TMR_RUNNING_MP, &ifmsh->timers_running); -		return; -	} -	ieee80211_queue_work(&local->hw, &sdata->work); +	ieee80211_queue_work(&sdata->local->hw, &sdata->work);  }  static void ieee80211_mesh_path_root_timer(unsigned long data) @@ -495,16 +479,10 @@ static void ieee80211_mesh_path_root_timer(unsigned long data)  	struct ieee80211_sub_if_data *sdata =  		(struct ieee80211_sub_if_data *) data;  	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; -	struct ieee80211_local *local = sdata->local;  	set_bit(MESH_WORK_ROOT, &ifmsh->wrkq_flags); -	if (local->quiescing) { -		set_bit(TMR_RUNNING_MPR, &ifmsh->timers_running); -		return; -	} - -	ieee80211_queue_work(&local->hw, &sdata->work); +	ieee80211_queue_work(&sdata->local->hw, &sdata->work);  }  void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh) @@ -622,35 +600,6 @@ static void ieee80211_mesh_rootpath(struct ieee80211_sub_if_data *sdata)  		  round_jiffies(TU_TO_EXP_TIME(interval)));  } -#ifdef CONFIG_PM -void ieee80211_mesh_quiesce(struct ieee80211_sub_if_data *sdata) -{ -	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - -	/* use atomic bitops in case all timers fire at the same time */ - -	if (del_timer_sync(&ifmsh->housekeeping_timer)) -		set_bit(TMR_RUNNING_HK, &ifmsh->timers_running); -	if (del_timer_sync(&ifmsh->mesh_path_timer)) -		set_bit(TMR_RUNNING_MP, &ifmsh->timers_running); -	if (del_timer_sync(&ifmsh->mesh_path_root_timer)) -		set_bit(TMR_RUNNING_MPR, &ifmsh->timers_running); -} - -void ieee80211_mesh_restart(struct ieee80211_sub_if_data *sdata) -{ -	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - -	if (test_and_clear_bit(TMR_RUNNING_HK, &ifmsh->timers_running)) -		add_timer(&ifmsh->housekeeping_timer); -	if (test_and_clear_bit(TMR_RUNNING_MP, &ifmsh->timers_running)) -		add_timer(&ifmsh->mesh_path_timer); -	if (test_and_clear_bit(TMR_RUNNING_MPR, &ifmsh->timers_running)) -		add_timer(&ifmsh->mesh_path_root_timer); -	ieee80211_mesh_root_setup(ifmsh); -} -#endif -  static int  ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)  { @@ -750,10 +699,8 @@ out_free:  static int  ieee80211_mesh_rebuild_beacon(struct ieee80211_if_mesh *ifmsh)  { -	struct ieee80211_sub_if_data *sdata;  	struct beacon_data *old_bcn;  	int ret; -	sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh);  	mutex_lock(&ifmsh->mtx); @@ -871,8 +818,6 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)  	local->fif_other_bss--;  	atomic_dec(&local->iff_allmultis);  	ieee80211_configure_filter(local); - -	sdata->u.mesh.timers_running = 0;  }  static void @@ -886,15 +831,14 @@ ieee80211_mesh_rx_probe_req(struct ieee80211_sub_if_data *sdata,  	struct ieee80211_mgmt *hdr;  	struct ieee802_11_elems elems;  	size_t baselen; -	u8 *pos, *end; +	u8 *pos; -	end = ((u8 *) mgmt) + len;  	pos = mgmt->u.probe_req.variable;  	baselen = (u8 *) pos - (u8 *) mgmt;  	if (baselen > len)  		return; -	ieee802_11_parse_elems(pos, len - baselen, &elems); +	ieee802_11_parse_elems(pos, len - baselen, false, &elems);  	/* 802.11-2012 10.1.4.3.2 */  	if ((!ether_addr_equal(mgmt->da, sdata->vif.addr) && @@ -955,7 +899,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,  		return;  	ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen, -			       &elems); +			       false, &elems);  	/* ignore non-mesh or secure / unsecure mismatch */  	if ((!elems.mesh_id || !elems.mesh_config) || @@ -963,7 +907,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,  	    (!elems.rsn && sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE))  		return; -	if (elems.ds_params && elems.ds_params_len == 1) +	if (elems.ds_params)  		freq = ieee80211_channel_to_frequency(elems.ds_params[0], band);  	else  		freq = rx_status->freq; diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 336c88a1668..da158774eeb 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -275,7 +275,8 @@ void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop);  void mesh_path_expire(struct ieee80211_sub_if_data *sdata);  void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata,  			    struct ieee80211_mgmt *mgmt, size_t len); -int mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst); +struct mesh_path * +mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst);  int mesh_path_add_gate(struct mesh_path *mpath);  int mesh_path_send_to_gates(struct mesh_path *mpath); @@ -313,8 +314,6 @@ void mesh_path_timer(unsigned long data);  void mesh_path_flush_by_nexthop(struct sta_info *sta);  void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata,  			     struct sk_buff *skb); -void mesh_path_quiesce(struct ieee80211_sub_if_data *sdata); -void mesh_path_restart(struct ieee80211_sub_if_data *sdata);  void mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata);  bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt); @@ -359,22 +358,12 @@ static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata)  void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local); -void ieee80211_mesh_quiesce(struct ieee80211_sub_if_data *sdata); -void ieee80211_mesh_restart(struct ieee80211_sub_if_data *sdata); -void mesh_plink_quiesce(struct sta_info *sta); -void mesh_plink_restart(struct sta_info *sta);  void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata);  void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata);  void ieee80211s_stop(void);  #else  static inline void  ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) {} -static inline void ieee80211_mesh_quiesce(struct ieee80211_sub_if_data *sdata) -{} -static inline void ieee80211_mesh_restart(struct ieee80211_sub_if_data *sdata) -{} -static inline void mesh_plink_quiesce(struct sta_info *sta) {} -static inline void mesh_plink_restart(struct sta_info *sta) {}  static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata)  { return false; }  static inline void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata) diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index bdb8d3b1458..486819cd02c 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -144,7 +144,7 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,  		*pos++ = WLAN_EID_PREQ;  		break;  	case MPATH_PREP: -		mhwmp_dbg(sdata, "sending PREP to %pM\n", target); +		mhwmp_dbg(sdata, "sending PREP to %pM\n", orig_addr);  		ie_len = 31;  		pos = skb_put(skb, 2 + ie_len);  		*pos++ = WLAN_EID_PREP; @@ -445,9 +445,8 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,  				}  			}  		} else { -			mesh_path_add(sdata, orig_addr); -			mpath = mesh_path_lookup(sdata, orig_addr); -			if (!mpath) { +			mpath = mesh_path_add(sdata, orig_addr); +			if (IS_ERR(mpath)) {  				rcu_read_unlock();  				return 0;  			} @@ -486,9 +485,8 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,  					(last_hop_metric > mpath->metric)))  				fresh_info = false;  		} else { -			mesh_path_add(sdata, ta); -			mpath = mesh_path_lookup(sdata, ta); -			if (!mpath) { +			mpath = mesh_path_add(sdata, ta); +			if (IS_ERR(mpath)) {  				rcu_read_unlock();  				return 0;  			} @@ -661,7 +659,7 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata,  	u32 target_sn, orig_sn, lifetime;  	mhwmp_dbg(sdata, "received PREP from %pM\n", -		  PREP_IE_ORIG_ADDR(prep_elem)); +		  PREP_IE_TARGET_ADDR(prep_elem));  	orig_addr = PREP_IE_ORIG_ADDR(prep_elem);  	if (ether_addr_equal(orig_addr, sdata->vif.addr)) @@ -804,9 +802,8 @@ static void hwmp_rann_frame_process(struct ieee80211_sub_if_data *sdata,  	mpath = mesh_path_lookup(sdata, orig_addr);  	if (!mpath) { -		mesh_path_add(sdata, orig_addr); -		mpath = mesh_path_lookup(sdata, orig_addr); -		if (!mpath) { +		mpath = mesh_path_add(sdata, orig_addr); +		if (IS_ERR(mpath)) {  			rcu_read_unlock();  			sdata->u.mesh.mshstats.dropped_frames_no_route++;  			return; @@ -883,7 +880,7 @@ void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata,  	baselen = (u8 *) mgmt->u.action.u.mesh_action.variable - (u8 *) mgmt;  	ieee802_11_parse_elems(mgmt->u.action.u.mesh_action.variable, -			len - baselen, &elems); +			       len - baselen, false, &elems);  	if (elems.preq) {  		if (elems.preq_len != 37) @@ -1098,11 +1095,10 @@ int mesh_nexthop_resolve(struct ieee80211_sub_if_data *sdata,  	/* no nexthop found, start resolving */  	mpath = mesh_path_lookup(sdata, target_addr);  	if (!mpath) { -		mesh_path_add(sdata, target_addr); -		mpath = mesh_path_lookup(sdata, target_addr); -		if (!mpath) { +		mpath = mesh_path_add(sdata, target_addr); +		if (IS_ERR(mpath)) {  			mesh_path_discard_frame(sdata, skb); -			err = -ENOSPC; +			err = PTR_ERR(mpath);  			goto endlookup;  		}  	} diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index dc7c8df40c2..89aacfd2756 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -493,7 +493,8 @@ int mesh_gate_num(struct ieee80211_sub_if_data *sdata)   *   * State: the initial state of the new path is set to 0   */ -int mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst) +struct mesh_path *mesh_path_add(struct ieee80211_sub_if_data *sdata, +				const u8 *dst)  {  	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;  	struct ieee80211_local *local = sdata->local; @@ -502,18 +503,33 @@ int mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst)  	struct mpath_node *node, *new_node;  	struct hlist_head *bucket;  	int grow = 0; -	int err = 0; +	int err;  	u32 hash_idx;  	if (ether_addr_equal(dst, sdata->vif.addr))  		/* never add ourselves as neighbours */ -		return -ENOTSUPP; +		return ERR_PTR(-ENOTSUPP);  	if (is_multicast_ether_addr(dst)) -		return -ENOTSUPP; +		return ERR_PTR(-ENOTSUPP);  	if (atomic_add_unless(&sdata->u.mesh.mpaths, 1, MESH_MAX_MPATHS) == 0) -		return -ENOSPC; +		return ERR_PTR(-ENOSPC); + +	read_lock_bh(&pathtbl_resize_lock); +	tbl = resize_dereference_mesh_paths(); + +	hash_idx = mesh_table_hash(dst, sdata, tbl); +	bucket = &tbl->hash_buckets[hash_idx]; + +	spin_lock(&tbl->hashwlock[hash_idx]); + +	hlist_for_each_entry(node, bucket, list) { +		mpath = node->mpath; +		if (mpath->sdata == sdata && +		    ether_addr_equal(dst, mpath->dst)) +			goto found; +	}  	err = -ENOMEM;  	new_mpath = kzalloc(sizeof(struct mesh_path), GFP_ATOMIC); @@ -524,7 +540,6 @@ int mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst)  	if (!new_node)  		goto err_node_alloc; -	read_lock_bh(&pathtbl_resize_lock);  	memcpy(new_mpath->dst, dst, ETH_ALEN);  	eth_broadcast_addr(new_mpath->rann_snd_addr);  	new_mpath->is_root = false; @@ -538,21 +553,6 @@ int mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst)  	spin_lock_init(&new_mpath->state_lock);  	init_timer(&new_mpath->timer); -	tbl = resize_dereference_mesh_paths(); - -	hash_idx = mesh_table_hash(dst, sdata, tbl); -	bucket = &tbl->hash_buckets[hash_idx]; - -	spin_lock(&tbl->hashwlock[hash_idx]); - -	err = -EEXIST; -	hlist_for_each_entry(node, bucket, list) { -		mpath = node->mpath; -		if (mpath->sdata == sdata && -		    ether_addr_equal(dst, mpath->dst)) -			goto err_exists; -	} -  	hlist_add_head_rcu(&new_node->list, bucket);  	if (atomic_inc_return(&tbl->entries) >=  	    tbl->mean_chain_len * (tbl->hash_mask + 1)) @@ -560,23 +560,23 @@ int mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst)  	mesh_paths_generation++; -	spin_unlock(&tbl->hashwlock[hash_idx]); -	read_unlock_bh(&pathtbl_resize_lock);  	if (grow) {  		set_bit(MESH_WORK_GROW_MPATH_TABLE,  &ifmsh->wrkq_flags);  		ieee80211_queue_work(&local->hw, &sdata->work);  	} -	return 0; - -err_exists: +	mpath = new_mpath; +found:  	spin_unlock(&tbl->hashwlock[hash_idx]);  	read_unlock_bh(&pathtbl_resize_lock); -	kfree(new_node); +	return mpath; +  err_node_alloc:  	kfree(new_mpath);  err_path_alloc:  	atomic_dec(&sdata->u.mesh.mpaths); -	return err; +	spin_unlock(&tbl->hashwlock[hash_idx]); +	read_unlock_bh(&pathtbl_resize_lock); +	return ERR_PTR(err);  }  static void mesh_table_free_rcu(struct rcu_head *rcu) diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 07d396d5707..09bebed9941 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -420,7 +420,6 @@ __mesh_sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *hw_addr)  		return NULL;  	sta->plink_state = NL80211_PLINK_LISTEN; -	init_timer(&sta->plink_timer);  	sta_info_pre_move_state(sta, IEEE80211_STA_AUTH);  	sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC); @@ -437,8 +436,9 @@ mesh_sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *addr,  {  	struct sta_info *sta = NULL; -	/* Userspace handles peer allocation when security is enabled */ -	if (sdata->u.mesh.security & IEEE80211_MESH_SEC_AUTHED) +	/* Userspace handles station allocation */ +	if (sdata->u.mesh.user_mpm || +	    sdata->u.mesh.security & IEEE80211_MESH_SEC_AUTHED)  		cfg80211_notify_new_peer_candidate(sdata->dev, addr,  						   elems->ie_start,  						   elems->total_len, @@ -534,10 +534,8 @@ static void mesh_plink_timer(unsigned long data)  	 */  	sta = (struct sta_info *) data; -	if (sta->sdata->local->quiescing) { -		sta->plink_timer_was_running = true; +	if (sta->sdata->local->quiescing)  		return; -	}  	spin_lock_bh(&sta->lock);  	if (sta->ignore_plink_timer) { @@ -546,8 +544,8 @@ static void mesh_plink_timer(unsigned long data)  		return;  	}  	mpl_dbg(sta->sdata, -		"Mesh plink timer for %pM fired on state %d\n", -		sta->sta.addr, sta->plink_state); +		"Mesh plink timer for %pM fired on state %s\n", +		sta->sta.addr, mplstates[sta->plink_state]);  	reason = 0;  	llid = sta->llid;  	plid = sta->plid; @@ -598,29 +596,6 @@ static void mesh_plink_timer(unsigned long data)  	}  } -#ifdef CONFIG_PM -void mesh_plink_quiesce(struct sta_info *sta) -{ -	if (!ieee80211_vif_is_mesh(&sta->sdata->vif)) -		return; - -	/* no kernel mesh sta timers have been initialized */ -	if (sta->sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE) -		return; - -	if (del_timer_sync(&sta->plink_timer)) -		sta->plink_timer_was_running = true; -} - -void mesh_plink_restart(struct sta_info *sta) -{ -	if (sta->plink_timer_was_running) { -		add_timer(&sta->plink_timer); -		sta->plink_timer_was_running = false; -	} -} -#endif -  static inline void mesh_plink_timer_set(struct sta_info *sta, int timeout)  {  	sta->plink_timer.expires = jiffies + (HZ * timeout / 1000); @@ -695,6 +670,10 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata,  	if (len < IEEE80211_MIN_ACTION_SIZE + 3)  		return; +	if (sdata->u.mesh.user_mpm) +		/* userspace must register for these */ +		return; +  	if (is_multicast_ether_addr(mgmt->da)) {  		mpl_dbg(sdata,  			"Mesh plink: ignore frame from multicast address\n"); @@ -708,7 +687,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata,  		baseaddr += 4;  		baselen += 4;  	} -	ieee802_11_parse_elems(baseaddr, len - baselen, &elems); +	ieee802_11_parse_elems(baseaddr, len - baselen, true, &elems);  	if (!elems.peering) {  		mpl_dbg(sdata, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 346ad4cfb01..29620bfc7a6 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -56,7 +56,10 @@ MODULE_PARM_DESC(max_probe_tries,   * probe on beacon miss before declaring the connection lost   * default to what we want.   */ -#define IEEE80211_BEACON_LOSS_COUNT	7 +static int beacon_loss_count = 7; +module_param(beacon_loss_count, int, 0644); +MODULE_PARM_DESC(beacon_loss_count, +		 "Number of beacon intervals before we decide beacon was lost.");  /*   * Time the connection can be idle before we probe @@ -87,9 +90,6 @@ MODULE_PARM_DESC(probe_wait_ms,   */  #define IEEE80211_SIGNAL_AVE_MIN_COUNT	4 -#define TMR_RUNNING_TIMER	0 -#define TMR_RUNNING_CHANSW	1 -  /*   * All cfg80211 functions have to be called outside a locked   * section so that they can acquire a lock themselves... This @@ -289,6 +289,8 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,  	} else {  		/* 40 MHz (and 80 MHz) must be supported for VHT */  		ret = IEEE80211_STA_DISABLE_VHT; +		/* also mark 40 MHz disabled */ +		ret |= IEEE80211_STA_DISABLE_40MHZ;  		goto out;  	} @@ -303,12 +305,6 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,  					       channel->band);  	vht_chandef.center_freq2 = 0; -	if (vht_oper->center_freq_seg2_idx) -		vht_chandef.center_freq2 = -			ieee80211_channel_to_frequency( -				vht_oper->center_freq_seg2_idx, -				channel->band); -  	switch (vht_oper->chan_width) {  	case IEEE80211_VHT_CHANWIDTH_USE_HT:  		vht_chandef.width = chandef->width; @@ -321,6 +317,10 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,  		break;  	case IEEE80211_VHT_CHANWIDTH_80P80MHZ:  		vht_chandef.width = NL80211_CHAN_WIDTH_80P80; +		vht_chandef.center_freq2 = +			ieee80211_channel_to_frequency( +				vht_oper->center_freq_seg2_idx, +				channel->band);  		break;  	default:  		if (verbose) @@ -604,11 +604,11 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,  	u8 *pos;  	u32 cap;  	struct ieee80211_sta_vht_cap vht_cap; -	int i;  	BUILD_BUG_ON(sizeof(vht_cap) != sizeof(sband->vht_cap));  	memcpy(&vht_cap, &sband->vht_cap, sizeof(vht_cap)); +	ieee80211_apply_vhtcap_overrides(sdata, &vht_cap);  	/* determine capability flags */  	cap = vht_cap.cap; @@ -631,37 +631,6 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,  			cpu_to_le32(IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE)))  		cap &= ~IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE; -	if (!(ap_vht_cap->vht_cap_info & -			cpu_to_le32(IEEE80211_VHT_CAP_TXSTBC))) -		cap &= ~(IEEE80211_VHT_CAP_RXSTBC_1 | -			 IEEE80211_VHT_CAP_RXSTBC_3 | -			 IEEE80211_VHT_CAP_RXSTBC_4); - -	for (i = 0; i < 8; i++) { -		int shift = i * 2; -		u16 mask = IEEE80211_VHT_MCS_NOT_SUPPORTED << shift; -		u16 ap_mcs, our_mcs; - -		ap_mcs = (le16_to_cpu(ap_vht_cap->supp_mcs.tx_mcs_map) & -								mask) >> shift; -		our_mcs = (le16_to_cpu(vht_cap.vht_mcs.rx_mcs_map) & -								mask) >> shift; - -		if (our_mcs == IEEE80211_VHT_MCS_NOT_SUPPORTED) -			continue; - -		switch (ap_mcs) { -		default: -			if (our_mcs <= ap_mcs) -				break; -			/* fall through */ -		case IEEE80211_VHT_MCS_NOT_SUPPORTED: -			vht_cap.vht_mcs.rx_mcs_map &= cpu_to_le16(~mask); -			vht_cap.vht_mcs.rx_mcs_map |= -				cpu_to_le16(ap_mcs << shift); -		} -	} -  	/* reserve and fill IE */  	pos = skb_put(skb, sizeof(struct ieee80211_vht_cap) + 2);  	ieee80211_ie_build_vht_cap(pos, &vht_cap, cap); @@ -987,6 +956,7 @@ static void ieee80211_chswitch_work(struct work_struct *work)  {  	struct ieee80211_sub_if_data *sdata =  		container_of(work, struct ieee80211_sub_if_data, u.mgd.chswitch_work); +	struct ieee80211_local *local = sdata->local;  	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;  	if (!ieee80211_sdata_running(sdata)) @@ -996,21 +966,22 @@ static void ieee80211_chswitch_work(struct work_struct *work)  	if (!ifmgd->associated)  		goto out; -	sdata->local->_oper_channel = sdata->local->csa_channel; -	if (!sdata->local->ops->channel_switch) { +	local->_oper_chandef = local->csa_chandef; + +	if (!local->ops->channel_switch) {  		/* call "hw_config" only if doing sw channel switch */ -		ieee80211_hw_config(sdata->local, -			IEEE80211_CONF_CHANGE_CHANNEL); +		ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);  	} else {  		/* update the device channel directly */ -		sdata->local->hw.conf.channel = sdata->local->_oper_channel; +		local->hw.conf.chandef = local->_oper_chandef;  	}  	/* XXX: shouldn't really modify cfg80211-owned data! */ -	ifmgd->associated->channel = sdata->local->_oper_channel; +	ifmgd->associated->channel = local->_oper_chandef.chan;  	/* XXX: wait for a beacon first? */ -	ieee80211_wake_queues_by_reason(&sdata->local->hw, +	ieee80211_wake_queues_by_reason(&local->hw, +					IEEE80211_MAX_QUEUE_MAP,  					IEEE80211_QUEUE_STOP_REASON_CSA);   out:  	ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED; @@ -1038,66 +1009,197 @@ static void ieee80211_chswitch_timer(unsigned long data)  {  	struct ieee80211_sub_if_data *sdata =  		(struct ieee80211_sub_if_data *) data; -	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; -	if (sdata->local->quiescing) { -		set_bit(TMR_RUNNING_CHANSW, &ifmgd->timers_running); -		return; -	} - -	ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work); +	ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.chswitch_work);  } -void +static void  ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, -				 const struct ieee80211_channel_sw_ie *sw_elem, -				 struct ieee80211_bss *bss, u64 timestamp) +				 u64 timestamp, struct ieee802_11_elems *elems)  { -	struct cfg80211_bss *cbss = -		container_of((void *)bss, struct cfg80211_bss, priv); -	struct ieee80211_channel *new_ch; +	struct ieee80211_local *local = sdata->local;  	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; -	int new_freq = ieee80211_channel_to_frequency(sw_elem->new_ch_num, -						      cbss->channel->band); +	struct cfg80211_bss *cbss = ifmgd->associated; +	struct ieee80211_bss *bss;  	struct ieee80211_chanctx *chanctx; +	enum ieee80211_band new_band; +	int new_freq; +	u8 new_chan_no; +	u8 count; +	u8 mode; +	struct ieee80211_channel *new_chan; +	struct cfg80211_chan_def new_chandef = {}; +	struct cfg80211_chan_def new_vht_chandef = {}; +	const struct ieee80211_sec_chan_offs_ie *sec_chan_offs; +	const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie; +	int secondary_channel_offset = -1;  	ASSERT_MGD_MTX(ifmgd); -	if (!ifmgd->associated) +	if (!cbss)  		return; -	if (sdata->local->scanning) +	if (local->scanning)  		return; -	/* Disregard subsequent beacons if we are already running a timer -	   processing a CSA */ - +	/* disregard subsequent announcements if we are already processing */  	if (ifmgd->flags & IEEE80211_STA_CSA_RECEIVED)  		return; -	new_ch = ieee80211_get_channel(sdata->local->hw.wiphy, new_freq); -	if (!new_ch || new_ch->flags & IEEE80211_CHAN_DISABLED) { +	sec_chan_offs = elems->sec_chan_offs; +	wide_bw_chansw_ie = elems->wide_bw_chansw_ie; + +	if (ifmgd->flags & (IEEE80211_STA_DISABLE_HT | +			    IEEE80211_STA_DISABLE_40MHZ)) { +		sec_chan_offs = NULL; +		wide_bw_chansw_ie = NULL; +	} + +	if (ifmgd->flags & IEEE80211_STA_DISABLE_VHT) +		wide_bw_chansw_ie = NULL; + +	if (elems->ext_chansw_ie) { +		if (!ieee80211_operating_class_to_band( +				elems->ext_chansw_ie->new_operating_class, +				&new_band)) { +			sdata_info(sdata, +				   "cannot understand ECSA IE operating class %d, disconnecting\n", +				   elems->ext_chansw_ie->new_operating_class); +			ieee80211_queue_work(&local->hw, +					     &ifmgd->csa_connection_drop_work); +		} +		new_chan_no = elems->ext_chansw_ie->new_ch_num; +		count = elems->ext_chansw_ie->count; +		mode = elems->ext_chansw_ie->mode; +	} else if (elems->ch_switch_ie) { +		new_band = cbss->channel->band; +		new_chan_no = elems->ch_switch_ie->new_ch_num; +		count = elems->ch_switch_ie->count; +		mode = elems->ch_switch_ie->mode; +	} else { +		/* nothing here we understand */ +		return; +	} + +	bss = (void *)cbss->priv; + +	new_freq = ieee80211_channel_to_frequency(new_chan_no, new_band); +	new_chan = ieee80211_get_channel(sdata->local->hw.wiphy, new_freq); +	if (!new_chan || new_chan->flags & IEEE80211_CHAN_DISABLED) {  		sdata_info(sdata,  			   "AP %pM switches to unsupported channel (%d MHz), disconnecting\n",  			   ifmgd->associated->bssid, new_freq); -		ieee80211_queue_work(&sdata->local->hw, +		ieee80211_queue_work(&local->hw, +				     &ifmgd->csa_connection_drop_work); +		return; +	} + +	if (sec_chan_offs) { +		secondary_channel_offset = sec_chan_offs->sec_chan_offs; +	} else if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) { +		/* if HT is enabled and the IE not present, it's still HT */ +		secondary_channel_offset = IEEE80211_HT_PARAM_CHA_SEC_NONE; +	} + +	switch (secondary_channel_offset) { +	default: +		/* secondary_channel_offset was present but is invalid */ +	case IEEE80211_HT_PARAM_CHA_SEC_NONE: +		cfg80211_chandef_create(&new_chandef, new_chan, +					NL80211_CHAN_HT20); +		break; +	case IEEE80211_HT_PARAM_CHA_SEC_ABOVE: +		cfg80211_chandef_create(&new_chandef, new_chan, +					NL80211_CHAN_HT40PLUS); +		break; +	case IEEE80211_HT_PARAM_CHA_SEC_BELOW: +		cfg80211_chandef_create(&new_chandef, new_chan, +					NL80211_CHAN_HT40MINUS); +		break; +	case -1: +		cfg80211_chandef_create(&new_chandef, new_chan, +					NL80211_CHAN_NO_HT); +		break; +	} + +	if (wide_bw_chansw_ie) { +		new_vht_chandef.chan = new_chan; +		new_vht_chandef.center_freq1 = +			ieee80211_channel_to_frequency( +				wide_bw_chansw_ie->new_center_freq_seg0, +				new_band); + +		switch (wide_bw_chansw_ie->new_channel_width) { +		default: +			/* hmmm, ignore VHT and use HT if present */ +		case IEEE80211_VHT_CHANWIDTH_USE_HT: +			new_vht_chandef.chan = NULL; +			break; +		case IEEE80211_VHT_CHANWIDTH_80MHZ: +			new_vht_chandef.width = NL80211_CHAN_WIDTH_80; +			break; +		case IEEE80211_VHT_CHANWIDTH_160MHZ: +			new_vht_chandef.width = NL80211_CHAN_WIDTH_160; +			break; +		case IEEE80211_VHT_CHANWIDTH_80P80MHZ: +			/* field is otherwise reserved */ +			new_vht_chandef.center_freq2 = +				ieee80211_channel_to_frequency( +					wide_bw_chansw_ie->new_center_freq_seg1, +					new_band); +			new_vht_chandef.width = NL80211_CHAN_WIDTH_80P80; +			break; +		} +		if (ifmgd->flags & IEEE80211_STA_DISABLE_80P80MHZ && +		    new_vht_chandef.width == NL80211_CHAN_WIDTH_80P80) +			chandef_downgrade(&new_vht_chandef); +		if (ifmgd->flags & IEEE80211_STA_DISABLE_160MHZ && +		    new_vht_chandef.width == NL80211_CHAN_WIDTH_160) +			chandef_downgrade(&new_vht_chandef); +		if (ifmgd->flags & IEEE80211_STA_DISABLE_40MHZ && +		    new_vht_chandef.width > NL80211_CHAN_WIDTH_20) +			chandef_downgrade(&new_vht_chandef); +	} + +	/* if VHT data is there validate & use it */ +	if (new_vht_chandef.chan) { +		if (!cfg80211_chandef_compatible(&new_vht_chandef, +						 &new_chandef)) { +			sdata_info(sdata, +				   "AP %pM CSA has inconsistent channel data, disconnecting\n", +				   ifmgd->associated->bssid); +			ieee80211_queue_work(&local->hw, +					     &ifmgd->csa_connection_drop_work); +			return; +		} +		new_chandef = new_vht_chandef; +	} + +	if (!cfg80211_chandef_usable(local->hw.wiphy, &new_chandef, +				     IEEE80211_CHAN_DISABLED)) { +		sdata_info(sdata, +			   "AP %pM switches to unsupported channel (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n", +			   ifmgd->associated->bssid, new_freq, +			   new_chandef.width, new_chandef.center_freq1, +			   new_chandef.center_freq2); +		ieee80211_queue_work(&local->hw,  				     &ifmgd->csa_connection_drop_work);  		return;  	}  	ifmgd->flags |= IEEE80211_STA_CSA_RECEIVED; -	if (sdata->local->use_chanctx) { +	if (local->use_chanctx) {  		sdata_info(sdata,  			   "not handling channel switch with channel contexts\n"); -		ieee80211_queue_work(&sdata->local->hw, +		ieee80211_queue_work(&local->hw,  				     &ifmgd->csa_connection_drop_work);  		return;  	} -	mutex_lock(&sdata->local->chanctx_mtx); +	mutex_lock(&local->chanctx_mtx);  	if (WARN_ON(!rcu_access_pointer(sdata->vif.chanctx_conf))) { -		mutex_unlock(&sdata->local->chanctx_mtx); +		mutex_unlock(&local->chanctx_mtx);  		return;  	}  	chanctx = container_of(rcu_access_pointer(sdata->vif.chanctx_conf), @@ -1105,39 +1207,39 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,  	if (chanctx->refcount > 1) {  		sdata_info(sdata,  			   "channel switch with multiple interfaces on the same channel, disconnecting\n"); -		ieee80211_queue_work(&sdata->local->hw, +		ieee80211_queue_work(&local->hw,  				     &ifmgd->csa_connection_drop_work); -		mutex_unlock(&sdata->local->chanctx_mtx); +		mutex_unlock(&local->chanctx_mtx);  		return;  	} -	mutex_unlock(&sdata->local->chanctx_mtx); +	mutex_unlock(&local->chanctx_mtx); -	sdata->local->csa_channel = new_ch; +	local->csa_chandef = new_chandef; -	if (sw_elem->mode) -		ieee80211_stop_queues_by_reason(&sdata->local->hw, +	if (mode) +		ieee80211_stop_queues_by_reason(&local->hw, +				IEEE80211_MAX_QUEUE_MAP,  				IEEE80211_QUEUE_STOP_REASON_CSA); -	if (sdata->local->ops->channel_switch) { +	if (local->ops->channel_switch) {  		/* use driver's channel switch callback */  		struct ieee80211_channel_switch ch_switch = {  			.timestamp = timestamp, -			.block_tx = sw_elem->mode, -			.channel = new_ch, -			.count = sw_elem->count, +			.block_tx = mode, +			.chandef = new_chandef, +			.count = count,  		}; -		drv_channel_switch(sdata->local, &ch_switch); +		drv_channel_switch(local, &ch_switch);  		return;  	}  	/* channel switch handled in software */ -	if (sw_elem->count <= 1) -		ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work); +	if (count <= 1) +		ieee80211_queue_work(&local->hw, &ifmgd->chswitch_work);  	else  		mod_timer(&ifmgd->chswitch_timer, -			  TU_TO_EXP_TIME(sw_elem->count * -					 cbss->beacon_interval)); +			  TU_TO_EXP_TIME(count * cbss->beacon_interval));  }  static u32 ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata, @@ -1383,6 +1485,7 @@ void ieee80211_dynamic_ps_disable_work(struct work_struct *work)  	}  	ieee80211_wake_queues_by_reason(&local->hw, +					IEEE80211_MAX_QUEUE_MAP,  					IEEE80211_QUEUE_STOP_REASON_PS);  } @@ -1435,16 +1538,14 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)  	if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) &&  	    !(ifmgd->flags & IEEE80211_STA_NULLFUNC_ACKED)) { -		netif_tx_stop_all_queues(sdata->dev); - -		if (drv_tx_frames_pending(local)) +		if (drv_tx_frames_pending(local)) {  			mod_timer(&local->dynamic_ps_timer, jiffies +  				  msecs_to_jiffies(  				  local->hw.conf.dynamic_ps_timeout)); -		else { +		} else {  			ieee80211_send_nullfunc(local, sdata, 1);  			/* Flush to get the tx status of nullfunc frame */ -			drv_flush(local, false); +			ieee80211_flush_queues(local, sdata);  		}  	} @@ -1455,9 +1556,6 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)  		local->hw.conf.flags |= IEEE80211_CONF_PS;  		ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);  	} - -	if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) -		netif_tx_wake_all_queues(sdata->dev);  }  void ieee80211_dynamic_ps_timer(unsigned long data) @@ -1563,6 +1661,7 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local,  		params.cw_max = ecw2cw((pos[1] & 0xf0) >> 4);  		params.cw_min = ecw2cw(pos[1] & 0x0f);  		params.txop = get_unaligned_le16(pos + 2); +		params.acm = acm;  		params.uapsd = uapsd;  		mlme_dbg(sdata, @@ -1650,7 +1749,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,  		bss_conf->assoc_capability, bss->has_erp_value, bss->erp_value);  	sdata->u.mgd.beacon_timeout = usecs_to_jiffies(ieee80211_tu_to_usec( -		IEEE80211_BEACON_LOSS_COUNT * bss_conf->beacon_int)); +		beacon_loss_count * bss_conf->beacon_int));  	sdata->u.mgd.associated = cbss;  	memcpy(sdata->u.mgd.bssid, cbss->bssid, ETH_ALEN); @@ -1663,18 +1762,17 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,  		rcu_read_lock();  		ies = rcu_dereference(cbss->ies);  		if (ies) { -			u8 noa[2];  			int ret;  			ret = cfg80211_get_p2p_attr(  					ies->data, ies->len,  					IEEE80211_P2P_ATTR_ABSENCE_NOTICE, -					noa, sizeof(noa)); +					(u8 *) &bss_conf->p2p_noa_attr, +					sizeof(bss_conf->p2p_noa_attr));  			if (ret >= 2) { -				bss_conf->p2p_oppps = noa[1] & 0x80; -				bss_conf->p2p_ctwindow = noa[1] & 0x7f; +				sdata->u.mgd.p2p_noa_index = +					bss_conf->p2p_noa_attr.index;  				bss_info_changed |= BSS_CHANGED_P2P_PS; -				sdata->u.mgd.p2p_noa_index = noa[0];  			}  		}  		rcu_read_unlock(); @@ -1718,7 +1816,6 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,  	ieee80211_recalc_smps(sdata);  	ieee80211_recalc_ps_vif(sdata); -	netif_tx_start_all_queues(sdata->dev);  	netif_carrier_on(sdata->dev);  } @@ -1741,22 +1838,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,  	ieee80211_stop_poll(sdata);  	ifmgd->associated = NULL; - -	/* -	 * we need to commit the associated = NULL change because the -	 * scan code uses that to determine whether this iface should -	 * go to/wake up from powersave or not -- and could otherwise -	 * wake the queues erroneously. -	 */ -	smp_mb(); - -	/* -	 * Thus, we can only afterwards stop the queues -- to account -	 * for the case where another CPU is finishing a scan at this -	 * time -- we don't want the scan code to enable queues. -	 */ - -	netif_tx_stop_all_queues(sdata->dev);  	netif_carrier_off(sdata->dev);  	/* @@ -1775,7 +1856,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,  	/* flush out any pending frame (e.g. DELBA) before deauth/disassoc */  	if (tx) -		drv_flush(local, false); +		ieee80211_flush_queues(local, sdata);  	/* deauthenticate/disassociate now */  	if (tx || frame_buf) @@ -1784,7 +1865,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,  	/* flush out frame */  	if (tx) -		drv_flush(local, false); +		ieee80211_flush_queues(local, sdata);  	/* clear bssid only after building the needed mgmt frames */  	memset(ifmgd->bssid, 0, ETH_ALEN); @@ -1799,12 +1880,15 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,  	changed |= BSS_CHANGED_ASSOC;  	sdata->vif.bss_conf.assoc = false; -	sdata->vif.bss_conf.p2p_ctwindow = 0; -	sdata->vif.bss_conf.p2p_oppps = false; +	ifmgd->p2p_noa_index = -1; +	memset(&sdata->vif.bss_conf.p2p_noa_attr, 0, +	       sizeof(sdata->vif.bss_conf.p2p_noa_attr)); -	/* on the next assoc, re-program HT parameters */ +	/* on the next assoc, re-program HT/VHT parameters */  	memset(&ifmgd->ht_capa, 0, sizeof(ifmgd->ht_capa));  	memset(&ifmgd->ht_capa_mask, 0, sizeof(ifmgd->ht_capa_mask)); +	memset(&ifmgd->vht_capa, 0, sizeof(ifmgd->vht_capa)); +	memset(&ifmgd->vht_capa_mask, 0, sizeof(ifmgd->vht_capa_mask));  	sdata->ap_power_level = IEEE80211_UNSET_POWER_LEVEL; @@ -1830,8 +1914,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,  	del_timer_sync(&sdata->u.mgd.timer);  	del_timer_sync(&sdata->u.mgd.chswitch_timer); -	sdata->u.mgd.timers_running = 0; -  	sdata->vif.bss_conf.dtim_period = 0;  	ifmgd->flags = 0; @@ -1956,7 +2038,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)  	ifmgd->probe_timeout = jiffies + msecs_to_jiffies(probe_wait_ms);  	run_again(ifmgd, ifmgd->probe_timeout);  	if (sdata->local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) -		drv_flush(sdata->local, false); +		ieee80211_flush_queues(sdata->local, sdata);  }  static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, @@ -1980,12 +2062,15 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata,  		goto out;  	} -	if (beacon) +	if (beacon) {  		mlme_dbg_ratelimited(sdata, -				     "detected beacon loss from AP - probing\n"); +				     "detected beacon loss from AP (missed %d beacons) - probing\n", +				     beacon_loss_count); -	ieee80211_cqm_rssi_notify(&sdata->vif, -		NL80211_CQM_RSSI_BEACON_LOSS_EVENT, GFP_KERNEL); +		ieee80211_cqm_rssi_notify(&sdata->vif, +					  NL80211_CQM_RSSI_BEACON_LOSS_EVENT, +					  GFP_KERNEL); +	}  	/*  	 * The driver/our work has already reported this event or the @@ -2079,6 +2164,7 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)  			       true, frame_buf);  	ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED;  	ieee80211_wake_queues_by_reason(&sdata->local->hw, +					IEEE80211_MAX_QUEUE_MAP,  					IEEE80211_QUEUE_STOP_REASON_CSA);  	mutex_unlock(&ifmgd->mtx); @@ -2130,7 +2216,6 @@ void ieee80211_beacon_loss(struct ieee80211_vif *vif)  	trace_api_beacon_loss(sdata); -	WARN_ON(hw->flags & IEEE80211_HW_CONNECTION_MONITOR);  	sdata->u.mgd.connection_loss = false;  	ieee80211_queue_work(hw, &sdata->u.mgd.beacon_connection_loss_work);  } @@ -2180,7 +2265,7 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata,  	u32 tx_flags = 0;  	pos = mgmt->u.auth.variable; -	ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems); +	ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), false, &elems);  	if (!elems.challenge)  		return;  	auth_data->expected_transaction = 4; @@ -2445,7 +2530,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,  	}  	pos = mgmt->u.assoc_resp.variable; -	ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems); +	ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), false, &elems);  	if (!elems.supp_rates) {  		sdata_info(sdata, "no SuppRates element in AssocResp\n"); @@ -2614,13 +2699,13 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,  		   capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14))));  	pos = mgmt->u.assoc_resp.variable; -	ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems); +	ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), false, &elems);  	if (status_code == WLAN_STATUS_ASSOC_REJECTED_TEMPORARILY && -	    elems.timeout_int && elems.timeout_int_len == 5 && -	    elems.timeout_int[0] == WLAN_TIMEOUT_ASSOC_COMEBACK) { +	    elems.timeout_int && +	    elems.timeout_int->type == WLAN_TIMEOUT_ASSOC_COMEBACK) {  		u32 tu, ms; -		tu = get_unaligned_le32(elems.timeout_int + 1); +		tu = le32_to_cpu(elems.timeout_int->value);  		ms = tu * 1024 / 1000;  		sdata_info(sdata,  			   "%pM rejected association temporarily; comeback duration %u TU (%u ms)\n", @@ -2669,6 +2754,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,  	struct ieee80211_channel *channel;  	bool need_ps = false; +	lockdep_assert_held(&sdata->u.mgd.mtx); +  	if ((sdata->u.mgd.associated &&  	     ether_addr_equal(mgmt->bssid, sdata->u.mgd.associated->bssid)) ||  	    (sdata->u.mgd.assoc_data && @@ -2683,7 +2770,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,  		}  	} -	if (elems->ds_params && elems->ds_params_len == 1) +	if (elems->ds_params)  		freq = ieee80211_channel_to_frequency(elems->ds_params[0],  						      rx_status->band);  	else @@ -2699,7 +2786,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,  	if (bss)  		ieee80211_rx_bss_put(local, bss); -	if (!sdata->u.mgd.associated) +	if (!sdata->u.mgd.associated || +	    !ether_addr_equal(mgmt->bssid, sdata->u.mgd.associated->bssid))  		return;  	if (need_ps) { @@ -2708,10 +2796,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,  		mutex_unlock(&local->iflist_mtx);  	} -	if (elems->ch_switch_ie && -	    memcmp(mgmt->bssid, sdata->u.mgd.associated->bssid, ETH_ALEN) == 0) -		ieee80211_sta_process_chanswitch(sdata, elems->ch_switch_ie, -						 bss, rx_status->mactime); +	ieee80211_sta_process_chanswitch(sdata, rx_status->mactime, elems); +  } @@ -2736,7 +2822,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,  		return;  	ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen, -				&elems); +			       false, &elems);  	ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems); @@ -2819,7 +2905,7 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,  	if (ifmgd->assoc_data && ifmgd->assoc_data->need_beacon &&  	    ether_addr_equal(mgmt->bssid, ifmgd->assoc_data->bss->bssid)) {  		ieee802_11_parse_elems(mgmt->u.beacon.variable, -				       len - baselen, &elems); +				       len - baselen, false, &elems);  		ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems);  		ifmgd->assoc_data->have_beacon = true; @@ -2929,7 +3015,7 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,  	ncrc = crc32_be(0, (void *)&mgmt->u.beacon.beacon_int, 4);  	ncrc = ieee802_11_parse_elems_crc(mgmt->u.beacon.variable, -					  len - baselen, &elems, +					  len - baselen, false, &elems,  					  care_about_ies, ncrc);  	if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) { @@ -2961,22 +3047,30 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,  	}  	if (sdata->vif.p2p) { -		u8 noa[2]; +		struct ieee80211_p2p_noa_attr noa = {};  		int ret;  		ret = cfg80211_get_p2p_attr(mgmt->u.beacon.variable,  					    len - baselen,  					    IEEE80211_P2P_ATTR_ABSENCE_NOTICE, -					    noa, sizeof(noa)); -		if (ret >= 2 && sdata->u.mgd.p2p_noa_index != noa[0]) { -			bss_conf->p2p_oppps = noa[1] & 0x80; -			bss_conf->p2p_ctwindow = noa[1] & 0x7f; +					    (u8 *) &noa, sizeof(noa)); +		if (ret >= 2) { +			if (sdata->u.mgd.p2p_noa_index != noa.index) { +				/* valid noa_attr and index changed */ +				sdata->u.mgd.p2p_noa_index = noa.index; +				memcpy(&bss_conf->p2p_noa_attr, &noa, sizeof(noa)); +				changed |= BSS_CHANGED_P2P_PS; +				/* +				 * make sure we update all information, the CRC +				 * mechanism doesn't look at P2P attributes. +				 */ +				ifmgd->beacon_crc_valid = false; +			} +		} else if (sdata->u.mgd.p2p_noa_index != -1) { +			/* noa_attr not found and we had valid noa_attr before */ +			sdata->u.mgd.p2p_noa_index = -1; +			memset(&bss_conf->p2p_noa_attr, 0, sizeof(bss_conf->p2p_noa_attr));  			changed |= BSS_CHANGED_P2P_PS; -			sdata->u.mgd.p2p_noa_index = noa[0]; -			/* -			 * make sure we update all information, the CRC -			 * mechanism doesn't look at P2P attributes. -			 */  			ifmgd->beacon_crc_valid = false;  		}  	} @@ -3018,7 +3112,7 @@ ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,  		changed |= BSS_CHANGED_DTIM_PERIOD;  	} -	if (elems.erp_info && elems.erp_info_len >= 1) { +	if (elems.erp_info) {  		erp_valid = true;  		erp_value = elems.erp_info[0];  	} else { @@ -3068,6 +3162,8 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,  	enum rx_mgmt_action rma = RX_MGMT_NONE;  	u8 deauth_buf[IEEE80211_DEAUTH_FRAME_LEN];  	u16 fc; +	struct ieee802_11_elems elems; +	int ies_len;  	rx_status = (struct ieee80211_rx_status *) skb->cb;  	mgmt = (struct ieee80211_mgmt *) skb->data; @@ -3097,14 +3193,48 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,  		rma = ieee80211_rx_mgmt_assoc_resp(sdata, mgmt, skb->len, &bss);  		break;  	case IEEE80211_STYPE_ACTION: -		switch (mgmt->u.action.category) { -		case WLAN_CATEGORY_SPECTRUM_MGMT: +		if (mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT) { +			ies_len = skb->len - +				  offsetof(struct ieee80211_mgmt, +					   u.action.u.chan_switch.variable); + +			if (ies_len < 0) +				break; + +			ieee802_11_parse_elems( +				mgmt->u.action.u.chan_switch.variable, +				ies_len, true, &elems); + +			if (elems.parse_error) +				break; +  			ieee80211_sta_process_chanswitch(sdata, -					&mgmt->u.action.u.chan_switch.sw_elem, -					(void *)ifmgd->associated->priv, -					rx_status->mactime); -			break; +							 rx_status->mactime, +							 &elems); +		} else if (mgmt->u.action.category == WLAN_CATEGORY_PUBLIC) { +			ies_len = skb->len - +				  offsetof(struct ieee80211_mgmt, +					   u.action.u.ext_chan_switch.variable); + +			if (ies_len < 0) +				break; + +			ieee802_11_parse_elems( +				mgmt->u.action.u.ext_chan_switch.variable, +				ies_len, true, &elems); + +			if (elems.parse_error) +				break; + +			/* for the handling code pretend this was also an IE */ +			elems.ext_chansw_ie = +				&mgmt->u.action.u.ext_chan_switch.data; + +			ieee80211_sta_process_chanswitch(sdata, +							 rx_status->mactime, +							 &elems);  		} +		break;  	}  	mutex_unlock(&ifmgd->mtx); @@ -3140,15 +3270,8 @@ static void ieee80211_sta_timer(unsigned long data)  {  	struct ieee80211_sub_if_data *sdata =  		(struct ieee80211_sub_if_data *) data; -	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; -	struct ieee80211_local *local = sdata->local; -	if (local->quiescing) { -		set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running); -		return; -	} - -	ieee80211_queue_work(&local->hw, &sdata->work); +	ieee80211_queue_work(&sdata->local->hw, &sdata->work);  }  static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata, @@ -3500,72 +3623,6 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)  	}  } -#ifdef CONFIG_PM -void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata) -{ -	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - -	/* -	 * Stop timers before deleting work items, as timers -	 * could race and re-add the work-items. They will be -	 * re-established on connection. -	 */ -	del_timer_sync(&ifmgd->conn_mon_timer); -	del_timer_sync(&ifmgd->bcn_mon_timer); - -	/* -	 * we need to use atomic bitops for the running bits -	 * only because both timers might fire at the same -	 * time -- the code here is properly synchronised. -	 */ - -	cancel_work_sync(&ifmgd->request_smps_work); - -	cancel_work_sync(&ifmgd->monitor_work); -	cancel_work_sync(&ifmgd->beacon_connection_loss_work); -	cancel_work_sync(&ifmgd->csa_connection_drop_work); -	if (del_timer_sync(&ifmgd->timer)) -		set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running); - -	if (del_timer_sync(&ifmgd->chswitch_timer)) -		set_bit(TMR_RUNNING_CHANSW, &ifmgd->timers_running); -	cancel_work_sync(&ifmgd->chswitch_work); -} - -void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata) -{ -	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - -	mutex_lock(&ifmgd->mtx); -	if (!ifmgd->associated) { -		mutex_unlock(&ifmgd->mtx); -		return; -	} - -	if (sdata->flags & IEEE80211_SDATA_DISCONNECT_RESUME) { -		sdata->flags &= ~IEEE80211_SDATA_DISCONNECT_RESUME; -		mlme_dbg(sdata, "driver requested disconnect after resume\n"); -		ieee80211_sta_connection_lost(sdata, -					      ifmgd->associated->bssid, -					      WLAN_REASON_UNSPECIFIED, -					      true); -		mutex_unlock(&ifmgd->mtx); -		return; -	} -	mutex_unlock(&ifmgd->mtx); - -	if (test_and_clear_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running)) -		add_timer(&ifmgd->timer); -	if (test_and_clear_bit(TMR_RUNNING_CHANSW, &ifmgd->timers_running)) -		add_timer(&ifmgd->chswitch_timer); -	ieee80211_sta_reset_beacon_monitor(sdata); - -	mutex_lock(&sdata->local->mtx); -	ieee80211_restart_sta_timer(sdata); -	mutex_unlock(&sdata->local->mtx); -} -#endif -  /* interface setup */  void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)  { @@ -3590,8 +3647,9 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)  	ifmgd->flags = 0;  	ifmgd->powersave = sdata->wdev.ps; -	ifmgd->uapsd_queues = IEEE80211_DEFAULT_UAPSD_QUEUES; -	ifmgd->uapsd_max_sp_len = IEEE80211_DEFAULT_MAX_SP_LEN; +	ifmgd->uapsd_queues = sdata->local->hw.uapsd_queues; +	ifmgd->uapsd_max_sp_len = sdata->local->hw.uapsd_max_sp_len; +	ifmgd->p2p_noa_index = -1;  	mutex_init(&ifmgd->mtx); @@ -4089,6 +4147,9 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,  		ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;  	} +	if (req->flags & ASSOC_REQ_DISABLE_VHT) +		ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; +  	/* Also disable HT if we don't support it or the AP doesn't use WMM */  	sband = local->hw.wiphy->bands[req->bss->channel->band];  	if (!sband->ht_cap.ht_supported || @@ -4112,6 +4173,10 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,  	memcpy(&ifmgd->ht_capa_mask, &req->ht_capa_mask,  	       sizeof(ifmgd->ht_capa_mask)); +	memcpy(&ifmgd->vht_capa, &req->vht_capa, sizeof(ifmgd->vht_capa)); +	memcpy(&ifmgd->vht_capa_mask, &req->vht_capa_mask, +	       sizeof(ifmgd->vht_capa_mask)); +  	if (req->ie && req->ie_len) {  		memcpy(assoc_data->ie, req->ie, req->ie_len);  		assoc_data->ie_len = req->ie_len; @@ -4149,7 +4214,8 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,  	rcu_read_unlock();  	if (bss->wmm_used && bss->uapsd_supported && -	    (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD)) { +	    (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD) && +	    sdata->wmm_acm != 0xff) {  		assoc_data->uapsd = true;  		ifmgd->flags |= IEEE80211_STA_UAPSD_ENABLED;  	} else { diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 430bd254e49..acd1f71adc0 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -118,9 +118,9 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local)  	 * Stop queues and transmit all frames queued by the driver  	 * before sending nullfunc to enable powersave at the AP.  	 */ -	ieee80211_stop_queues_by_reason(&local->hw, +	ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,  					IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL); -	drv_flush(local, false); +	ieee80211_flush_queues(local, NULL);  	mutex_lock(&local->iflist_mtx);  	list_for_each_entry(sdata, &local->interfaces, list) { @@ -181,7 +181,7 @@ void ieee80211_offchannel_return(struct ieee80211_local *local)  	}  	mutex_unlock(&local->iflist_mtx); -	ieee80211_wake_queues_by_reason(&local->hw, +	ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP,  					IEEE80211_QUEUE_STOP_REASON_OFFCHANNEL);  } @@ -277,7 +277,7 @@ void ieee80211_start_next_roc(struct ieee80211_local *local)  			duration = 10;  		ret = drv_remain_on_channel(local, roc->sdata, roc->chan, -					    duration); +					    duration, roc->type);  		roc->started = true; @@ -382,7 +382,7 @@ void ieee80211_sw_roc_work(struct work_struct *work)  		ieee80211_roc_notify_destroy(roc, !roc->abort);  		if (started) { -			drv_flush(local, false); +			ieee80211_flush_queues(local, NULL);  			local->tmp_channel = NULL;  			ieee80211_hw_config(local, 0); @@ -445,15 +445,15 @@ void ieee80211_roc_setup(struct ieee80211_local *local)  	INIT_LIST_HEAD(&local->roc_list);  } -void ieee80211_roc_purge(struct ieee80211_sub_if_data *sdata) +void ieee80211_roc_purge(struct ieee80211_local *local, +			 struct ieee80211_sub_if_data *sdata)  { -	struct ieee80211_local *local = sdata->local;  	struct ieee80211_roc_work *roc, *tmp;  	LIST_HEAD(tmp_list);  	mutex_lock(&local->mtx);  	list_for_each_entry_safe(roc, tmp, &local->roc_list, list) { -		if (roc->sdata != sdata) +		if (sdata && roc->sdata != sdata)  			continue;  		if (roc->started && local->ops->remain_on_channel) { diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index d0275f34bf7..7fc5d0d8149 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -6,32 +6,11 @@  #include "driver-ops.h"  #include "led.h" -/* return value indicates whether the driver should be further notified */ -static void ieee80211_quiesce(struct ieee80211_sub_if_data *sdata) -{ -	switch (sdata->vif.type) { -	case NL80211_IFTYPE_STATION: -		ieee80211_sta_quiesce(sdata); -		break; -	case NL80211_IFTYPE_ADHOC: -		ieee80211_ibss_quiesce(sdata); -		break; -	case NL80211_IFTYPE_MESH_POINT: -		ieee80211_mesh_quiesce(sdata); -		break; -	default: -		break; -	} - -	cancel_work_sync(&sdata->work); -} -  int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)  {  	struct ieee80211_local *local = hw_to_local(hw);  	struct ieee80211_sub_if_data *sdata;  	struct sta_info *sta; -	struct ieee80211_chanctx *ctx;  	if (!local->open_count)  		goto suspend; @@ -40,6 +19,10 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)  	ieee80211_dfs_cac_cancel(local); +	ieee80211_roc_purge(local, NULL); + +	ieee80211_del_virtual_monitor(local); +  	if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) {  		mutex_lock(&local->sta_mtx);  		list_for_each_entry(sta, &local->sta_list, list) { @@ -51,12 +34,14 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)  	}  	ieee80211_stop_queues_by_reason(hw, -			IEEE80211_QUEUE_STOP_REASON_SUSPEND); +					IEEE80211_MAX_QUEUE_MAP, +					IEEE80211_QUEUE_STOP_REASON_SUSPEND); -	/* flush out all packets */ +	/* flush out all packets and station cleanup call_rcu()s */  	synchronize_net(); +	rcu_barrier(); -	drv_flush(local, false); +	ieee80211_flush_queues(local, NULL);  	local->quiescing = true;  	/* make quiescing visible to timers everywhere */ @@ -89,23 +74,17 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)  				mutex_unlock(&local->sta_mtx);  			}  			ieee80211_wake_queues_by_reason(hw, +					IEEE80211_MAX_QUEUE_MAP,  					IEEE80211_QUEUE_STOP_REASON_SUSPEND);  			return err;  		} else if (err > 0) {  			WARN_ON(err != 1); -			local->wowlan = false; +			return err;  		} else { -			list_for_each_entry(sdata, &local->interfaces, list) -				if (ieee80211_sdata_running(sdata)) -					ieee80211_quiesce(sdata);  			goto suspend;  		}  	} -	/* disable keys */ -	list_for_each_entry(sdata, &local->interfaces, list) -		ieee80211_disable_keys(sdata); -  	/* tear down aggregation sessions and remove STAs */  	mutex_lock(&local->sta_mtx);  	list_for_each_entry(sta, &local->sta_list, list) { @@ -117,100 +96,21 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)  				WARN_ON(drv_sta_state(local, sta->sdata, sta,  						      state, state - 1));  		} - -		mesh_plink_quiesce(sta);  	}  	mutex_unlock(&local->sta_mtx);  	/* remove all interfaces */  	list_for_each_entry(sdata, &local->interfaces, list) { -		static u8 zero_addr[ETH_ALEN] = {}; -		u32 changed = 0; -  		if (!ieee80211_sdata_running(sdata))  			continue; - -		switch (sdata->vif.type) { -		case NL80211_IFTYPE_AP_VLAN: -		case NL80211_IFTYPE_MONITOR: -			/* skip these */ -			continue; -		case NL80211_IFTYPE_STATION: -			if (sdata->vif.bss_conf.assoc) -				changed = BSS_CHANGED_ASSOC | -					  BSS_CHANGED_BSSID | -					  BSS_CHANGED_IDLE; -			break; -		case NL80211_IFTYPE_AP: -		case NL80211_IFTYPE_ADHOC: -		case NL80211_IFTYPE_MESH_POINT: -			if (sdata->vif.bss_conf.enable_beacon) -				changed = BSS_CHANGED_BEACON_ENABLED; -			break; -		default: -			break; -		} - -		ieee80211_quiesce(sdata); - -		sdata->suspend_bss_conf = sdata->vif.bss_conf; -		memset(&sdata->vif.bss_conf, 0, sizeof(sdata->vif.bss_conf)); -		sdata->vif.bss_conf.idle = true; -		if (sdata->suspend_bss_conf.bssid) -			sdata->vif.bss_conf.bssid = zero_addr; - -		/* disable beaconing or remove association */ -		ieee80211_bss_info_change_notify(sdata, changed); - -		if (sdata->vif.type == NL80211_IFTYPE_AP && -		    rcu_access_pointer(sdata->u.ap.beacon)) -			drv_stop_ap(local, sdata); - -		if (local->use_chanctx) { -			struct ieee80211_chanctx_conf *conf; - -			mutex_lock(&local->chanctx_mtx); -			conf = rcu_dereference_protected( -					sdata->vif.chanctx_conf, -					lockdep_is_held(&local->chanctx_mtx)); -			if (conf) { -				ctx = container_of(conf, -						   struct ieee80211_chanctx, -						   conf); -				drv_unassign_vif_chanctx(local, sdata, ctx); -			} - -			mutex_unlock(&local->chanctx_mtx); -		}  		drv_remove_interface(local, sdata);  	} -	sdata = rtnl_dereference(local->monitor_sdata); -	if (sdata) { -		if (local->use_chanctx) { -			struct ieee80211_chanctx_conf *conf; - -			mutex_lock(&local->chanctx_mtx); -			conf = rcu_dereference_protected( -					sdata->vif.chanctx_conf, -					lockdep_is_held(&local->chanctx_mtx)); -			if (conf) { -				ctx = container_of(conf, -						   struct ieee80211_chanctx, -						   conf); -				drv_unassign_vif_chanctx(local, sdata, ctx); -			} - -			mutex_unlock(&local->chanctx_mtx); -		} - -		drv_remove_interface(local, sdata); -	} - -	mutex_lock(&local->chanctx_mtx); -	list_for_each_entry(ctx, &local->chanctx_list, list) -		drv_remove_chanctx(local, ctx); -	mutex_unlock(&local->chanctx_mtx); +	/* +	 * We disconnected on all interfaces before suspend, all channel +	 * contexts should be released. +	 */ +	WARN_ON(!list_empty(&local->chanctx_list));  	/* stop hardware - this must stop RX */  	if (local->open_count) diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index dd88381c53b..0d51877efdb 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -252,6 +252,25 @@ rate_lowest_non_cck_index(struct ieee80211_supported_band *sband,  	return 0;  } +static void __rate_control_send_low(struct ieee80211_hw *hw, +				    struct ieee80211_supported_band *sband, +				    struct ieee80211_sta *sta, +				    struct ieee80211_tx_info *info) +{ +	if ((sband->band != IEEE80211_BAND_2GHZ) || +	    !(info->flags & IEEE80211_TX_CTL_NO_CCK_RATE)) +		info->control.rates[0].idx = rate_lowest_index(sband, sta); +	else +		info->control.rates[0].idx = +			rate_lowest_non_cck_index(sband, sta); + +	info->control.rates[0].count = +		(info->flags & IEEE80211_TX_CTL_NO_ACK) ? +		1 : hw->max_rate_tries; + +	info->control.skip_table = 1; +} +  bool rate_control_send_low(struct ieee80211_sta *sta,  			   void *priv_sta, @@ -262,16 +281,8 @@ bool rate_control_send_low(struct ieee80211_sta *sta,  	int mcast_rate;  	if (!sta || !priv_sta || rc_no_data_or_no_ack_use_min(txrc)) { -		if ((sband->band != IEEE80211_BAND_2GHZ) || -		    !(info->flags & IEEE80211_TX_CTL_NO_CCK_RATE)) -			info->control.rates[0].idx = -				rate_lowest_index(txrc->sband, sta); -		else -			info->control.rates[0].idx = -				rate_lowest_non_cck_index(txrc->sband, sta); -		info->control.rates[0].count = -			(info->flags & IEEE80211_TX_CTL_NO_ACK) ? -			1 : txrc->hw->max_rate_tries; +		__rate_control_send_low(txrc->hw, sband, sta, info); +  		if (!sta && txrc->bss) {  			mcast_rate = txrc->bss_conf->mcast_rate[sband->band];  			if (mcast_rate > 0) { @@ -355,7 +366,8 @@ static bool rate_idx_match_mcs_mask(struct ieee80211_tx_rate *rate,  static void rate_idx_match_mask(struct ieee80211_tx_rate *rate, -				struct ieee80211_tx_rate_control *txrc, +				struct ieee80211_supported_band *sband, +				enum nl80211_chan_width chan_width,  				u32 mask,  				u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN])  { @@ -375,27 +387,17 @@ static void rate_idx_match_mask(struct ieee80211_tx_rate *rate,  				  IEEE80211_TX_RC_USE_SHORT_PREAMBLE);  		alt_rate.count = rate->count;  		if (rate_idx_match_legacy_mask(&alt_rate, -					       txrc->sband->n_bitrates, -					       mask)) { +					       sband->n_bitrates, mask)) {  			*rate = alt_rate;  			return;  		}  	} else { -		struct sk_buff *skb = txrc->skb; -		struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; -		__le16 fc; -  		/* handle legacy rates */ -		if (rate_idx_match_legacy_mask(rate, txrc->sband->n_bitrates, -					       mask)) +		if (rate_idx_match_legacy_mask(rate, sband->n_bitrates, mask))  			return;  		/* if HT BSS, and we handle a data frame, also try HT rates */ -		if (txrc->bss_conf->chandef.width == NL80211_CHAN_WIDTH_20_NOHT) -			return; - -		fc = hdr->frame_control; -		if (!ieee80211_is_data(fc)) +		if (chan_width == NL80211_CHAN_WIDTH_20_NOHT)  			return;  		alt_rate.idx = 0; @@ -408,7 +410,7 @@ static void rate_idx_match_mask(struct ieee80211_tx_rate *rate,  		alt_rate.flags |= IEEE80211_TX_RC_MCS; -		if (txrc->bss_conf->chandef.width == NL80211_CHAN_WIDTH_40) +		if (chan_width == NL80211_CHAN_WIDTH_40)  			alt_rate.flags |= IEEE80211_TX_RC_40_MHZ_WIDTH;  		if (rate_idx_match_mcs_mask(&alt_rate, mcs_mask)) { @@ -426,6 +428,228 @@ static void rate_idx_match_mask(struct ieee80211_tx_rate *rate,  	 */  } +static void rate_fixup_ratelist(struct ieee80211_vif *vif, +				struct ieee80211_supported_band *sband, +				struct ieee80211_tx_info *info, +				struct ieee80211_tx_rate *rates, +				int max_rates) +{ +	struct ieee80211_rate *rate; +	bool inval = false; +	int i; + +	/* +	 * Set up the RTS/CTS rate as the fastest basic rate +	 * that is not faster than the data rate unless there +	 * is no basic rate slower than the data rate, in which +	 * case we pick the slowest basic rate +	 * +	 * XXX: Should this check all retry rates? +	 */ +	if (!(rates[0].flags & IEEE80211_TX_RC_MCS)) { +		u32 basic_rates = vif->bss_conf.basic_rates; +		s8 baserate = basic_rates ? ffs(basic_rates - 1) : 0; + +		rate = &sband->bitrates[rates[0].idx]; + +		for (i = 0; i < sband->n_bitrates; i++) { +			/* must be a basic rate */ +			if (!(basic_rates & BIT(i))) +				continue; +			/* must not be faster than the data rate */ +			if (sband->bitrates[i].bitrate > rate->bitrate) +				continue; +			/* maximum */ +			if (sband->bitrates[baserate].bitrate < +			     sband->bitrates[i].bitrate) +				baserate = i; +		} + +		info->control.rts_cts_rate_idx = baserate; +	} + +	for (i = 0; i < max_rates; i++) { +		/* +		 * make sure there's no valid rate following +		 * an invalid one, just in case drivers don't +		 * take the API seriously to stop at -1. +		 */ +		if (inval) { +			rates[i].idx = -1; +			continue; +		} +		if (rates[i].idx < 0) { +			inval = true; +			continue; +		} + +		/* +		 * For now assume MCS is already set up correctly, this +		 * needs to be fixed. +		 */ +		if (rates[i].flags & IEEE80211_TX_RC_MCS) { +			WARN_ON(rates[i].idx > 76); + +			if (!(rates[i].flags & IEEE80211_TX_RC_USE_RTS_CTS) && +			    info->control.use_cts_prot) +				rates[i].flags |= +					IEEE80211_TX_RC_USE_CTS_PROTECT; +			continue; +		} + +		if (rates[i].flags & IEEE80211_TX_RC_VHT_MCS) { +			WARN_ON(ieee80211_rate_get_vht_mcs(&rates[i]) > 9); +			continue; +		} + +		/* set up RTS protection if desired */ +		if (info->control.use_rts) { +			rates[i].flags |= IEEE80211_TX_RC_USE_RTS_CTS; +			info->control.use_cts_prot = false; +		} + +		/* RC is busted */ +		if (WARN_ON_ONCE(rates[i].idx >= sband->n_bitrates)) { +			rates[i].idx = -1; +			continue; +		} + +		rate = &sband->bitrates[rates[i].idx]; + +		/* set up short preamble */ +		if (info->control.short_preamble && +		    rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) +			rates[i].flags |= IEEE80211_TX_RC_USE_SHORT_PREAMBLE; + +		/* set up G protection */ +		if (!(rates[i].flags & IEEE80211_TX_RC_USE_RTS_CTS) && +		    info->control.use_cts_prot && +		    rate->flags & IEEE80211_RATE_ERP_G) +			rates[i].flags |= IEEE80211_TX_RC_USE_CTS_PROTECT; +	} +} + + +static void rate_control_fill_sta_table(struct ieee80211_sta *sta, +					struct ieee80211_tx_info *info, +					struct ieee80211_tx_rate *rates, +					int max_rates) +{ +	struct ieee80211_sta_rates *ratetbl = NULL; +	int i; + +	if (sta && !info->control.skip_table) +		ratetbl = rcu_dereference(sta->rates); + +	/* Fill remaining rate slots with data from the sta rate table. */ +	max_rates = min_t(int, max_rates, IEEE80211_TX_RATE_TABLE_SIZE); +	for (i = 0; i < max_rates; i++) { +		if (i < ARRAY_SIZE(info->control.rates) && +		    info->control.rates[i].idx >= 0 && +		    info->control.rates[i].count) { +			if (rates != info->control.rates) +				rates[i] = info->control.rates[i]; +		} else if (ratetbl) { +			rates[i].idx = ratetbl->rate[i].idx; +			rates[i].flags = ratetbl->rate[i].flags; +			if (info->control.use_rts) +				rates[i].count = ratetbl->rate[i].count_rts; +			else if (info->control.use_cts_prot) +				rates[i].count = ratetbl->rate[i].count_cts; +			else +				rates[i].count = ratetbl->rate[i].count; +		} else { +			rates[i].idx = -1; +			rates[i].count = 0; +		} + +		if (rates[i].idx < 0 || !rates[i].count) +			break; +	} +} + +static void rate_control_apply_mask(struct ieee80211_sub_if_data *sdata, +				    struct ieee80211_sta *sta, +				    struct ieee80211_supported_band *sband, +				    struct ieee80211_tx_info *info, +				    struct ieee80211_tx_rate *rates, +				    int max_rates) +{ +	enum nl80211_chan_width chan_width; +	u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN]; +	bool has_mcs_mask; +	u32 mask; +	int i; + +	/* +	 * Try to enforce the rateidx mask the user wanted. skip this if the +	 * default mask (allow all rates) is used to save some processing for +	 * the common case. +	 */ +	mask = sdata->rc_rateidx_mask[info->band]; +	has_mcs_mask = sdata->rc_has_mcs_mask[info->band]; +	if (mask == (1 << sband->n_bitrates) - 1 && !has_mcs_mask) +		return; + +	if (has_mcs_mask) +		memcpy(mcs_mask, sdata->rc_rateidx_mcs_mask[info->band], +		       sizeof(mcs_mask)); +	else +		memset(mcs_mask, 0xff, sizeof(mcs_mask)); + +	if (sta) { +		/* Filter out rates that the STA does not support */ +		mask &= sta->supp_rates[info->band]; +		for (i = 0; i < sizeof(mcs_mask); i++) +			mcs_mask[i] &= sta->ht_cap.mcs.rx_mask[i]; +	} + +	/* +	 * Make sure the rate index selected for each TX rate is +	 * included in the configured mask and change the rate indexes +	 * if needed. +	 */ +	chan_width = sdata->vif.bss_conf.chandef.width; +	for (i = 0; i < max_rates; i++) { +		/* Skip invalid rates */ +		if (rates[i].idx < 0) +			break; + +		rate_idx_match_mask(&rates[i], sband, mask, chan_width, +				    mcs_mask); +	} +} + +void ieee80211_get_tx_rates(struct ieee80211_vif *vif, +			    struct ieee80211_sta *sta, +			    struct sk_buff *skb, +			    struct ieee80211_tx_rate *dest, +			    int max_rates) +{ +	struct ieee80211_sub_if_data *sdata; +	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; +	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); +	struct ieee80211_supported_band *sband; + +	rate_control_fill_sta_table(sta, info, dest, max_rates); + +	if (!vif) +		return; + +	sdata = vif_to_sdata(vif); +	sband = sdata->local->hw.wiphy->bands[info->band]; + +	if (ieee80211_is_data(hdr->frame_control)) +		rate_control_apply_mask(sdata, sta, sband, info, dest, max_rates); + +	if (dest[0].idx < 0) +		__rate_control_send_low(&sdata->local->hw, sband, sta, info); + +	if (sta) +		rate_fixup_ratelist(vif, sband, info, dest, max_rates); +} +EXPORT_SYMBOL(ieee80211_get_tx_rates); +  void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,  			   struct sta_info *sta,  			   struct ieee80211_tx_rate_control *txrc) @@ -435,8 +659,6 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,  	struct ieee80211_sta *ista = NULL;  	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb);  	int i; -	u32 mask; -	u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN];  	if (sta && test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) {  		ista = &sta->sta; @@ -454,37 +676,27 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,  	ref->ops->get_rate(ref->priv, ista, priv_sta, txrc); -	/* -	 * Try to enforce the rateidx mask the user wanted. skip this if the -	 * default mask (allow all rates) is used to save some processing for -	 * the common case. -	 */ -	mask = sdata->rc_rateidx_mask[info->band]; -	memcpy(mcs_mask, sdata->rc_rateidx_mcs_mask[info->band], -	       sizeof(mcs_mask)); -	if (mask != (1 << txrc->sband->n_bitrates) - 1) { -		if (sta) { -			/* Filter out rates that the STA does not support */ -			mask &= sta->sta.supp_rates[info->band]; -			for (i = 0; i < sizeof(mcs_mask); i++) -				mcs_mask[i] &= sta->sta.ht_cap.mcs.rx_mask[i]; -		} -		/* -		 * Make sure the rate index selected for each TX rate is -		 * included in the configured mask and change the rate indexes -		 * if needed. -		 */ -		for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { -			/* Skip invalid rates */ -			if (info->control.rates[i].idx < 0) -				break; -			rate_idx_match_mask(&info->control.rates[i], txrc, -					    mask, mcs_mask); -		} -	} +	if (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_RC_TABLE) +		return; + +	ieee80211_get_tx_rates(&sdata->vif, ista, txrc->skb, +			       info->control.rates, +			       ARRAY_SIZE(info->control.rates)); +} -	BUG_ON(info->control.rates[0].idx < 0); +int rate_control_set_rates(struct ieee80211_hw *hw, +			   struct ieee80211_sta *pubsta, +			   struct ieee80211_sta_rates *rates) +{ +	struct ieee80211_sta_rates *old = rcu_dereference(pubsta->rates); + +	rcu_assign_pointer(pubsta->rates, rates); +	if (old) +		kfree_rcu(old, rcu_head); + +	return 0;  } +EXPORT_SYMBOL(rate_control_set_rates);  int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local,  				 const char *name) diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index eea45a2c7c3..ac7ef5414bd 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -55,7 +55,6 @@  #include "rate.h"  #include "rc80211_minstrel.h" -#define SAMPLE_COLUMNS	10  #define SAMPLE_TBL(_mi, _idx, _col) \  		_mi->sample_table[(_idx * SAMPLE_COLUMNS) + _col] @@ -70,16 +69,75 @@ rix_to_ndx(struct minstrel_sta_info *mi, int rix)  	return i;  } +/* find & sort topmost throughput rates */ +static inline void +minstrel_sort_best_tp_rates(struct minstrel_sta_info *mi, int i, u8 *tp_list) +{ +	int j = MAX_THR_RATES; + +	while (j > 0 && mi->r[i].cur_tp > mi->r[tp_list[j - 1]].cur_tp) +		j--; +	if (j < MAX_THR_RATES - 1) +		memmove(&tp_list[j + 1], &tp_list[j], MAX_THR_RATES - (j + 1)); +	if (j < MAX_THR_RATES) +		tp_list[j] = i; +} + +static void +minstrel_set_rate(struct minstrel_sta_info *mi, struct ieee80211_sta_rates *ratetbl, +		  int offset, int idx) +{ +	struct minstrel_rate *r = &mi->r[idx]; + +	ratetbl->rate[offset].idx = r->rix; +	ratetbl->rate[offset].count = r->adjusted_retry_count; +	ratetbl->rate[offset].count_cts = r->retry_count_cts; +	ratetbl->rate[offset].count_rts = r->retry_count_rtscts; +} + +static void +minstrel_update_rates(struct minstrel_priv *mp, struct minstrel_sta_info *mi) +{ +	struct ieee80211_sta_rates *ratetbl; +	int i = 0; + +	ratetbl = kzalloc(sizeof(*ratetbl), GFP_ATOMIC); +	if (!ratetbl) +		return; + +	/* Start with max_tp_rate */ +	minstrel_set_rate(mi, ratetbl, i++, mi->max_tp_rate[0]); + +	if (mp->hw->max_rates >= 3) { +		/* At least 3 tx rates supported, use max_tp_rate2 next */ +		minstrel_set_rate(mi, ratetbl, i++, mi->max_tp_rate[1]); +	} + +	if (mp->hw->max_rates >= 2) { +		/* At least 2 tx rates supported, use max_prob_rate next */ +		minstrel_set_rate(mi, ratetbl, i++, mi->max_prob_rate); +	} + +	/* Use lowest rate last */ +	ratetbl->rate[i].idx = mi->lowest_rix; +	ratetbl->rate[i].count = mp->max_retry; +	ratetbl->rate[i].count_cts = mp->max_retry; +	ratetbl->rate[i].count_rts = mp->max_retry; + +	rate_control_set_rates(mp->hw, mi->sta, ratetbl); +} +  static void  minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi)  { -	u32 max_tp = 0, index_max_tp = 0, index_max_tp2 = 0; -	u32 max_prob = 0, index_max_prob = 0; +	u8 tmp_tp_rate[MAX_THR_RATES]; +	u8 tmp_prob_rate = 0;  	u32 usecs; -	u32 p;  	int i; -	mi->stats_update = jiffies; +	for (i=0; i < MAX_THR_RATES; i++) +	    tmp_tp_rate[i] = 0; +  	for (i = 0; i < mi->n_rates; i++) {  		struct minstrel_rate *mr = &mi->r[i]; @@ -87,27 +145,32 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi)  		if (!usecs)  			usecs = 1000000; -		/* To avoid rounding issues, probabilities scale from 0 (0%) -		 * to 18000 (100%) */ -		if (mr->attempts) { -			p = (mr->success * 18000) / mr->attempts; +		if (unlikely(mr->attempts > 0)) { +			mr->sample_skipped = 0; +			mr->cur_prob = MINSTREL_FRAC(mr->success, mr->attempts);  			mr->succ_hist += mr->success;  			mr->att_hist += mr->attempts; -			mr->cur_prob = p; -			p = ((p * (100 - mp->ewma_level)) + (mr->probability * -				mp->ewma_level)) / 100; -			mr->probability = p; -			mr->cur_tp = p * (1000000 / usecs); -		} +			mr->probability = minstrel_ewma(mr->probability, +							mr->cur_prob, +							EWMA_LEVEL); +		} else +			mr->sample_skipped++;  		mr->last_success = mr->success;  		mr->last_attempts = mr->attempts;  		mr->success = 0;  		mr->attempts = 0; +		/* Update throughput per rate, reset thr. below 10% success */ +		if (mr->probability < MINSTREL_FRAC(10, 100)) +			mr->cur_tp = 0; +		else +			mr->cur_tp = mr->probability * (1000000 / usecs); +  		/* Sample less often below the 10% chance of success.  		 * Sample less often above the 95% chance of success. */ -		if ((mr->probability > 17100) || (mr->probability < 1800)) { +		if (mr->probability > MINSTREL_FRAC(95, 100) || +		    mr->probability < MINSTREL_FRAC(10, 100)) {  			mr->adjusted_retry_count = mr->retry_count >> 1;  			if (mr->adjusted_retry_count > 2)  				mr->adjusted_retry_count = 2; @@ -118,35 +181,32 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi)  		}  		if (!mr->adjusted_retry_count)  			mr->adjusted_retry_count = 2; -	} -	for (i = 0; i < mi->n_rates; i++) { -		struct minstrel_rate *mr = &mi->r[i]; -		if (max_tp < mr->cur_tp) { -			index_max_tp = i; -			max_tp = mr->cur_tp; -		} -		if (max_prob < mr->probability) { -			index_max_prob = i; -			max_prob = mr->probability; +		minstrel_sort_best_tp_rates(mi, i, tmp_tp_rate); + +		/* To determine the most robust rate (max_prob_rate) used at +		 * 3rd mmr stage we distinct between two cases: +		 * (1) if any success probabilitiy >= 95%, out of those rates +		 * choose the maximum throughput rate as max_prob_rate +		 * (2) if all success probabilities < 95%, the rate with +		 * highest success probability is choosen as max_prob_rate */ +		if (mr->probability >= MINSTREL_FRAC(95,100)) { +			if (mr->cur_tp >= mi->r[tmp_prob_rate].cur_tp) +				tmp_prob_rate = i; +		} else { +			if (mr->probability >= mi->r[tmp_prob_rate].probability) +				tmp_prob_rate = i;  		}  	} -	max_tp = 0; -	for (i = 0; i < mi->n_rates; i++) { -		struct minstrel_rate *mr = &mi->r[i]; +	/* Assign the new rate set */ +	memcpy(mi->max_tp_rate, tmp_tp_rate, sizeof(mi->max_tp_rate)); +	mi->max_prob_rate = tmp_prob_rate; -		if (i == index_max_tp) -			continue; +	/* Reset update timer */ +	mi->stats_update = jiffies; -		if (max_tp < mr->cur_tp) { -			index_max_tp2 = i; -			max_tp = mr->cur_tp; -		} -	} -	mi->max_tp_rate = index_max_tp; -	mi->max_tp_rate2 = index_max_tp2; -	mi->max_prob_rate = index_max_prob; +	minstrel_update_rates(mp, mi);  }  static void @@ -195,9 +255,9 @@ minstrel_get_retry_count(struct minstrel_rate *mr,  {  	unsigned int retry = mr->adjusted_retry_count; -	if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_RTS_CTS) +	if (info->control.use_rts)  		retry = max(2U, min(mr->retry_count_rtscts, retry)); -	else if (info->control.rates[0].flags & IEEE80211_TX_RC_USE_CTS_PROTECT) +	else if (info->control.use_cts_prot)  		retry = max(2U, min(mr->retry_count_cts, retry));  	return retry;  } @@ -207,10 +267,10 @@ static int  minstrel_get_next_sample(struct minstrel_sta_info *mi)  {  	unsigned int sample_ndx; -	sample_ndx = SAMPLE_TBL(mi, mi->sample_idx, mi->sample_column); -	mi->sample_idx++; -	if ((int) mi->sample_idx > (mi->n_rates - 2)) { -		mi->sample_idx = 0; +	sample_ndx = SAMPLE_TBL(mi, mi->sample_row, mi->sample_column); +	mi->sample_row++; +	if ((int) mi->sample_row >= mi->n_rates) { +		mi->sample_row = 0;  		mi->sample_column++;  		if (mi->sample_column >= SAMPLE_COLUMNS)  			mi->sample_column = 0; @@ -226,111 +286,96 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,  	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);  	struct minstrel_sta_info *mi = priv_sta;  	struct minstrel_priv *mp = priv; -	struct ieee80211_tx_rate *ar = info->control.rates; -	unsigned int ndx, sample_ndx = 0; -	bool mrr; -	bool sample_slower = false; -	bool sample = false; -	int i, delta; -	int mrr_ndx[3]; -	int sample_rate; +	struct ieee80211_tx_rate *rate = &info->control.rates[0]; +	struct minstrel_rate *msr, *mr; +	unsigned int ndx; +	bool mrr_capable; +	bool prev_sample = mi->prev_sample; +	int delta; +	int sampling_ratio; +	/* management/no-ack frames do not use rate control */  	if (rate_control_send_low(sta, priv_sta, txrc))  		return; -	mrr = mp->has_mrr && !txrc->rts && !txrc->bss_conf->use_cts_prot; - -	ndx = mi->max_tp_rate; - -	if (mrr) -		sample_rate = mp->lookaround_rate_mrr; +	/* check multi-rate-retry capabilities & adjust lookaround_rate */ +	mrr_capable = mp->has_mrr && +		      !txrc->rts && +		      !txrc->bss_conf->use_cts_prot; +	if (mrr_capable) +		sampling_ratio = mp->lookaround_rate_mrr;  	else -		sample_rate = mp->lookaround_rate; +		sampling_ratio = mp->lookaround_rate; +	/* increase sum packet counter */  	mi->packet_count++; -	delta = (mi->packet_count * sample_rate / 100) - + +	delta = (mi->packet_count * sampling_ratio / 100) -  			(mi->sample_count + mi->sample_deferred / 2); -	/* delta > 0: sampling required */ -	if ((delta > 0) && (mrr || !mi->prev_sample)) { -		struct minstrel_rate *msr; -		if (mi->packet_count >= 10000) { -			mi->sample_deferred = 0; -			mi->sample_count = 0; -			mi->packet_count = 0; -		} else if (delta > mi->n_rates * 2) { -			/* With multi-rate retry, not every planned sample -			 * attempt actually gets used, due to the way the retry -			 * chain is set up - [max_tp,sample,prob,lowest] for -			 * sample_rate < max_tp. -			 * -			 * If there's too much sampling backlog and the link -			 * starts getting worse, minstrel would start bursting -			 * out lots of sampling frames, which would result -			 * in a large throughput loss. */ -			mi->sample_count += (delta - mi->n_rates * 2); -		} +	/* delta < 0: no sampling required */ +	mi->prev_sample = false; +	if (delta < 0 || (!mrr_capable && prev_sample)) +		return; -		sample_ndx = minstrel_get_next_sample(mi); -		msr = &mi->r[sample_ndx]; -		sample = true; -		sample_slower = mrr && (msr->perfect_tx_time > -			mi->r[ndx].perfect_tx_time); +	if (mi->packet_count >= 10000) { +		mi->sample_deferred = 0; +		mi->sample_count = 0; +		mi->packet_count = 0; +	} else if (delta > mi->n_rates * 2) { +		/* With multi-rate retry, not every planned sample +		 * attempt actually gets used, due to the way the retry +		 * chain is set up - [max_tp,sample,prob,lowest] for +		 * sample_rate < max_tp. +		 * +		 * If there's too much sampling backlog and the link +		 * starts getting worse, minstrel would start bursting +		 * out lots of sampling frames, which would result +		 * in a large throughput loss. */ +		mi->sample_count += (delta - mi->n_rates * 2); +	} -		if (!sample_slower) { -			if (msr->sample_limit != 0) { -				ndx = sample_ndx; -				mi->sample_count++; -				if (msr->sample_limit > 0) -					msr->sample_limit--; -			} else { -				sample = false; -			} -		} else { -			/* Only use IEEE80211_TX_CTL_RATE_CTRL_PROBE to mark -			 * packets that have the sampling rate deferred to the -			 * second MRR stage. Increase the sample counter only -			 * if the deferred sample rate was actually used. -			 * Use the sample_deferred counter to make sure that -			 * the sampling is not done in large bursts */ -			info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; -			mi->sample_deferred++; -		} +	/* get next random rate sample */ +	ndx = minstrel_get_next_sample(mi); +	msr = &mi->r[ndx]; +	mr = &mi->r[mi->max_tp_rate[0]]; + +	/* Decide if direct ( 1st mrr stage) or indirect (2nd mrr stage) +	 * rate sampling method should be used. +	 * Respect such rates that are not sampled for 20 interations. +	 */ +	if (mrr_capable && +	    msr->perfect_tx_time > mr->perfect_tx_time && +	    msr->sample_skipped < 20) { +		/* Only use IEEE80211_TX_CTL_RATE_CTRL_PROBE to mark +		 * packets that have the sampling rate deferred to the +		 * second MRR stage. Increase the sample counter only +		 * if the deferred sample rate was actually used. +		 * Use the sample_deferred counter to make sure that +		 * the sampling is not done in large bursts */ +		info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; +		rate++; +		mi->sample_deferred++; +	} else { +		if (!msr->sample_limit != 0) +			return; + +		mi->sample_count++; +		if (msr->sample_limit > 0) +			msr->sample_limit--;  	} -	mi->prev_sample = sample;  	/* If we're not using MRR and the sampling rate already  	 * has a probability of >95%, we shouldn't be attempting  	 * to use it, as this only wastes precious airtime */ -	if (!mrr && sample && (mi->r[ndx].probability > 17100)) -		ndx = mi->max_tp_rate; - -	ar[0].idx = mi->r[ndx].rix; -	ar[0].count = minstrel_get_retry_count(&mi->r[ndx], info); - -	if (!mrr) { -		if (!sample) -			ar[0].count = mp->max_retry; -		ar[1].idx = mi->lowest_rix; -		ar[1].count = mp->max_retry; +	if (!mrr_capable && +	   (mi->r[ndx].probability > MINSTREL_FRAC(95, 100)))  		return; -	} -	/* MRR setup */ -	if (sample) { -		if (sample_slower) -			mrr_ndx[0] = sample_ndx; -		else -			mrr_ndx[0] = mi->max_tp_rate; -	} else { -		mrr_ndx[0] = mi->max_tp_rate2; -	} -	mrr_ndx[1] = mi->max_prob_rate; -	mrr_ndx[2] = 0; -	for (i = 1; i < 4; i++) { -		ar[i].idx = mi->r[mrr_ndx[i - 1]].rix; -		ar[i].count = mi->r[mrr_ndx[i - 1]].adjusted_retry_count; -	} +	mi->prev_sample = true; + +	rate->idx = mi->r[ndx].rix; +	rate->count = minstrel_get_retry_count(&mi->r[ndx], info);  } @@ -351,26 +396,21 @@ static void  init_sample_table(struct minstrel_sta_info *mi)  {  	unsigned int i, col, new_idx; -	unsigned int n_srates = mi->n_rates - 1;  	u8 rnd[8];  	mi->sample_column = 0; -	mi->sample_idx = 0; -	memset(mi->sample_table, 0, SAMPLE_COLUMNS * mi->n_rates); +	mi->sample_row = 0; +	memset(mi->sample_table, 0xff, SAMPLE_COLUMNS * mi->n_rates);  	for (col = 0; col < SAMPLE_COLUMNS; col++) { -		for (i = 0; i < n_srates; i++) { +		for (i = 0; i < mi->n_rates; i++) {  			get_random_bytes(rnd, sizeof(rnd)); -			new_idx = (i + rnd[i & 7]) % n_srates; +			new_idx = (i + rnd[i & 7]) % mi->n_rates; -			while (SAMPLE_TBL(mi, new_idx, col) != 0) -				new_idx = (new_idx + 1) % n_srates; +			while (SAMPLE_TBL(mi, new_idx, col) != 0xff) +				new_idx = (new_idx + 1) % mi->n_rates; -			/* Don't sample the slowest rate (i.e. slowest base -			 * rate). We must presume that the slowest rate works -			 * fine, or else other management frames will also be -			 * failing and the link will break */ -			SAMPLE_TBL(mi, new_idx, col) = i + 1; +			SAMPLE_TBL(mi, new_idx, col) = i;  		}  	}  } @@ -385,12 +425,16 @@ minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband,  	unsigned int i, n = 0;  	unsigned int t_slot = 9; /* FIXME: get real slot time */ +	mi->sta = sta;  	mi->lowest_rix = rate_lowest_index(sband, sta);  	ctl_rate = &sband->bitrates[mi->lowest_rix];  	mi->sp_ack_dur = ieee80211_frame_duration(sband->band, 10,  				ctl_rate->bitrate,  				!!(ctl_rate->flags & IEEE80211_RATE_ERP_G), 1); +	memset(mi->max_tp_rate, 0, sizeof(mi->max_tp_rate)); +	mi->max_prob_rate = 0; +  	for (i = 0; i < sband->n_bitrates; i++) {  		struct minstrel_rate *mr = &mi->r[n];  		unsigned int tx_time = 0, tx_time_cts = 0, tx_time_rtscts = 0; @@ -433,6 +477,8 @@ minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband,  		} while ((tx_time < mp->segment_size) &&  				(++mr->retry_count < mp->max_retry));  		mr->adjusted_retry_count = mr->retry_count; +		if (!(sband->bitrates[i].flags & IEEE80211_RATE_ERP_G)) +			mr->retry_count_cts = mr->retry_count;  	}  	for (i = n; i < sband->n_bitrates; i++) { @@ -444,6 +490,7 @@ minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband,  	mi->stats_update = jiffies;  	init_sample_table(mi); +	minstrel_update_rates(mp, mi);  }  static void * @@ -542,9 +589,6 @@ minstrel_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir)  	mp->lookaround_rate = 5;  	mp->lookaround_rate_mrr = 10; -	/* moving average weight for EWMA */ -	mp->ewma_level = 75; -  	/* maximum time that the hw is allowed to stay in one MRR segment */  	mp->segment_size = 6000; diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h index 5ecf757817f..f4301f4b2e4 100644 --- a/net/mac80211/rc80211_minstrel.h +++ b/net/mac80211/rc80211_minstrel.h @@ -9,6 +9,29 @@  #ifndef __RC_MINSTREL_H  #define __RC_MINSTREL_H +#define EWMA_LEVEL	96	/* ewma weighting factor [/EWMA_DIV] */ +#define EWMA_DIV	128 +#define SAMPLE_COLUMNS	10	/* number of columns in sample table */ + + +/* scaled fraction values */ +#define MINSTREL_SCALE  16 +#define MINSTREL_FRAC(val, div) (((val) << MINSTREL_SCALE) / div) +#define MINSTREL_TRUNC(val) ((val) >> MINSTREL_SCALE) + +/* number of highest throughput rates to consider*/ +#define MAX_THR_RATES 4 + +/* + * Perform EWMA (Exponentially Weighted Moving Average) calculation +  */ +static inline int +minstrel_ewma(int old, int new, int weight) +{ +	return (new * (EWMA_DIV - weight) + old * weight) / EWMA_DIV; +} + +  struct minstrel_rate {  	int bitrate;  	int rix; @@ -26,6 +49,7 @@ struct minstrel_rate {  	u32 attempts;  	u32 last_attempts;  	u32 last_success; +	u8 sample_skipped;  	/* parts per thousand */  	u32 cur_prob; @@ -39,20 +63,21 @@ struct minstrel_rate {  };  struct minstrel_sta_info { +	struct ieee80211_sta *sta; +  	unsigned long stats_update;  	unsigned int sp_ack_dur;  	unsigned int rate_avg;  	unsigned int lowest_rix; -	unsigned int max_tp_rate; -	unsigned int max_tp_rate2; -	unsigned int max_prob_rate; +	u8 max_tp_rate[MAX_THR_RATES]; +	u8 max_prob_rate;  	unsigned int packet_count;  	unsigned int sample_count;  	int sample_deferred; -	unsigned int sample_idx; +	unsigned int sample_row;  	unsigned int sample_column;  	int n_rates; @@ -73,7 +98,6 @@ struct minstrel_priv {  	unsigned int cw_min;  	unsigned int cw_max;  	unsigned int max_retry; -	unsigned int ewma_level;  	unsigned int segment_size;  	unsigned int update_interval;  	unsigned int lookaround_rate; diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c index d5a56226e67..fd0b9ca1570 100644 --- a/net/mac80211/rc80211_minstrel_debugfs.c +++ b/net/mac80211/rc80211_minstrel_debugfs.c @@ -68,23 +68,25 @@ minstrel_stats_open(struct inode *inode, struct file *file)  	file->private_data = ms;  	p = ms->buf; -	p += sprintf(p, "rate     throughput  ewma prob   this prob  " +	p += sprintf(p, "rate      throughput  ewma prob  this prob  "  			"this succ/attempt   success    attempts\n");  	for (i = 0; i < mi->n_rates; i++) {  		struct minstrel_rate *mr = &mi->r[i]; -		*(p++) = (i == mi->max_tp_rate) ? 'T' : ' '; -		*(p++) = (i == mi->max_tp_rate2) ? 't' : ' '; +		*(p++) = (i == mi->max_tp_rate[0]) ? 'A' : ' '; +		*(p++) = (i == mi->max_tp_rate[1]) ? 'B' : ' '; +		*(p++) = (i == mi->max_tp_rate[2]) ? 'C' : ' '; +		*(p++) = (i == mi->max_tp_rate[3]) ? 'D' : ' ';  		*(p++) = (i == mi->max_prob_rate) ? 'P' : ' ';  		p += sprintf(p, "%3u%s", mr->bitrate / 2,  				(mr->bitrate & 1 ? ".5" : "  ")); -		tp = mr->cur_tp / ((18000 << 10) / 96); -		prob = mr->cur_prob / 18; -		eprob = mr->probability / 18; +		tp = MINSTREL_TRUNC(mr->cur_tp / 10); +		prob = MINSTREL_TRUNC(mr->cur_prob * 1000); +		eprob = MINSTREL_TRUNC(mr->probability * 1000);  		p += sprintf(p, "  %6u.%1u   %6u.%1u   %6u.%1u        " -				"%3u(%3u)   %8llu    %8llu\n", +				"   %3u(%3u)  %8llu    %8llu\n",  				tp / 10, tp % 10,  				eprob / 10, eprob % 10,  				prob / 10, prob % 10, diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 3af141c6971..5b2d3012b98 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -17,8 +17,6 @@  #include "rc80211_minstrel_ht.h"  #define AVG_PKT_SIZE	1200 -#define SAMPLE_COLUMNS	10 -#define EWMA_LEVEL		75  /* Number of bits for an average sized packet */  #define MCS_NBITS (AVG_PKT_SIZE << 3) @@ -26,11 +24,11 @@  /* Number of symbols for a packet with (bps) bits per symbol */  #define MCS_NSYMS(bps) ((MCS_NBITS + (bps) - 1) / (bps)) -/* Transmission time for a packet containing (syms) symbols */ +/* Transmission time (nanoseconds) for a packet containing (syms) symbols */  #define MCS_SYMBOL_TIME(sgi, syms)					\  	(sgi ?								\ -	  ((syms) * 18 + 4) / 5 :	/* syms * 3.6 us */		\ -	  (syms) << 2			/* syms * 4 us */		\ +	  ((syms) * 18000 + 4000) / 5 :	/* syms * 3.6 us */		\ +	  ((syms) * 1000) << 2		/* syms * 4 us */		\  	)  /* Transmit duration for the raw data part of an average sized packet */ @@ -64,9 +62,9 @@  }  #define CCK_DURATION(_bitrate, _short, _len)		\ -	(10 /* SIFS */ +				\ +	(1000 * (10 /* SIFS */ +			\  	 (_short ? 72 + 24 : 144 + 48 ) +		\ -	 (8 * (_len + 4) * 10) / (_bitrate)) +	 (8 * (_len + 4) * 10) / (_bitrate)))  #define CCK_ACK_DURATION(_bitrate, _short)			\  	(CCK_DURATION((_bitrate > 10 ? 20 : 10), false, 60) +	\ @@ -128,14 +126,8 @@ const struct mcs_group minstrel_mcs_groups[] = {  static u8 sample_table[SAMPLE_COLUMNS][MCS_GROUP_RATES]; -/* - * Perform EWMA (Exponentially Weighted Moving Average) calculation - */ -static int -minstrel_ewma(int old, int new, int weight) -{ -	return (new * (100 - weight) + old * weight) / 100; -} +static void +minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi);  /*   * Look up an MCS group index based on mac80211 rate information @@ -211,20 +203,32 @@ static void  minstrel_ht_calc_tp(struct minstrel_ht_sta *mi, int group, int rate)  {  	struct minstrel_rate_stats *mr; -	unsigned int usecs = 0; +	unsigned int nsecs = 0; +	unsigned int tp; +	unsigned int prob;  	mr = &mi->groups[group].rates[rate]; +	prob = mr->probability; -	if (mr->probability < MINSTREL_FRAC(1, 10)) { +	if (prob < MINSTREL_FRAC(1, 10)) {  		mr->cur_tp = 0;  		return;  	} +	/* +	 * For the throughput calculation, limit the probability value to 90% to +	 * account for collision related packet error rate fluctuation +	 */ +	if (prob > MINSTREL_FRAC(9, 10)) +		prob = MINSTREL_FRAC(9, 10); +  	if (group != MINSTREL_CCK_GROUP) -		usecs = mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len); +		nsecs = 1000 * mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len); -	usecs += minstrel_mcs_groups[group].duration[rate]; -	mr->cur_tp = MINSTREL_TRUNC((1000000 / usecs) * mr->probability); +	nsecs += minstrel_mcs_groups[group].duration[rate]; +	tp = 1000000 * ((mr->probability * 1000) / nsecs); + +	mr->cur_tp = MINSTREL_TRUNC(tp);  }  /* @@ -243,6 +247,7 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)  	struct minstrel_rate_stats *mr;  	int cur_prob, cur_prob_tp, cur_tp, cur_tp2;  	int group, i, index; +	bool mi_rates_valid = false;  	if (mi->ampdu_packets > 0) {  		mi->avg_ampdu_len = minstrel_ewma(mi->avg_ampdu_len, @@ -253,11 +258,10 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)  	mi->sample_slow = 0;  	mi->sample_count = 0; -	mi->max_tp_rate = 0; -	mi->max_tp_rate2 = 0; -	mi->max_prob_rate = 0;  	for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) { +		bool mg_rates_valid = false; +  		cur_prob = 0;  		cur_prob_tp = 0;  		cur_tp = 0; @@ -267,15 +271,24 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)  		if (!mg->supported)  			continue; -		mg->max_tp_rate = 0; -		mg->max_tp_rate2 = 0; -		mg->max_prob_rate = 0;  		mi->sample_count++;  		for (i = 0; i < MCS_GROUP_RATES; i++) {  			if (!(mg->supported & BIT(i)))  				continue; +			/* initialize rates selections starting indexes */ +			if (!mg_rates_valid) { +				mg->max_tp_rate = mg->max_tp_rate2 = +					mg->max_prob_rate = i; +				if (!mi_rates_valid) { +					mi->max_tp_rate = mi->max_tp_rate2 = +						mi->max_prob_rate = i; +					mi_rates_valid = true; +				} +				mg_rates_valid = true; +			} +  			mr = &mg->rates[i];  			mr->retry_updated = false;  			index = MCS_GROUP_RATES * group + i; @@ -308,8 +321,8 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)  		}  	} -	/* try to sample up to half of the available rates during each interval */ -	mi->sample_count *= 4; +	/* try to sample all available rates during each interval */ +	mi->sample_count *= 8;  	cur_prob = 0;  	cur_prob_tp = 0; @@ -320,20 +333,13 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)  		if (!mg->supported)  			continue; -		mr = minstrel_get_ratestats(mi, mg->max_prob_rate); -		if (cur_prob_tp < mr->cur_tp && -		    minstrel_mcs_groups[group].streams == 1) { -			mi->max_prob_rate = mg->max_prob_rate; -			cur_prob = mr->cur_prob; -			cur_prob_tp = mr->cur_tp; -		} -  		mr = minstrel_get_ratestats(mi, mg->max_tp_rate);  		if (cur_tp < mr->cur_tp) {  			mi->max_tp_rate2 = mi->max_tp_rate;  			cur_tp2 = cur_tp;  			mi->max_tp_rate = mg->max_tp_rate;  			cur_tp = mr->cur_tp; +			mi->max_prob_streams = minstrel_mcs_groups[group].streams - 1;  		}  		mr = minstrel_get_ratestats(mi, mg->max_tp_rate2); @@ -343,6 +349,23 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)  		}  	} +	if (mi->max_prob_streams < 1) +		mi->max_prob_streams = 1; + +	for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) { +		mg = &mi->groups[group]; +		if (!mg->supported) +			continue; +		mr = minstrel_get_ratestats(mi, mg->max_prob_rate); +		if (cur_prob_tp < mr->cur_tp && +		    minstrel_mcs_groups[group].streams <= mi->max_prob_streams) { +			mi->max_prob_rate = mg->max_prob_rate; +			cur_prob = mr->cur_prob; +			cur_prob_tp = mr->cur_tp; +		} +	} + +  	mi->stats_update = jiffies;  } @@ -445,7 +468,7 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,  	struct ieee80211_tx_rate *ar = info->status.rates;  	struct minstrel_rate_stats *rate, *rate2;  	struct minstrel_priv *mp = priv; -	bool last; +	bool last, update = false;  	int i;  	if (!msp->is_ht) @@ -467,7 +490,7 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,  	if (!mi->sample_wait && !mi->sample_tries && mi->sample_count > 0) {  		mi->sample_wait = 16 + 2 * MINSTREL_TRUNC(mi->avg_ampdu_len); -		mi->sample_tries = 2; +		mi->sample_tries = 1;  		mi->sample_count--;  	} @@ -494,21 +517,29 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,  	rate = minstrel_get_ratestats(mi, mi->max_tp_rate);  	if (rate->attempts > 30 &&  	    MINSTREL_FRAC(rate->success, rate->attempts) < -	    MINSTREL_FRAC(20, 100)) +	    MINSTREL_FRAC(20, 100)) {  		minstrel_downgrade_rate(mi, &mi->max_tp_rate, true); +		update = true; +	}  	rate2 = minstrel_get_ratestats(mi, mi->max_tp_rate2);  	if (rate2->attempts > 30 &&  	    MINSTREL_FRAC(rate2->success, rate2->attempts) < -	    MINSTREL_FRAC(20, 100)) +	    MINSTREL_FRAC(20, 100)) {  		minstrel_downgrade_rate(mi, &mi->max_tp_rate2, false); +		update = true; +	}  	if (time_after(jiffies, mi->stats_update + (mp->update_interval / 2 * HZ) / 1000)) { +		update = true;  		minstrel_ht_update_stats(mp, mi);  		if (!(info->flags & IEEE80211_TX_CTL_AMPDU) &&  		    mi->max_prob_rate / MCS_GROUP_RATES != MINSTREL_CCK_GROUP)  			minstrel_aggr_check(sta, skb);  	} + +	if (update) +		minstrel_ht_update_rates(mp, mi);  }  static void @@ -536,7 +567,7 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,  	mr->retry_updated = true;  	group = &minstrel_mcs_groups[index / MCS_GROUP_RATES]; -	tx_time_data = group->duration[index % MCS_GROUP_RATES] * ampdu_len; +	tx_time_data = group->duration[index % MCS_GROUP_RATES] * ampdu_len / 1000;  	/* Contention time for first 2 tries */  	ctime = (t_slot * cw) >> 1; @@ -572,36 +603,71 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,  static void  minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, -                     struct ieee80211_tx_rate *rate, int index, -                     bool sample, bool rtscts) +                     struct ieee80211_sta_rates *ratetbl, int offset, int index)  {  	const struct mcs_group *group = &minstrel_mcs_groups[index / MCS_GROUP_RATES];  	struct minstrel_rate_stats *mr; +	u8 idx; +	u16 flags;  	mr = minstrel_get_ratestats(mi, index);  	if (!mr->retry_updated)  		minstrel_calc_retransmit(mp, mi, index); -	if (sample) -		rate->count = 1; -	else if (mr->probability < MINSTREL_FRAC(20, 100)) -		rate->count = 2; -	else if (rtscts) -		rate->count = mr->retry_count_rtscts; -	else -		rate->count = mr->retry_count; - -	rate->flags = 0; -	if (rtscts) -		rate->flags |= IEEE80211_TX_RC_USE_RTS_CTS; +	if (mr->probability < MINSTREL_FRAC(20, 100) || !mr->retry_count) { +		ratetbl->rate[offset].count = 2; +		ratetbl->rate[offset].count_rts = 2; +		ratetbl->rate[offset].count_cts = 2; +	} else { +		ratetbl->rate[offset].count = mr->retry_count; +		ratetbl->rate[offset].count_cts = mr->retry_count; +		ratetbl->rate[offset].count_rts = mr->retry_count_rtscts; +	}  	if (index / MCS_GROUP_RATES == MINSTREL_CCK_GROUP) { -		rate->idx = mp->cck_rates[index % ARRAY_SIZE(mp->cck_rates)]; +		idx = mp->cck_rates[index % ARRAY_SIZE(mp->cck_rates)]; +		flags = 0; +	} else { +		idx = index % MCS_GROUP_RATES + +		      (group->streams - 1) * MCS_GROUP_RATES; +		flags = IEEE80211_TX_RC_MCS | group->flags; +	} + +	if (offset > 0) { +		ratetbl->rate[offset].count = ratetbl->rate[offset].count_rts; +		flags |= IEEE80211_TX_RC_USE_RTS_CTS; +	} + +	ratetbl->rate[offset].idx = idx; +	ratetbl->rate[offset].flags = flags; +} + +static void +minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) +{ +	struct ieee80211_sta_rates *rates; +	int i = 0; + +	rates = kzalloc(sizeof(*rates), GFP_ATOMIC); +	if (!rates)  		return; + +	/* Start with max_tp_rate */ +	minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_tp_rate); + +	if (mp->hw->max_rates >= 3) { +		/* At least 3 tx rates supported, use max_tp_rate2 next */ +		minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_tp_rate2);  	} -	rate->flags |= IEEE80211_TX_RC_MCS | group->flags; -	rate->idx = index % MCS_GROUP_RATES + (group->streams - 1) * MCS_GROUP_RATES; +	if (mp->hw->max_rates >= 2) { +		/* +		 * At least 2 tx rates supported, use max_prob_rate next */ +		minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_prob_rate); +	} + +	rates->rate[i].idx = -1; +	rate_control_set_rates(mp->hw, mi->sta, rates);  }  static inline int @@ -616,6 +682,7 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)  {  	struct minstrel_rate_stats *mr;  	struct minstrel_mcs_group_data *mg; +	unsigned int sample_dur, sample_group;  	int sample_idx = 0;  	if (mi->sample_wait > 0) { @@ -626,39 +693,46 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi)  	if (!mi->sample_tries)  		return -1; -	mi->sample_tries--;  	mg = &mi->groups[mi->sample_group];  	sample_idx = sample_table[mg->column][mg->index];  	mr = &mg->rates[sample_idx]; -	sample_idx += mi->sample_group * MCS_GROUP_RATES; +	sample_group = mi->sample_group; +	sample_idx += sample_group * MCS_GROUP_RATES;  	minstrel_next_sample_idx(mi);  	/*  	 * Sampling might add some overhead (RTS, no aggregation)  	 * to the frame. Hence, don't use sampling for the currently -	 * used max TP rate. +	 * used rates.  	 */ -	if (sample_idx == mi->max_tp_rate) +	if (sample_idx == mi->max_tp_rate || +	    sample_idx == mi->max_tp_rate2 || +	    sample_idx == mi->max_prob_rate)  		return -1; +  	/* -	 * When not using MRR, do not sample if the probability is already -	 * higher than 95% to avoid wasting airtime +	 * Do not sample if the probability is already higher than 95% +	 * to avoid wasting airtime.  	 */ -	if (!mp->has_mrr && (mr->probability > MINSTREL_FRAC(95, 100))) +	if (mr->probability > MINSTREL_FRAC(95, 100))  		return -1;  	/*  	 * Make sure that lower rates get sampled only occasionally,  	 * if the link is working perfectly.  	 */ -	if (minstrel_get_duration(sample_idx) > -	    minstrel_get_duration(mi->max_tp_rate)) { +	sample_dur = minstrel_get_duration(sample_idx); +	if (sample_dur >= minstrel_get_duration(mi->max_tp_rate2) && +	    (mi->max_prob_streams < +	     minstrel_mcs_groups[sample_group].streams || +	     sample_dur >= minstrel_get_duration(mi->max_prob_rate))) {  		if (mr->sample_skipped < 20)  			return -1;  		if (mi->sample_slow++ > 2)  			return -1;  	} +	mi->sample_tries--;  	return sample_idx;  } @@ -683,13 +757,13 @@ static void  minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,                       struct ieee80211_tx_rate_control *txrc)  { +	const struct mcs_group *sample_group;  	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb); -	struct ieee80211_tx_rate *ar = info->status.rates; +	struct ieee80211_tx_rate *rate = &info->status.rates[0];  	struct minstrel_ht_sta_priv *msp = priv_sta;  	struct minstrel_ht_sta *mi = &msp->ht;  	struct minstrel_priv *mp = priv;  	int sample_idx; -	bool sample = false;  	if (rate_control_send_low(sta, priv_sta, txrc))  		return; @@ -717,51 +791,6 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,  	}  #endif -	if (sample_idx >= 0) { -		sample = true; -		minstrel_ht_set_rate(mp, mi, &ar[0], sample_idx, -			true, false); -		info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; -	} else { -		minstrel_ht_set_rate(mp, mi, &ar[0], mi->max_tp_rate, -			false, false); -	} - -	if (mp->hw->max_rates >= 3) { -		/* -		 * At least 3 tx rates supported, use -		 * sample_rate -> max_tp_rate -> max_prob_rate for sampling and -		 * max_tp_rate -> max_tp_rate2 -> max_prob_rate by default. -		 */ -		if (sample_idx >= 0) -			minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate, -				false, false); -		else -			minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_tp_rate2, -				false, true); - -		minstrel_ht_set_rate(mp, mi, &ar[2], mi->max_prob_rate, -				     false, !sample); - -		ar[3].count = 0; -		ar[3].idx = -1; -	} else if (mp->hw->max_rates == 2) { -		/* -		 * Only 2 tx rates supported, use -		 * sample_rate -> max_prob_rate for sampling and -		 * max_tp_rate -> max_prob_rate by default. -		 */ -		minstrel_ht_set_rate(mp, mi, &ar[1], mi->max_prob_rate, -				     false, !sample); - -		ar[2].count = 0; -		ar[2].idx = -1; -	} else { -		/* Not using MRR, only use the first rate */ -		ar[1].count = 0; -		ar[1].idx = -1; -	} -  	mi->total_packets++;  	/* wraparound */ @@ -769,6 +798,16 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,  		mi->total_packets = 0;  		mi->sample_packets = 0;  	} + +	if (sample_idx < 0) +		return; + +	sample_group = &minstrel_mcs_groups[sample_idx / MCS_GROUP_RATES]; +	info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; +	rate->idx = sample_idx % MCS_GROUP_RATES + +		    (sample_group->streams - 1) * MCS_GROUP_RATES; +	rate->flags = IEEE80211_TX_RC_MCS | sample_group->flags; +	rate->count = 1;  }  static void @@ -818,6 +857,8 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,  	msp->is_ht = true;  	memset(mi, 0, sizeof(*mi)); + +	mi->sta = sta;  	mi->stats_update = jiffies;  	ack_dur = ieee80211_frame_duration(sband->band, 10, 60, 1, 1); @@ -879,6 +920,10 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,  	if (!n_supported)  		goto use_legacy; +	/* create an initial rate table with the lowest supported rates */ +	minstrel_ht_update_stats(mp, mi); +	minstrel_ht_update_rates(mp, mi); +  	return;  use_legacy: diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h index 302dbd52180..d655586773a 100644 --- a/net/mac80211/rc80211_minstrel_ht.h +++ b/net/mac80211/rc80211_minstrel_ht.h @@ -16,11 +16,6 @@  #define MINSTREL_MAX_STREAMS	3  #define MINSTREL_STREAM_GROUPS	4 -/* scaled fraction values */ -#define MINSTREL_SCALE	16 -#define MINSTREL_FRAC(val, div) (((val) << MINSTREL_SCALE) / div) -#define MINSTREL_TRUNC(val) ((val) >> MINSTREL_SCALE) -  #define MCS_GROUP_RATES	8  struct mcs_group { @@ -70,6 +65,8 @@ struct minstrel_mcs_group_data {  };  struct minstrel_ht_sta { +	struct ieee80211_sta *sta; +  	/* ampdu length (average, per sampling interval) */  	unsigned int ampdu_len;  	unsigned int ampdu_packets; @@ -85,6 +82,7 @@ struct minstrel_ht_sta {  	/* best probability rate */  	unsigned int max_prob_rate; +	unsigned int max_prob_streams;  	/* time of last status update */  	unsigned long stats_update; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index c6844ad080b..c8447af76ea 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -648,24 +648,6 @@ static ieee80211_rx_result ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx)  	return RX_CONTINUE;  } -#define SEQ_MODULO 0x1000 -#define SEQ_MASK   0xfff - -static inline int seq_less(u16 sq1, u16 sq2) -{ -	return ((sq1 - sq2) & SEQ_MASK) > (SEQ_MODULO >> 1); -} - -static inline u16 seq_inc(u16 sq) -{ -	return (sq + 1) & SEQ_MASK; -} - -static inline u16 seq_sub(u16 sq1, u16 sq2) -{ -	return (sq1 - sq2) & SEQ_MASK; -} -  static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata,  					    struct tid_ampdu_rx *tid_agg_rx,  					    int index, @@ -687,7 +669,7 @@ static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata,  	__skb_queue_tail(frames, skb);  no_frame: -	tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num); +	tid_agg_rx->head_seq_num = ieee80211_sn_inc(tid_agg_rx->head_seq_num);  }  static void ieee80211_release_reorder_frames(struct ieee80211_sub_if_data *sdata, @@ -699,8 +681,9 @@ static void ieee80211_release_reorder_frames(struct ieee80211_sub_if_data *sdata  	lockdep_assert_held(&tid_agg_rx->reorder_lock); -	while (seq_less(tid_agg_rx->head_seq_num, head_seq_num)) { -		index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) % +	while (ieee80211_sn_less(tid_agg_rx->head_seq_num, head_seq_num)) { +		index = ieee80211_sn_sub(tid_agg_rx->head_seq_num, +					 tid_agg_rx->ssn) %  							tid_agg_rx->buf_size;  		ieee80211_release_reorder_frame(sdata, tid_agg_rx, index,  						frames); @@ -727,8 +710,8 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata,  	lockdep_assert_held(&tid_agg_rx->reorder_lock);  	/* release the buffer until next missing frame */ -	index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) % -						tid_agg_rx->buf_size; +	index = ieee80211_sn_sub(tid_agg_rx->head_seq_num, +				 tid_agg_rx->ssn) % tid_agg_rx->buf_size;  	if (!tid_agg_rx->reorder_buf[index] &&  	    tid_agg_rx->stored_mpdu_num) {  		/* @@ -756,19 +739,22 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata,  			 * Increment the head seq# also for the skipped slots.  			 */  			tid_agg_rx->head_seq_num = -				(tid_agg_rx->head_seq_num + skipped) & SEQ_MASK; +				(tid_agg_rx->head_seq_num + +				 skipped) & IEEE80211_SN_MASK;  			skipped = 0;  		}  	} else while (tid_agg_rx->reorder_buf[index]) {  		ieee80211_release_reorder_frame(sdata, tid_agg_rx, index,  						frames); -		index =	seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) % +		index =	ieee80211_sn_sub(tid_agg_rx->head_seq_num, +					 tid_agg_rx->ssn) %  							tid_agg_rx->buf_size;  	}  	if (tid_agg_rx->stored_mpdu_num) { -		j = index = seq_sub(tid_agg_rx->head_seq_num, -				    tid_agg_rx->ssn) % tid_agg_rx->buf_size; +		j = index = ieee80211_sn_sub(tid_agg_rx->head_seq_num, +					     tid_agg_rx->ssn) % +							tid_agg_rx->buf_size;  		for (; j != (index - 1) % tid_agg_rx->buf_size;  		     j = (j + 1) % tid_agg_rx->buf_size) { @@ -809,7 +795,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata  	head_seq_num = tid_agg_rx->head_seq_num;  	/* frame with out of date sequence number */ -	if (seq_less(mpdu_seq_num, head_seq_num)) { +	if (ieee80211_sn_less(mpdu_seq_num, head_seq_num)) {  		dev_kfree_skb(skb);  		goto out;  	} @@ -818,8 +804,9 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata  	 * If frame the sequence number exceeds our buffering window  	 * size release some previous frames to make room for this one.  	 */ -	if (!seq_less(mpdu_seq_num, head_seq_num + buf_size)) { -		head_seq_num = seq_inc(seq_sub(mpdu_seq_num, buf_size)); +	if (!ieee80211_sn_less(mpdu_seq_num, head_seq_num + buf_size)) { +		head_seq_num = ieee80211_sn_inc( +				ieee80211_sn_sub(mpdu_seq_num, buf_size));  		/* release stored frames up to new head to stack */  		ieee80211_release_reorder_frames(sdata, tid_agg_rx,  						 head_seq_num, frames); @@ -827,7 +814,8 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata  	/* Now the new frame is always in the range of the reordering buffer */ -	index = seq_sub(mpdu_seq_num, tid_agg_rx->ssn) % tid_agg_rx->buf_size; +	index = ieee80211_sn_sub(mpdu_seq_num, +				 tid_agg_rx->ssn) % tid_agg_rx->buf_size;  	/* check if we already stored this frame */  	if (tid_agg_rx->reorder_buf[index]) { @@ -843,7 +831,8 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata  	 */  	if (mpdu_seq_num == tid_agg_rx->head_seq_num &&  	    tid_agg_rx->stored_mpdu_num == 0) { -		tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num); +		tid_agg_rx->head_seq_num = +			ieee80211_sn_inc(tid_agg_rx->head_seq_num);  		ret = false;  		goto out;  	} @@ -1894,8 +1883,10 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)  		 * 'align' will only take the values 0 or 2 here  		 * since all frames are required to be aligned  		 * to 2-byte boundaries when being passed to -		 * mac80211. That also explains the __skb_push() -		 * below. +		 * mac80211; the code here works just as well if +		 * that isn't true, but mac80211 assumes it can +		 * access fields as 2-byte aligned (e.g. for +		 * compare_ether_addr)  		 */  		align = ((unsigned long)(skb->data + sizeof(struct ethhdr))) & 3;  		if (align) { @@ -2094,6 +2085,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)  	}  	fwd_hdr =  (struct ieee80211_hdr *) fwd_skb->data; +	fwd_hdr->frame_control &= ~cpu_to_le16(IEEE80211_FCTL_RETRY);  	info = IEEE80211_SKB_CB(fwd_skb);  	memset(info, 0, sizeof(*info));  	info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; @@ -2432,6 +2424,22 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)  		}  		break; +	case WLAN_CATEGORY_PUBLIC: +		if (len < IEEE80211_MIN_ACTION_SIZE + 1) +			goto invalid; +		if (sdata->vif.type != NL80211_IFTYPE_STATION) +			break; +		if (!rx->sta) +			break; +		if (!ether_addr_equal(mgmt->bssid, sdata->u.mgd.bssid)) +			break; +		if (mgmt->u.action.u.ext_chan_switch.action_code != +				WLAN_PUB_ACTION_EXT_CHANSW_ANN) +			break; +		if (len < offsetof(struct ieee80211_mgmt, +				   u.action.u.ext_chan_switch.variable)) +			goto invalid; +		goto queue;  	case WLAN_CATEGORY_VHT:  		if (sdata->vif.type != NL80211_IFTYPE_STATION &&  		    sdata->vif.type != NL80211_IFTYPE_MESH_POINT && @@ -2515,10 +2523,6 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)  			ieee80211_process_measurement_req(sdata, mgmt, len);  			goto handled;  		case WLAN_ACTION_SPCT_CHL_SWITCH: -			if (len < (IEEE80211_MIN_ACTION_SIZE + -				   sizeof(mgmt->u.action.u.chan_switch))) -				break; -  			if (sdata->vif.type != NL80211_IFTYPE_STATION)  				break; @@ -2552,7 +2556,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)  		case WLAN_SP_MESH_PEERING_CONFIRM:  			if (!ieee80211_vif_is_mesh(&sdata->vif))  				goto invalid; -			if (sdata->u.mesh.security != IEEE80211_MESH_SEC_NONE) +			if (sdata->u.mesh.user_mpm)  				/* userspace handles this frame */  				break;  			goto queue; @@ -3051,7 +3055,8 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx,  		    !ieee80211_is_probe_resp(hdr->frame_control) &&  		    !ieee80211_is_beacon(hdr->frame_control))  			return 0; -		if (!ether_addr_equal(sdata->vif.addr, hdr->addr1)) +		if (!ether_addr_equal(sdata->vif.addr, hdr->addr1) && +		    !multicast)  			status->rx_flags &= ~IEEE80211_RX_RA_MATCH;  		break;  	default: diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 43a45cf00e0..99b103921a4 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -98,9 +98,8 @@ ieee80211_bss_info_update(struct ieee80211_local *local,  	}  	/* save the ERP value so that it is available at association time */ -	if (elems->erp_info && elems->erp_info_len >= 1 && -			(!elems->parse_error || -			 !(bss->valid_data & IEEE80211_BSS_VALID_ERP))) { +	if (elems->erp_info && (!elems->parse_error || +				!(bss->valid_data & IEEE80211_BSS_VALID_ERP))) {  		bss->erp_value = elems->erp_info[0];  		bss->has_erp_value = true;  		if (!elems->parse_error) @@ -153,7 +152,6 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)  	u8 *elements;  	struct ieee80211_channel *channel;  	size_t baselen; -	bool beacon;  	struct ieee802_11_elems elems;  	if (skb->len < 24 || @@ -175,17 +173,15 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)  		elements = mgmt->u.probe_resp.variable;  		baselen = offsetof(struct ieee80211_mgmt, u.probe_resp.variable); -		beacon = false;  	} else {  		baselen = offsetof(struct ieee80211_mgmt, u.beacon.variable);  		elements = mgmt->u.beacon.variable; -		beacon = true;  	}  	if (baselen > skb->len)  		return; -	ieee802_11_parse_elems(elements, skb->len - baselen, &elems); +	ieee802_11_parse_elems(elements, skb->len - baselen, false, &elems);  	channel = ieee80211_get_channel(local->hw.wiphy, rx_status->freq); @@ -335,7 +331,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local)  	ieee80211_offchannel_stop_vifs(local);  	/* ensure nullfunc is transmitted before leaving operating channel */ -	drv_flush(local, false); +	ieee80211_flush_queues(local, NULL);  	ieee80211_configure_filter(local); @@ -387,7 +383,7 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local,  {  	int i;  	struct ieee80211_sub_if_data *sdata; -	enum ieee80211_band band = local->hw.conf.channel->band; +	enum ieee80211_band band = local->hw.conf.chandef.chan->band;  	u32 tx_flags;  	tx_flags = IEEE80211_TX_INTFL_OFFCHAN_TX_OK; @@ -404,7 +400,7 @@ static void ieee80211_scan_state_send_probe(struct ieee80211_local *local,  			local->scan_req->ssids[i].ssid_len,  			local->scan_req->ie, local->scan_req->ie_len,  			local->scan_req->rates[band], false, -			tx_flags, local->hw.conf.channel, true); +			tx_flags, local->hw.conf.chandef.chan, true);  	/*  	 * After sending probe requests, wait for probe responses @@ -470,7 +466,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,  	if (local->ops->hw_scan) {  		__set_bit(SCAN_HW_SCANNING, &local->scanning);  	} else if ((req->n_channels == 1) && -		   (req->channels[0] == local->_oper_channel)) { +		   (req->channels[0] == local->_oper_chandef.chan)) {  		/*  		 * If we are scanning only on the operating channel  		 * then we do not need to stop normal activities @@ -671,7 +667,7 @@ static void ieee80211_scan_state_resume(struct ieee80211_local *local,  	ieee80211_offchannel_stop_vifs(local);  	if (local->ops->flush) { -		drv_flush(local, false); +		ieee80211_flush_queues(local, NULL);  		*next_delay = 0;  	} else  		*next_delay = HZ / 10; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 238a0cca320..11216bc13b2 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -342,6 +342,11 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,  	INIT_WORK(&sta->drv_unblock_wk, sta_unblock);  	INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work);  	mutex_init(&sta->ampdu_mlme.mtx); +#ifdef CONFIG_MAC80211_MESH +	if (ieee80211_vif_is_mesh(&sdata->vif) && +	    !sdata->u.mesh.user_mpm) +		init_timer(&sta->plink_timer); +#endif  	memcpy(sta->sta.addr, addr, ETH_ALEN);  	sta->local = local; @@ -551,6 +556,15 @@ static inline void __bss_tim_clear(u8 *tim, u16 id)  	tim[id / 8] &= ~(1 << (id % 8));  } +static inline bool __bss_tim_get(u8 *tim, u16 id) +{ +	/* +	 * This format has been mandated by the IEEE specifications, +	 * so this line may not be changed to use the test_bit() format. +	 */ +	return tim[id / 8] & (1 << (id % 8)); +} +  static unsigned long ieee80211_tids_for_ac(int ac)  {  	/* If we ever support TIDs > 7, this obviously needs to be adjusted */ @@ -631,6 +645,9 @@ void sta_info_recalc_tim(struct sta_info *sta)   done:  	spin_lock_bh(&local->tim_lock); +	if (indicate_tim == __bss_tim_get(ps->tim, id)) +		goto out_unlock; +  	if (indicate_tim)  		__bss_tim_set(ps->tim, id);  	else @@ -642,6 +659,7 @@ void sta_info_recalc_tim(struct sta_info *sta)  		local->tim_in_locked_section = false;  	} +out_unlock:  	spin_unlock_bh(&local->tim_lock);  } @@ -765,8 +783,7 @@ int __must_check __sta_info_destroy(struct sta_info *sta)  {  	struct ieee80211_local *local;  	struct ieee80211_sub_if_data *sdata; -	int ret, i; -	bool have_key = false; +	int ret;  	might_sleep(); @@ -793,19 +810,8 @@ int __must_check __sta_info_destroy(struct sta_info *sta)  	list_del_rcu(&sta->list); -	mutex_lock(&local->key_mtx); -	for (i = 0; i < NUM_DEFAULT_KEYS; i++) { -		__ieee80211_key_free(key_mtx_dereference(local, sta->gtk[i])); -		have_key = true; -	} -	if (sta->ptk) { -		__ieee80211_key_free(key_mtx_dereference(local, sta->ptk)); -		have_key = true; -	} -	mutex_unlock(&local->key_mtx); - -	if (!have_key) -		synchronize_net(); +	/* this always calls synchronize_net() */ +	ieee80211_free_sta_keys(local, sta);  	sta->dead = true; @@ -1391,30 +1397,16 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw,  }  EXPORT_SYMBOL(ieee80211_sta_block_awake); -void ieee80211_sta_eosp_irqsafe(struct ieee80211_sta *pubsta) +void ieee80211_sta_eosp(struct ieee80211_sta *pubsta)  {  	struct sta_info *sta = container_of(pubsta, struct sta_info, sta);  	struct ieee80211_local *local = sta->local; -	struct sk_buff *skb; -	struct skb_eosp_msg_data *data;  	trace_api_eosp(local, pubsta); -	skb = alloc_skb(0, GFP_ATOMIC); -	if (!skb) { -		/* too bad ... but race is better than loss */ -		clear_sta_flag(sta, WLAN_STA_SP); -		return; -	} - -	data = (void *)skb->cb; -	memcpy(data->sta, pubsta->addr, ETH_ALEN); -	memcpy(data->iface, sta->sdata->vif.addr, ETH_ALEN); -	skb->pkt_type = IEEE80211_EOSP_MSG; -	skb_queue_tail(&local->skb_queue, skb); -	tasklet_schedule(&local->tasklet); +	clear_sta_flag(sta, WLAN_STA_SP);  } -EXPORT_SYMBOL(ieee80211_sta_eosp_irqsafe); +EXPORT_SYMBOL(ieee80211_sta_eosp);  void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta,  				u8 tid, bool buffered) diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 4947341a2a8..adc30045f99 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -281,7 +281,6 @@ struct sta_ampdu_mlme {   * @plink_state: peer link state   * @plink_timeout: timeout of peer link   * @plink_timer: peer link watch timer - * @plink_timer_was_running: used by suspend/resume to restore timers   * @t_offset: timing offset relative to this host   * @t_offset_setpoint: reference timing offset of this sta to be used when   * 	calculating clockdrift @@ -334,7 +333,8 @@ struct sta_info {  	unsigned long driver_buffered_tids;  	/* Updated from RX path only, no locking requirements */ -	unsigned long rx_packets, rx_bytes; +	unsigned long rx_packets; +	u64 rx_bytes;  	unsigned long wep_weak_iv_count;  	unsigned long last_rx;  	long last_connected; @@ -354,9 +354,9 @@ struct sta_info {  	unsigned int fail_avg;  	/* Updated from TX path only, no locking requirements */ -	unsigned long tx_packets; -	unsigned long tx_bytes; -	unsigned long tx_fragments; +	u32 tx_fragments; +	u64 tx_packets[IEEE80211_NUM_ACS]; +	u64 tx_bytes[IEEE80211_NUM_ACS];  	struct ieee80211_tx_rate last_tx_rate;  	int last_rx_rate_idx;  	u32 last_rx_rate_flag; @@ -379,7 +379,6 @@ struct sta_info {  	__le16 reason;  	u8 plink_retries;  	bool ignore_plink_timer; -	bool plink_timer_was_running;  	enum nl80211_plink_state plink_state;  	u32 plink_timeout;  	struct timer_list plink_timer; diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 3d7cd2a0582..c215fafd7a2 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -28,27 +28,27 @@  #define VIF_PR_FMT	" vif:%s(%d%s)"  #define VIF_PR_ARG	__get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : "" -#define CHANDEF_ENTRY	__field(u32, control_freq)				\ -			__field(u32, chan_width)				\ -			__field(u32, center_freq1)				\ +#define CHANDEF_ENTRY	__field(u32, control_freq)					\ +			__field(u32, chan_width)					\ +			__field(u32, center_freq1)					\  			__field(u32, center_freq2) -#define CHANDEF_ASSIGN(c)							\ -			__entry->control_freq = (c)->chan->center_freq;		\ -			__entry->chan_width = (c)->width;			\ -			__entry->center_freq1 = (c)->center_freq1;		\ +#define CHANDEF_ASSIGN(c)								\ +			__entry->control_freq = (c)->chan ? (c)->chan->center_freq : 0;	\ +			__entry->chan_width = (c)->width;				\ +			__entry->center_freq1 = (c)->center_freq1;			\  			__entry->center_freq2 = (c)->center_freq2;  #define CHANDEF_PR_FMT	" control:%d MHz width:%d center: %d/%d MHz" -#define CHANDEF_PR_ARG	__entry->control_freq, __entry->chan_width,		\ +#define CHANDEF_PR_ARG	__entry->control_freq, __entry->chan_width,			\  			__entry->center_freq1, __entry->center_freq2 -#define CHANCTX_ENTRY	CHANDEF_ENTRY						\ -			__field(u8, rx_chains_static)				\ +#define CHANCTX_ENTRY	CHANDEF_ENTRY							\ +			__field(u8, rx_chains_static)					\  			__field(u8, rx_chains_dynamic) -#define CHANCTX_ASSIGN	CHANDEF_ASSIGN(&ctx->conf.def)				\ -			__entry->rx_chains_static = ctx->conf.rx_chains_static;	\ +#define CHANCTX_ASSIGN	CHANDEF_ASSIGN(&ctx->conf.def)					\ +			__entry->rx_chains_static = ctx->conf.rx_chains_static;		\  			__entry->rx_chains_dynamic = ctx->conf.rx_chains_dynamic  #define CHANCTX_PR_FMT	CHANDEF_PR_FMT " chains:%d/%d" -#define CHANCTX_PR_ARG	CHANDEF_PR_ARG,						\ +#define CHANCTX_PR_ARG	CHANDEF_PR_ARG,							\  			__entry->rx_chains_static, __entry->rx_chains_dynamic @@ -286,8 +286,7 @@ TRACE_EVENT(drv_config,  		__field(u16, listen_interval)  		__field(u8, long_frame_max_tx_count)  		__field(u8, short_frame_max_tx_count) -		__field(int, center_freq) -		__field(int, channel_type) +		CHANDEF_ENTRY  		__field(int, smps)  	), @@ -303,15 +302,13 @@ TRACE_EVENT(drv_config,  			local->hw.conf.long_frame_max_tx_count;  		__entry->short_frame_max_tx_count =  			local->hw.conf.short_frame_max_tx_count; -		__entry->center_freq = local->hw.conf.channel ? -					local->hw.conf.channel->center_freq : 0; -		__entry->channel_type = local->hw.conf.channel_type; +		CHANDEF_ASSIGN(&local->hw.conf.chandef)  		__entry->smps = local->hw.conf.smps_mode;  	),  	TP_printk( -		LOCAL_PR_FMT " ch:%#x freq:%d", -		LOCAL_PR_ARG, __entry->changed, __entry->center_freq +		LOCAL_PR_FMT " ch:%#x" CHANDEF_PR_FMT, +		LOCAL_PR_ARG, __entry->changed, CHANDEF_PR_ARG  	)  ); @@ -359,8 +356,7 @@ TRACE_EVENT(drv_bss_info_changed,  		__dynamic_array(u8, ssid, info->ssid_len);  		__field(bool, hidden_ssid);  		__field(int, txpower) -		__field(u8, p2p_ctwindow) -		__field(bool, p2p_oppps) +		__field(u8, p2p_oppps_ctwindow)  	),  	TP_fast_assign( @@ -400,8 +396,7 @@ TRACE_EVENT(drv_bss_info_changed,  		memcpy(__get_dynamic_array(ssid), info->ssid, info->ssid_len);  		__entry->hidden_ssid = info->hidden_ssid;  		__entry->txpower = info->txpower; -		__entry->p2p_ctwindow = info->p2p_ctwindow; -		__entry->p2p_oppps = info->p2p_oppps; +		__entry->p2p_oppps_ctwindow = info->p2p_noa_attr.oppps_ctwindow;  	),  	TP_printk( @@ -431,6 +426,30 @@ TRACE_EVENT(drv_prepare_multicast,  	)  ); +TRACE_EVENT(drv_set_multicast_list, +	TP_PROTO(struct ieee80211_local *local, +		 struct ieee80211_sub_if_data *sdata, int mc_count), + +	TP_ARGS(local, sdata, mc_count), + +	TP_STRUCT__entry( +		LOCAL_ENTRY +		__field(bool, allmulti) +		__field(int, mc_count) +	), + +	TP_fast_assign( +		LOCAL_ASSIGN; +		__entry->allmulti = sdata->flags & IEEE80211_SDATA_ALLMULTI; +		__entry->mc_count = mc_count; +	), + +	TP_printk( +		LOCAL_PR_FMT " configure mc filter, count=%d, allmulti=%d", +		LOCAL_PR_ARG, __entry->mc_count, __entry->allmulti +	) +); +  TRACE_EVENT(drv_configure_filter,  	TP_PROTO(struct ieee80211_local *local,  		 unsigned int changed_flags, @@ -940,23 +959,26 @@ TRACE_EVENT(drv_get_survey,  );  TRACE_EVENT(drv_flush, -	TP_PROTO(struct ieee80211_local *local, bool drop), +	TP_PROTO(struct ieee80211_local *local, +		 u32 queues, bool drop), -	TP_ARGS(local, drop), +	TP_ARGS(local, queues, drop),  	TP_STRUCT__entry(  		LOCAL_ENTRY  		__field(bool, drop) +		__field(u32, queues)  	),  	TP_fast_assign(  		LOCAL_ASSIGN;  		__entry->drop = drop; +		__entry->queues = queues;  	),  	TP_printk( -		LOCAL_PR_FMT " drop:%d", -		LOCAL_PR_ARG, __entry->drop +		LOCAL_PR_FMT " queues:0x%x drop:%d", +		LOCAL_PR_ARG, __entry->queues, __entry->drop  	)  ); @@ -968,23 +990,23 @@ TRACE_EVENT(drv_channel_switch,  	TP_STRUCT__entry(  		LOCAL_ENTRY +		CHANDEF_ENTRY  		__field(u64, timestamp)  		__field(bool, block_tx) -		__field(u16, freq)  		__field(u8, count)  	),  	TP_fast_assign(  		LOCAL_ASSIGN; +		CHANDEF_ASSIGN(&ch_switch->chandef)  		__entry->timestamp = ch_switch->timestamp;  		__entry->block_tx = ch_switch->block_tx; -		__entry->freq = ch_switch->channel->center_freq;  		__entry->count = ch_switch->count;  	),  	TP_printk( -		LOCAL_PR_FMT " new freq:%u count:%d", -		LOCAL_PR_ARG, __entry->freq, __entry->count +		LOCAL_PR_FMT " new " CHANDEF_PR_FMT " count:%d", +		LOCAL_PR_ARG, CHANDEF_PR_ARG, __entry->count  	)  ); @@ -1042,15 +1064,17 @@ TRACE_EVENT(drv_remain_on_channel,  	TP_PROTO(struct ieee80211_local *local,  		 struct ieee80211_sub_if_data *sdata,  		 struct ieee80211_channel *chan, -		 unsigned int duration), +		 unsigned int duration, +		 enum ieee80211_roc_type type), -	TP_ARGS(local, sdata, chan, duration), +	TP_ARGS(local, sdata, chan, duration, type),  	TP_STRUCT__entry(  		LOCAL_ENTRY  		VIF_ENTRY  		__field(int, center_freq)  		__field(unsigned int, duration) +		__field(u32, type)  	),  	TP_fast_assign( @@ -1058,12 +1082,13 @@ TRACE_EVENT(drv_remain_on_channel,  		VIF_ASSIGN;  		__entry->center_freq = chan->center_freq;  		__entry->duration = duration; +		__entry->type = type;  	),  	TP_printk( -		LOCAL_PR_FMT  VIF_PR_FMT " freq:%dMHz duration:%dms", +		LOCAL_PR_FMT  VIF_PR_FMT " freq:%dMHz duration:%dms type=%d",  		LOCAL_PR_ARG, VIF_PR_ARG, -		__entry->center_freq, __entry->duration +		__entry->center_freq, __entry->duration, __entry->type  	)  ); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 8914d2d2881..9972e07a2f9 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -48,15 +48,15 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx,  	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);  	/* assume HW handles this */ -	if (info->control.rates[0].flags & IEEE80211_TX_RC_MCS) +	if (tx->rate.flags & IEEE80211_TX_RC_MCS)  		return 0;  	/* uh huh? */ -	if (WARN_ON_ONCE(info->control.rates[0].idx < 0)) +	if (WARN_ON_ONCE(tx->rate.idx < 0))  		return 0;  	sband = local->hw.wiphy->bands[info->band]; -	txrate = &sband->bitrates[info->control.rates[0].idx]; +	txrate = &sband->bitrates[tx->rate.idx];  	erp = txrate->flags & IEEE80211_RATE_ERP_G; @@ -233,6 +233,7 @@ ieee80211_tx_h_dynamic_ps(struct ieee80211_tx_data *tx)  	if (local->hw.conf.flags & IEEE80211_CONF_PS) {  		ieee80211_stop_queues_by_reason(&local->hw, +						IEEE80211_MAX_QUEUE_MAP,  						IEEE80211_QUEUE_STOP_REASON_PS);  		ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED;  		ieee80211_queue_work(&local->hw, @@ -616,11 +617,9 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)  	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);  	struct ieee80211_hdr *hdr = (void *)tx->skb->data;  	struct ieee80211_supported_band *sband; -	struct ieee80211_rate *rate; -	int i;  	u32 len; -	bool inval = false, rts = false, short_preamble = false;  	struct ieee80211_tx_rate_control txrc; +	struct ieee80211_sta_rates *ratetbl = NULL;  	bool assoc = false;  	memset(&txrc, 0, sizeof(txrc)); @@ -641,18 +640,23 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)  		txrc.max_rate_idx = -1;  	else  		txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1; -	memcpy(txrc.rate_idx_mcs_mask, -	       tx->sdata->rc_rateidx_mcs_mask[info->band], -	       sizeof(txrc.rate_idx_mcs_mask)); + +	if (tx->sdata->rc_has_mcs_mask[info->band]) +		txrc.rate_idx_mcs_mask = +			tx->sdata->rc_rateidx_mcs_mask[info->band]; +  	txrc.bss = (tx->sdata->vif.type == NL80211_IFTYPE_AP ||  		    tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT ||  		    tx->sdata->vif.type == NL80211_IFTYPE_ADHOC);  	/* set up RTS protection if desired */  	if (len > tx->local->hw.wiphy->rts_threshold) { -		txrc.rts = rts = true; +		txrc.rts = true;  	} +	info->control.use_rts = txrc.rts; +	info->control.use_cts_prot = tx->sdata->vif.bss_conf.use_cts_prot; +  	/*  	 * Use short preamble if the BSS can handle it, but not for  	 * management frames unless we know the receiver can handle @@ -662,7 +666,9 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)  	if (tx->sdata->vif.bss_conf.use_short_preamble &&  	    (ieee80211_is_data(hdr->frame_control) ||  	     (tx->sta && test_sta_flag(tx->sta, WLAN_STA_SHORT_PREAMBLE)))) -		txrc.short_preamble = short_preamble = true; +		txrc.short_preamble = true; + +	info->control.short_preamble = txrc.short_preamble;  	if (tx->sta)  		assoc = test_sta_flag(tx->sta, WLAN_STA_ASSOC); @@ -686,16 +692,38 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)  	 */  	rate_control_get_rate(tx->sdata, tx->sta, &txrc); -	if (unlikely(info->control.rates[0].idx < 0)) -		return TX_DROP; +	if (tx->sta && !info->control.skip_table) +		ratetbl = rcu_dereference(tx->sta->sta.rates); + +	if (unlikely(info->control.rates[0].idx < 0)) { +		if (ratetbl) { +			struct ieee80211_tx_rate rate = { +				.idx = ratetbl->rate[0].idx, +				.flags = ratetbl->rate[0].flags, +				.count = ratetbl->rate[0].count +			}; + +			if (ratetbl->rate[0].idx < 0) +				return TX_DROP; + +			tx->rate = rate; +		} else { +			return TX_DROP; +		} +	} else { +		tx->rate = info->control.rates[0]; +	}  	if (txrc.reported_rate.idx < 0) { -		txrc.reported_rate = info->control.rates[0]; +		txrc.reported_rate = tx->rate;  		if (tx->sta && ieee80211_is_data(hdr->frame_control))  			tx->sta->last_tx_rate = txrc.reported_rate;  	} else if (tx->sta)  		tx->sta->last_tx_rate = txrc.reported_rate; +	if (ratetbl) +		return TX_CONTINUE; +  	if (unlikely(!info->control.rates[0].count))  		info->control.rates[0].count = 1; @@ -703,91 +731,6 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)  			 (info->flags & IEEE80211_TX_CTL_NO_ACK)))  		info->control.rates[0].count = 1; -	if (is_multicast_ether_addr(hdr->addr1)) { -		/* -		 * XXX: verify the rate is in the basic rateset -		 */ -		return TX_CONTINUE; -	} - -	/* -	 * set up the RTS/CTS rate as the fastest basic rate -	 * that is not faster than the data rate -	 * -	 * XXX: Should this check all retry rates? -	 */ -	if (!(info->control.rates[0].flags & IEEE80211_TX_RC_MCS)) { -		s8 baserate = 0; - -		rate = &sband->bitrates[info->control.rates[0].idx]; - -		for (i = 0; i < sband->n_bitrates; i++) { -			/* must be a basic rate */ -			if (!(tx->sdata->vif.bss_conf.basic_rates & BIT(i))) -				continue; -			/* must not be faster than the data rate */ -			if (sband->bitrates[i].bitrate > rate->bitrate) -				continue; -			/* maximum */ -			if (sband->bitrates[baserate].bitrate < -			     sband->bitrates[i].bitrate) -				baserate = i; -		} - -		info->control.rts_cts_rate_idx = baserate; -	} - -	for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { -		/* -		 * make sure there's no valid rate following -		 * an invalid one, just in case drivers don't -		 * take the API seriously to stop at -1. -		 */ -		if (inval) { -			info->control.rates[i].idx = -1; -			continue; -		} -		if (info->control.rates[i].idx < 0) { -			inval = true; -			continue; -		} - -		/* -		 * For now assume MCS is already set up correctly, this -		 * needs to be fixed. -		 */ -		if (info->control.rates[i].flags & IEEE80211_TX_RC_MCS) { -			WARN_ON(info->control.rates[i].idx > 76); -			continue; -		} - -		/* set up RTS protection if desired */ -		if (rts) -			info->control.rates[i].flags |= -				IEEE80211_TX_RC_USE_RTS_CTS; - -		/* RC is busted */ -		if (WARN_ON_ONCE(info->control.rates[i].idx >= -				 sband->n_bitrates)) { -			info->control.rates[i].idx = -1; -			continue; -		} - -		rate = &sband->bitrates[info->control.rates[i].idx]; - -		/* set up short preamble */ -		if (short_preamble && -		    rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) -			info->control.rates[i].flags |= -				IEEE80211_TX_RC_USE_SHORT_PREAMBLE; - -		/* set up G protection */ -		if (!rts && tx->sdata->vif.bss_conf.use_cts_prot && -		    rate->flags & IEEE80211_RATE_ERP_G) -			info->control.rates[i].flags |= -				IEEE80211_TX_RC_USE_CTS_PROTECT; -	} -  	return TX_CONTINUE;  } @@ -991,15 +934,18 @@ static ieee80211_tx_result debug_noinline  ieee80211_tx_h_stats(struct ieee80211_tx_data *tx)  {  	struct sk_buff *skb; +	int ac = -1;  	if (!tx->sta)  		return TX_CONTINUE; -	tx->sta->tx_packets++;  	skb_queue_walk(&tx->skbs, skb) { +		ac = skb_get_queue_mapping(skb);  		tx->sta->tx_fragments++; -		tx->sta->tx_bytes += skb->len; +		tx->sta->tx_bytes[ac] += skb->len;  	} +	if (ac >= 0) +		tx->sta->tx_packets[ac]++;  	return TX_CONTINUE;  } @@ -1705,7 +1651,7 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,  	if (chanctx_conf)  		chan = chanctx_conf->def.chan;  	else if (!local->use_chanctx) -		chan = local->_oper_channel; +		chan = local->_oper_chandef.chan;  	else  		goto fail_rcu; @@ -1839,7 +1785,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,  		 * This is the exception! WDS style interfaces are prohibited  		 * when channel contexts are in used so this must be valid  		 */ -		band = local->hw.conf.channel->band; +		band = local->hw.conf.chandef.chan->band;  		break;  #ifdef CONFIG_MAC80211_MESH  	case NL80211_IFTYPE_MESH_POINT: @@ -2085,7 +2031,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,  		encaps_data = bridge_tunnel_header;  		encaps_len = sizeof(bridge_tunnel_header);  		skip_header_bytes -= 2; -	} else if (ethertype >= 0x600) { +	} else if (ethertype >= ETH_P_802_3_MIN) {  		encaps_data = rfc1042_header;  		encaps_len = sizeof(rfc1042_header);  		skip_header_bytes -= 2; @@ -2438,14 +2384,17 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,  	} else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {  		struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;  		struct ieee80211_hdr *hdr; -		struct sk_buff *presp = rcu_dereference(ifibss->presp); +		struct beacon_data *presp = rcu_dereference(ifibss->presp);  		if (!presp)  			goto out; -		skb = skb_copy(presp, GFP_ATOMIC); +		skb = dev_alloc_skb(local->tx_headroom + presp->head_len);  		if (!skb)  			goto out; +		skb_reserve(skb, local->tx_headroom); +		memcpy(skb_put(skb, presp->head_len), presp->head, +		       presp->head_len);  		hdr = (struct ieee80211_hdr *) skb->data;  		hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | @@ -2495,8 +2444,6 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,  		txrc.max_rate_idx = -1;  	else  		txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1; -	memcpy(txrc.rate_idx_mcs_mask, sdata->rc_rateidx_mcs_mask[band], -	       sizeof(txrc.rate_idx_mcs_mask));  	txrc.bss = true;  	rate_control_get_rate(sdata, NULL, &txrc); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 0f38f43ac62..3f87fa468b1 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -453,7 +453,8 @@ void ieee80211_add_pending_skbs_fn(struct ieee80211_local *local,  }  void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, -				    enum queue_stop_reason reason) +				     unsigned long queues, +				     enum queue_stop_reason reason)  {  	struct ieee80211_local *local = hw_to_local(hw);  	unsigned long flags; @@ -461,7 +462,7 @@ void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,  	spin_lock_irqsave(&local->queue_stop_reason_lock, flags); -	for (i = 0; i < hw->queues; i++) +	for_each_set_bit(i, &queues, hw->queues)  		__ieee80211_stop_queue(hw, i, reason);  	spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); @@ -469,7 +470,7 @@ void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,  void ieee80211_stop_queues(struct ieee80211_hw *hw)  { -	ieee80211_stop_queues_by_reason(hw, +	ieee80211_stop_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP,  					IEEE80211_QUEUE_STOP_REASON_DRIVER);  }  EXPORT_SYMBOL(ieee80211_stop_queues); @@ -484,13 +485,15 @@ int ieee80211_queue_stopped(struct ieee80211_hw *hw, int queue)  		return true;  	spin_lock_irqsave(&local->queue_stop_reason_lock, flags); -	ret = !!local->queue_stop_reasons[queue]; +	ret = test_bit(IEEE80211_QUEUE_STOP_REASON_DRIVER, +		       &local->queue_stop_reasons[queue]);  	spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);  	return ret;  }  EXPORT_SYMBOL(ieee80211_queue_stopped);  void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, +				     unsigned long queues,  				     enum queue_stop_reason reason)  {  	struct ieee80211_local *local = hw_to_local(hw); @@ -499,7 +502,7 @@ void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,  	spin_lock_irqsave(&local->queue_stop_reason_lock, flags); -	for (i = 0; i < hw->queues; i++) +	for_each_set_bit(i, &queues, hw->queues)  		__ieee80211_wake_queue(hw, i, reason);  	spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); @@ -507,10 +510,42 @@ void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,  void ieee80211_wake_queues(struct ieee80211_hw *hw)  { -	ieee80211_wake_queues_by_reason(hw, IEEE80211_QUEUE_STOP_REASON_DRIVER); +	ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP, +					IEEE80211_QUEUE_STOP_REASON_DRIVER);  }  EXPORT_SYMBOL(ieee80211_wake_queues); +void ieee80211_flush_queues(struct ieee80211_local *local, +			    struct ieee80211_sub_if_data *sdata) +{ +	u32 queues; + +	if (!local->ops->flush) +		return; + +	if (sdata && local->hw.flags & IEEE80211_HW_QUEUE_CONTROL) { +		int ac; + +		queues = 0; + +		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) +			queues |= BIT(sdata->vif.hw_queue[ac]); +		if (sdata->vif.cab_queue != IEEE80211_INVAL_HW_QUEUE) +			queues |= BIT(sdata->vif.cab_queue); +	} else { +		/* all queues */ +		queues = BIT(local->hw.queues) - 1; +	} + +	ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, +					IEEE80211_QUEUE_STOP_REASON_FLUSH); + +	drv_flush(local, queues, false); + +	ieee80211_wake_queues_by_reason(&local->hw, IEEE80211_MAX_QUEUE_MAP, +					IEEE80211_QUEUE_STOP_REASON_FLUSH); +} +  void ieee80211_iterate_active_interfaces(  	struct ieee80211_hw *hw, u32 iter_flags,  	void (*iterator)(void *data, u8 *mac, @@ -626,7 +661,7 @@ void ieee80211_queue_delayed_work(struct ieee80211_hw *hw,  }  EXPORT_SYMBOL(ieee80211_queue_delayed_work); -u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, +u32 ieee802_11_parse_elems_crc(u8 *start, size_t len, bool action,  			       struct ieee802_11_elems *elems,  			       u64 filter, u32 crc)  { @@ -634,6 +669,7 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,  	u8 *pos = start;  	bool calc_crc = filter != 0;  	DECLARE_BITMAP(seen_elems, 256); +	const u8 *ie;  	bitmap_zero(seen_elems, 256);  	memset(elems, 0, sizeof(*elems)); @@ -681,6 +717,12 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,  		case WLAN_EID_COUNTRY:  		case WLAN_EID_PWR_CONSTRAINT:  		case WLAN_EID_TIMEOUT_INTERVAL: +		case WLAN_EID_SECONDARY_CHANNEL_OFFSET: +		case WLAN_EID_WIDE_BW_CHANNEL_SWITCH: +		/* +		 * not listing WLAN_EID_CHANNEL_SWITCH_WRAPPER -- it seems possible +		 * that if the content gets bigger it might be needed more than once +		 */  			if (test_bit(id, seen_elems)) {  				elems->parse_error = true;  				left -= elen; @@ -704,17 +746,11 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,  			elems->supp_rates = pos;  			elems->supp_rates_len = elen;  			break; -		case WLAN_EID_FH_PARAMS: -			elems->fh_params = pos; -			elems->fh_params_len = elen; -			break;  		case WLAN_EID_DS_PARAMS: -			elems->ds_params = pos; -			elems->ds_params_len = elen; -			break; -		case WLAN_EID_CF_PARAMS: -			elems->cf_params = pos; -			elems->cf_params_len = elen; +			if (elen >= 1) +				elems->ds_params = pos; +			else +				elem_parse_failed = true;  			break;  		case WLAN_EID_TIM:  			if (elen >= sizeof(struct ieee80211_tim_ie)) { @@ -723,10 +759,6 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,  			} else  				elem_parse_failed = true;  			break; -		case WLAN_EID_IBSS_PARAMS: -			elems->ibss_params = pos; -			elems->ibss_params_len = elen; -			break;  		case WLAN_EID_CHALLENGE:  			elems->challenge = pos;  			elems->challenge_len = elen; @@ -756,8 +788,10 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,  			elems->rsn_len = elen;  			break;  		case WLAN_EID_ERP_INFO: -			elems->erp_info = pos; -			elems->erp_info_len = elen; +			if (elen >= 1) +				elems->erp_info = pos; +			else +				elem_parse_failed = true;  			break;  		case WLAN_EID_EXT_SUPP_RATES:  			elems->ext_supp_rates = pos; @@ -836,12 +870,47 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,  			}  			elems->ch_switch_ie = (void *)pos;  			break; -		case WLAN_EID_QUIET: -			if (!elems->quiet_elem) { -				elems->quiet_elem = pos; -				elems->quiet_elem_len = elen; +		case WLAN_EID_EXT_CHANSWITCH_ANN: +			if (elen != sizeof(struct ieee80211_ext_chansw_ie)) { +				elem_parse_failed = true; +				break; +			} +			elems->ext_chansw_ie = (void *)pos; +			break; +		case WLAN_EID_SECONDARY_CHANNEL_OFFSET: +			if (elen != sizeof(struct ieee80211_sec_chan_offs_ie)) { +				elem_parse_failed = true; +				break; +			} +			elems->sec_chan_offs = (void *)pos; +			break; +		case WLAN_EID_WIDE_BW_CHANNEL_SWITCH: +			if (!action || +			    elen != sizeof(*elems->wide_bw_chansw_ie)) { +				elem_parse_failed = true; +				break; +			} +			elems->wide_bw_chansw_ie = (void *)pos; +			break; +		case WLAN_EID_CHANNEL_SWITCH_WRAPPER: +			if (action) { +				elem_parse_failed = true; +				break; +			} +			/* +			 * This is a bit tricky, but as we only care about +			 * the wide bandwidth channel switch element, so +			 * just parse it out manually. +			 */ +			ie = cfg80211_find_ie(WLAN_EID_WIDE_BW_CHANNEL_SWITCH, +					      pos, elen); +			if (ie) { +				if (ie[1] == sizeof(*elems->wide_bw_chansw_ie)) +					elems->wide_bw_chansw_ie = +						(void *)(ie + 2); +				else +					elem_parse_failed = true;  			} -			elems->num_of_quiet_elem++;  			break;  		case WLAN_EID_COUNTRY:  			elems->country_elem = pos; @@ -855,8 +924,10 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,  			elems->pwr_constr_elem = pos;  			break;  		case WLAN_EID_TIMEOUT_INTERVAL: -			elems->timeout_int = pos; -			elems->timeout_int_len = elen; +			if (elen >= sizeof(struct ieee80211_timeout_interval_ie)) +				elems->timeout_int = (void *)pos; +			else +				elem_parse_failed = true;  			break;  		default:  			break; @@ -877,12 +948,6 @@ u32 ieee802_11_parse_elems_crc(u8 *start, size_t len,  	return crc;  } -void ieee802_11_parse_elems(u8 *start, size_t len, -			    struct ieee802_11_elems *elems) -{ -	ieee802_11_parse_elems_crc(start, len, elems, 0, 0); -} -  void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata,  			       bool bss_notify)  { @@ -1357,6 +1422,25 @@ void ieee80211_stop_device(struct ieee80211_local *local)  	drv_stop(local);  } +static void ieee80211_assign_chanctx(struct ieee80211_local *local, +				     struct ieee80211_sub_if_data *sdata) +{ +	struct ieee80211_chanctx_conf *conf; +	struct ieee80211_chanctx *ctx; + +	if (!local->use_chanctx) +		return; + +	mutex_lock(&local->chanctx_mtx); +	conf = rcu_dereference_protected(sdata->vif.chanctx_conf, +					 lockdep_is_held(&local->chanctx_mtx)); +	if (conf) { +		ctx = container_of(conf, struct ieee80211_chanctx, conf); +		drv_assign_vif_chanctx(local, sdata, ctx); +	} +	mutex_unlock(&local->chanctx_mtx); +} +  int ieee80211_reconfig(struct ieee80211_local *local)  {  	struct ieee80211_hw *hw = &local->hw; @@ -1421,6 +1505,8 @@ int ieee80211_reconfig(struct ieee80211_local *local)  	/* add interfaces */  	sdata = rtnl_dereference(local->monitor_sdata);  	if (sdata) { +		/* in HW restart it exists already */ +		WARN_ON(local->resuming);  		res = drv_add_interface(local, sdata);  		if (WARN_ON(res)) {  			rcu_assign_pointer(local->monitor_sdata, NULL); @@ -1445,36 +1531,14 @@ int ieee80211_reconfig(struct ieee80211_local *local)  	}  	list_for_each_entry(sdata, &local->interfaces, list) { -		struct ieee80211_chanctx_conf *ctx_conf; -  		if (!ieee80211_sdata_running(sdata))  			continue; - -		mutex_lock(&local->chanctx_mtx); -		ctx_conf = rcu_dereference_protected(sdata->vif.chanctx_conf, -				lockdep_is_held(&local->chanctx_mtx)); -		if (ctx_conf) { -			ctx = container_of(ctx_conf, struct ieee80211_chanctx, -					   conf); -			drv_assign_vif_chanctx(local, sdata, ctx); -		} -		mutex_unlock(&local->chanctx_mtx); +		ieee80211_assign_chanctx(local, sdata);  	}  	sdata = rtnl_dereference(local->monitor_sdata); -	if (sdata && local->use_chanctx && ieee80211_sdata_running(sdata)) { -		struct ieee80211_chanctx_conf *ctx_conf; - -		mutex_lock(&local->chanctx_mtx); -		ctx_conf = rcu_dereference_protected(sdata->vif.chanctx_conf, -				lockdep_is_held(&local->chanctx_mtx)); -		if (ctx_conf) { -			ctx = container_of(ctx_conf, struct ieee80211_chanctx, -					   conf); -			drv_assign_vif_chanctx(local, sdata, ctx); -		} -		mutex_unlock(&local->chanctx_mtx); -	} +	if (sdata && ieee80211_sdata_running(sdata)) +		ieee80211_assign_chanctx(local, sdata);  	/* add STAs back */  	mutex_lock(&local->sta_mtx); @@ -1534,11 +1598,6 @@ int ieee80211_reconfig(struct ieee80211_local *local)  			  BSS_CHANGED_IDLE |  			  BSS_CHANGED_TXPOWER; -#ifdef CONFIG_PM -		if (local->resuming && !reconfig_due_to_wowlan) -			sdata->vif.bss_conf = sdata->suspend_bss_conf; -#endif -  		switch (sdata->vif.type) {  		case NL80211_IFTYPE_STATION:  			changed |= BSS_CHANGED_ASSOC | @@ -1637,6 +1696,9 @@ int ieee80211_reconfig(struct ieee80211_local *local)  	local->in_reconfig = false;  	barrier(); +	if (local->monitors == local->open_count && local->monitors > 0) +		ieee80211_add_virtual_monitor(local); +  	/*  	 * Clear the WLAN_STA_BLOCK_BA flag so new aggregation  	 * sessions can be established after a resume. @@ -1659,8 +1721,8 @@ int ieee80211_reconfig(struct ieee80211_local *local)  		mutex_unlock(&local->sta_mtx);  	} -	ieee80211_wake_queues_by_reason(hw, -			IEEE80211_QUEUE_STOP_REASON_SUSPEND); +	ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP, +					IEEE80211_QUEUE_STOP_REASON_SUSPEND);  	/*  	 * If this is for hw restart things are still running. @@ -1678,28 +1740,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)  	mb();  	local->resuming = false; -	list_for_each_entry(sdata, &local->interfaces, list) { -		switch(sdata->vif.type) { -		case NL80211_IFTYPE_STATION: -			ieee80211_sta_restart(sdata); -			break; -		case NL80211_IFTYPE_ADHOC: -			ieee80211_ibss_restart(sdata); -			break; -		case NL80211_IFTYPE_MESH_POINT: -			ieee80211_mesh_restart(sdata); -			break; -		default: -			break; -		} -	} -  	mod_timer(&local->sta_cleanup, jiffies + 1); - -	mutex_lock(&local->sta_mtx); -	list_for_each_entry(sta, &local->sta_list, list) -		mesh_plink_restart(sta); -	mutex_unlock(&local->sta_mtx);  #else  	WARN_ON(1);  #endif @@ -2051,7 +2092,7 @@ int ieee80211_ave_rssi(struct ieee80211_vif *vif)  		/* non-managed type inferfaces */  		return 0;  	} -	return ifmgd->ave_beacon_signal; +	return ifmgd->ave_beacon_signal / 16;  }  EXPORT_SYMBOL_GPL(ieee80211_ave_rssi); @@ -2166,8 +2207,7 @@ void ieee80211_dfs_radar_detected_work(struct work_struct *work)  		/* currently not handled */  		WARN_ON(1);  	else { -		cfg80211_chandef_create(&chandef, local->hw.conf.channel, -					local->hw.conf.channel_type); +		chandef = local->hw.conf.chandef;  		cfg80211_radar_event(local->hw.wiphy, &chandef, GFP_KERNEL);  	}  } diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index a2c2258bc84..171344d4eb7 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -13,6 +13,104 @@  #include "rate.h" +static void __check_vhtcap_disable(struct ieee80211_sub_if_data *sdata, +				   struct ieee80211_sta_vht_cap *vht_cap, +				   u32 flag) +{ +	__le32 le_flag = cpu_to_le32(flag); + +	if (sdata->u.mgd.vht_capa_mask.vht_cap_info & le_flag && +	    !(sdata->u.mgd.vht_capa.vht_cap_info & le_flag)) +		vht_cap->cap &= ~flag; +} + +void ieee80211_apply_vhtcap_overrides(struct ieee80211_sub_if_data *sdata, +				      struct ieee80211_sta_vht_cap *vht_cap) +{ +	int i; +	u16 rxmcs_mask, rxmcs_cap, rxmcs_n, txmcs_mask, txmcs_cap, txmcs_n; + +	if (!vht_cap->vht_supported) +		return; + +	if (sdata->vif.type != NL80211_IFTYPE_STATION) +		return; + +	__check_vhtcap_disable(sdata, vht_cap, +			       IEEE80211_VHT_CAP_RXLDPC); +	__check_vhtcap_disable(sdata, vht_cap, +			       IEEE80211_VHT_CAP_SHORT_GI_80); +	__check_vhtcap_disable(sdata, vht_cap, +			       IEEE80211_VHT_CAP_SHORT_GI_160); +	__check_vhtcap_disable(sdata, vht_cap, +			       IEEE80211_VHT_CAP_TXSTBC); +	__check_vhtcap_disable(sdata, vht_cap, +			       IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE); +	__check_vhtcap_disable(sdata, vht_cap, +			       IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE); +	__check_vhtcap_disable(sdata, vht_cap, +			       IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN); +	__check_vhtcap_disable(sdata, vht_cap, +			       IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN); + +	/* Allow user to decrease AMPDU length exponent */ +	if (sdata->u.mgd.vht_capa_mask.vht_cap_info & +	    cpu_to_le32(IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK)) { +		u32 cap, n; + +		n = le32_to_cpu(sdata->u.mgd.vht_capa.vht_cap_info) & +			IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK; +		n >>= IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT; +		cap = vht_cap->cap & IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK; +		cap >>= IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT; + +		if (n < cap) { +			vht_cap->cap &= +				~IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK; +			vht_cap->cap |= +				n << IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT; +		} +	} + +	/* Allow the user to decrease MCSes */ +	rxmcs_mask = +		le16_to_cpu(sdata->u.mgd.vht_capa_mask.supp_mcs.rx_mcs_map); +	rxmcs_n = le16_to_cpu(sdata->u.mgd.vht_capa.supp_mcs.rx_mcs_map); +	rxmcs_n &= rxmcs_mask; +	rxmcs_cap = le16_to_cpu(vht_cap->vht_mcs.rx_mcs_map); + +	txmcs_mask = +		le16_to_cpu(sdata->u.mgd.vht_capa_mask.supp_mcs.tx_mcs_map); +	txmcs_n = le16_to_cpu(sdata->u.mgd.vht_capa.supp_mcs.tx_mcs_map); +	txmcs_n &= txmcs_mask; +	txmcs_cap = le16_to_cpu(vht_cap->vht_mcs.tx_mcs_map); +	for (i = 0; i < 8; i++) { +		u8 m, n, c; + +		m = (rxmcs_mask >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED; +		n = (rxmcs_n >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED; +		c = (rxmcs_cap >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED; + +		if (m && ((c != IEEE80211_VHT_MCS_NOT_SUPPORTED && n < c) || +			  n == IEEE80211_VHT_MCS_NOT_SUPPORTED)) { +			rxmcs_cap &= ~(3 << 2*i); +			rxmcs_cap |= (rxmcs_n & (3 << 2*i)); +		} + +		m = (txmcs_mask >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED; +		n = (txmcs_n >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED; +		c = (txmcs_cap >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED; + +		if (m && ((c != IEEE80211_VHT_MCS_NOT_SUPPORTED && n < c) || +			  n == IEEE80211_VHT_MCS_NOT_SUPPORTED)) { +			txmcs_cap &= ~(3 << 2*i); +			txmcs_cap |= (txmcs_n & (3 << 2*i)); +		} +	} +	vht_cap->vht_mcs.rx_mcs_map = cpu_to_le16(rxmcs_cap); +	vht_cap->vht_mcs.tx_mcs_map = cpu_to_le16(txmcs_cap); +} +  void  ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,  				    struct ieee80211_supported_band *sband, @@ -20,6 +118,8 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,  				    struct sta_info *sta)  {  	struct ieee80211_sta_vht_cap *vht_cap = &sta->sta.vht_cap; +	struct ieee80211_sta_vht_cap own_cap; +	u32 cap_info, i;  	memset(vht_cap, 0, sizeof(*vht_cap)); @@ -35,12 +135,122 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata,  	vht_cap->vht_supported = true; -	vht_cap->cap = le32_to_cpu(vht_cap_ie->vht_cap_info); +	own_cap = sband->vht_cap; +	/* +	 * If user has specified capability overrides, take care +	 * of that if the station we're setting up is the AP that +	 * we advertised a restricted capability set to. Override +	 * our own capabilities and then use those below. +	 */ +	if (sdata->vif.type == NL80211_IFTYPE_STATION && +	    !test_sta_flag(sta, WLAN_STA_TDLS_PEER)) +		ieee80211_apply_vhtcap_overrides(sdata, &own_cap); + +	/* take some capabilities as-is */ +	cap_info = le32_to_cpu(vht_cap_ie->vht_cap_info); +	vht_cap->cap = cap_info; +	vht_cap->cap &= IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895 | +			IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 | +			IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 | +			IEEE80211_VHT_CAP_RXLDPC | +			IEEE80211_VHT_CAP_VHT_TXOP_PS | +			IEEE80211_VHT_CAP_HTC_VHT | +			IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK | +			IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_UNSOL_MFB | +			IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_MRQ_MFB | +			IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN | +			IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN; + +	/* and some based on our own capabilities */ +	switch (own_cap.cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) { +	case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ: +		vht_cap->cap |= cap_info & +				IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ; +		break; +	case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ: +		vht_cap->cap |= cap_info & +				IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK; +		break; +	default: +		/* nothing */ +		break; +	} + +	/* symmetric capabilities */ +	vht_cap->cap |= cap_info & own_cap.cap & +			(IEEE80211_VHT_CAP_SHORT_GI_80 | +			 IEEE80211_VHT_CAP_SHORT_GI_160); + +	/* remaining ones */ +	if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE) { +		vht_cap->cap |= cap_info & +				(IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE | +				 IEEE80211_VHT_CAP_BEAMFORMER_ANTENNAS_MAX | +				 IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MAX); +	} + +	if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE) +		vht_cap->cap |= cap_info & +				IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE; + +	if (own_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE) +		vht_cap->cap |= cap_info & +				IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE; + +	if (own_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE) +		vht_cap->cap |= cap_info & +				IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE; + +	if (own_cap.cap & IEEE80211_VHT_CAP_TXSTBC) +		vht_cap->cap |= cap_info & IEEE80211_VHT_CAP_RXSTBC_MASK; + +	if (own_cap.cap & IEEE80211_VHT_CAP_RXSTBC_MASK) +		vht_cap->cap |= cap_info & IEEE80211_VHT_CAP_TXSTBC;  	/* Copy peer MCS info, the driver might need them. */  	memcpy(&vht_cap->vht_mcs, &vht_cap_ie->supp_mcs,  	       sizeof(struct ieee80211_vht_mcs_info)); +	/* but also restrict MCSes */ +	for (i = 0; i < 8; i++) { +		u16 own_rx, own_tx, peer_rx, peer_tx; + +		own_rx = le16_to_cpu(own_cap.vht_mcs.rx_mcs_map); +		own_rx = (own_rx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED; + +		own_tx = le16_to_cpu(own_cap.vht_mcs.tx_mcs_map); +		own_tx = (own_tx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED; + +		peer_rx = le16_to_cpu(vht_cap->vht_mcs.rx_mcs_map); +		peer_rx = (peer_rx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED; + +		peer_tx = le16_to_cpu(vht_cap->vht_mcs.tx_mcs_map); +		peer_tx = (peer_tx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED; + +		if (peer_tx != IEEE80211_VHT_MCS_NOT_SUPPORTED) { +			if (own_rx == IEEE80211_VHT_MCS_NOT_SUPPORTED) +				peer_tx = IEEE80211_VHT_MCS_NOT_SUPPORTED; +			else if (own_rx < peer_tx) +				peer_tx = own_rx; +		} + +		if (peer_rx != IEEE80211_VHT_MCS_NOT_SUPPORTED) { +			if (own_tx == IEEE80211_VHT_MCS_NOT_SUPPORTED) +				peer_rx = IEEE80211_VHT_MCS_NOT_SUPPORTED; +			else if (own_tx < peer_rx) +				peer_rx = own_tx; +		} + +		vht_cap->vht_mcs.rx_mcs_map &= +			~cpu_to_le16(IEEE80211_VHT_MCS_NOT_SUPPORTED << i * 2); +		vht_cap->vht_mcs.rx_mcs_map |= cpu_to_le16(peer_rx << i * 2); + +		vht_cap->vht_mcs.tx_mcs_map &= +			~cpu_to_le16(IEEE80211_VHT_MCS_NOT_SUPPORTED << i * 2); +		vht_cap->vht_mcs.tx_mcs_map |= cpu_to_le16(peer_tx << i * 2); +	} + +	/* finally set up the bandwidth */  	switch (vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) {  	case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ:  	case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ: diff --git a/net/mac802154/mac802154.h b/net/mac802154/mac802154.h index a4dcaf1dd4b..d48422e2711 100644 --- a/net/mac802154/mac802154.h +++ b/net/mac802154/mac802154.h @@ -88,9 +88,7 @@ struct mac802154_sub_if_data {  #define mac802154_to_priv(_hw)	container_of(_hw, struct mac802154_priv, hw) -#define MAC802154_MAX_XMIT_ATTEMPTS	3 - -#define MAC802154_CHAN_NONE		(~(u8)0) /* No channel is assigned */ +#define MAC802154_CHAN_NONE		0xff /* No channel is assigned */  extern struct ieee802154_reduced_mlme_ops mac802154_mlme_reduced;  extern struct ieee802154_mlme_ops mac802154_mlme_wpan; @@ -114,5 +112,6 @@ void mac802154_dev_set_ieee_addr(struct net_device *dev);  u16 mac802154_dev_get_pan_id(const struct net_device *dev);  void mac802154_dev_set_pan_id(struct net_device *dev, u16 val);  void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan); +u8 mac802154_dev_get_dsn(const struct net_device *dev);  #endif /* MAC802154_H */ diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c index d8d27700608..a99910d4d52 100644 --- a/net/mac802154/mac_cmd.c +++ b/net/mac802154/mac_cmd.c @@ -73,4 +73,5 @@ struct ieee802154_mlme_ops mac802154_mlme_wpan = {  	.start_req = mac802154_mlme_start_req,  	.get_pan_id = mac802154_dev_get_pan_id,  	.get_short_addr = mac802154_dev_get_short_addr, +	.get_dsn = mac802154_dev_get_dsn,  }; diff --git a/net/mac802154/mib.c b/net/mac802154/mib.c index f47781ab0cc..8ded97cf1c3 100644 --- a/net/mac802154/mib.c +++ b/net/mac802154/mib.c @@ -159,6 +159,15 @@ void mac802154_dev_set_pan_id(struct net_device *dev, u16 val)  	}  } +u8 mac802154_dev_get_dsn(const struct net_device *dev) +{ +	struct mac802154_sub_if_data *priv = netdev_priv(dev); + +	BUG_ON(dev->type != ARPHRD_IEEE802154); + +	return priv->dsn++; +} +  static void phy_chan_notify(struct work_struct *work)  {  	struct phy_chan_notify_work *nw = container_of(work, @@ -167,9 +176,15 @@ static void phy_chan_notify(struct work_struct *work)  	struct mac802154_sub_if_data *priv = netdev_priv(nw->dev);  	int res; +	mutex_lock(&priv->hw->phy->pib_lock);  	res = hw->ops->set_channel(&hw->hw, priv->page, priv->chan);  	if (res)  		pr_debug("set_channel failed\n"); +	else { +		priv->hw->phy->current_channel = priv->chan; +		priv->hw->phy->current_page = priv->page; +	} +	mutex_unlock(&priv->hw->phy->pib_lock);  	kfree(nw);  } @@ -186,8 +201,11 @@ void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan)  	priv->chan = chan;  	spin_unlock_bh(&priv->mib_lock); +	mutex_lock(&priv->hw->phy->pib_lock);  	if (priv->hw->phy->current_channel != priv->chan ||  	    priv->hw->phy->current_page != priv->page) { +		mutex_unlock(&priv->hw->phy->pib_lock); +  		work = kzalloc(sizeof(*work), GFP_ATOMIC);  		if (!work)  			return; @@ -195,5 +213,6 @@ void mac802154_dev_set_page_channel(struct net_device *dev, u8 page, u8 chan)  		INIT_WORK(&work->work, phy_chan_notify);  		work->dev = dev;  		queue_work(priv->hw->dev_workqueue, &work->work); -	} +	} else +		mutex_unlock(&priv->hw->phy->pib_lock);  } diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c index 4e09d070995..6d1647399d4 100644 --- a/net/mac802154/tx.c +++ b/net/mac802154/tx.c @@ -25,6 +25,7 @@  #include <linux/if_arp.h>  #include <linux/crc-ccitt.h> +#include <net/ieee802154_netdev.h>  #include <net/mac802154.h>  #include <net/wpan-phy.h> @@ -39,12 +40,12 @@ struct xmit_work {  	struct mac802154_priv *priv;  	u8 chan;  	u8 page; -	u8 xmit_attempts;  };  static void mac802154_xmit_worker(struct work_struct *work)  {  	struct xmit_work *xw = container_of(work, struct xmit_work, work); +	struct mac802154_sub_if_data *sdata;  	int res;  	mutex_lock(&xw->priv->phy->pib_lock); @@ -57,21 +58,23 @@ static void mac802154_xmit_worker(struct work_struct *work)  			pr_debug("set_channel failed\n");  			goto out;  		} + +		xw->priv->phy->current_channel = xw->chan; +		xw->priv->phy->current_page = xw->page;  	}  	res = xw->priv->ops->xmit(&xw->priv->hw, xw->skb); +	if (res) +		pr_debug("transmission failed\n");  out:  	mutex_unlock(&xw->priv->phy->pib_lock); -	if (res) { -		if (xw->xmit_attempts++ < MAC802154_MAX_XMIT_ATTEMPTS) { -			queue_work(xw->priv->dev_workqueue, &xw->work); -			return; -		} else -			pr_debug("transmission failed for %d times", -				 MAC802154_MAX_XMIT_ATTEMPTS); -	} +	/* Restart the netif queue on each sub_if_data object. */ +	rcu_read_lock(); +	list_for_each_entry_rcu(sdata, &xw->priv->slaves, list) +		netif_wake_queue(sdata->dev); +	rcu_read_unlock();  	dev_kfree_skb(xw->skb); @@ -82,6 +85,7 @@ netdev_tx_t mac802154_tx(struct mac802154_priv *priv, struct sk_buff *skb,  			 u8 page, u8 chan)  {  	struct xmit_work *work; +	struct mac802154_sub_if_data *sdata;  	if (!(priv->phy->channels_supported[page] & (1 << chan))) {  		WARN_ON(1); @@ -109,12 +113,17 @@ netdev_tx_t mac802154_tx(struct mac802154_priv *priv, struct sk_buff *skb,  		return NETDEV_TX_BUSY;  	} +	/* Stop the netif queue on each sub_if_data object. */ +	rcu_read_lock(); +	list_for_each_entry_rcu(sdata, &priv->slaves, list) +		netif_stop_queue(sdata->dev); +	rcu_read_unlock(); +  	INIT_WORK(&work->work, mac802154_xmit_worker);  	work->skb = skb;  	work->priv = priv;  	work->page = page;  	work->chan = chan; -	work->xmit_attempts = 0;  	queue_work(priv->dev_workqueue, &work->work); diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c index d20c6d3c247..2ca2f4dceab 100644 --- a/net/mac802154/wpan.c +++ b/net/mac802154/wpan.c @@ -145,6 +145,8 @@ static int mac802154_header_create(struct sk_buff *skb,  	head[pos++] = mac_cb(skb)->seq; /* DSN/BSN */  	fc = mac_cb_type(skb); +	if (mac_cb_is_ackreq(skb)) +		fc |= IEEE802154_FC_ACK_REQ;  	if (!saddr) {  		spin_lock_bh(&priv->mib_lock); @@ -358,7 +360,7 @@ void mac802154_wpan_setup(struct net_device *dev)  	dev->header_ops		= &mac802154_header_ops;  	dev->needed_tailroom	= 2; /* FCS */  	dev->mtu		= IEEE802154_MTU; -	dev->tx_queue_len	= 10; +	dev->tx_queue_len	= 300;  	dev->type		= ARPHRD_IEEE802154;  	dev->flags		= IFF_NOARP | IFF_BROADCAST;  	dev->watchdog_timeo	= 0; diff --git a/net/netfilter/core.c b/net/netfilter/core.c index a9c488b6c50..07c865a31a3 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -5,6 +5,7 @@   * way.   *   * Rusty Russell (C)2000 -- This code is GPL. + * Patrick McHardy (c) 2006-2012   */  #include <linux/kernel.h>  #include <linux/netfilter.h> @@ -276,10 +277,30 @@ void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);  EXPORT_SYMBOL(nf_nat_decode_session_hook);  #endif +static int __net_init netfilter_net_init(struct net *net) +{  #ifdef CONFIG_PROC_FS -struct proc_dir_entry *proc_net_netfilter; -EXPORT_SYMBOL(proc_net_netfilter); +	net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter", +						net->proc_net); +	if (!net->nf.proc_netfilter) { +		if (!net_eq(net, &init_net)) +			pr_err("cannot create netfilter proc entry"); + +		return -ENOMEM; +	}  #endif +	return 0; +} + +static void __net_exit netfilter_net_exit(struct net *net) +{ +	remove_proc_entry("netfilter", net->proc_net); +} + +static struct pernet_operations netfilter_net_ops = { +	.init = netfilter_net_init, +	.exit = netfilter_net_exit, +};  void __init netfilter_init(void)  { @@ -289,11 +310,8 @@ void __init netfilter_init(void)  			INIT_LIST_HEAD(&nf_hooks[i][h]);  	} -#ifdef CONFIG_PROC_FS -	proc_net_netfilter = proc_mkdir("netfilter", init_net.proc_net); -	if (!proc_net_netfilter) +	if (register_pernet_subsys(&netfilter_net_ops) < 0)  		panic("cannot create netfilter proc entry"); -#endif  	if (netfilter_log_init() < 0)  		panic("cannot initialize nf_log"); diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h new file mode 100644 index 00000000000..25243379b88 --- /dev/null +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -0,0 +1,277 @@ +/* Copyright (C) 2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __IP_SET_BITMAP_IP_GEN_H +#define __IP_SET_BITMAP_IP_GEN_H + +#define CONCAT(a, b)		a##b +#define TOKEN(a,b)		CONCAT(a, b) + +#define mtype_do_test		TOKEN(MTYPE, _do_test) +#define mtype_gc_test		TOKEN(MTYPE, _gc_test) +#define mtype_is_filled		TOKEN(MTYPE, _is_filled) +#define mtype_do_add		TOKEN(MTYPE, _do_add) +#define mtype_do_del		TOKEN(MTYPE, _do_del) +#define mtype_do_list		TOKEN(MTYPE, _do_list) +#define mtype_do_head		TOKEN(MTYPE, _do_head) +#define mtype_adt_elem		TOKEN(MTYPE, _adt_elem) +#define mtype_add_timeout	TOKEN(MTYPE, _add_timeout) +#define mtype_gc_init		TOKEN(MTYPE, _gc_init) +#define mtype_kadt		TOKEN(MTYPE, _kadt) +#define mtype_uadt		TOKEN(MTYPE, _uadt) +#define mtype_destroy		TOKEN(MTYPE, _destroy) +#define mtype_flush		TOKEN(MTYPE, _flush) +#define mtype_head		TOKEN(MTYPE, _head) +#define mtype_same_set		TOKEN(MTYPE, _same_set) +#define mtype_elem		TOKEN(MTYPE, _elem) +#define mtype_test		TOKEN(MTYPE, _test) +#define mtype_add		TOKEN(MTYPE, _add) +#define mtype_del		TOKEN(MTYPE, _del) +#define mtype_list		TOKEN(MTYPE, _list) +#define mtype_gc		TOKEN(MTYPE, _gc) +#define mtype			MTYPE + +#define ext_timeout(e, m)	\ +	(unsigned long *)((e) + (m)->offset[IPSET_OFFSET_TIMEOUT]) +#define ext_counter(e, m)	\ +	(struct ip_set_counter *)((e) + (m)->offset[IPSET_OFFSET_COUNTER]) +#define get_ext(map, id)	((map)->extensions + (map)->dsize * (id)) + +static void +mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) +{ +	struct mtype *map = set->data; + +	init_timer(&map->gc); +	map->gc.data = (unsigned long) set; +	map->gc.function = gc; +	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; +	add_timer(&map->gc); +} + +static void +mtype_destroy(struct ip_set *set) +{ +	struct mtype *map = set->data; + +	if (SET_WITH_TIMEOUT(set)) +		del_timer_sync(&map->gc); + +	ip_set_free(map->members); +	if (map->dsize) +		ip_set_free(map->extensions); +	kfree(map); + +	set->data = NULL; +} + +static void +mtype_flush(struct ip_set *set) +{ +	struct mtype *map = set->data; + +	memset(map->members, 0, map->memsize); +} + +static int +mtype_head(struct ip_set *set, struct sk_buff *skb) +{ +	const struct mtype *map = set->data; +	struct nlattr *nested; + +	nested = ipset_nest_start(skb, IPSET_ATTR_DATA); +	if (!nested) +		goto nla_put_failure; +	if (mtype_do_head(skb, map) || +	    nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || +	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE, +			  htonl(sizeof(*map) + +				map->memsize + +				map->dsize * map->elements)) || +	    (SET_WITH_TIMEOUT(set) && +	     nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))) || +	    (SET_WITH_COUNTER(set) && +	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, +			   htonl(IPSET_FLAG_WITH_COUNTERS)))) +		goto nla_put_failure; +	ipset_nest_end(skb, nested); + +	return 0; +nla_put_failure: +	return -EMSGSIZE; +} + +static int +mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, +	   struct ip_set_ext *mext, u32 flags) +{ +	struct mtype *map = set->data; +	const struct mtype_adt_elem *e = value; +	void *x = get_ext(map, e->id); +	int ret = mtype_do_test(e, map); + +	if (ret <= 0) +		return ret; +	if (SET_WITH_TIMEOUT(set) && +	    ip_set_timeout_expired(ext_timeout(x, map))) +		return 0; +	if (SET_WITH_COUNTER(set)) +		ip_set_update_counter(ext_counter(x, map), ext, mext, flags); +	return 1; +} + +static int +mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, +	  struct ip_set_ext *mext, u32 flags) +{ +	struct mtype *map = set->data; +	const struct mtype_adt_elem *e = value; +	void *x = get_ext(map, e->id); +	int ret = mtype_do_add(e, map, flags); + +	if (ret == IPSET_ADD_FAILED) { +		if (SET_WITH_TIMEOUT(set) && +		    ip_set_timeout_expired(ext_timeout(x, map))) +			ret = 0; +		else if (!(flags & IPSET_FLAG_EXIST)) +			return -IPSET_ERR_EXIST; +	} + +	if (SET_WITH_TIMEOUT(set)) +#ifdef IP_SET_BITMAP_STORED_TIMEOUT +		mtype_add_timeout(ext_timeout(x, map), e, ext, map, ret); +#else +		ip_set_timeout_set(ext_timeout(x, map), ext->timeout); +#endif + +	if (SET_WITH_COUNTER(set)) +		ip_set_init_counter(ext_counter(x, map), ext); +	return 0; +} + +static int +mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, +	  struct ip_set_ext *mext, u32 flags) +{ +	struct mtype *map = set->data; +	const struct mtype_adt_elem *e = value; +	const void *x = get_ext(map, e->id); + +	if (mtype_do_del(e, map) || +	    (SET_WITH_TIMEOUT(set) && +	     ip_set_timeout_expired(ext_timeout(x, map)))) +		return -IPSET_ERR_EXIST; + +	return 0; +} + +static int +mtype_list(const struct ip_set *set, +	   struct sk_buff *skb, struct netlink_callback *cb) +{ +	struct mtype *map = set->data; +	struct nlattr *adt, *nested; +	void *x; +	u32 id, first = cb->args[2]; + +	adt = ipset_nest_start(skb, IPSET_ATTR_ADT); +	if (!adt) +		return -EMSGSIZE; +	for (; cb->args[2] < map->elements; cb->args[2]++) { +		id = cb->args[2]; +		x = get_ext(map, id); +		if (!test_bit(id, map->members) || +		    (SET_WITH_TIMEOUT(set) && +#ifdef IP_SET_BITMAP_STORED_TIMEOUT +		     mtype_is_filled((const struct mtype_elem *) x) && +#endif +		     ip_set_timeout_expired(ext_timeout(x, map)))) +			continue; +		nested = ipset_nest_start(skb, IPSET_ATTR_DATA); +		if (!nested) { +			if (id == first) { +				nla_nest_cancel(skb, adt); +				return -EMSGSIZE; +			} else +				goto nla_put_failure; +		} +		if (mtype_do_list(skb, map, id)) +			goto nla_put_failure; +		if (SET_WITH_TIMEOUT(set)) { +#ifdef IP_SET_BITMAP_STORED_TIMEOUT +			if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, +					  htonl(ip_set_timeout_stored(map, id, +							ext_timeout(x, map))))) +				goto nla_put_failure; +#else +			if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, +					  htonl(ip_set_timeout_get( +							ext_timeout(x, map))))) +				goto nla_put_failure; +#endif +		} +		if (SET_WITH_COUNTER(set) && +		    ip_set_put_counter(skb, ext_counter(x, map))) +			goto nla_put_failure; +		ipset_nest_end(skb, nested); +	} +	ipset_nest_end(skb, adt); + +	/* Set listing finished */ +	cb->args[2] = 0; + +	return 0; + +nla_put_failure: +	nla_nest_cancel(skb, nested); +	ipset_nest_end(skb, adt); +	if (unlikely(id == first)) { +		cb->args[2] = 0; +		return -EMSGSIZE; +	} +	return 0; +} + +static void +mtype_gc(unsigned long ul_set) +{ +	struct ip_set *set = (struct ip_set *) ul_set; +	struct mtype *map = set->data; +	const void *x; +	u32 id; + +	/* We run parallel with other readers (test element) +	 * but adding/deleting new entries is locked out */ +	read_lock_bh(&set->lock); +	for (id = 0; id < map->elements; id++) +		if (mtype_gc_test(id, map)) { +			x = get_ext(map, id); +			if (ip_set_timeout_expired(ext_timeout(x, map))) +				clear_bit(id, map->members); +		} +	read_unlock_bh(&set->lock); + +	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; +	add_timer(&map->gc); +} + +static const struct ip_set_type_variant mtype = { +	.kadt	= mtype_kadt, +	.uadt	= mtype_uadt, +	.adt	= { +		[IPSET_ADD] = mtype_add, +		[IPSET_DEL] = mtype_del, +		[IPSET_TEST] = mtype_test, +	}, +	.destroy = mtype_destroy, +	.flush	= mtype_flush, +	.head	= mtype_head, +	.list	= mtype_list, +	.same_set = mtype_same_set, +}; + +#endif /* __IP_SET_BITMAP_IP_GEN_H */ diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index 4a92fd47bd4..f1a8128bef0 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -1,6 +1,6 @@  /* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>   *                         Patrick Schaaf <bof@bof.de> - * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> + * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License version 2 as @@ -24,31 +24,37 @@  #include <linux/netfilter/ipset/pfxlen.h>  #include <linux/netfilter/ipset/ip_set.h>  #include <linux/netfilter/ipset/ip_set_bitmap.h> -#define IP_SET_BITMAP_TIMEOUT -#include <linux/netfilter/ipset/ip_set_timeout.h>  #define REVISION_MIN	0 -#define REVISION_MAX	0 +#define REVISION_MAX	1	/* Counter support added */  MODULE_LICENSE("GPL");  MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");  IP_SET_MODULE_DESC("bitmap:ip", REVISION_MIN, REVISION_MAX);  MODULE_ALIAS("ip_set_bitmap:ip"); +#define MTYPE		bitmap_ip +  /* Type structure */  struct bitmap_ip {  	void *members;		/* the set members */ +	void *extensions;	/* data extensions */  	u32 first_ip;		/* host byte order, included in range */  	u32 last_ip;		/* host byte order, included in range */  	u32 elements;		/* number of max elements in the set */  	u32 hosts;		/* number of hosts in a subnet */  	size_t memsize;		/* members size */ +	size_t dsize;		/* extensions struct size */ +	size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */  	u8 netmask;		/* subnet netmask */  	u32 timeout;		/* timeout parameter */  	struct timer_list gc;	/* garbage collection */  }; -/* Base variant */ +/* ADT structure for generic function args */ +struct bitmap_ip_adt_elem { +	u16 id; +};  static inline u32  ip_to_id(const struct bitmap_ip *m, u32 ip) @@ -56,188 +62,67 @@ ip_to_id(const struct bitmap_ip *m, u32 ip)  	return ((ip & ip_set_hostmask(m->netmask)) - m->first_ip)/m->hosts;  } -static int -bitmap_ip_test(struct ip_set *set, void *value, u32 timeout, u32 flags) -{ -	const struct bitmap_ip *map = set->data; -	u16 id = *(u16 *)value; - -	return !!test_bit(id, map->members); -} +/* Common functions */ -static int -bitmap_ip_add(struct ip_set *set, void *value, u32 timeout, u32 flags) +static inline int +bitmap_ip_do_test(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map)  { -	struct bitmap_ip *map = set->data; -	u16 id = *(u16 *)value; - -	if (test_and_set_bit(id, map->members)) -		return -IPSET_ERR_EXIST; - -	return 0; +	return !!test_bit(e->id, map->members);  } -static int -bitmap_ip_del(struct ip_set *set, void *value, u32 timeout, u32 flags) +static inline int +bitmap_ip_gc_test(u16 id, const struct bitmap_ip *map)  { -	struct bitmap_ip *map = set->data; -	u16 id = *(u16 *)value; - -	if (!test_and_clear_bit(id, map->members)) -		return -IPSET_ERR_EXIST; - -	return 0; +	return !!test_bit(id, map->members);  } -static int -bitmap_ip_list(const struct ip_set *set, -	       struct sk_buff *skb, struct netlink_callback *cb) +static inline int +bitmap_ip_do_add(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map, +		 u32 flags)  { -	const struct bitmap_ip *map = set->data; -	struct nlattr *atd, *nested; -	u32 id, first = cb->args[2]; - -	atd = ipset_nest_start(skb, IPSET_ATTR_ADT); -	if (!atd) -		return -EMSGSIZE; -	for (; cb->args[2] < map->elements; cb->args[2]++) { -		id = cb->args[2]; -		if (!test_bit(id, map->members)) -			continue; -		nested = ipset_nest_start(skb, IPSET_ATTR_DATA); -		if (!nested) { -			if (id == first) { -				nla_nest_cancel(skb, atd); -				return -EMSGSIZE; -			} else -				goto nla_put_failure; -		} -		if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, -				    htonl(map->first_ip + id * map->hosts))) -			goto nla_put_failure; -		ipset_nest_end(skb, nested); -	} -	ipset_nest_end(skb, atd); -	/* Set listing finished */ -	cb->args[2] = 0; -	return 0; - -nla_put_failure: -	nla_nest_cancel(skb, nested); -	ipset_nest_end(skb, atd); -	if (unlikely(id == first)) { -		cb->args[2] = 0; -		return -EMSGSIZE; -	} -	return 0; +	return !!test_and_set_bit(e->id, map->members);  } -/* Timeout variant */ - -static int -bitmap_ip_ttest(struct ip_set *set, void *value, u32 timeout, u32 flags) +static inline int +bitmap_ip_do_del(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map)  { -	const struct bitmap_ip *map = set->data; -	const unsigned long *members = map->members; -	u16 id = *(u16 *)value; - -	return ip_set_timeout_test(members[id]); +	return !test_and_clear_bit(e->id, map->members);  } -static int -bitmap_ip_tadd(struct ip_set *set, void *value, u32 timeout, u32 flags) +static inline int +bitmap_ip_do_list(struct sk_buff *skb, const struct bitmap_ip *map, u32 id)  { -	struct bitmap_ip *map = set->data; -	unsigned long *members = map->members; -	u16 id = *(u16 *)value; - -	if (ip_set_timeout_test(members[id]) && !(flags & IPSET_FLAG_EXIST)) -		return -IPSET_ERR_EXIST; - -	members[id] = ip_set_timeout_set(timeout); - -	return 0; +	return nla_put_ipaddr4(skb, IPSET_ATTR_IP, +			htonl(map->first_ip + id * map->hosts));  } -static int -bitmap_ip_tdel(struct ip_set *set, void *value, u32 timeout, u32 flags) +static inline int +bitmap_ip_do_head(struct sk_buff *skb, const struct bitmap_ip *map)  { -	struct bitmap_ip *map = set->data; -	unsigned long *members = map->members; -	u16 id = *(u16 *)value; -	int ret = -IPSET_ERR_EXIST; - -	if (ip_set_timeout_test(members[id])) -		ret = 0; - -	members[id] = IPSET_ELEM_UNSET; -	return ret; -} - -static int -bitmap_ip_tlist(const struct ip_set *set, -		struct sk_buff *skb, struct netlink_callback *cb) -{ -	const struct bitmap_ip *map = set->data; -	struct nlattr *adt, *nested; -	u32 id, first = cb->args[2]; -	const unsigned long *members = map->members; - -	adt = ipset_nest_start(skb, IPSET_ATTR_ADT); -	if (!adt) -		return -EMSGSIZE; -	for (; cb->args[2] < map->elements; cb->args[2]++) { -		id = cb->args[2]; -		if (!ip_set_timeout_test(members[id])) -			continue; -		nested = ipset_nest_start(skb, IPSET_ATTR_DATA); -		if (!nested) { -			if (id == first) { -				nla_nest_cancel(skb, adt); -				return -EMSGSIZE; -			} else -				goto nla_put_failure; -		} -		if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, -				    htonl(map->first_ip + id * map->hosts)) || -		    nla_put_net32(skb, IPSET_ATTR_TIMEOUT, -				  htonl(ip_set_timeout_get(members[id])))) -			goto nla_put_failure; -		ipset_nest_end(skb, nested); -	} -	ipset_nest_end(skb, adt); - -	/* Set listing finished */ -	cb->args[2] = 0; - -	return 0; - -nla_put_failure: -	nla_nest_cancel(skb, nested); -	ipset_nest_end(skb, adt); -	if (unlikely(id == first)) { -		cb->args[2] = 0; -		return -EMSGSIZE; -	} -	return 0; +	return nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip)) || +	       nla_put_ipaddr4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)) || +	       (map->netmask != 32 && +		nla_put_u8(skb, IPSET_ATTR_NETMASK, map->netmask));  }  static int  bitmap_ip_kadt(struct ip_set *set, const struct sk_buff *skb,  	       const struct xt_action_param *par, -	       enum ipset_adt adt, const struct ip_set_adt_opt *opt) +	       enum ipset_adt adt, struct ip_set_adt_opt *opt)  {  	struct bitmap_ip *map = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt]; +	struct bitmap_ip_adt_elem e = { }; +	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map);  	u32 ip;  	ip = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC));  	if (ip < map->first_ip || ip > map->last_ip)  		return -IPSET_ERR_BITMAP_RANGE; -	ip = ip_to_id(map, ip); +	e.id = ip_to_id(map, ip); -	return adtfn(set, &ip, opt_timeout(opt, map), opt->cmdflags); +	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);  }  static int @@ -246,33 +131,31 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],  {  	struct bitmap_ip *map = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt]; -	u32 timeout = map->timeout; -	u32 ip, ip_to, id; +	u32 ip, ip_to; +	struct bitmap_ip_adt_elem e = { }; +	struct ip_set_ext ext = IP_SET_INIT_UEXT(map);  	int ret = 0;  	if (unlikely(!tb[IPSET_ATTR_IP] || -		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))  		return -IPSET_ERR_PROTOCOL;  	if (tb[IPSET_ATTR_LINENO])  		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); -	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); +	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) || +	      ip_set_get_extensions(set, tb, &ext);  	if (ret)  		return ret;  	if (ip < map->first_ip || ip > map->last_ip)  		return -IPSET_ERR_BITMAP_RANGE; -	if (tb[IPSET_ATTR_TIMEOUT]) { -		if (!with_timeout(map->timeout)) -			return -IPSET_ERR_TIMEOUT; -		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); -	} -  	if (adt == IPSET_TEST) { -		id = ip_to_id(map, ip); -		return adtfn(set, &id, timeout, flags); +		e.id = ip_to_id(map, ip); +		return adtfn(set, &e, &ext, &ext, flags);  	}  	if (tb[IPSET_ATTR_IP_TO]) { @@ -297,8 +180,8 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],  		return -IPSET_ERR_BITMAP_RANGE;  	for (; !before(ip_to, ip); ip += map->hosts) { -		id = ip_to_id(map, ip); -		ret = adtfn(set, &id, timeout, flags); +		e.id = ip_to_id(map, ip); +		ret = adtfn(set, &e, &ext, &ext, flags);  		if (ret && !ip_set_eexist(ret, flags))  			return ret; @@ -308,54 +191,6 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],  	return ret;  } -static void -bitmap_ip_destroy(struct ip_set *set) -{ -	struct bitmap_ip *map = set->data; - -	if (with_timeout(map->timeout)) -		del_timer_sync(&map->gc); - -	ip_set_free(map->members); -	kfree(map); - -	set->data = NULL; -} - -static void -bitmap_ip_flush(struct ip_set *set) -{ -	struct bitmap_ip *map = set->data; - -	memset(map->members, 0, map->memsize); -} - -static int -bitmap_ip_head(struct ip_set *set, struct sk_buff *skb) -{ -	const struct bitmap_ip *map = set->data; -	struct nlattr *nested; - -	nested = ipset_nest_start(skb, IPSET_ATTR_DATA); -	if (!nested) -		goto nla_put_failure; -	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip)) || -	    nla_put_ipaddr4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)) || -	    (map->netmask != 32 && -	     nla_put_u8(skb, IPSET_ATTR_NETMASK, map->netmask)) || -	    nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || -	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE, -			  htonl(sizeof(*map) + map->memsize)) || -	    (with_timeout(map->timeout) && -	     nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout)))) -		goto nla_put_failure; -	ipset_nest_end(skb, nested); - -	return 0; -nla_put_failure: -	return -EMSGSIZE; -} -  static bool  bitmap_ip_same_set(const struct ip_set *a, const struct ip_set *b)  { @@ -365,70 +200,35 @@ bitmap_ip_same_set(const struct ip_set *a, const struct ip_set *b)  	return x->first_ip == y->first_ip &&  	       x->last_ip == y->last_ip &&  	       x->netmask == y->netmask && -	       x->timeout == y->timeout; +	       x->timeout == y->timeout && +	       a->extensions == b->extensions;  } -static const struct ip_set_type_variant bitmap_ip = { -	.kadt	= bitmap_ip_kadt, -	.uadt	= bitmap_ip_uadt, -	.adt	= { -		[IPSET_ADD] = bitmap_ip_add, -		[IPSET_DEL] = bitmap_ip_del, -		[IPSET_TEST] = bitmap_ip_test, -	}, -	.destroy = bitmap_ip_destroy, -	.flush	= bitmap_ip_flush, -	.head	= bitmap_ip_head, -	.list	= bitmap_ip_list, -	.same_set = bitmap_ip_same_set, +/* Plain variant */ + +struct bitmap_ip_elem {  }; -static const struct ip_set_type_variant bitmap_tip = { -	.kadt	= bitmap_ip_kadt, -	.uadt	= bitmap_ip_uadt, -	.adt	= { -		[IPSET_ADD] = bitmap_ip_tadd, -		[IPSET_DEL] = bitmap_ip_tdel, -		[IPSET_TEST] = bitmap_ip_ttest, -	}, -	.destroy = bitmap_ip_destroy, -	.flush	= bitmap_ip_flush, -	.head	= bitmap_ip_head, -	.list	= bitmap_ip_tlist, -	.same_set = bitmap_ip_same_set, +/* Timeout variant */ + +struct bitmap_ipt_elem { +	unsigned long timeout;  }; -static void -bitmap_ip_gc(unsigned long ul_set) -{ -	struct ip_set *set = (struct ip_set *) ul_set; -	struct bitmap_ip *map = set->data; -	unsigned long *table = map->members; -	u32 id; +/* Plain variant with counter */ -	/* We run parallel with other readers (test element) -	 * but adding/deleting new entries is locked out */ -	read_lock_bh(&set->lock); -	for (id = 0; id < map->elements; id++) -		if (ip_set_timeout_expired(table[id])) -			table[id] = IPSET_ELEM_UNSET; -	read_unlock_bh(&set->lock); +struct bitmap_ipc_elem { +	struct ip_set_counter counter; +}; -	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; -	add_timer(&map->gc); -} +/* Timeout variant with counter */ -static void -bitmap_ip_gc_init(struct ip_set *set) -{ -	struct bitmap_ip *map = set->data; +struct bitmap_ipct_elem { +	unsigned long timeout; +	struct ip_set_counter counter; +}; -	init_timer(&map->gc); -	map->gc.data = (unsigned long) set; -	map->gc.function = bitmap_ip_gc; -	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; -	add_timer(&map->gc); -} +#include "ip_set_bitmap_gen.h"  /* Create bitmap:ip type of sets */ @@ -440,6 +240,13 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,  	map->members = ip_set_alloc(map->memsize);  	if (!map->members)  		return false; +	if (map->dsize) { +		map->extensions = ip_set_alloc(map->dsize * elements); +		if (!map->extensions) { +			kfree(map->members); +			return false; +		} +	}  	map->first_ip = first_ip;  	map->last_ip = last_ip;  	map->elements = elements; @@ -457,13 +264,14 @@ static int  bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)  {  	struct bitmap_ip *map; -	u32 first_ip, last_ip, hosts; +	u32 first_ip, last_ip, hosts, cadt_flags = 0;  	u64 elements;  	u8 netmask = 32;  	int ret;  	if (unlikely(!tb[IPSET_ATTR_IP] || -		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))  		return -IPSET_ERR_PROTOCOL;  	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &first_ip); @@ -526,8 +334,45 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)  	if (!map)  		return -ENOMEM; -	if (tb[IPSET_ATTR_TIMEOUT]) { -		map->memsize = elements * sizeof(unsigned long); +	map->memsize = bitmap_bytes(0, elements - 1); +	set->variant = &bitmap_ip; +	if (tb[IPSET_ATTR_CADT_FLAGS]) +		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); +	if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { +		set->extensions |= IPSET_EXT_COUNTER; +		if (tb[IPSET_ATTR_TIMEOUT]) { +			map->dsize = sizeof(struct bitmap_ipct_elem); +			map->offset[IPSET_OFFSET_TIMEOUT] = +				offsetof(struct bitmap_ipct_elem, timeout); +			map->offset[IPSET_OFFSET_COUNTER] = +				offsetof(struct bitmap_ipct_elem, counter); + +			if (!init_map_ip(set, map, first_ip, last_ip, +					 elements, hosts, netmask)) { +				kfree(map); +				return -ENOMEM; +			} + +			map->timeout = ip_set_timeout_uget( +				tb[IPSET_ATTR_TIMEOUT]); +			set->extensions |= IPSET_EXT_TIMEOUT; + +			bitmap_ip_gc_init(set, bitmap_ip_gc); +		} else { +			map->dsize = sizeof(struct bitmap_ipc_elem); +			map->offset[IPSET_OFFSET_COUNTER] = +				offsetof(struct bitmap_ipc_elem, counter); + +			if (!init_map_ip(set, map, first_ip, last_ip, +					 elements, hosts, netmask)) { +				kfree(map); +				return -ENOMEM; +			} +		} +	} else if (tb[IPSET_ATTR_TIMEOUT]) { +		map->dsize = sizeof(struct bitmap_ipt_elem); +		map->offset[IPSET_OFFSET_TIMEOUT] = +			offsetof(struct bitmap_ipt_elem, timeout);  		if (!init_map_ip(set, map, first_ip, last_ip,  				 elements, hosts, netmask)) { @@ -536,19 +381,16 @@ bitmap_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags)  		}  		map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); -		set->variant = &bitmap_tip; +		set->extensions |= IPSET_EXT_TIMEOUT; -		bitmap_ip_gc_init(set); +		bitmap_ip_gc_init(set, bitmap_ip_gc);  	} else { -		map->memsize = bitmap_bytes(0, elements - 1); - +		map->dsize = 0;  		if (!init_map_ip(set, map, first_ip, last_ip,  				 elements, hosts, netmask)) {  			kfree(map);  			return -ENOMEM;  		} - -		set->variant = &bitmap_ip;  	}  	return 0;  } @@ -568,6 +410,7 @@ static struct ip_set_type bitmap_ip_type __read_mostly = {  		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },  		[IPSET_ATTR_NETMASK]	= { .type = NLA_U8  },  		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 }, +		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },  	},  	.adt_policy	= {  		[IPSET_ATTR_IP]		= { .type = NLA_NESTED }, @@ -575,6 +418,8 @@ static struct ip_set_type bitmap_ip_type __read_mostly = {  		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },  		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },  		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 }, +		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 }, +		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },  	},  	.me		= THIS_MODULE,  }; diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index d7df6ac2c6f..3b30e0bef89 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -1,7 +1,7 @@  /* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>   *                         Patrick Schaaf <bof@bof.de>   *			   Martin Josefsson <gandalf@wlug.westbo.se> - * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> + * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License version 2 as @@ -23,344 +23,208 @@  #include <linux/netfilter/ipset/pfxlen.h>  #include <linux/netfilter/ipset/ip_set.h> -#include <linux/netfilter/ipset/ip_set_timeout.h>  #include <linux/netfilter/ipset/ip_set_bitmap.h>  #define REVISION_MIN	0 -#define REVISION_MAX	0 +#define REVISION_MAX	1	/* Counter support added */  MODULE_LICENSE("GPL");  MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");  IP_SET_MODULE_DESC("bitmap:ip,mac", REVISION_MIN, REVISION_MAX);  MODULE_ALIAS("ip_set_bitmap:ip,mac"); +#define MTYPE		bitmap_ipmac +#define IP_SET_BITMAP_STORED_TIMEOUT +  enum { -	MAC_EMPTY,		/* element is not set */ -	MAC_FILLED,		/* element is set with MAC */  	MAC_UNSET,		/* element is set, without MAC */ +	MAC_FILLED,		/* element is set with MAC */  };  /* Type structure */  struct bitmap_ipmac {  	void *members;		/* the set members */ +	void *extensions;	/* MAC + data extensions */  	u32 first_ip;		/* host byte order, included in range */  	u32 last_ip;		/* host byte order, included in range */ +	u32 elements;		/* number of max elements in the set */  	u32 timeout;		/* timeout value */  	struct timer_list gc;	/* garbage collector */ +	size_t memsize;		/* members size */  	size_t dsize;		/* size of element */ +	size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */  };  /* ADT structure for generic function args */ -struct ipmac { -	u32 id;			/* id in array */ -	unsigned char *ether;	/* ethernet address */ +struct bitmap_ipmac_adt_elem { +	u16 id; +	unsigned char *ether;  }; -/* Member element without and with timeout */ - -struct ipmac_elem { +struct bitmap_ipmac_elem {  	unsigned char ether[ETH_ALEN]; -	unsigned char match; +	unsigned char filled;  } __attribute__ ((aligned)); -struct ipmac_telem { -	unsigned char ether[ETH_ALEN]; -	unsigned char match; -	unsigned long timeout; -} __attribute__ ((aligned)); - -static inline void * -bitmap_ipmac_elem(const struct bitmap_ipmac *map, u32 id) +static inline u32 +ip_to_id(const struct bitmap_ipmac *m, u32 ip)  { -	return (void *)((char *)map->members + id * map->dsize); +	return ip - m->first_ip;  } -static inline bool -bitmap_timeout(const struct bitmap_ipmac *map, u32 id) +static inline struct bitmap_ipmac_elem * +get_elem(void *extensions, u16 id, size_t dsize)  { -	const struct ipmac_telem *elem = bitmap_ipmac_elem(map, id); - -	return ip_set_timeout_test(elem->timeout); +	return (struct bitmap_ipmac_elem *)(extensions + id * dsize);  } -static inline bool -bitmap_expired(const struct bitmap_ipmac *map, u32 id) -{ -	const struct ipmac_telem *elem = bitmap_ipmac_elem(map, id); - -	return ip_set_timeout_expired(elem->timeout); -} +/* Common functions */  static inline int -bitmap_ipmac_exist(const struct ipmac_telem *elem) +bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e, +		     const struct bitmap_ipmac *map)  { -	return elem->match == MAC_UNSET || -	       (elem->match == MAC_FILLED && -		!ip_set_timeout_expired(elem->timeout)); -} - -/* Base variant */ - -static int -bitmap_ipmac_test(struct ip_set *set, void *value, u32 timeout, u32 flags) -{ -	const struct bitmap_ipmac *map = set->data; -	const struct ipmac *data = value; -	const struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id); +	const struct bitmap_ipmac_elem *elem; -	switch (elem->match) { -	case MAC_UNSET: -		/* Trigger kernel to fill out the ethernet address */ -		return -EAGAIN; -	case MAC_FILLED: -		return data->ether == NULL || -		       ether_addr_equal(data->ether, elem->ether); -	} -	return 0; +	if (!test_bit(e->id, map->members)) +		return 0; +	elem = get_elem(map->extensions, e->id, map->dsize); +	if (elem->filled == MAC_FILLED) +		return e->ether == NULL || +		       ether_addr_equal(e->ether, elem->ether); +	/* Trigger kernel to fill out the ethernet address */ +	return -EAGAIN;  } -static int -bitmap_ipmac_add(struct ip_set *set, void *value, u32 timeout, u32 flags) +static inline int +bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map)  { -	struct bitmap_ipmac *map = set->data; -	const struct ipmac *data = value; -	struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id); - -	switch (elem->match) { -	case MAC_UNSET: -		if (!data->ether) -			/* Already added without ethernet address */ -			return -IPSET_ERR_EXIST; -		/* Fill the MAC address */ -		memcpy(elem->ether, data->ether, ETH_ALEN); -		elem->match = MAC_FILLED; -		break; -	case MAC_FILLED: -		return -IPSET_ERR_EXIST; -	case MAC_EMPTY: -		if (data->ether) { -			memcpy(elem->ether, data->ether, ETH_ALEN); -			elem->match = MAC_FILLED; -		} else -			elem->match = MAC_UNSET; -	} +	const struct bitmap_ipmac_elem *elem; -	return 0; +	if (!test_bit(id, map->members)) +		return 0; +	elem = get_elem(map->extensions, id, map->dsize); +	/* Timer not started for the incomplete elements */ +	return elem->filled == MAC_FILLED;  } -static int -bitmap_ipmac_del(struct ip_set *set, void *value, u32 timeout, u32 flags) +static inline int +bitmap_ipmac_is_filled(const struct bitmap_ipmac_elem *elem)  { -	struct bitmap_ipmac *map = set->data; -	const struct ipmac *data = value; -	struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id); - -	if (elem->match == MAC_EMPTY) -		return -IPSET_ERR_EXIST; - -	elem->match = MAC_EMPTY; - -	return 0; +	return elem->filled == MAC_FILLED;  } -static int -bitmap_ipmac_list(const struct ip_set *set, -		  struct sk_buff *skb, struct netlink_callback *cb) +static inline int +bitmap_ipmac_add_timeout(unsigned long *timeout, +			 const struct bitmap_ipmac_adt_elem *e, +			 const struct ip_set_ext *ext, +			 struct bitmap_ipmac *map, int mode)  { -	const struct bitmap_ipmac *map = set->data; -	const struct ipmac_elem *elem; -	struct nlattr *atd, *nested; -	u32 id, first = cb->args[2]; -	u32 last = map->last_ip - map->first_ip; - -	atd = ipset_nest_start(skb, IPSET_ATTR_ADT); -	if (!atd) -		return -EMSGSIZE; -	for (; cb->args[2] <= last; cb->args[2]++) { -		id = cb->args[2]; -		elem = bitmap_ipmac_elem(map, id); -		if (elem->match == MAC_EMPTY) -			continue; -		nested = ipset_nest_start(skb, IPSET_ATTR_DATA); -		if (!nested) { -			if (id == first) { -				nla_nest_cancel(skb, atd); -				return -EMSGSIZE; -			} else -				goto nla_put_failure; -		} -		if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, -				    htonl(map->first_ip + id)) || -		    (elem->match == MAC_FILLED && -		     nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, -			     elem->ether))) -			goto nla_put_failure; -		ipset_nest_end(skb, nested); -	} -	ipset_nest_end(skb, atd); -	/* Set listing finished */ -	cb->args[2] = 0; - -	return 0; +	u32 t = ext->timeout; -nla_put_failure: -	nla_nest_cancel(skb, nested); -	ipset_nest_end(skb, atd); -	if (unlikely(id == first)) { -		cb->args[2] = 0; -		return -EMSGSIZE; +	if (mode == IPSET_ADD_START_STORED_TIMEOUT) { +		if (t == map->timeout) +			/* Timeout was not specified, get stored one */ +			t = *timeout; +		ip_set_timeout_set(timeout, t); +	} else { +		/* If MAC is unset yet, we store plain timeout value +		 * because the timer is not activated yet +		 * and we can reuse it later when MAC is filled out, +		 * possibly by the kernel */ +		if (e->ether) +			ip_set_timeout_set(timeout, t); +		else +			*timeout = t;  	}  	return 0;  } -/* Timeout variant */ - -static int -bitmap_ipmac_ttest(struct ip_set *set, void *value, u32 timeout, u32 flags) +static inline int +bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e, +		    struct bitmap_ipmac *map, u32 flags)  { -	const struct bitmap_ipmac *map = set->data; -	const struct ipmac *data = value; -	const struct ipmac_elem *elem = bitmap_ipmac_elem(map, data->id); +	struct bitmap_ipmac_elem *elem; -	switch (elem->match) { -	case MAC_UNSET: -		/* Trigger kernel to fill out the ethernet address */ -		return -EAGAIN; -	case MAC_FILLED: -		return (data->ether == NULL || -			ether_addr_equal(data->ether, elem->ether)) && -		       !bitmap_expired(map, data->id); +	elem = get_elem(map->extensions, e->id, map->dsize); +	if (test_and_set_bit(e->id, map->members)) { +		if (elem->filled == MAC_FILLED) { +			if (e->ether && (flags & IPSET_FLAG_EXIST)) +				memcpy(elem->ether, e->ether, ETH_ALEN); +			return IPSET_ADD_FAILED; +		} else if (!e->ether) +			/* Already added without ethernet address */ +			return IPSET_ADD_FAILED; +		/* Fill the MAC address and trigger the timer activation */ +		memcpy(elem->ether, e->ether, ETH_ALEN); +		elem->filled = MAC_FILLED; +		return IPSET_ADD_START_STORED_TIMEOUT; +	} else if (e->ether) { +		/* We can store MAC too */ +		memcpy(elem->ether, e->ether, ETH_ALEN); +		elem->filled = MAC_FILLED; +		return 0; +	} else { +		elem->filled = MAC_UNSET; +		/* MAC is not stored yet, don't start timer */ +		return IPSET_ADD_STORE_PLAIN_TIMEOUT;  	} -	return 0;  } -static int -bitmap_ipmac_tadd(struct ip_set *set, void *value, u32 timeout, u32 flags) +static inline int +bitmap_ipmac_do_del(const struct bitmap_ipmac_adt_elem *e, +		    struct bitmap_ipmac *map)  { -	struct bitmap_ipmac *map = set->data; -	const struct ipmac *data = value; -	struct ipmac_telem *elem = bitmap_ipmac_elem(map, data->id); -	bool flag_exist = flags & IPSET_FLAG_EXIST; - -	switch (elem->match) { -	case MAC_UNSET: -		if (!(data->ether || flag_exist)) -			/* Already added without ethernet address */ -			return -IPSET_ERR_EXIST; -		/* Fill the MAC address and activate the timer */ -		memcpy(elem->ether, data->ether, ETH_ALEN); -		elem->match = MAC_FILLED; -		if (timeout == map->timeout) -			/* Timeout was not specified, get stored one */ -			timeout = elem->timeout; -		elem->timeout = ip_set_timeout_set(timeout); -		break; -	case MAC_FILLED: -		if (!(bitmap_expired(map, data->id) || flag_exist)) -			return -IPSET_ERR_EXIST; -		/* Fall through */ -	case MAC_EMPTY: -		if (data->ether) { -			memcpy(elem->ether, data->ether, ETH_ALEN); -			elem->match = MAC_FILLED; -		} else -			elem->match = MAC_UNSET; -		/* If MAC is unset yet, we store plain timeout value -		 * because the timer is not activated yet -		 * and we can reuse it later when MAC is filled out, -		 * possibly by the kernel */ -		elem->timeout = data->ether ? ip_set_timeout_set(timeout) -					    : timeout; -		break; -	} - -	return 0; +	return !test_and_clear_bit(e->id, map->members);  } -static int -bitmap_ipmac_tdel(struct ip_set *set, void *value, u32 timeout, u32 flags) +static inline unsigned long +ip_set_timeout_stored(struct bitmap_ipmac *map, u32 id, unsigned long *timeout)  { -	struct bitmap_ipmac *map = set->data; -	const struct ipmac *data = value; -	struct ipmac_telem *elem = bitmap_ipmac_elem(map, data->id); +	const struct bitmap_ipmac_elem *elem = +		get_elem(map->extensions, id, map->dsize); -	if (elem->match == MAC_EMPTY || bitmap_expired(map, data->id)) -		return -IPSET_ERR_EXIST; - -	elem->match = MAC_EMPTY; - -	return 0; +	return elem->filled == MAC_FILLED ? ip_set_timeout_get(timeout) : +					    *timeout;  } -static int -bitmap_ipmac_tlist(const struct ip_set *set, -		   struct sk_buff *skb, struct netlink_callback *cb) +static inline int +bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map, +		     u32 id)  { -	const struct bitmap_ipmac *map = set->data; -	const struct ipmac_telem *elem; -	struct nlattr *atd, *nested; -	u32 id, first = cb->args[2]; -	u32 timeout, last = map->last_ip - map->first_ip; - -	atd = ipset_nest_start(skb, IPSET_ATTR_ADT); -	if (!atd) -		return -EMSGSIZE; -	for (; cb->args[2] <= last; cb->args[2]++) { -		id = cb->args[2]; -		elem = bitmap_ipmac_elem(map, id); -		if (!bitmap_ipmac_exist(elem)) -			continue; -		nested = ipset_nest_start(skb, IPSET_ATTR_DATA); -		if (!nested) { -			if (id == first) { -				nla_nest_cancel(skb, atd); -				return -EMSGSIZE; -			} else -				goto nla_put_failure; -		} -		if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, -				    htonl(map->first_ip + id)) || -		    (elem->match == MAC_FILLED && -		     nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, -			     elem->ether))) -			goto nla_put_failure; -		timeout = elem->match == MAC_UNSET ? elem->timeout -				: ip_set_timeout_get(elem->timeout); -		if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(timeout))) -			goto nla_put_failure; -		ipset_nest_end(skb, nested); -	} -	ipset_nest_end(skb, atd); -	/* Set listing finished */ -	cb->args[2] = 0; +	const struct bitmap_ipmac_elem *elem = +		get_elem(map->extensions, id, map->dsize); -	return 0; +	return nla_put_ipaddr4(skb, IPSET_ATTR_IP, +			       htonl(map->first_ip + id)) || +	       (elem->filled == MAC_FILLED && +		nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, elem->ether)); +} -nla_put_failure: -	nla_nest_cancel(skb, nested); -	ipset_nest_end(skb, atd); -	if (unlikely(id == first)) { -		cb->args[2] = 0; -		return -EMSGSIZE; -	} -	return 0; +static inline int +bitmap_ipmac_do_head(struct sk_buff *skb, const struct bitmap_ipmac *map) +{ +	return nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip)) || +	       nla_put_ipaddr4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip));  }  static int  bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,  		  const struct xt_action_param *par, -		  enum ipset_adt adt, const struct ip_set_adt_opt *opt) +		  enum ipset_adt adt, struct ip_set_adt_opt *opt)  {  	struct bitmap_ipmac *map = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt]; -	struct ipmac data; +	struct bitmap_ipmac_adt_elem e = {}; +	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map); +	u32 ip;  	/* MAC can be src only */  	if (!(opt->flags & IPSET_DIM_TWO_SRC))  		return 0; -	data.id = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC)); -	if (data.id < map->first_ip || data.id > map->last_ip) +	ip = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC)); +	if (ip < map->first_ip || ip > map->last_ip)  		return -IPSET_ERR_BITMAP_RANGE;  	/* Backward compatibility: we don't check the second flag */ @@ -368,10 +232,10 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,  	    (skb_mac_header(skb) + ETH_HLEN) > skb->data)  		return -EINVAL; -	data.id -= map->first_ip; -	data.ether = eth_hdr(skb)->h_source; +	e.id = ip_to_id(map, ip); +	e.ether = eth_hdr(skb)->h_source; -	return adtfn(set, &data, opt_timeout(opt, map), opt->cmdflags); +	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);  }  static int @@ -380,91 +244,39 @@ bitmap_ipmac_uadt(struct ip_set *set, struct nlattr *tb[],  {  	const struct bitmap_ipmac *map = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt]; -	struct ipmac data; -	u32 timeout = map->timeout; +	struct bitmap_ipmac_adt_elem e = {}; +	struct ip_set_ext ext = IP_SET_INIT_UEXT(map); +	u32 ip;  	int ret = 0;  	if (unlikely(!tb[IPSET_ATTR_IP] || -		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))  		return -IPSET_ERR_PROTOCOL;  	if (tb[IPSET_ATTR_LINENO])  		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); -	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &data.id); +	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) || +	      ip_set_get_extensions(set, tb, &ext);  	if (ret)  		return ret; -	if (data.id < map->first_ip || data.id > map->last_ip) +	if (ip < map->first_ip || ip > map->last_ip)  		return -IPSET_ERR_BITMAP_RANGE; +	e.id = ip_to_id(map, ip);  	if (tb[IPSET_ATTR_ETHER]) -		data.ether = nla_data(tb[IPSET_ATTR_ETHER]); +		e.ether = nla_data(tb[IPSET_ATTR_ETHER]);  	else -		data.ether = NULL; - -	if (tb[IPSET_ATTR_TIMEOUT]) { -		if (!with_timeout(map->timeout)) -			return -IPSET_ERR_TIMEOUT; -		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); -	} +		e.ether = NULL; -	data.id -= map->first_ip; - -	ret = adtfn(set, &data, timeout, flags); +	ret = adtfn(set, &e, &ext, &ext, flags);  	return ip_set_eexist(ret, flags) ? 0 : ret;  } -static void -bitmap_ipmac_destroy(struct ip_set *set) -{ -	struct bitmap_ipmac *map = set->data; - -	if (with_timeout(map->timeout)) -		del_timer_sync(&map->gc); - -	ip_set_free(map->members); -	kfree(map); - -	set->data = NULL; -} - -static void -bitmap_ipmac_flush(struct ip_set *set) -{ -	struct bitmap_ipmac *map = set->data; - -	memset(map->members, 0, -	       (map->last_ip - map->first_ip + 1) * map->dsize); -} - -static int -bitmap_ipmac_head(struct ip_set *set, struct sk_buff *skb) -{ -	const struct bitmap_ipmac *map = set->data; -	struct nlattr *nested; - -	nested = ipset_nest_start(skb, IPSET_ATTR_DATA); -	if (!nested) -		goto nla_put_failure; -	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip)) || -	    nla_put_ipaddr4(skb, IPSET_ATTR_IP_TO, htonl(map->last_ip)) || -	    nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || -	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE, -			  htonl(sizeof(*map) + -				((map->last_ip - map->first_ip + 1) * -				 map->dsize))) || -	    (with_timeout(map->timeout) && -	     nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout)))) -		goto nla_put_failure; -	ipset_nest_end(skb, nested); - -	return 0; -nla_put_failure: -	return -EMSGSIZE; -} -  static bool  bitmap_ipmac_same_set(const struct ip_set *a, const struct ip_set *b)  { @@ -473,85 +285,64 @@ bitmap_ipmac_same_set(const struct ip_set *a, const struct ip_set *b)  	return x->first_ip == y->first_ip &&  	       x->last_ip == y->last_ip && -	       x->timeout == y->timeout; +	       x->timeout == y->timeout && +	       a->extensions == b->extensions;  } -static const struct ip_set_type_variant bitmap_ipmac = { -	.kadt	= bitmap_ipmac_kadt, -	.uadt	= bitmap_ipmac_uadt, -	.adt	= { -		[IPSET_ADD] = bitmap_ipmac_add, -		[IPSET_DEL] = bitmap_ipmac_del, -		[IPSET_TEST] = bitmap_ipmac_test, -	}, -	.destroy = bitmap_ipmac_destroy, -	.flush	= bitmap_ipmac_flush, -	.head	= bitmap_ipmac_head, -	.list	= bitmap_ipmac_list, -	.same_set = bitmap_ipmac_same_set, -}; +/* Plain variant */ -static const struct ip_set_type_variant bitmap_tipmac = { -	.kadt	= bitmap_ipmac_kadt, -	.uadt	= bitmap_ipmac_uadt, -	.adt	= { -		[IPSET_ADD] = bitmap_ipmac_tadd, -		[IPSET_DEL] = bitmap_ipmac_tdel, -		[IPSET_TEST] = bitmap_ipmac_ttest, -	}, -	.destroy = bitmap_ipmac_destroy, -	.flush	= bitmap_ipmac_flush, -	.head	= bitmap_ipmac_head, -	.list	= bitmap_ipmac_tlist, -	.same_set = bitmap_ipmac_same_set, +/* Timeout variant */ + +struct bitmap_ipmact_elem { +	struct { +		unsigned char ether[ETH_ALEN]; +		unsigned char filled; +	} __attribute__ ((aligned)); +	unsigned long timeout;  }; -static void -bitmap_ipmac_gc(unsigned long ul_set) -{ -	struct ip_set *set = (struct ip_set *) ul_set; -	struct bitmap_ipmac *map = set->data; -	struct ipmac_telem *elem; -	u32 id, last = map->last_ip - map->first_ip; +/* Plain variant with counter */ -	/* We run parallel with other readers (test element) -	 * but adding/deleting new entries is locked out */ -	read_lock_bh(&set->lock); -	for (id = 0; id <= last; id++) { -		elem = bitmap_ipmac_elem(map, id); -		if (elem->match == MAC_FILLED && -		    ip_set_timeout_expired(elem->timeout)) -			elem->match = MAC_EMPTY; -	} -	read_unlock_bh(&set->lock); +struct bitmap_ipmacc_elem { +	struct { +		unsigned char ether[ETH_ALEN]; +		unsigned char filled; +	} __attribute__ ((aligned)); +	struct ip_set_counter counter; +}; -	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; -	add_timer(&map->gc); -} +/* Timeout variant with counter */ -static void -bitmap_ipmac_gc_init(struct ip_set *set) -{ -	struct bitmap_ipmac *map = set->data; +struct bitmap_ipmacct_elem { +	struct { +		unsigned char ether[ETH_ALEN]; +		unsigned char filled; +	} __attribute__ ((aligned)); +	unsigned long timeout; +	struct ip_set_counter counter; +}; -	init_timer(&map->gc); -	map->gc.data = (unsigned long) set; -	map->gc.function = bitmap_ipmac_gc; -	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; -	add_timer(&map->gc); -} +#include "ip_set_bitmap_gen.h"  /* Create bitmap:ip,mac type of sets */  static bool  init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map, -	       u32 first_ip, u32 last_ip) +	       u32 first_ip, u32 last_ip, u32 elements)  {  	map->members = ip_set_alloc((last_ip - first_ip + 1) * map->dsize);  	if (!map->members)  		return false; +	if (map->dsize) { +		map->extensions = ip_set_alloc(map->dsize * elements); +		if (!map->extensions) { +			kfree(map->members); +			return false; +		} +	}  	map->first_ip = first_ip;  	map->last_ip = last_ip; +	map->elements = elements;  	map->timeout = IPSET_NO_TIMEOUT;  	set->data = map; @@ -564,13 +355,14 @@ static int  bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],  		    u32 flags)  { -	u32 first_ip, last_ip; +	u32 first_ip, last_ip, cadt_flags = 0;  	u64 elements;  	struct bitmap_ipmac *map;  	int ret;  	if (unlikely(!tb[IPSET_ATTR_IP] || -		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))  		return -IPSET_ERR_PROTOCOL;  	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &first_ip); @@ -605,28 +397,59 @@ bitmap_ipmac_create(struct ip_set *set, struct nlattr *tb[],  	if (!map)  		return -ENOMEM; -	if (tb[IPSET_ATTR_TIMEOUT]) { -		map->dsize = sizeof(struct ipmac_telem); +	map->memsize = bitmap_bytes(0, elements - 1); +	set->variant = &bitmap_ipmac; +	if (tb[IPSET_ATTR_CADT_FLAGS]) +		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); +	if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { +		set->extensions |= IPSET_EXT_COUNTER; +		if (tb[IPSET_ATTR_TIMEOUT]) { +			map->dsize = sizeof(struct bitmap_ipmacct_elem); +			map->offset[IPSET_OFFSET_TIMEOUT] = +				offsetof(struct bitmap_ipmacct_elem, timeout); +			map->offset[IPSET_OFFSET_COUNTER] = +				offsetof(struct bitmap_ipmacct_elem, counter); + +			if (!init_map_ipmac(set, map, first_ip, last_ip, +					    elements)) { +				kfree(map); +				return -ENOMEM; +			} +			map->timeout = ip_set_timeout_uget( +				tb[IPSET_ATTR_TIMEOUT]); +			set->extensions |= IPSET_EXT_TIMEOUT; +			bitmap_ipmac_gc_init(set, bitmap_ipmac_gc); +		} else { +			map->dsize = sizeof(struct bitmap_ipmacc_elem); +			map->offset[IPSET_OFFSET_COUNTER] = +				offsetof(struct bitmap_ipmacc_elem, counter); + +			if (!init_map_ipmac(set, map, first_ip, last_ip, +					    elements)) { +				kfree(map); +				return -ENOMEM; +			} +		} +	} else if (tb[IPSET_ATTR_TIMEOUT]) { +		map->dsize = sizeof(struct bitmap_ipmact_elem); +		map->offset[IPSET_OFFSET_TIMEOUT] = +			offsetof(struct bitmap_ipmact_elem, timeout); -		if (!init_map_ipmac(set, map, first_ip, last_ip)) { +		if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {  			kfree(map);  			return -ENOMEM;  		} -  		map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - -		set->variant = &bitmap_tipmac; - -		bitmap_ipmac_gc_init(set); +		set->extensions |= IPSET_EXT_TIMEOUT; +		bitmap_ipmac_gc_init(set, bitmap_ipmac_gc);  	} else { -		map->dsize = sizeof(struct ipmac_elem); +		map->dsize = sizeof(struct bitmap_ipmac_elem); -		if (!init_map_ipmac(set, map, first_ip, last_ip)) { +		if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) {  			kfree(map);  			return -ENOMEM;  		}  		set->variant = &bitmap_ipmac; -  	}  	return 0;  } @@ -645,6 +468,7 @@ static struct ip_set_type bitmap_ipmac_type = {  		[IPSET_ATTR_IP_TO]	= { .type = NLA_NESTED },  		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },  		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 }, +		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },  	},  	.adt_policy	= {  		[IPSET_ATTR_IP]		= { .type = NLA_NESTED }, @@ -652,6 +476,8 @@ static struct ip_set_type bitmap_ipmac_type = {  					    .len  = ETH_ALEN },  		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },  		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 }, +		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 }, +		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },  	},  	.me		= THIS_MODULE,  }; diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index e6b2db76f4c..8207d1fda52 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> +/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License version 2 as @@ -19,205 +19,94 @@  #include <linux/netfilter/ipset/ip_set.h>  #include <linux/netfilter/ipset/ip_set_bitmap.h>  #include <linux/netfilter/ipset/ip_set_getport.h> -#define IP_SET_BITMAP_TIMEOUT -#include <linux/netfilter/ipset/ip_set_timeout.h>  #define REVISION_MIN	0 -#define REVISION_MAX	0 +#define REVISION_MAX	1	/* Counter support added */  MODULE_LICENSE("GPL");  MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");  IP_SET_MODULE_DESC("bitmap:port", REVISION_MIN, REVISION_MAX);  MODULE_ALIAS("ip_set_bitmap:port"); +#define MTYPE		bitmap_port +  /* Type structure */  struct bitmap_port {  	void *members;		/* the set members */ +	void *extensions;	/* data extensions */  	u16 first_port;		/* host byte order, included in range */  	u16 last_port;		/* host byte order, included in range */ +	u32 elements;		/* number of max elements in the set */  	size_t memsize;		/* members size */ +	size_t dsize;		/* extensions struct size */ +	size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */  	u32 timeout;		/* timeout parameter */  	struct timer_list gc;	/* garbage collection */  }; -/* Base variant */ +/* ADT structure for generic function args */ +struct bitmap_port_adt_elem { +	u16 id; +}; -static int -bitmap_port_test(struct ip_set *set, void *value, u32 timeout, u32 flags) +static inline u16 +port_to_id(const struct bitmap_port *m, u16 port)  { -	const struct bitmap_port *map = set->data; -	u16 id = *(u16 *)value; - -	return !!test_bit(id, map->members); +	return port - m->first_port;  } -static int -bitmap_port_add(struct ip_set *set, void *value, u32 timeout, u32 flags) -{ -	struct bitmap_port *map = set->data; -	u16 id = *(u16 *)value; - -	if (test_and_set_bit(id, map->members)) -		return -IPSET_ERR_EXIST; - -	return 0; -} +/* Common functions */ -static int -bitmap_port_del(struct ip_set *set, void *value, u32 timeout, u32 flags) +static inline int +bitmap_port_do_test(const struct bitmap_port_adt_elem *e, +		    const struct bitmap_port *map)  { -	struct bitmap_port *map = set->data; -	u16 id = *(u16 *)value; - -	if (!test_and_clear_bit(id, map->members)) -		return -IPSET_ERR_EXIST; - -	return 0; +	return !!test_bit(e->id, map->members);  } -static int -bitmap_port_list(const struct ip_set *set, -		 struct sk_buff *skb, struct netlink_callback *cb) +static inline int +bitmap_port_gc_test(u16 id, const struct bitmap_port *map)  { -	const struct bitmap_port *map = set->data; -	struct nlattr *atd, *nested; -	u16 id, first = cb->args[2]; -	u16 last = map->last_port - map->first_port; - -	atd = ipset_nest_start(skb, IPSET_ATTR_ADT); -	if (!atd) -		return -EMSGSIZE; -	for (; cb->args[2] <= last; cb->args[2]++) { -		id = cb->args[2]; -		if (!test_bit(id, map->members)) -			continue; -		nested = ipset_nest_start(skb, IPSET_ATTR_DATA); -		if (!nested) { -			if (id == first) { -				nla_nest_cancel(skb, atd); -				return -EMSGSIZE; -			} else -				goto nla_put_failure; -		} -		if (nla_put_net16(skb, IPSET_ATTR_PORT, -				  htons(map->first_port + id))) -			goto nla_put_failure; -		ipset_nest_end(skb, nested); -	} -	ipset_nest_end(skb, atd); -	/* Set listing finished */ -	cb->args[2] = 0; - -	return 0; - -nla_put_failure: -	nla_nest_cancel(skb, nested); -	ipset_nest_end(skb, atd); -	if (unlikely(id == first)) { -		cb->args[2] = 0; -		return -EMSGSIZE; -	} -	return 0; +	return !!test_bit(id, map->members);  } -/* Timeout variant */ - -static int -bitmap_port_ttest(struct ip_set *set, void *value, u32 timeout, u32 flags) +static inline int +bitmap_port_do_add(const struct bitmap_port_adt_elem *e, +		   struct bitmap_port *map, u32 flags)  { -	const struct bitmap_port *map = set->data; -	const unsigned long *members = map->members; -	u16 id = *(u16 *)value; - -	return ip_set_timeout_test(members[id]); +	return !!test_and_set_bit(e->id, map->members);  } -static int -bitmap_port_tadd(struct ip_set *set, void *value, u32 timeout, u32 flags) +static inline int +bitmap_port_do_del(const struct bitmap_port_adt_elem *e, +		   struct bitmap_port *map)  { -	struct bitmap_port *map = set->data; -	unsigned long *members = map->members; -	u16 id = *(u16 *)value; - -	if (ip_set_timeout_test(members[id]) && !(flags & IPSET_FLAG_EXIST)) -		return -IPSET_ERR_EXIST; - -	members[id] = ip_set_timeout_set(timeout); - -	return 0; +	return !test_and_clear_bit(e->id, map->members);  } -static int -bitmap_port_tdel(struct ip_set *set, void *value, u32 timeout, u32 flags) +static inline int +bitmap_port_do_list(struct sk_buff *skb, const struct bitmap_port *map, u32 id)  { -	struct bitmap_port *map = set->data; -	unsigned long *members = map->members; -	u16 id = *(u16 *)value; -	int ret = -IPSET_ERR_EXIST; - -	if (ip_set_timeout_test(members[id])) -		ret = 0; - -	members[id] = IPSET_ELEM_UNSET; -	return ret; +	return nla_put_net16(skb, IPSET_ATTR_PORT, +			     htons(map->first_port + id));  } -static int -bitmap_port_tlist(const struct ip_set *set, -		  struct sk_buff *skb, struct netlink_callback *cb) +static inline int +bitmap_port_do_head(struct sk_buff *skb, const struct bitmap_port *map)  { -	const struct bitmap_port *map = set->data; -	struct nlattr *adt, *nested; -	u16 id, first = cb->args[2]; -	u16 last = map->last_port - map->first_port; -	const unsigned long *members = map->members; - -	adt = ipset_nest_start(skb, IPSET_ATTR_ADT); -	if (!adt) -		return -EMSGSIZE; -	for (; cb->args[2] <= last; cb->args[2]++) { -		id = cb->args[2]; -		if (!ip_set_timeout_test(members[id])) -			continue; -		nested = ipset_nest_start(skb, IPSET_ATTR_DATA); -		if (!nested) { -			if (id == first) { -				nla_nest_cancel(skb, adt); -				return -EMSGSIZE; -			} else -				goto nla_put_failure; -		} -		if (nla_put_net16(skb, IPSET_ATTR_PORT, -				  htons(map->first_port + id)) || -		    nla_put_net32(skb, IPSET_ATTR_TIMEOUT, -				  htonl(ip_set_timeout_get(members[id])))) -			goto nla_put_failure; -		ipset_nest_end(skb, nested); -	} -	ipset_nest_end(skb, adt); - -	/* Set listing finished */ -	cb->args[2] = 0; - -	return 0; - -nla_put_failure: -	nla_nest_cancel(skb, nested); -	ipset_nest_end(skb, adt); -	if (unlikely(id == first)) { -		cb->args[2] = 0; -		return -EMSGSIZE; -	} -	return 0; +	return nla_put_net16(skb, IPSET_ATTR_PORT, htons(map->first_port)) || +	       nla_put_net16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port));  }  static int  bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb,  		 const struct xt_action_param *par, -		 enum ipset_adt adt, const struct ip_set_adt_opt *opt) +		 enum ipset_adt adt, struct ip_set_adt_opt *opt)  {  	struct bitmap_port *map = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt]; +	struct bitmap_port_adt_elem e = {}; +	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map);  	__be16 __port;  	u16 port = 0; @@ -230,9 +119,9 @@ bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb,  	if (port < map->first_port || port > map->last_port)  		return -IPSET_ERR_BITMAP_RANGE; -	port -= map->first_port; +	e.id = port_to_id(map, port); -	return adtfn(set, &port, opt_timeout(opt, map), opt->cmdflags); +	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);  }  static int @@ -241,14 +130,17 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],  {  	struct bitmap_port *map = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt]; -	u32 timeout = map->timeout; +	struct bitmap_port_adt_elem e = {}; +	struct ip_set_ext ext = IP_SET_INIT_UEXT(map);  	u32 port;	/* wraparound */ -	u16 id, port_to; +	u16 port_to;  	int ret = 0;  	if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||  		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || -		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))  		return -IPSET_ERR_PROTOCOL;  	if (tb[IPSET_ATTR_LINENO]) @@ -257,16 +149,13 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],  	port = ip_set_get_h16(tb[IPSET_ATTR_PORT]);  	if (port < map->first_port || port > map->last_port)  		return -IPSET_ERR_BITMAP_RANGE; - -	if (tb[IPSET_ATTR_TIMEOUT]) { -		if (!with_timeout(map->timeout)) -			return -IPSET_ERR_TIMEOUT; -		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); -	} +	ret = ip_set_get_extensions(set, tb, &ext); +	if (ret) +		return ret;  	if (adt == IPSET_TEST) { -		id = port - map->first_port; -		return adtfn(set, &id, timeout, flags); +		e.id = port_to_id(map, port); +		return adtfn(set, &e, &ext, &ext, flags);  	}  	if (tb[IPSET_ATTR_PORT_TO]) { @@ -283,8 +172,8 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],  		return -IPSET_ERR_BITMAP_RANGE;  	for (; port <= port_to; port++) { -		id = port - map->first_port; -		ret = adtfn(set, &id, timeout, flags); +		e.id = port_to_id(map, port); +		ret = adtfn(set, &e, &ext, &ext, flags);  		if (ret && !ip_set_eexist(ret, flags))  			return ret; @@ -294,52 +183,6 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],  	return ret;  } -static void -bitmap_port_destroy(struct ip_set *set) -{ -	struct bitmap_port *map = set->data; - -	if (with_timeout(map->timeout)) -		del_timer_sync(&map->gc); - -	ip_set_free(map->members); -	kfree(map); - -	set->data = NULL; -} - -static void -bitmap_port_flush(struct ip_set *set) -{ -	struct bitmap_port *map = set->data; - -	memset(map->members, 0, map->memsize); -} - -static int -bitmap_port_head(struct ip_set *set, struct sk_buff *skb) -{ -	const struct bitmap_port *map = set->data; -	struct nlattr *nested; - -	nested = ipset_nest_start(skb, IPSET_ATTR_DATA); -	if (!nested) -		goto nla_put_failure; -	if (nla_put_net16(skb, IPSET_ATTR_PORT, htons(map->first_port)) || -	    nla_put_net16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port)) || -	    nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || -	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE, -			  htonl(sizeof(*map) + map->memsize)) || -	    (with_timeout(map->timeout) && -	     nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout)))) -		goto nla_put_failure; -	ipset_nest_end(skb, nested); - -	return 0; -nla_put_failure: -	return -EMSGSIZE; -} -  static bool  bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b)  { @@ -348,71 +191,35 @@ bitmap_port_same_set(const struct ip_set *a, const struct ip_set *b)  	return x->first_port == y->first_port &&  	       x->last_port == y->last_port && -	       x->timeout == y->timeout; +	       x->timeout == y->timeout && +	       a->extensions == b->extensions;  } -static const struct ip_set_type_variant bitmap_port = { -	.kadt	= bitmap_port_kadt, -	.uadt	= bitmap_port_uadt, -	.adt	= { -		[IPSET_ADD] = bitmap_port_add, -		[IPSET_DEL] = bitmap_port_del, -		[IPSET_TEST] = bitmap_port_test, -	}, -	.destroy = bitmap_port_destroy, -	.flush	= bitmap_port_flush, -	.head	= bitmap_port_head, -	.list	= bitmap_port_list, -	.same_set = bitmap_port_same_set, +/* Plain variant */ + +struct bitmap_port_elem {  }; -static const struct ip_set_type_variant bitmap_tport = { -	.kadt	= bitmap_port_kadt, -	.uadt	= bitmap_port_uadt, -	.adt	= { -		[IPSET_ADD] = bitmap_port_tadd, -		[IPSET_DEL] = bitmap_port_tdel, -		[IPSET_TEST] = bitmap_port_ttest, -	}, -	.destroy = bitmap_port_destroy, -	.flush	= bitmap_port_flush, -	.head	= bitmap_port_head, -	.list	= bitmap_port_tlist, -	.same_set = bitmap_port_same_set, +/* Timeout variant */ + +struct bitmap_portt_elem { +	unsigned long timeout;  }; -static void -bitmap_port_gc(unsigned long ul_set) -{ -	struct ip_set *set = (struct ip_set *) ul_set; -	struct bitmap_port *map = set->data; -	unsigned long *table = map->members; -	u32 id;	/* wraparound */ -	u16 last = map->last_port - map->first_port; +/* Plain variant with counter */ -	/* We run parallel with other readers (test element) -	 * but adding/deleting new entries is locked out */ -	read_lock_bh(&set->lock); -	for (id = 0; id <= last; id++) -		if (ip_set_timeout_expired(table[id])) -			table[id] = IPSET_ELEM_UNSET; -	read_unlock_bh(&set->lock); +struct bitmap_portc_elem { +	struct ip_set_counter counter; +}; -	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; -	add_timer(&map->gc); -} +/* Timeout variant with counter */ -static void -bitmap_port_gc_init(struct ip_set *set) -{ -	struct bitmap_port *map = set->data; +struct bitmap_portct_elem { +	unsigned long timeout; +	struct ip_set_counter counter; +}; -	init_timer(&map->gc); -	map->gc.data = (unsigned long) set; -	map->gc.function = bitmap_port_gc; -	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; -	add_timer(&map->gc); -} +#include "ip_set_bitmap_gen.h"  /* Create bitmap:ip type of sets */ @@ -423,6 +230,13 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,  	map->members = ip_set_alloc(map->memsize);  	if (!map->members)  		return false; +	if (map->dsize) { +		map->extensions = ip_set_alloc(map->dsize * map->elements); +		if (!map->extensions) { +			kfree(map->members); +			return false; +		} +	}  	map->first_port = first_port;  	map->last_port = last_port;  	map->timeout = IPSET_NO_TIMEOUT; @@ -434,15 +248,16 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,  }  static int -bitmap_port_create(struct ip_set *set, struct nlattr *tb[], -		 u32 flags) +bitmap_port_create(struct ip_set *set, struct nlattr *tb[], u32 flags)  {  	struct bitmap_port *map;  	u16 first_port, last_port; +	u32 cadt_flags = 0;  	if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||  		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT_TO) || -		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))  		return -IPSET_ERR_PROTOCOL;  	first_port = ip_set_get_h16(tb[IPSET_ATTR_PORT]); @@ -458,28 +273,56 @@ bitmap_port_create(struct ip_set *set, struct nlattr *tb[],  	if (!map)  		return -ENOMEM; -	if (tb[IPSET_ATTR_TIMEOUT]) { -		map->memsize = (last_port - first_port + 1) -			       * sizeof(unsigned long); +	map->elements = last_port - first_port + 1; +	map->memsize = map->elements * sizeof(unsigned long); +	set->variant = &bitmap_port; +	if (tb[IPSET_ATTR_CADT_FLAGS]) +		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); +	if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { +		set->extensions |= IPSET_EXT_COUNTER; +		if (tb[IPSET_ATTR_TIMEOUT]) { +			map->dsize = sizeof(struct bitmap_portct_elem); +			map->offset[IPSET_OFFSET_TIMEOUT] = +				offsetof(struct bitmap_portct_elem, timeout); +			map->offset[IPSET_OFFSET_COUNTER] = +				offsetof(struct bitmap_portct_elem, counter); +			if (!init_map_port(set, map, first_port, last_port)) { +				kfree(map); +				return -ENOMEM; +			} +			map->timeout = +				ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); +			set->extensions |= IPSET_EXT_TIMEOUT; +			bitmap_port_gc_init(set, bitmap_port_gc); +		} else { +			map->dsize = sizeof(struct bitmap_portc_elem); +			map->offset[IPSET_OFFSET_COUNTER] = +				offsetof(struct bitmap_portc_elem, counter); +			if (!init_map_port(set, map, first_port, last_port)) { +				kfree(map); +				return -ENOMEM; +			} +		} +	} else if (tb[IPSET_ATTR_TIMEOUT]) { +		map->dsize = sizeof(struct bitmap_portt_elem); +		map->offset[IPSET_OFFSET_TIMEOUT] = +			offsetof(struct bitmap_portt_elem, timeout);  		if (!init_map_port(set, map, first_port, last_port)) {  			kfree(map);  			return -ENOMEM;  		}  		map->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); -		set->variant = &bitmap_tport; - -		bitmap_port_gc_init(set); +		set->extensions |= IPSET_EXT_TIMEOUT; +		bitmap_port_gc_init(set, bitmap_port_gc);  	} else { -		map->memsize = bitmap_bytes(0, last_port - first_port); -		pr_debug("memsize: %zu\n", map->memsize); +		map->dsize = 0;  		if (!init_map_port(set, map, first_port, last_port)) {  			kfree(map);  			return -ENOMEM;  		} -		set->variant = &bitmap_port;  	}  	return 0;  } @@ -497,12 +340,15 @@ static struct ip_set_type bitmap_port_type = {  		[IPSET_ATTR_PORT]	= { .type = NLA_U16 },  		[IPSET_ATTR_PORT_TO]	= { .type = NLA_U16 },  		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 }, +		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },  	},  	.adt_policy	= {  		[IPSET_ATTR_PORT]	= { .type = NLA_U16 },  		[IPSET_ATTR_PORT_TO]	= { .type = NLA_U16 },  		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },  		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 }, +		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 }, +		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },  	},  	.me		= THIS_MODULE,  }; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 1ba9dbc0e10..f7713900798 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1,6 +1,6 @@  /* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>   *                         Patrick Schaaf <bof@bof.de> - * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> + * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License version 2 as @@ -15,7 +15,6 @@  #include <linux/ip.h>  #include <linux/skbuff.h>  #include <linux/spinlock.h> -#include <linux/netlink.h>  #include <linux/rculist.h>  #include <net/netlink.h> @@ -316,6 +315,29 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)  }  EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6); +int +ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], +		      struct ip_set_ext *ext) +{ +	if (tb[IPSET_ATTR_TIMEOUT]) { +		if (!(set->extensions & IPSET_EXT_TIMEOUT)) +			return -IPSET_ERR_TIMEOUT; +		ext->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); +	} +	if (tb[IPSET_ATTR_BYTES] || tb[IPSET_ATTR_PACKETS]) { +		if (!(set->extensions & IPSET_EXT_COUNTER)) +			return -IPSET_ERR_COUNTER; +		if (tb[IPSET_ATTR_BYTES]) +			ext->bytes = be64_to_cpu(nla_get_be64( +						 tb[IPSET_ATTR_BYTES])); +		if (tb[IPSET_ATTR_PACKETS]) +			ext->packets = be64_to_cpu(nla_get_be64( +						   tb[IPSET_ATTR_PACKETS])); +	} +	return 0; +} +EXPORT_SYMBOL_GPL(ip_set_get_extensions); +  /*   * Creating/destroying/renaming/swapping affect the existence and   * the properties of a set. All of these can be executed from userspace @@ -366,8 +388,7 @@ ip_set_rcu_get(ip_set_id_t index)  int  ip_set_test(ip_set_id_t index, const struct sk_buff *skb, -	    const struct xt_action_param *par, -	    const struct ip_set_adt_opt *opt) +	    const struct xt_action_param *par, struct ip_set_adt_opt *opt)  {  	struct ip_set *set = ip_set_rcu_get(index);  	int ret = 0; @@ -392,7 +413,7 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb,  		ret = 1;  	} else {  		/* --return-nomatch: invert matched element */ -		if ((opt->flags & IPSET_RETURN_NOMATCH) && +		if ((opt->cmdflags & IPSET_FLAG_RETURN_NOMATCH) &&  		    (set->type->features & IPSET_TYPE_NOMATCH) &&  		    (ret > 0 || ret == -ENOTEMPTY))  			ret = -ret; @@ -405,8 +426,7 @@ EXPORT_SYMBOL_GPL(ip_set_test);  int  ip_set_add(ip_set_id_t index, const struct sk_buff *skb, -	   const struct xt_action_param *par, -	   const struct ip_set_adt_opt *opt) +	   const struct xt_action_param *par, struct ip_set_adt_opt *opt)  {  	struct ip_set *set = ip_set_rcu_get(index);  	int ret; @@ -428,8 +448,7 @@ EXPORT_SYMBOL_GPL(ip_set_add);  int  ip_set_del(ip_set_id_t index, const struct sk_buff *skb, -	   const struct xt_action_param *par, -	   const struct ip_set_adt_opt *opt) +	   const struct xt_action_param *par, struct ip_set_adt_opt *opt)  {  	struct ip_set *set = ip_set_rcu_get(index);  	int ret = 0; @@ -1085,7 +1104,7 @@ static int  dump_init(struct netlink_callback *cb)  {  	struct nlmsghdr *nlh = nlmsg_hdr(cb->skb); -	int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); +	int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));  	struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];  	struct nlattr *attr = (void *)nlh + min_len;  	u32 dump_type; @@ -1301,7 +1320,7 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,  		struct sk_buff *skb2;  		struct nlmsgerr *errmsg;  		size_t payload = sizeof(*errmsg) + nlmsg_len(nlh); -		int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); +		int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));  		struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];  		struct nlattr *cmdattr;  		u32 *errline; diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h new file mode 100644 index 00000000000..57beb1762b2 --- /dev/null +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -0,0 +1,1100 @@ +/* Copyright (C) 2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _IP_SET_HASH_GEN_H +#define _IP_SET_HASH_GEN_H + +#include <linux/rcupdate.h> +#include <linux/jhash.h> +#include <linux/netfilter/ipset/ip_set_timeout.h> +#ifndef rcu_dereference_bh +#define rcu_dereference_bh(p)	rcu_dereference(p) +#endif + +#define CONCAT(a, b)		a##b +#define TOKEN(a, b)		CONCAT(a, b) + +/* Hashing which uses arrays to resolve clashing. The hash table is resized + * (doubled) when searching becomes too long. + * Internally jhash is used with the assumption that the size of the + * stored data is a multiple of sizeof(u32). If storage supports timeout, + * the timeout field must be the last one in the data structure - that field + * is ignored when computing the hash key. + * + * Readers and resizing + * + * Resizing can be triggered by userspace command only, and those + * are serialized by the nfnl mutex. During resizing the set is + * read-locked, so the only possible concurrent operations are + * the kernel side readers. Those must be protected by proper RCU locking. + */ + +/* Number of elements to store in an initial array block */ +#define AHASH_INIT_SIZE			4 +/* Max number of elements to store in an array block */ +#define AHASH_MAX_SIZE			(3*AHASH_INIT_SIZE) + +/* Max number of elements can be tuned */ +#ifdef IP_SET_HASH_WITH_MULTI +#define AHASH_MAX(h)			((h)->ahash_max) + +static inline u8 +tune_ahash_max(u8 curr, u32 multi) +{ +	u32 n; + +	if (multi < curr) +		return curr; + +	n = curr + AHASH_INIT_SIZE; +	/* Currently, at listing one hash bucket must fit into a message. +	 * Therefore we have a hard limit here. +	 */ +	return n > curr && n <= 64 ? n : curr; +} +#define TUNE_AHASH_MAX(h, multi)	\ +	((h)->ahash_max = tune_ahash_max((h)->ahash_max, multi)) +#else +#define AHASH_MAX(h)			AHASH_MAX_SIZE +#define TUNE_AHASH_MAX(h, multi) +#endif + +/* A hash bucket */ +struct hbucket { +	void *value;		/* the array of the values */ +	u8 size;		/* size of the array */ +	u8 pos;			/* position of the first free entry */ +}; + +/* The hash table: the table size stored here in order to make resizing easy */ +struct htable { +	u8 htable_bits;		/* size of hash table == 2^htable_bits */ +	struct hbucket bucket[0]; /* hashtable buckets */ +}; + +#define hbucket(h, i)		(&((h)->bucket[i])) + +/* Book-keeping of the prefixes added to the set */ +struct net_prefixes { +	u8 cidr;		/* the different cidr values in the set */ +	u32 nets;		/* number of elements per cidr */ +}; + +/* Compute the hash table size */ +static size_t +htable_size(u8 hbits) +{ +	size_t hsize; + +	/* We must fit both into u32 in jhash and size_t */ +	if (hbits > 31) +		return 0; +	hsize = jhash_size(hbits); +	if ((((size_t)-1) - sizeof(struct htable))/sizeof(struct hbucket) +	    < hsize) +		return 0; + +	return hsize * sizeof(struct hbucket) + sizeof(struct htable); +} + +/* Compute htable_bits from the user input parameter hashsize */ +static u8 +htable_bits(u32 hashsize) +{ +	/* Assume that hashsize == 2^htable_bits */ +	u8 bits = fls(hashsize - 1); +	if (jhash_size(bits) != hashsize) +		/* Round up to the first 2^n value */ +		bits = fls(hashsize); + +	return bits; +} + +/* Destroy the hashtable part of the set */ +static void +ahash_destroy(struct htable *t) +{ +	struct hbucket *n; +	u32 i; + +	for (i = 0; i < jhash_size(t->htable_bits); i++) { +		n = hbucket(t, i); +		if (n->size) +			/* FIXME: use slab cache */ +			kfree(n->value); +	} + +	ip_set_free(t); +} + +static int +hbucket_elem_add(struct hbucket *n, u8 ahash_max, size_t dsize) +{ +	if (n->pos >= n->size) { +		void *tmp; + +		if (n->size >= ahash_max) +			/* Trigger rehashing */ +			return -EAGAIN; + +		tmp = kzalloc((n->size + AHASH_INIT_SIZE) * dsize, +			      GFP_ATOMIC); +		if (!tmp) +			return -ENOMEM; +		if (n->size) { +			memcpy(tmp, n->value, n->size * dsize); +			kfree(n->value); +		} +		n->value = tmp; +		n->size += AHASH_INIT_SIZE; +	} +	return 0; +} + +#ifdef IP_SET_HASH_WITH_NETS +#ifdef IP_SET_HASH_WITH_NETS_PACKED +/* When cidr is packed with nomatch, cidr - 1 is stored in the entry */ +#define CIDR(cidr)		(cidr + 1) +#else +#define CIDR(cidr)		(cidr) +#endif + +#define SET_HOST_MASK(family)	(family == AF_INET ? 32 : 128) + +#ifdef IP_SET_HASH_WITH_MULTI +#define NETS_LENGTH(family)	(SET_HOST_MASK(family) + 1) +#else +#define NETS_LENGTH(family)	SET_HOST_MASK(family) +#endif + +#else +#define NETS_LENGTH(family)	0 +#endif /* IP_SET_HASH_WITH_NETS */ + +#define ext_timeout(e, h)	\ +(unsigned long *)(((void *)(e)) + (h)->offset[IPSET_OFFSET_TIMEOUT]) +#define ext_counter(e, h)	\ +(struct ip_set_counter *)(((void *)(e)) + (h)->offset[IPSET_OFFSET_COUNTER]) + +#endif /* _IP_SET_HASH_GEN_H */ + +/* Family dependent templates */ + +#undef ahash_data +#undef mtype_data_equal +#undef mtype_do_data_match +#undef mtype_data_set_flags +#undef mtype_data_reset_flags +#undef mtype_data_netmask +#undef mtype_data_list +#undef mtype_data_next +#undef mtype_elem + +#undef mtype_add_cidr +#undef mtype_del_cidr +#undef mtype_ahash_memsize +#undef mtype_flush +#undef mtype_destroy +#undef mtype_gc_init +#undef mtype_same_set +#undef mtype_kadt +#undef mtype_uadt +#undef mtype + +#undef mtype_add +#undef mtype_del +#undef mtype_test_cidrs +#undef mtype_test +#undef mtype_expire +#undef mtype_resize +#undef mtype_head +#undef mtype_list +#undef mtype_gc +#undef mtype_gc_init +#undef mtype_variant +#undef mtype_data_match + +#undef HKEY + +#define mtype_data_equal	TOKEN(MTYPE, _data_equal) +#ifdef IP_SET_HASH_WITH_NETS +#define mtype_do_data_match	TOKEN(MTYPE, _do_data_match) +#else +#define mtype_do_data_match(d)	1 +#endif +#define mtype_data_set_flags	TOKEN(MTYPE, _data_set_flags) +#define mtype_data_reset_flags	TOKEN(MTYPE, _data_reset_flags) +#define mtype_data_netmask	TOKEN(MTYPE, _data_netmask) +#define mtype_data_list		TOKEN(MTYPE, _data_list) +#define mtype_data_next		TOKEN(MTYPE, _data_next) +#define mtype_elem		TOKEN(MTYPE, _elem) +#define mtype_add_cidr		TOKEN(MTYPE, _add_cidr) +#define mtype_del_cidr		TOKEN(MTYPE, _del_cidr) +#define mtype_ahash_memsize	TOKEN(MTYPE, _ahash_memsize) +#define mtype_flush		TOKEN(MTYPE, _flush) +#define mtype_destroy		TOKEN(MTYPE, _destroy) +#define mtype_gc_init		TOKEN(MTYPE, _gc_init) +#define mtype_same_set		TOKEN(MTYPE, _same_set) +#define mtype_kadt		TOKEN(MTYPE, _kadt) +#define mtype_uadt		TOKEN(MTYPE, _uadt) +#define mtype			MTYPE + +#define mtype_elem		TOKEN(MTYPE, _elem) +#define mtype_add		TOKEN(MTYPE, _add) +#define mtype_del		TOKEN(MTYPE, _del) +#define mtype_test_cidrs	TOKEN(MTYPE, _test_cidrs) +#define mtype_test		TOKEN(MTYPE, _test) +#define mtype_expire		TOKEN(MTYPE, _expire) +#define mtype_resize		TOKEN(MTYPE, _resize) +#define mtype_head		TOKEN(MTYPE, _head) +#define mtype_list		TOKEN(MTYPE, _list) +#define mtype_gc		TOKEN(MTYPE, _gc) +#define mtype_variant		TOKEN(MTYPE, _variant) +#define mtype_data_match	TOKEN(MTYPE, _data_match) + +#ifndef HKEY_DATALEN +#define HKEY_DATALEN		sizeof(struct mtype_elem) +#endif + +#define HKEY(data, initval, htable_bits)			\ +(jhash2((u32 *)(data), HKEY_DATALEN/sizeof(u32), initval)	\ +	& jhash_mask(htable_bits)) + +#ifndef htype +#define htype			HTYPE + +/* The generic hash structure */ +struct htype { +	struct htable *table;	/* the hash table */ +	u32 maxelem;		/* max elements in the hash */ +	u32 elements;		/* current element (vs timeout) */ +	u32 initval;		/* random jhash init value */ +	u32 timeout;		/* timeout value, if enabled */ +	size_t dsize;		/* data struct size */ +	size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */ +	struct timer_list gc;	/* garbage collection when timeout enabled */ +	struct mtype_elem next; /* temporary storage for uadd */ +#ifdef IP_SET_HASH_WITH_MULTI +	u8 ahash_max;		/* max elements in an array block */ +#endif +#ifdef IP_SET_HASH_WITH_NETMASK +	u8 netmask;		/* netmask value for subnets to store */ +#endif +#ifdef IP_SET_HASH_WITH_RBTREE +	struct rb_root rbtree; +#endif +#ifdef IP_SET_HASH_WITH_NETS +	struct net_prefixes nets[0]; /* book-keeping of prefixes */ +#endif +}; +#endif + +#ifdef IP_SET_HASH_WITH_NETS +/* Network cidr size book keeping when the hash stores different + * sized networks */ +static void +mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length) +{ +	int i, j; + +	/* Add in increasing prefix order, so larger cidr first */ +	for (i = 0, j = -1; i < nets_length && h->nets[i].nets; i++) { +		if (j != -1) +			continue; +		else if (h->nets[i].cidr < cidr) +			j = i; +		else if (h->nets[i].cidr == cidr) { +			h->nets[i].nets++; +			return; +		} +	} +	if (j != -1) { +		for (; i > j; i--) { +			h->nets[i].cidr = h->nets[i - 1].cidr; +			h->nets[i].nets = h->nets[i - 1].nets; +		} +	} +	h->nets[i].cidr = cidr; +	h->nets[i].nets = 1; +} + +static void +mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length) +{ +	u8 i, j; + +	for (i = 0; i < nets_length - 1 && h->nets[i].cidr != cidr; i++) +		; +	h->nets[i].nets--; + +	if (h->nets[i].nets != 0) +		return; + +	for (j = i; j < nets_length - 1 && h->nets[j].nets; j++) { +		h->nets[j].cidr = h->nets[j + 1].cidr; +		h->nets[j].nets = h->nets[j + 1].nets; +	} +} +#endif + +/* Calculate the actual memory size of the set data */ +static size_t +mtype_ahash_memsize(const struct htype *h, u8 nets_length) +{ +	u32 i; +	struct htable *t = h->table; +	size_t memsize = sizeof(*h) +			 + sizeof(*t) +#ifdef IP_SET_HASH_WITH_NETS +			 + sizeof(struct net_prefixes) * nets_length +#endif +			 + jhash_size(t->htable_bits) * sizeof(struct hbucket); + +	for (i = 0; i < jhash_size(t->htable_bits); i++) +		memsize += t->bucket[i].size * h->dsize; + +	return memsize; +} + +/* Flush a hash type of set: destroy all elements */ +static void +mtype_flush(struct ip_set *set) +{ +	struct htype *h = set->data; +	struct htable *t = h->table; +	struct hbucket *n; +	u32 i; + +	for (i = 0; i < jhash_size(t->htable_bits); i++) { +		n = hbucket(t, i); +		if (n->size) { +			n->size = n->pos = 0; +			/* FIXME: use slab cache */ +			kfree(n->value); +		} +	} +#ifdef IP_SET_HASH_WITH_NETS +	memset(h->nets, 0, sizeof(struct net_prefixes) +			   * NETS_LENGTH(set->family)); +#endif +	h->elements = 0; +} + +/* Destroy a hash type of set */ +static void +mtype_destroy(struct ip_set *set) +{ +	struct htype *h = set->data; + +	if (set->extensions & IPSET_EXT_TIMEOUT) +		del_timer_sync(&h->gc); + +	ahash_destroy(h->table); +#ifdef IP_SET_HASH_WITH_RBTREE +	rbtree_destroy(&h->rbtree); +#endif +	kfree(h); + +	set->data = NULL; +} + +static void +mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) +{ +	struct htype *h = set->data; + +	init_timer(&h->gc); +	h->gc.data = (unsigned long) set; +	h->gc.function = gc; +	h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ; +	add_timer(&h->gc); +	pr_debug("gc initialized, run in every %u\n", +		 IPSET_GC_PERIOD(h->timeout)); +} + +static bool +mtype_same_set(const struct ip_set *a, const struct ip_set *b) +{ +	const struct htype *x = a->data; +	const struct htype *y = b->data; + +	/* Resizing changes htable_bits, so we ignore it */ +	return x->maxelem == y->maxelem && +	       x->timeout == y->timeout && +#ifdef IP_SET_HASH_WITH_NETMASK +	       x->netmask == y->netmask && +#endif +	       a->extensions == b->extensions; +} + +/* Get the ith element from the array block n */ +#define ahash_data(n, i, dsize)	\ +	((struct mtype_elem *)((n)->value + ((i) * (dsize)))) + +/* Delete expired elements from the hashtable */ +static void +mtype_expire(struct htype *h, u8 nets_length, size_t dsize) +{ +	struct htable *t = h->table; +	struct hbucket *n; +	struct mtype_elem *data; +	u32 i; +	int j; + +	for (i = 0; i < jhash_size(t->htable_bits); i++) { +		n = hbucket(t, i); +		for (j = 0; j < n->pos; j++) { +			data = ahash_data(n, j, dsize); +			if (ip_set_timeout_expired(ext_timeout(data, h))) { +				pr_debug("expired %u/%u\n", i, j); +#ifdef IP_SET_HASH_WITH_NETS +				mtype_del_cidr(h, CIDR(data->cidr), +					       nets_length); +#endif +				if (j != n->pos - 1) +					/* Not last one */ +					memcpy(data, +					       ahash_data(n, n->pos - 1, dsize), +					       dsize); +				n->pos--; +				h->elements--; +			} +		} +		if (n->pos + AHASH_INIT_SIZE < n->size) { +			void *tmp = kzalloc((n->size - AHASH_INIT_SIZE) +					    * dsize, +					    GFP_ATOMIC); +			if (!tmp) +				/* Still try to delete expired elements */ +				continue; +			n->size -= AHASH_INIT_SIZE; +			memcpy(tmp, n->value, n->size * dsize); +			kfree(n->value); +			n->value = tmp; +		} +	} +} + +static void +mtype_gc(unsigned long ul_set) +{ +	struct ip_set *set = (struct ip_set *) ul_set; +	struct htype *h = set->data; + +	pr_debug("called\n"); +	write_lock_bh(&set->lock); +	mtype_expire(h, NETS_LENGTH(set->family), h->dsize); +	write_unlock_bh(&set->lock); + +	h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ; +	add_timer(&h->gc); +} + +/* Resize a hash: create a new hash table with doubling the hashsize + * and inserting the elements to it. Repeat until we succeed or + * fail due to memory pressures. */ +static int +mtype_resize(struct ip_set *set, bool retried) +{ +	struct htype *h = set->data; +	struct htable *t, *orig = h->table; +	u8 htable_bits = orig->htable_bits; +#ifdef IP_SET_HASH_WITH_NETS +	u8 flags; +#endif +	struct mtype_elem *data; +	struct mtype_elem *d; +	struct hbucket *n, *m; +	u32 i, j; +	int ret; + +	/* Try to cleanup once */ +	if (SET_WITH_TIMEOUT(set) && !retried) { +		i = h->elements; +		write_lock_bh(&set->lock); +		mtype_expire(set->data, NETS_LENGTH(set->family), +			     h->dsize); +		write_unlock_bh(&set->lock); +		if (h->elements < i) +			return 0; +	} + +retry: +	ret = 0; +	htable_bits++; +	pr_debug("attempt to resize set %s from %u to %u, t %p\n", +		 set->name, orig->htable_bits, htable_bits, orig); +	if (!htable_bits) { +		/* In case we have plenty of memory :-) */ +		pr_warning("Cannot increase the hashsize of set %s further\n", +			   set->name); +		return -IPSET_ERR_HASH_FULL; +	} +	t = ip_set_alloc(sizeof(*t) +			 + jhash_size(htable_bits) * sizeof(struct hbucket)); +	if (!t) +		return -ENOMEM; +	t->htable_bits = htable_bits; + +	read_lock_bh(&set->lock); +	for (i = 0; i < jhash_size(orig->htable_bits); i++) { +		n = hbucket(orig, i); +		for (j = 0; j < n->pos; j++) { +			data = ahash_data(n, j, h->dsize); +#ifdef IP_SET_HASH_WITH_NETS +			flags = 0; +			mtype_data_reset_flags(data, &flags); +#endif +			m = hbucket(t, HKEY(data, h->initval, htable_bits)); +			ret = hbucket_elem_add(m, AHASH_MAX(h), h->dsize); +			if (ret < 0) { +#ifdef IP_SET_HASH_WITH_NETS +				mtype_data_reset_flags(data, &flags); +#endif +				read_unlock_bh(&set->lock); +				ahash_destroy(t); +				if (ret == -EAGAIN) +					goto retry; +				return ret; +			} +			d = ahash_data(m, m->pos++, h->dsize); +			memcpy(d, data, h->dsize); +#ifdef IP_SET_HASH_WITH_NETS +			mtype_data_reset_flags(d, &flags); +#endif +		} +	} + +	rcu_assign_pointer(h->table, t); +	read_unlock_bh(&set->lock); + +	/* Give time to other readers of the set */ +	synchronize_rcu_bh(); + +	pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name, +		 orig->htable_bits, orig, t->htable_bits, t); +	ahash_destroy(orig); + +	return 0; +} + +/* Add an element to a hash and update the internal counters when succeeded, + * otherwise report the proper error code. */ +static int +mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, +	  struct ip_set_ext *mext, u32 flags) +{ +	struct htype *h = set->data; +	struct htable *t; +	const struct mtype_elem *d = value; +	struct mtype_elem *data; +	struct hbucket *n; +	int i, ret = 0; +	int j = AHASH_MAX(h) + 1; +	bool flag_exist = flags & IPSET_FLAG_EXIST; +	u32 key, multi = 0; + +	if (SET_WITH_TIMEOUT(set) && h->elements >= h->maxelem) +		/* FIXME: when set is full, we slow down here */ +		mtype_expire(h, NETS_LENGTH(set->family), h->dsize); + +	if (h->elements >= h->maxelem) { +		if (net_ratelimit()) +			pr_warning("Set %s is full, maxelem %u reached\n", +				   set->name, h->maxelem); +		return -IPSET_ERR_HASH_FULL; +	} + +	rcu_read_lock_bh(); +	t = rcu_dereference_bh(h->table); +	key = HKEY(value, h->initval, t->htable_bits); +	n = hbucket(t, key); +	for (i = 0; i < n->pos; i++) { +		data = ahash_data(n, i, h->dsize); +		if (mtype_data_equal(data, d, &multi)) { +			if (flag_exist || +			    (SET_WITH_TIMEOUT(set) && +			     ip_set_timeout_expired(ext_timeout(data, h)))) { +				/* Just the extensions could be overwritten */ +				j = i; +				goto reuse_slot; +			} else { +				ret = -IPSET_ERR_EXIST; +				goto out; +			} +		} +		/* Reuse first timed out entry */ +		if (SET_WITH_TIMEOUT(set) && +		    ip_set_timeout_expired(ext_timeout(data, h)) && +		    j != AHASH_MAX(h) + 1) +			j = i; +	} +reuse_slot: +	if (j != AHASH_MAX(h) + 1) { +		/* Fill out reused slot */ +		data = ahash_data(n, j, h->dsize); +#ifdef IP_SET_HASH_WITH_NETS +		mtype_del_cidr(h, CIDR(data->cidr), NETS_LENGTH(set->family)); +		mtype_add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family)); +#endif +	} else { +		/* Use/create a new slot */ +		TUNE_AHASH_MAX(h, multi); +		ret = hbucket_elem_add(n, AHASH_MAX(h), h->dsize); +		if (ret != 0) { +			if (ret == -EAGAIN) +				mtype_data_next(&h->next, d); +			goto out; +		} +		data = ahash_data(n, n->pos++, h->dsize); +#ifdef IP_SET_HASH_WITH_NETS +		mtype_add_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family)); +#endif +		h->elements++; +	} +	memcpy(data, d, sizeof(struct mtype_elem)); +#ifdef IP_SET_HASH_WITH_NETS +	mtype_data_set_flags(data, flags); +#endif +	if (SET_WITH_TIMEOUT(set)) +		ip_set_timeout_set(ext_timeout(data, h), ext->timeout); +	if (SET_WITH_COUNTER(set)) +		ip_set_init_counter(ext_counter(data, h), ext); + +out: +	rcu_read_unlock_bh(); +	return ret; +} + +/* Delete an element from the hash: swap it with the last element + * and free up space if possible. + */ +static int +mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, +	  struct ip_set_ext *mext, u32 flags) +{ +	struct htype *h = set->data; +	struct htable *t = h->table; +	const struct mtype_elem *d = value; +	struct mtype_elem *data; +	struct hbucket *n; +	int i; +	u32 key, multi = 0; + +	key = HKEY(value, h->initval, t->htable_bits); +	n = hbucket(t, key); +	for (i = 0; i < n->pos; i++) { +		data = ahash_data(n, i, h->dsize); +		if (!mtype_data_equal(data, d, &multi)) +			continue; +		if (SET_WITH_TIMEOUT(set) && +		    ip_set_timeout_expired(ext_timeout(data, h))) +			return -IPSET_ERR_EXIST; +		if (i != n->pos - 1) +			/* Not last one */ +			memcpy(data, ahash_data(n, n->pos - 1, h->dsize), +			       h->dsize); + +		n->pos--; +		h->elements--; +#ifdef IP_SET_HASH_WITH_NETS +		mtype_del_cidr(h, CIDR(d->cidr), NETS_LENGTH(set->family)); +#endif +		if (n->pos + AHASH_INIT_SIZE < n->size) { +			void *tmp = kzalloc((n->size - AHASH_INIT_SIZE) +					    * h->dsize, +					    GFP_ATOMIC); +			if (!tmp) +				return 0; +			n->size -= AHASH_INIT_SIZE; +			memcpy(tmp, n->value, n->size * h->dsize); +			kfree(n->value); +			n->value = tmp; +		} +		return 0; +	} + +	return -IPSET_ERR_EXIST; +} + +static inline int +mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext, +		 struct ip_set_ext *mext, struct ip_set *set, u32 flags) +{ +	if (SET_WITH_COUNTER(set)) +		ip_set_update_counter(ext_counter(data, +						  (struct htype *)(set->data)), +				      ext, mext, flags); +	return mtype_do_data_match(data); +} + +#ifdef IP_SET_HASH_WITH_NETS +/* Special test function which takes into account the different network + * sizes added to the set */ +static int +mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, +		 const struct ip_set_ext *ext, +		 struct ip_set_ext *mext, u32 flags) +{ +	struct htype *h = set->data; +	struct htable *t = h->table; +	struct hbucket *n; +	struct mtype_elem *data; +	int i, j = 0; +	u32 key, multi = 0; +	u8 nets_length = NETS_LENGTH(set->family); + +	pr_debug("test by nets\n"); +	for (; j < nets_length && h->nets[j].nets && !multi; j++) { +		mtype_data_netmask(d, h->nets[j].cidr); +		key = HKEY(d, h->initval, t->htable_bits); +		n = hbucket(t, key); +		for (i = 0; i < n->pos; i++) { +			data = ahash_data(n, i, h->dsize); +			if (!mtype_data_equal(data, d, &multi)) +				continue; +			if (SET_WITH_TIMEOUT(set)) { +				if (!ip_set_timeout_expired( +							ext_timeout(data, h))) +					return mtype_data_match(data, ext, +								mext, set, +								flags); +#ifdef IP_SET_HASH_WITH_MULTI +				multi = 0; +#endif +			} else +				return mtype_data_match(data, ext, +							mext, set, flags); +		} +	} +	return 0; +} +#endif + +/* Test whether the element is added to the set */ +static int +mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, +	   struct ip_set_ext *mext, u32 flags) +{ +	struct htype *h = set->data; +	struct htable *t = h->table; +	struct mtype_elem *d = value; +	struct hbucket *n; +	struct mtype_elem *data; +	int i; +	u32 key, multi = 0; + +#ifdef IP_SET_HASH_WITH_NETS +	/* If we test an IP address and not a network address, +	 * try all possible network sizes */ +	if (CIDR(d->cidr) == SET_HOST_MASK(set->family)) +		return mtype_test_cidrs(set, d, ext, mext, flags); +#endif + +	key = HKEY(d, h->initval, t->htable_bits); +	n = hbucket(t, key); +	for (i = 0; i < n->pos; i++) { +		data = ahash_data(n, i, h->dsize); +		if (mtype_data_equal(data, d, &multi) && +		    !(SET_WITH_TIMEOUT(set) && +		      ip_set_timeout_expired(ext_timeout(data, h)))) +			return mtype_data_match(data, ext, mext, set, flags); +	} +	return 0; +} + +/* Reply a HEADER request: fill out the header part of the set */ +static int +mtype_head(struct ip_set *set, struct sk_buff *skb) +{ +	const struct htype *h = set->data; +	struct nlattr *nested; +	size_t memsize; + +	read_lock_bh(&set->lock); +	memsize = mtype_ahash_memsize(h, NETS_LENGTH(set->family)); +	read_unlock_bh(&set->lock); + +	nested = ipset_nest_start(skb, IPSET_ATTR_DATA); +	if (!nested) +		goto nla_put_failure; +	if (nla_put_net32(skb, IPSET_ATTR_HASHSIZE, +			  htonl(jhash_size(h->table->htable_bits))) || +	    nla_put_net32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem))) +		goto nla_put_failure; +#ifdef IP_SET_HASH_WITH_NETMASK +	if (h->netmask != HOST_MASK && +	    nla_put_u8(skb, IPSET_ATTR_NETMASK, h->netmask)) +		goto nla_put_failure; +#endif +	if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) || +	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) || +	    ((set->extensions & IPSET_EXT_TIMEOUT) && +	     nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(h->timeout))) || +	    ((set->extensions & IPSET_EXT_COUNTER) && +	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, +			   htonl(IPSET_FLAG_WITH_COUNTERS)))) +		goto nla_put_failure; +	ipset_nest_end(skb, nested); + +	return 0; +nla_put_failure: +	return -EMSGSIZE; +} + +/* Reply a LIST/SAVE request: dump the elements of the specified set */ +static int +mtype_list(const struct ip_set *set, +	   struct sk_buff *skb, struct netlink_callback *cb) +{ +	const struct htype *h = set->data; +	const struct htable *t = h->table; +	struct nlattr *atd, *nested; +	const struct hbucket *n; +	const struct mtype_elem *e; +	u32 first = cb->args[2]; +	/* We assume that one hash bucket fills into one page */ +	void *incomplete; +	int i; + +	atd = ipset_nest_start(skb, IPSET_ATTR_ADT); +	if (!atd) +		return -EMSGSIZE; +	pr_debug("list hash set %s\n", set->name); +	for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) { +		incomplete = skb_tail_pointer(skb); +		n = hbucket(t, cb->args[2]); +		pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n); +		for (i = 0; i < n->pos; i++) { +			e = ahash_data(n, i, h->dsize); +			if (SET_WITH_TIMEOUT(set) && +			    ip_set_timeout_expired(ext_timeout(e, h))) +				continue; +			pr_debug("list hash %lu hbucket %p i %u, data %p\n", +				 cb->args[2], n, i, e); +			nested = ipset_nest_start(skb, IPSET_ATTR_DATA); +			if (!nested) { +				if (cb->args[2] == first) { +					nla_nest_cancel(skb, atd); +					return -EMSGSIZE; +				} else +					goto nla_put_failure; +			} +			if (mtype_data_list(skb, e)) +				goto nla_put_failure; +			if (SET_WITH_TIMEOUT(set) && +			    nla_put_net32(skb, IPSET_ATTR_TIMEOUT, +					  htonl(ip_set_timeout_get( +						ext_timeout(e, h))))) +				goto nla_put_failure; +			if (SET_WITH_COUNTER(set) && +			    ip_set_put_counter(skb, ext_counter(e, h))) +				goto nla_put_failure; +			ipset_nest_end(skb, nested); +		} +	} +	ipset_nest_end(skb, atd); +	/* Set listing finished */ +	cb->args[2] = 0; + +	return 0; + +nla_put_failure: +	nlmsg_trim(skb, incomplete); +	ipset_nest_end(skb, atd); +	if (unlikely(first == cb->args[2])) { +		pr_warning("Can't list set %s: one bucket does not fit into " +			   "a message. Please report it!\n", set->name); +		cb->args[2] = 0; +		return -EMSGSIZE; +	} +	return 0; +} + +static int +TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb, +	      const struct xt_action_param *par, +	      enum ipset_adt adt, struct ip_set_adt_opt *opt); + +static int +TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[], +	      enum ipset_adt adt, u32 *lineno, u32 flags, bool retried); + +static const struct ip_set_type_variant mtype_variant = { +	.kadt	= mtype_kadt, +	.uadt	= mtype_uadt, +	.adt	= { +		[IPSET_ADD] = mtype_add, +		[IPSET_DEL] = mtype_del, +		[IPSET_TEST] = mtype_test, +	}, +	.destroy = mtype_destroy, +	.flush	= mtype_flush, +	.head	= mtype_head, +	.list	= mtype_list, +	.resize	= mtype_resize, +	.same_set = mtype_same_set, +}; + +#ifdef IP_SET_EMIT_CREATE +static int +TOKEN(HTYPE, _create)(struct ip_set *set, struct nlattr *tb[], u32 flags) +{ +	u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; +	u32 cadt_flags = 0; +	u8 hbits; +#ifdef IP_SET_HASH_WITH_NETMASK +	u8 netmask; +#endif +	size_t hsize; +	struct HTYPE *h; + +	if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) +		return -IPSET_ERR_INVALID_FAMILY; +#ifdef IP_SET_HASH_WITH_NETMASK +	netmask = set->family == NFPROTO_IPV4 ? 32 : 128; +	pr_debug("Create set %s with family %s\n", +		 set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6"); +#endif + +	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) +		return -IPSET_ERR_PROTOCOL; + +	if (tb[IPSET_ATTR_HASHSIZE]) { +		hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); +		if (hashsize < IPSET_MIMINAL_HASHSIZE) +			hashsize = IPSET_MIMINAL_HASHSIZE; +	} + +	if (tb[IPSET_ATTR_MAXELEM]) +		maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); + +#ifdef IP_SET_HASH_WITH_NETMASK +	if (tb[IPSET_ATTR_NETMASK]) { +		netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]); + +		if ((set->family == NFPROTO_IPV4 && netmask > 32) || +		    (set->family == NFPROTO_IPV6 && netmask > 128) || +		    netmask == 0) +			return -IPSET_ERR_INVALID_NETMASK; +	} +#endif + +	hsize = sizeof(*h); +#ifdef IP_SET_HASH_WITH_NETS +	hsize += sizeof(struct net_prefixes) * +		(set->family == NFPROTO_IPV4 ? 32 : 128); +#endif +	h = kzalloc(hsize, GFP_KERNEL); +	if (!h) +		return -ENOMEM; + +	h->maxelem = maxelem; +#ifdef IP_SET_HASH_WITH_NETMASK +	h->netmask = netmask; +#endif +	get_random_bytes(&h->initval, sizeof(h->initval)); +	h->timeout = IPSET_NO_TIMEOUT; + +	hbits = htable_bits(hashsize); +	hsize = htable_size(hbits); +	if (hsize == 0) { +		kfree(h); +		return -ENOMEM; +	} +	h->table = ip_set_alloc(hsize); +	if (!h->table) { +		kfree(h); +		return -ENOMEM; +	} +	h->table->htable_bits = hbits; + +	set->data = h; +	if (set->family ==  NFPROTO_IPV4) +		set->variant = &TOKEN(HTYPE, 4_variant); +	else +		set->variant = &TOKEN(HTYPE, 6_variant); + +	if (tb[IPSET_ATTR_CADT_FLAGS]) +		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); +	if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { +		set->extensions |= IPSET_EXT_COUNTER; +		if (tb[IPSET_ATTR_TIMEOUT]) { +			h->timeout = +				ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); +			set->extensions |= IPSET_EXT_TIMEOUT; +			if (set->family == NFPROTO_IPV4) { +				h->dsize = +					sizeof(struct TOKEN(HTYPE, 4ct_elem)); +				h->offset[IPSET_OFFSET_TIMEOUT] = +					offsetof(struct TOKEN(HTYPE, 4ct_elem), +						 timeout); +				h->offset[IPSET_OFFSET_COUNTER] = +					offsetof(struct TOKEN(HTYPE, 4ct_elem), +						 counter); +				TOKEN(HTYPE, 4_gc_init)(set, +					TOKEN(HTYPE, 4_gc)); +			} else { +				h->dsize = +					sizeof(struct TOKEN(HTYPE, 6ct_elem)); +				h->offset[IPSET_OFFSET_TIMEOUT] = +					offsetof(struct TOKEN(HTYPE, 6ct_elem), +						 timeout); +				h->offset[IPSET_OFFSET_COUNTER] = +					offsetof(struct TOKEN(HTYPE, 6ct_elem), +						 counter); +				TOKEN(HTYPE, 6_gc_init)(set, +					TOKEN(HTYPE, 6_gc)); +			} +		} else { +			if (set->family == NFPROTO_IPV4) { +				h->dsize = +					sizeof(struct TOKEN(HTYPE, 4c_elem)); +				h->offset[IPSET_OFFSET_COUNTER] = +					offsetof(struct TOKEN(HTYPE, 4c_elem), +						 counter); +			} else { +				h->dsize = +					sizeof(struct TOKEN(HTYPE, 6c_elem)); +				h->offset[IPSET_OFFSET_COUNTER] = +					offsetof(struct TOKEN(HTYPE, 6c_elem), +						 counter); +			} +		} +	} else if (tb[IPSET_ATTR_TIMEOUT]) { +		h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); +		set->extensions |= IPSET_EXT_TIMEOUT; +		if (set->family == NFPROTO_IPV4) { +			h->dsize = sizeof(struct TOKEN(HTYPE, 4t_elem)); +			h->offset[IPSET_OFFSET_TIMEOUT] = +				offsetof(struct TOKEN(HTYPE, 4t_elem), +					 timeout); +			TOKEN(HTYPE, 4_gc_init)(set, TOKEN(HTYPE, 4_gc)); +		} else { +			h->dsize = sizeof(struct TOKEN(HTYPE, 6t_elem)); +			h->offset[IPSET_OFFSET_TIMEOUT] = +				offsetof(struct TOKEN(HTYPE, 6t_elem), +					 timeout); +			TOKEN(HTYPE, 6_gc_init)(set, TOKEN(HTYPE, 6_gc)); +		} +	} else { +		if (set->family == NFPROTO_IPV4) +			h->dsize = sizeof(struct TOKEN(HTYPE, 4_elem)); +		else +			h->dsize = sizeof(struct TOKEN(HTYPE, 6_elem)); +	} + +	pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", +		 set->name, jhash_size(h->table->htable_bits), +		 h->table->htable_bits, h->maxelem, set->data, h->table); + +	return 0; +} +#endif /* IP_SET_EMIT_CREATE */ diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index b7d4cb475ae..c74e6e14cd9 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> +/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License version 2 as @@ -21,11 +21,10 @@  #include <linux/netfilter.h>  #include <linux/netfilter/ipset/pfxlen.h>  #include <linux/netfilter/ipset/ip_set.h> -#include <linux/netfilter/ipset/ip_set_timeout.h>  #include <linux/netfilter/ipset/ip_set_hash.h>  #define REVISION_MIN	0 -#define REVISION_MAX	0 +#define REVISION_MAX	1	/* Counters support */  MODULE_LICENSE("GPL");  MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); @@ -33,58 +32,47 @@ IP_SET_MODULE_DESC("hash:ip", REVISION_MIN, REVISION_MAX);  MODULE_ALIAS("ip_set_hash:ip");  /* Type specific function prefix */ -#define TYPE		hash_ip - -static bool -hash_ip_same_set(const struct ip_set *a, const struct ip_set *b); - -#define hash_ip4_same_set	hash_ip_same_set -#define hash_ip6_same_set	hash_ip_same_set +#define HTYPE		hash_ip +#define IP_SET_HASH_WITH_NETMASK -/* The type variant functions: IPv4 */ +/* IPv4 variants */ -/* Member elements without timeout */ +/* Member elements */  struct hash_ip4_elem { +	/* Zero valued IP addresses cannot be stored */  	__be32 ip;  }; -/* Member elements with timeout support */ -struct hash_ip4_telem { +struct hash_ip4t_elem {  	__be32 ip;  	unsigned long timeout;  }; -static inline bool -hash_ip4_data_equal(const struct hash_ip4_elem *ip1, -		    const struct hash_ip4_elem *ip2, -		    u32 *multi) -{ -	return ip1->ip == ip2->ip; -} +struct hash_ip4c_elem { +	__be32 ip; +	struct ip_set_counter counter; +}; -static inline bool -hash_ip4_data_isnull(const struct hash_ip4_elem *elem) -{ -	return elem->ip == 0; -} +struct hash_ip4ct_elem { +	__be32 ip; +	struct ip_set_counter counter; +	unsigned long timeout; +}; -static inline void -hash_ip4_data_copy(struct hash_ip4_elem *dst, const struct hash_ip4_elem *src) -{ -	dst->ip = src->ip; -} +/* Common functions */ -/* Zero valued IP addresses cannot be stored */ -static inline void -hash_ip4_data_zero_out(struct hash_ip4_elem *elem) +static inline bool +hash_ip4_data_equal(const struct hash_ip4_elem *e1, +		    const struct hash_ip4_elem *e2, +		    u32 *multi)  { -	elem->ip = 0; +	return e1->ip == e2->ip;  }  static inline bool -hash_ip4_data_list(struct sk_buff *skb, const struct hash_ip4_elem *data) +hash_ip4_data_list(struct sk_buff *skb, const struct hash_ip4_elem *e)  { -	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip)) +	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, e->ip))  		goto nla_put_failure;  	return 0; @@ -92,41 +80,26 @@ nla_put_failure:  	return 1;  } -static bool -hash_ip4_data_tlist(struct sk_buff *skb, const struct hash_ip4_elem *data) +static inline void +hash_ip4_data_next(struct hash_ip4_elem *next, const struct hash_ip4_elem *e)  { -	const struct hash_ip4_telem *tdata = -		(const struct hash_ip4_telem *)data; - -	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, tdata->ip) || -	    nla_put_net32(skb, IPSET_ATTR_TIMEOUT, -			  htonl(ip_set_timeout_get(tdata->timeout)))) -		goto nla_put_failure; - -	return 0; - -nla_put_failure: -	return 1; +	next->ip = e->ip;  } -#define IP_SET_HASH_WITH_NETMASK +#define MTYPE		hash_ip4  #define PF		4  #define HOST_MASK	32 -#include <linux/netfilter/ipset/ip_set_ahash.h> - -static inline void -hash_ip4_data_next(struct ip_set_hash *h, const struct hash_ip4_elem *d) -{ -	h->next.ip = d->ip; -} +#include "ip_set_hash_gen.h"  static int  hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb,  	      const struct xt_action_param *par, -	      enum ipset_adt adt, const struct ip_set_adt_opt *opt) +	      enum ipset_adt adt, struct ip_set_adt_opt *opt)  { -	const struct ip_set_hash *h = set->data; +	const struct hash_ip *h = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt]; +	struct hash_ip4_elem e = {}; +	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);  	__be32 ip;  	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &ip); @@ -134,43 +107,42 @@ hash_ip4_kadt(struct ip_set *set, const struct sk_buff *skb,  	if (ip == 0)  		return -EINVAL; -	return adtfn(set, &ip, opt_timeout(opt, h), opt->cmdflags); +	e.ip = ip; +	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);  }  static int  hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],  	      enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)  { -	const struct ip_set_hash *h = set->data; +	const struct hash_ip *h = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt]; -	u32 ip, ip_to, hosts, timeout = h->timeout; -	__be32 nip; +	struct hash_ip4_elem e = {}; +	struct ip_set_ext ext = IP_SET_INIT_UEXT(h); +	u32 ip, ip_to, hosts;  	int ret = 0;  	if (unlikely(!tb[IPSET_ATTR_IP] || -		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))  		return -IPSET_ERR_PROTOCOL;  	if (tb[IPSET_ATTR_LINENO])  		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); -	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); +	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) || +	      ip_set_get_extensions(set, tb, &ext);  	if (ret)  		return ret;  	ip &= ip_set_hostmask(h->netmask); -	if (tb[IPSET_ATTR_TIMEOUT]) { -		if (!with_timeout(h->timeout)) -			return -IPSET_ERR_TIMEOUT; -		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); -	} -  	if (adt == IPSET_TEST) { -		nip = htonl(ip); -		if (nip == 0) +		e.ip = htonl(ip); +		if (e.ip == 0)  			return -IPSET_ERR_HASH_ELEM; -		return adtfn(set, &nip, timeout, flags); +		return adtfn(set, &e, &ext, &ext, flags);  	}  	ip_to = ip; @@ -193,10 +165,10 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],  	if (retried)  		ip = ntohl(h->next.ip);  	for (; !before(ip_to, ip); ip += hosts) { -		nip = htonl(ip); -		if (nip == 0) +		e.ip = htonl(ip); +		if (e.ip == 0)  			return -IPSET_ERR_HASH_ELEM; -		ret = adtfn(set, &nip, timeout, flags); +		ret = adtfn(set, &e, &ext, &ext, flags);  		if (ret && !ip_set_eexist(ret, flags))  			return ret; @@ -206,29 +178,31 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],  	return ret;  } -static bool -hash_ip_same_set(const struct ip_set *a, const struct ip_set *b) -{ -	const struct ip_set_hash *x = a->data; -	const struct ip_set_hash *y = b->data; +/* IPv6 variants */ -	/* Resizing changes htable_bits, so we ignore it */ -	return x->maxelem == y->maxelem && -	       x->timeout == y->timeout && -	       x->netmask == y->netmask; -} +/* Member elements */ +struct hash_ip6_elem { +	union nf_inet_addr ip; +}; -/* The type variant functions: IPv6 */ +struct hash_ip6t_elem { +	union nf_inet_addr ip; +	unsigned long timeout; +}; -struct hash_ip6_elem { +struct hash_ip6c_elem {  	union nf_inet_addr ip; +	struct ip_set_counter counter;  }; -struct hash_ip6_telem { +struct hash_ip6ct_elem {  	union nf_inet_addr ip; +	struct ip_set_counter counter;  	unsigned long timeout;  }; +/* Common functions */ +  static inline bool  hash_ip6_data_equal(const struct hash_ip6_elem *ip1,  		    const struct hash_ip6_elem *ip2, @@ -237,37 +211,16 @@ hash_ip6_data_equal(const struct hash_ip6_elem *ip1,  	return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6);  } -static inline bool -hash_ip6_data_isnull(const struct hash_ip6_elem *elem) -{ -	return ipv6_addr_any(&elem->ip.in6); -} -  static inline void -hash_ip6_data_copy(struct hash_ip6_elem *dst, const struct hash_ip6_elem *src) +hash_ip6_netmask(union nf_inet_addr *ip, u8 prefix)  { -	dst->ip.in6 = src->ip.in6; -} - -static inline void -hash_ip6_data_zero_out(struct hash_ip6_elem *elem) -{ -	ipv6_addr_set(&elem->ip.in6, 0, 0, 0, 0); -} - -static inline void -ip6_netmask(union nf_inet_addr *ip, u8 prefix) -{ -	ip->ip6[0] &= ip_set_netmask6(prefix)[0]; -	ip->ip6[1] &= ip_set_netmask6(prefix)[1]; -	ip->ip6[2] &= ip_set_netmask6(prefix)[2]; -	ip->ip6[3] &= ip_set_netmask6(prefix)[3]; +	ip6_netmask(ip, prefix);  }  static bool -hash_ip6_data_list(struct sk_buff *skb, const struct hash_ip6_elem *data) +hash_ip6_data_list(struct sk_buff *skb, const struct hash_ip6_elem *e)  { -	if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6)) +	if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6))  		goto nla_put_failure;  	return 0; @@ -275,69 +228,55 @@ nla_put_failure:  	return 1;  } -static bool -hash_ip6_data_tlist(struct sk_buff *skb, const struct hash_ip6_elem *data) +static inline void +hash_ip6_data_next(struct hash_ip4_elem *next, const struct hash_ip6_elem *e)  { -	const struct hash_ip6_telem *e = -		(const struct hash_ip6_telem *)data; - -	if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) || -	    nla_put_net32(skb, IPSET_ATTR_TIMEOUT, -			  htonl(ip_set_timeout_get(e->timeout)))) -		goto nla_put_failure; -	return 0; - -nla_put_failure: -	return 1;  } +#undef MTYPE  #undef PF  #undef HOST_MASK +#undef HKEY_DATALEN +#define MTYPE		hash_ip6  #define PF		6  #define HOST_MASK	128 -#include <linux/netfilter/ipset/ip_set_ahash.h> -static inline void -hash_ip6_data_next(struct ip_set_hash *h, const struct hash_ip6_elem *d) -{ -} +#define IP_SET_EMIT_CREATE +#include "ip_set_hash_gen.h"  static int  hash_ip6_kadt(struct ip_set *set, const struct sk_buff *skb,  	      const struct xt_action_param *par, -	      enum ipset_adt adt, const struct ip_set_adt_opt *opt) +	      enum ipset_adt adt, struct ip_set_adt_opt *opt)  { -	const struct ip_set_hash *h = set->data; +	const struct hash_ip *h = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt]; -	union nf_inet_addr ip; +	struct hash_ip6_elem e = {}; +	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); -	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &ip.in6); -	ip6_netmask(&ip, h->netmask); -	if (ipv6_addr_any(&ip.in6)) +	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6); +	hash_ip6_netmask(&e.ip, h->netmask); +	if (ipv6_addr_any(&e.ip.in6))  		return -EINVAL; -	return adtfn(set, &ip, opt_timeout(opt, h), opt->cmdflags); +	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);  } -static const struct nla_policy hash_ip6_adt_policy[IPSET_ATTR_ADT_MAX + 1] = { -	[IPSET_ATTR_IP]		= { .type = NLA_NESTED }, -	[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 }, -	[IPSET_ATTR_LINENO]	= { .type = NLA_U32 }, -}; -  static int  hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[],  	      enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)  { -	const struct ip_set_hash *h = set->data; +	const struct hash_ip *h = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt]; -	union nf_inet_addr ip; -	u32 timeout = h->timeout; +	struct hash_ip6_elem e = {}; +	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);  	int ret;  	if (unlikely(!tb[IPSET_ATTR_IP] ||  		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||  		     tb[IPSET_ATTR_IP_TO] ||  		     tb[IPSET_ATTR_CIDR]))  		return -IPSET_ERR_PROTOCOL; @@ -345,110 +284,20 @@ hash_ip6_uadt(struct ip_set *set, struct nlattr *tb[],  	if (tb[IPSET_ATTR_LINENO])  		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); -	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &ip); +	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) || +	      ip_set_get_extensions(set, tb, &ext);  	if (ret)  		return ret; -	ip6_netmask(&ip, h->netmask); -	if (ipv6_addr_any(&ip.in6)) +	hash_ip6_netmask(&e.ip, h->netmask); +	if (ipv6_addr_any(&e.ip.in6))  		return -IPSET_ERR_HASH_ELEM; -	if (tb[IPSET_ATTR_TIMEOUT]) { -		if (!with_timeout(h->timeout)) -			return -IPSET_ERR_TIMEOUT; -		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); -	} - -	ret = adtfn(set, &ip, timeout, flags); +	ret = adtfn(set, &e, &ext, &ext, flags);  	return ip_set_eexist(ret, flags) ? 0 : ret;  } -/* Create hash:ip type of sets */ - -static int -hash_ip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) -{ -	u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; -	u8 netmask, hbits; -	size_t hsize; -	struct ip_set_hash *h; - -	if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) -		return -IPSET_ERR_INVALID_FAMILY; -	netmask = set->family == NFPROTO_IPV4 ? 32 : 128; -	pr_debug("Create set %s with family %s\n", -		 set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6"); - -	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || -		     !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || -		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) -		return -IPSET_ERR_PROTOCOL; - -	if (tb[IPSET_ATTR_HASHSIZE]) { -		hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); -		if (hashsize < IPSET_MIMINAL_HASHSIZE) -			hashsize = IPSET_MIMINAL_HASHSIZE; -	} - -	if (tb[IPSET_ATTR_MAXELEM]) -		maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); - -	if (tb[IPSET_ATTR_NETMASK]) { -		netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]); - -		if ((set->family == NFPROTO_IPV4 && netmask > 32) || -		    (set->family == NFPROTO_IPV6 && netmask > 128) || -		    netmask == 0) -			return -IPSET_ERR_INVALID_NETMASK; -	} - -	h = kzalloc(sizeof(*h), GFP_KERNEL); -	if (!h) -		return -ENOMEM; - -	h->maxelem = maxelem; -	h->netmask = netmask; -	get_random_bytes(&h->initval, sizeof(h->initval)); -	h->timeout = IPSET_NO_TIMEOUT; - -	hbits = htable_bits(hashsize); -	hsize = htable_size(hbits); -	if (hsize == 0) { -		kfree(h); -		return -ENOMEM; -	} -	h->table = ip_set_alloc(hsize); -	if (!h->table) { -		kfree(h); -		return -ENOMEM; -	} -	h->table->htable_bits = hbits; - -	set->data = h; - -	if (tb[IPSET_ATTR_TIMEOUT]) { -		h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - -		set->variant = set->family == NFPROTO_IPV4 -			? &hash_ip4_tvariant : &hash_ip6_tvariant; - -		if (set->family == NFPROTO_IPV4) -			hash_ip4_gc_init(set); -		else -			hash_ip6_gc_init(set); -	} else { -		set->variant = set->family == NFPROTO_IPV4 -			? &hash_ip4_variant : &hash_ip6_variant; -	} - -	pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", -		 set->name, jhash_size(h->table->htable_bits), -		 h->table->htable_bits, h->maxelem, set->data, h->table); - -	return 0; -} -  static struct ip_set_type hash_ip_type __read_mostly = {  	.name		= "hash:ip",  	.protocol	= IPSET_PROTOCOL, @@ -465,6 +314,7 @@ static struct ip_set_type hash_ip_type __read_mostly = {  		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },  		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },  		[IPSET_ATTR_NETMASK]	= { .type = NLA_U8  }, +		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },  	},  	.adt_policy	= {  		[IPSET_ATTR_IP]		= { .type = NLA_NESTED }, @@ -472,6 +322,8 @@ static struct ip_set_type hash_ip_type __read_mostly = {  		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },  		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },  		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 }, +		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 }, +		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },  	},  	.me		= THIS_MODULE,  }; diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index d8f77bacae8..7a2d2bd98d0 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> +/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License version 2 as @@ -21,12 +21,12 @@  #include <linux/netfilter.h>  #include <linux/netfilter/ipset/pfxlen.h>  #include <linux/netfilter/ipset/ip_set.h> -#include <linux/netfilter/ipset/ip_set_timeout.h>  #include <linux/netfilter/ipset/ip_set_getport.h>  #include <linux/netfilter/ipset/ip_set_hash.h>  #define REVISION_MIN	0 -#define REVISION_MAX	1 /* SCTP and UDPLITE support added */ +/*			1    SCTP and UDPLITE support added */ +#define REVISION_MAX	2 /* Counters support added */  MODULE_LICENSE("GPL");  MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); @@ -34,33 +34,45 @@ IP_SET_MODULE_DESC("hash:ip,port", REVISION_MIN, REVISION_MAX);  MODULE_ALIAS("ip_set_hash:ip,port");  /* Type specific function prefix */ -#define TYPE		hash_ipport +#define HTYPE		hash_ipport -static bool -hash_ipport_same_set(const struct ip_set *a, const struct ip_set *b); +/* IPv4 variants */ -#define hash_ipport4_same_set	hash_ipport_same_set -#define hash_ipport6_same_set	hash_ipport_same_set +/* Member elements */ +struct hash_ipport4_elem { +	__be32 ip; +	__be16 port; +	u8 proto; +	u8 padding; +}; -/* The type variant functions: IPv4 */ +struct hash_ipport4t_elem { +	__be32 ip; +	__be16 port; +	u8 proto; +	u8 padding; +	unsigned long timeout; +}; -/* Member elements without timeout */ -struct hash_ipport4_elem { +struct hash_ipport4c_elem {  	__be32 ip;  	__be16 port;  	u8 proto;  	u8 padding; +	struct ip_set_counter counter;  }; -/* Member elements with timeout support */ -struct hash_ipport4_telem { +struct hash_ipport4ct_elem {  	__be32 ip;  	__be16 port;  	u8 proto;  	u8 padding; +	struct ip_set_counter counter;  	unsigned long timeout;  }; +/* Common functions */ +  static inline bool  hash_ipport4_data_equal(const struct hash_ipport4_elem *ip1,  			const struct hash_ipport4_elem *ip2, @@ -71,27 +83,6 @@ hash_ipport4_data_equal(const struct hash_ipport4_elem *ip1,  	       ip1->proto == ip2->proto;  } -static inline bool -hash_ipport4_data_isnull(const struct hash_ipport4_elem *elem) -{ -	return elem->proto == 0; -} - -static inline void -hash_ipport4_data_copy(struct hash_ipport4_elem *dst, -		       const struct hash_ipport4_elem *src) -{ -	dst->ip = src->ip; -	dst->port = src->port; -	dst->proto = src->proto; -} - -static inline void -hash_ipport4_data_zero_out(struct hash_ipport4_elem *elem) -{ -	elem->proto = 0; -} -  static bool  hash_ipport4_data_list(struct sk_buff *skb,  		       const struct hash_ipport4_elem *data) @@ -106,111 +97,91 @@ nla_put_failure:  	return 1;  } -static bool -hash_ipport4_data_tlist(struct sk_buff *skb, -			const struct hash_ipport4_elem *data) -{ -	const struct hash_ipport4_telem *tdata = -		(const struct hash_ipport4_telem *)data; - -	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, tdata->ip) || -	    nla_put_net16(skb, IPSET_ATTR_PORT, tdata->port) || -	    nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) || -	    nla_put_net32(skb, IPSET_ATTR_TIMEOUT, -			  htonl(ip_set_timeout_get(tdata->timeout)))) -		goto nla_put_failure; -	return 0; - -nla_put_failure: -	return 1; -} - -#define PF		4 -#define HOST_MASK	32 -#include <linux/netfilter/ipset/ip_set_ahash.h> -  static inline void -hash_ipport4_data_next(struct ip_set_hash *h, +hash_ipport4_data_next(struct hash_ipport4_elem *next,  		       const struct hash_ipport4_elem *d)  { -	h->next.ip = d->ip; -	h->next.port = d->port; +	next->ip = d->ip; +	next->port = d->port;  } +#define MTYPE           hash_ipport4 +#define PF              4 +#define HOST_MASK       32 +#define HKEY_DATALEN	sizeof(struct hash_ipport4_elem) +#include "ip_set_hash_gen.h" +  static int  hash_ipport4_kadt(struct ip_set *set, const struct sk_buff *skb,  		  const struct xt_action_param *par, -		  enum ipset_adt adt, const struct ip_set_adt_opt *opt) +		  enum ipset_adt adt, struct ip_set_adt_opt *opt)  { -	const struct ip_set_hash *h = set->data; +	const struct hash_ipport *h = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt]; -	struct hash_ipport4_elem data = { }; +	struct hash_ipport4_elem e = { }; +	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);  	if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, -				 &data.port, &data.proto)) +				 &e.port, &e.proto))  		return -EINVAL; -	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip); - -	return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); +	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip); +	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);  }  static int  hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],  		  enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)  { -	const struct ip_set_hash *h = set->data; +	const struct hash_ipport *h = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt]; -	struct hash_ipport4_elem data = { }; +	struct hash_ipport4_elem e = { }; +	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);  	u32 ip, ip_to, p = 0, port, port_to; -	u32 timeout = h->timeout;  	bool with_ports = false;  	int ret;  	if (unlikely(!tb[IPSET_ATTR_IP] ||  		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||  		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || -		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))  		return -IPSET_ERR_PROTOCOL;  	if (tb[IPSET_ATTR_LINENO])  		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); -	ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip); +	ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) || +	      ip_set_get_extensions(set, tb, &ext);  	if (ret)  		return ret;  	if (tb[IPSET_ATTR_PORT]) -		data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); +		e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);  	else  		return -IPSET_ERR_PROTOCOL;  	if (tb[IPSET_ATTR_PROTO]) { -		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); -		with_ports = ip_set_proto_with_ports(data.proto); +		e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); +		with_ports = ip_set_proto_with_ports(e.proto); -		if (data.proto == 0) +		if (e.proto == 0)  			return -IPSET_ERR_INVALID_PROTO;  	} else  		return -IPSET_ERR_MISSING_PROTO; -	if (!(with_ports || data.proto == IPPROTO_ICMP)) -		data.port = 0; - -	if (tb[IPSET_ATTR_TIMEOUT]) { -		if (!with_timeout(h->timeout)) -			return -IPSET_ERR_TIMEOUT; -		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); -	} +	if (!(with_ports || e.proto == IPPROTO_ICMP)) +		e.port = 0;  	if (adt == IPSET_TEST ||  	    !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] ||  	      tb[IPSET_ATTR_PORT_TO])) { -		ret = adtfn(set, &data, timeout, flags); +		ret = adtfn(set, &e, &ext, &ext, flags);  		return ip_set_eexist(ret, flags) ? 0 : ret;  	} -	ip_to = ip = ntohl(data.ip); +	ip_to = ip = ntohl(e.ip);  	if (tb[IPSET_ATTR_IP_TO]) {  		ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);  		if (ret) @@ -225,7 +196,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],  		ip_set_mask_from_to(ip, ip_to, cidr);  	} -	port_to = port = ntohs(data.port); +	port_to = port = ntohs(e.port);  	if (with_ports && tb[IPSET_ATTR_PORT_TO]) {  		port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);  		if (port > port_to) @@ -238,9 +209,9 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],  		p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)  						       : port;  		for (; p <= port_to; p++) { -			data.ip = htonl(ip); -			data.port = htons(p); -			ret = adtfn(set, &data, timeout, flags); +			e.ip = htonl(ip); +			e.port = htons(p); +			ret = adtfn(set, &e, &ext, &ext, flags);  			if (ret && !ip_set_eexist(ret, flags))  				return ret; @@ -251,34 +222,42 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],  	return ret;  } -static bool -hash_ipport_same_set(const struct ip_set *a, const struct ip_set *b) -{ -	const struct ip_set_hash *x = a->data; -	const struct ip_set_hash *y = b->data; +/* IPv6 variants */ -	/* Resizing changes htable_bits, so we ignore it */ -	return x->maxelem == y->maxelem && -	       x->timeout == y->timeout; -} +struct hash_ipport6_elem { +	union nf_inet_addr ip; +	__be16 port; +	u8 proto; +	u8 padding; +}; -/* The type variant functions: IPv6 */ +struct hash_ipport6t_elem { +	union nf_inet_addr ip; +	__be16 port; +	u8 proto; +	u8 padding; +	unsigned long timeout; +}; -struct hash_ipport6_elem { +struct hash_ipport6c_elem {  	union nf_inet_addr ip;  	__be16 port;  	u8 proto;  	u8 padding; +	struct ip_set_counter counter;  }; -struct hash_ipport6_telem { +struct hash_ipport6ct_elem {  	union nf_inet_addr ip;  	__be16 port;  	u8 proto;  	u8 padding; +	struct ip_set_counter counter;  	unsigned long timeout;  }; +/* Common functions */ +  static inline bool  hash_ipport6_data_equal(const struct hash_ipport6_elem *ip1,  			const struct hash_ipport6_elem *ip2, @@ -289,25 +268,6 @@ hash_ipport6_data_equal(const struct hash_ipport6_elem *ip1,  	       ip1->proto == ip2->proto;  } -static inline bool -hash_ipport6_data_isnull(const struct hash_ipport6_elem *elem) -{ -	return elem->proto == 0; -} - -static inline void -hash_ipport6_data_copy(struct hash_ipport6_elem *dst, -		       const struct hash_ipport6_elem *src) -{ -	memcpy(dst, src, sizeof(*dst)); -} - -static inline void -hash_ipport6_data_zero_out(struct hash_ipport6_elem *elem) -{ -	elem->proto = 0; -} -  static bool  hash_ipport6_data_list(struct sk_buff *skb,  		       const struct hash_ipport6_elem *data) @@ -322,66 +282,52 @@ nla_put_failure:  	return 1;  } -static bool -hash_ipport6_data_tlist(struct sk_buff *skb, -			const struct hash_ipport6_elem *data) +static inline void +hash_ipport6_data_next(struct hash_ipport4_elem *next, +		       const struct hash_ipport6_elem *d)  { -	const struct hash_ipport6_telem *e = -		(const struct hash_ipport6_telem *)data; - -	if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) || -	    nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || -	    nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) || -	    nla_put_net32(skb, IPSET_ATTR_TIMEOUT, -			  htonl(ip_set_timeout_get(e->timeout)))) -		goto nla_put_failure; -	return 0; - -nla_put_failure: -	return 1; +	next->port = d->port;  } +#undef MTYPE  #undef PF  #undef HOST_MASK +#undef HKEY_DATALEN +#define MTYPE		hash_ipport6  #define PF		6  #define HOST_MASK	128 -#include <linux/netfilter/ipset/ip_set_ahash.h> - -static inline void -hash_ipport6_data_next(struct ip_set_hash *h, -		       const struct hash_ipport6_elem *d) -{ -	h->next.port = d->port; -} +#define HKEY_DATALEN	sizeof(struct hash_ipport6_elem) +#define	IP_SET_EMIT_CREATE +#include "ip_set_hash_gen.h"  static int  hash_ipport6_kadt(struct ip_set *set, const struct sk_buff *skb,  		  const struct xt_action_param *par, -		  enum ipset_adt adt, const struct ip_set_adt_opt *opt) +		  enum ipset_adt adt, struct ip_set_adt_opt *opt)  { -	const struct ip_set_hash *h = set->data; +	const struct hash_ipport *h = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt]; -	struct hash_ipport6_elem data = { }; +	struct hash_ipport6_elem e = { }; +	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);  	if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, -				 &data.port, &data.proto)) +				 &e.port, &e.proto))  		return -EINVAL; -	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6); - -	return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); +	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6); +	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);  }  static int  hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],  		  enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)  { -	const struct ip_set_hash *h = set->data; +	const struct hash_ipport *h = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt]; -	struct hash_ipport6_elem data = { }; +	struct hash_ipport6_elem e = { }; +	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);  	u32 port, port_to; -	u32 timeout = h->timeout;  	bool with_ports = false;  	int ret; @@ -389,6 +335,8 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],  		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||  		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||  		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||  		     tb[IPSET_ATTR_IP_TO] ||  		     tb[IPSET_ATTR_CIDR]))  		return -IPSET_ERR_PROTOCOL; @@ -396,39 +344,34 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],  	if (tb[IPSET_ATTR_LINENO])  		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); -	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip); +	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) || +	      ip_set_get_extensions(set, tb, &ext);  	if (ret)  		return ret;  	if (tb[IPSET_ATTR_PORT]) -		data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); +		e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);  	else  		return -IPSET_ERR_PROTOCOL;  	if (tb[IPSET_ATTR_PROTO]) { -		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); -		with_ports = ip_set_proto_with_ports(data.proto); +		e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); +		with_ports = ip_set_proto_with_ports(e.proto); -		if (data.proto == 0) +		if (e.proto == 0)  			return -IPSET_ERR_INVALID_PROTO;  	} else  		return -IPSET_ERR_MISSING_PROTO; -	if (!(with_ports || data.proto == IPPROTO_ICMPV6)) -		data.port = 0; - -	if (tb[IPSET_ATTR_TIMEOUT]) { -		if (!with_timeout(h->timeout)) -			return -IPSET_ERR_TIMEOUT; -		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); -	} +	if (!(with_ports || e.proto == IPPROTO_ICMPV6)) +		e.port = 0;  	if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { -		ret = adtfn(set, &data, timeout, flags); +		ret = adtfn(set, &e, &ext, &ext, flags);  		return ip_set_eexist(ret, flags) ? 0 : ret;  	} -	port = ntohs(data.port); +	port = ntohs(e.port);  	port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);  	if (port > port_to)  		swap(port, port_to); @@ -436,8 +379,8 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],  	if (retried)  		port = ntohs(h->next.port);  	for (; port <= port_to; port++) { -		data.port = htons(port); -		ret = adtfn(set, &data, timeout, flags); +		e.port = htons(port); +		ret = adtfn(set, &e, &ext, &ext, flags);  		if (ret && !ip_set_eexist(ret, flags))  			return ret; @@ -447,78 +390,6 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],  	return ret;  } -/* Create hash:ip type of sets */ - -static int -hash_ipport_create(struct ip_set *set, struct nlattr *tb[], u32 flags) -{ -	struct ip_set_hash *h; -	u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; -	u8 hbits; -	size_t hsize; - -	if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) -		return -IPSET_ERR_INVALID_FAMILY; - -	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || -		     !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || -		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) -		return -IPSET_ERR_PROTOCOL; - -	if (tb[IPSET_ATTR_HASHSIZE]) { -		hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); -		if (hashsize < IPSET_MIMINAL_HASHSIZE) -			hashsize = IPSET_MIMINAL_HASHSIZE; -	} - -	if (tb[IPSET_ATTR_MAXELEM]) -		maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); - -	h = kzalloc(sizeof(*h), GFP_KERNEL); -	if (!h) -		return -ENOMEM; - -	h->maxelem = maxelem; -	get_random_bytes(&h->initval, sizeof(h->initval)); -	h->timeout = IPSET_NO_TIMEOUT; - -	hbits = htable_bits(hashsize); -	hsize = htable_size(hbits); -	if (hsize == 0) { -		kfree(h); -		return -ENOMEM; -	} -	h->table = ip_set_alloc(hsize); -	if (!h->table) { -		kfree(h); -		return -ENOMEM; -	} -	h->table->htable_bits = hbits; - -	set->data = h; - -	if (tb[IPSET_ATTR_TIMEOUT]) { -		h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - -		set->variant = set->family == NFPROTO_IPV4 -			? &hash_ipport4_tvariant : &hash_ipport6_tvariant; - -		if (set->family == NFPROTO_IPV4) -			hash_ipport4_gc_init(set); -		else -			hash_ipport6_gc_init(set); -	} else { -		set->variant = set->family == NFPROTO_IPV4 -			? &hash_ipport4_variant : &hash_ipport6_variant; -	} - -	pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", -		 set->name, jhash_size(h->table->htable_bits), -		 h->table->htable_bits, h->maxelem, set->data, h->table); - -	return 0; -} -  static struct ip_set_type hash_ipport_type __read_mostly = {  	.name		= "hash:ip,port",  	.protocol	= IPSET_PROTOCOL, @@ -535,6 +406,7 @@ static struct ip_set_type hash_ipport_type __read_mostly = {  		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },  		[IPSET_ATTR_PROTO]	= { .type = NLA_U8 },  		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 }, +		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },  	},  	.adt_policy	= {  		[IPSET_ATTR_IP]		= { .type = NLA_NESTED }, @@ -545,6 +417,8 @@ static struct ip_set_type hash_ipport_type __read_mostly = {  		[IPSET_ATTR_PROTO]	= { .type = NLA_U8 },  		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },  		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 }, +		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 }, +		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },  	},  	.me		= THIS_MODULE,  }; diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 1da1e955f38..34e8a1acce4 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> +/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License version 2 as @@ -21,12 +21,12 @@  #include <linux/netfilter.h>  #include <linux/netfilter/ipset/pfxlen.h>  #include <linux/netfilter/ipset/ip_set.h> -#include <linux/netfilter/ipset/ip_set_timeout.h>  #include <linux/netfilter/ipset/ip_set_getport.h>  #include <linux/netfilter/ipset/ip_set_hash.h>  #define REVISION_MIN	0 -#define REVISION_MAX	1 /* SCTP and UDPLITE support added */ +/*			1    SCTP and UDPLITE support added */ +#define REVISION_MAX	2 /* Counters support added */  MODULE_LICENSE("GPL");  MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); @@ -34,32 +34,44 @@ IP_SET_MODULE_DESC("hash:ip,port,ip", REVISION_MIN, REVISION_MAX);  MODULE_ALIAS("ip_set_hash:ip,port,ip");  /* Type specific function prefix */ -#define TYPE		hash_ipportip +#define HTYPE		hash_ipportip -static bool -hash_ipportip_same_set(const struct ip_set *a, const struct ip_set *b); +/* IPv4 variants */ -#define hash_ipportip4_same_set	hash_ipportip_same_set -#define hash_ipportip6_same_set	hash_ipportip_same_set +/* Member elements  */ +struct hash_ipportip4_elem { +	__be32 ip; +	__be32 ip2; +	__be16 port; +	u8 proto; +	u8 padding; +}; -/* The type variant functions: IPv4 */ +struct hash_ipportip4t_elem { +	__be32 ip; +	__be32 ip2; +	__be16 port; +	u8 proto; +	u8 padding; +	unsigned long timeout; +}; -/* Member elements without timeout */ -struct hash_ipportip4_elem { +struct hash_ipportip4c_elem {  	__be32 ip;  	__be32 ip2;  	__be16 port;  	u8 proto;  	u8 padding; +	struct ip_set_counter counter;  }; -/* Member elements with timeout support */ -struct hash_ipportip4_telem { +struct hash_ipportip4ct_elem {  	__be32 ip;  	__be32 ip2;  	__be16 port;  	u8 proto;  	u8 padding; +	struct ip_set_counter counter;  	unsigned long timeout;  }; @@ -74,25 +86,6 @@ hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1,  	       ip1->proto == ip2->proto;  } -static inline bool -hash_ipportip4_data_isnull(const struct hash_ipportip4_elem *elem) -{ -	return elem->proto == 0; -} - -static inline void -hash_ipportip4_data_copy(struct hash_ipportip4_elem *dst, -			 const struct hash_ipportip4_elem *src) -{ -	memcpy(dst, src, sizeof(*dst)); -} - -static inline void -hash_ipportip4_data_zero_out(struct hash_ipportip4_elem *elem) -{ -	elem->proto = 0; -} -  static bool  hash_ipportip4_data_list(struct sk_buff *skb,  		       const struct hash_ipportip4_elem *data) @@ -108,117 +101,96 @@ nla_put_failure:  	return 1;  } -static bool -hash_ipportip4_data_tlist(struct sk_buff *skb, -			const struct hash_ipportip4_elem *data) +static inline void +hash_ipportip4_data_next(struct hash_ipportip4_elem *next, +			 const struct hash_ipportip4_elem *d)  { -	const struct hash_ipportip4_telem *tdata = -		(const struct hash_ipportip4_telem *)data; - -	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, tdata->ip) || -	    nla_put_ipaddr4(skb, IPSET_ATTR_IP2, tdata->ip2) || -	    nla_put_net16(skb, IPSET_ATTR_PORT, tdata->port) || -	    nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) || -	    nla_put_net32(skb, IPSET_ATTR_TIMEOUT, -			  htonl(ip_set_timeout_get(tdata->timeout)))) -		goto nla_put_failure; -	return 0; - -nla_put_failure: -	return 1; +	next->ip = d->ip; +	next->port = d->port;  } +/* Common functions */ +#define MTYPE		hash_ipportip4  #define PF		4  #define HOST_MASK	32 -#include <linux/netfilter/ipset/ip_set_ahash.h> - -static inline void -hash_ipportip4_data_next(struct ip_set_hash *h, -			 const struct hash_ipportip4_elem *d) -{ -	h->next.ip = d->ip; -	h->next.port = d->port; -} +#include "ip_set_hash_gen.h"  static int  hash_ipportip4_kadt(struct ip_set *set, const struct sk_buff *skb,  		    const struct xt_action_param *par, -		    enum ipset_adt adt, const struct ip_set_adt_opt *opt) +		    enum ipset_adt adt, struct ip_set_adt_opt *opt)  { -	const struct ip_set_hash *h = set->data; +	const struct hash_ipportip *h = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt]; -	struct hash_ipportip4_elem data = { }; +	struct hash_ipportip4_elem e = { }; +	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);  	if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, -				 &data.port, &data.proto)) +				 &e.port, &e.proto))  		return -EINVAL; -	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip); -	ip4addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &data.ip2); - -	return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); +	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip); +	ip4addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip2); +	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);  }  static int  hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],  		    enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)  { -	const struct ip_set_hash *h = set->data; +	const struct hash_ipportip *h = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt]; -	struct hash_ipportip4_elem data = { }; +	struct hash_ipportip4_elem e = { }; +	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);  	u32 ip, ip_to, p = 0, port, port_to; -	u32 timeout = h->timeout;  	bool with_ports = false;  	int ret;  	if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] ||  		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||  		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || -		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))  		return -IPSET_ERR_PROTOCOL;  	if (tb[IPSET_ATTR_LINENO])  		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); -	ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &data.ip); +	ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip) || +	      ip_set_get_extensions(set, tb, &ext);  	if (ret)  		return ret; -	ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP2], &data.ip2); +	ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP2], &e.ip2);  	if (ret)  		return ret;  	if (tb[IPSET_ATTR_PORT]) -		data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); +		e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);  	else  		return -IPSET_ERR_PROTOCOL;  	if (tb[IPSET_ATTR_PROTO]) { -		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); -		with_ports = ip_set_proto_with_ports(data.proto); +		e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); +		with_ports = ip_set_proto_with_ports(e.proto); -		if (data.proto == 0) +		if (e.proto == 0)  			return -IPSET_ERR_INVALID_PROTO;  	} else  		return -IPSET_ERR_MISSING_PROTO; -	if (!(with_ports || data.proto == IPPROTO_ICMP)) -		data.port = 0; - -	if (tb[IPSET_ATTR_TIMEOUT]) { -		if (!with_timeout(h->timeout)) -			return -IPSET_ERR_TIMEOUT; -		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); -	} +	if (!(with_ports || e.proto == IPPROTO_ICMP)) +		e.port = 0;  	if (adt == IPSET_TEST ||  	    !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] ||  	      tb[IPSET_ATTR_PORT_TO])) { -		ret = adtfn(set, &data, timeout, flags); +		ret = adtfn(set, &e, &ext, &ext, flags);  		return ip_set_eexist(ret, flags) ? 0 : ret;  	} -	ip_to = ip = ntohl(data.ip); +	ip_to = ip = ntohl(e.ip);  	if (tb[IPSET_ATTR_IP_TO]) {  		ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);  		if (ret) @@ -233,7 +205,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],  		ip_set_mask_from_to(ip, ip_to, cidr);  	} -	port_to = port = ntohs(data.port); +	port_to = port = ntohs(e.port);  	if (with_ports && tb[IPSET_ATTR_PORT_TO]) {  		port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);  		if (port > port_to) @@ -246,9 +218,9 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],  		p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)  						       : port;  		for (; p <= port_to; p++) { -			data.ip = htonl(ip); -			data.port = htons(p); -			ret = adtfn(set, &data, timeout, flags); +			e.ip = htonl(ip); +			e.port = htons(p); +			ret = adtfn(set, &e, &ext, &ext, flags);  			if (ret && !ip_set_eexist(ret, flags))  				return ret; @@ -259,36 +231,46 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],  	return ret;  } -static bool -hash_ipportip_same_set(const struct ip_set *a, const struct ip_set *b) -{ -	const struct ip_set_hash *x = a->data; -	const struct ip_set_hash *y = b->data; +/* IPv6 variants */ -	/* Resizing changes htable_bits, so we ignore it */ -	return x->maxelem == y->maxelem && -	       x->timeout == y->timeout; -} +struct hash_ipportip6_elem { +	union nf_inet_addr ip; +	union nf_inet_addr ip2; +	__be16 port; +	u8 proto; +	u8 padding; +}; -/* The type variant functions: IPv6 */ +struct hash_ipportip6t_elem { +	union nf_inet_addr ip; +	union nf_inet_addr ip2; +	__be16 port; +	u8 proto; +	u8 padding; +	unsigned long timeout; +}; -struct hash_ipportip6_elem { +struct hash_ipportip6c_elem {  	union nf_inet_addr ip;  	union nf_inet_addr ip2;  	__be16 port;  	u8 proto;  	u8 padding; +	struct ip_set_counter counter;  }; -struct hash_ipportip6_telem { +struct hash_ipportip6ct_elem {  	union nf_inet_addr ip;  	union nf_inet_addr ip2;  	__be16 port;  	u8 proto;  	u8 padding; +	struct ip_set_counter counter;  	unsigned long timeout;  }; +/* Common functions */ +  static inline bool  hash_ipportip6_data_equal(const struct hash_ipportip6_elem *ip1,  			  const struct hash_ipportip6_elem *ip2, @@ -300,25 +282,6 @@ hash_ipportip6_data_equal(const struct hash_ipportip6_elem *ip1,  	       ip1->proto == ip2->proto;  } -static inline bool -hash_ipportip6_data_isnull(const struct hash_ipportip6_elem *elem) -{ -	return elem->proto == 0; -} - -static inline void -hash_ipportip6_data_copy(struct hash_ipportip6_elem *dst, -			 const struct hash_ipportip6_elem *src) -{ -	memcpy(dst, src, sizeof(*dst)); -} - -static inline void -hash_ipportip6_data_zero_out(struct hash_ipportip6_elem *elem) -{ -	elem->proto = 0; -} -  static bool  hash_ipportip6_data_list(struct sk_buff *skb,  			 const struct hash_ipportip6_elem *data) @@ -334,68 +297,51 @@ nla_put_failure:  	return 1;  } -static bool -hash_ipportip6_data_tlist(struct sk_buff *skb, -			  const struct hash_ipportip6_elem *data) +static inline void +hash_ipportip6_data_next(struct hash_ipportip4_elem *next, +			 const struct hash_ipportip6_elem *d)  { -	const struct hash_ipportip6_telem *e = -		(const struct hash_ipportip6_telem *)data; - -	if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) || -	    nla_put_ipaddr6(skb, IPSET_ATTR_IP2, &data->ip2.in6) || -	    nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || -	    nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) || -	    nla_put_net32(skb, IPSET_ATTR_TIMEOUT, -			  htonl(ip_set_timeout_get(e->timeout)))) -		goto nla_put_failure; -	return 0; - -nla_put_failure: -	return 1; +	next->port = d->port;  } +#undef MTYPE  #undef PF  #undef HOST_MASK +#define MTYPE		hash_ipportip6  #define PF		6  #define HOST_MASK	128 -#include <linux/netfilter/ipset/ip_set_ahash.h> - -static inline void -hash_ipportip6_data_next(struct ip_set_hash *h, -			 const struct hash_ipportip6_elem *d) -{ -	h->next.port = d->port; -} +#define IP_SET_EMIT_CREATE +#include "ip_set_hash_gen.h"  static int  hash_ipportip6_kadt(struct ip_set *set, const struct sk_buff *skb,  		    const struct xt_action_param *par, -		    enum ipset_adt adt, const struct ip_set_adt_opt *opt) +		    enum ipset_adt adt, struct ip_set_adt_opt *opt)  { -	const struct ip_set_hash *h = set->data; +	const struct hash_ipportip *h = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt]; -	struct hash_ipportip6_elem data = { }; +	struct hash_ipportip6_elem e = { }; +	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);  	if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, -				 &data.port, &data.proto)) +				 &e.port, &e.proto))  		return -EINVAL; -	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6); -	ip6addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &data.ip2.in6); - -	return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); +	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6); +	ip6addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip2.in6); +	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);  }  static int  hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],  		    enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)  { -	const struct ip_set_hash *h = set->data; +	const struct hash_ipportip *h = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt]; -	struct hash_ipportip6_elem data = { }; +	struct hash_ipportip6_elem e = { }; +	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);  	u32 port, port_to; -	u32 timeout = h->timeout;  	bool with_ports = false;  	int ret; @@ -403,6 +349,8 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],  		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||  		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||  		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||  		     tb[IPSET_ATTR_IP_TO] ||  		     tb[IPSET_ATTR_CIDR]))  		return -IPSET_ERR_PROTOCOL; @@ -410,43 +358,38 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],  	if (tb[IPSET_ATTR_LINENO])  		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); -	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip); +	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) || +	      ip_set_get_extensions(set, tb, &ext);  	if (ret)  		return ret; -	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &data.ip2); +	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip2);  	if (ret)  		return ret;  	if (tb[IPSET_ATTR_PORT]) -		data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); +		e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);  	else  		return -IPSET_ERR_PROTOCOL;  	if (tb[IPSET_ATTR_PROTO]) { -		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); -		with_ports = ip_set_proto_with_ports(data.proto); +		e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); +		with_ports = ip_set_proto_with_ports(e.proto); -		if (data.proto == 0) +		if (e.proto == 0)  			return -IPSET_ERR_INVALID_PROTO;  	} else  		return -IPSET_ERR_MISSING_PROTO; -	if (!(with_ports || data.proto == IPPROTO_ICMPV6)) -		data.port = 0; - -	if (tb[IPSET_ATTR_TIMEOUT]) { -		if (!with_timeout(h->timeout)) -			return -IPSET_ERR_TIMEOUT; -		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); -	} +	if (!(with_ports || e.proto == IPPROTO_ICMPV6)) +		e.port = 0;  	if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { -		ret = adtfn(set, &data, timeout, flags); +		ret = adtfn(set, &e, &ext, &ext, flags);  		return ip_set_eexist(ret, flags) ? 0 : ret;  	} -	port = ntohs(data.port); +	port = ntohs(e.port);  	port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);  	if (port > port_to)  		swap(port, port_to); @@ -454,8 +397,8 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],  	if (retried)  		port = ntohs(h->next.port);  	for (; port <= port_to; port++) { -		data.port = htons(port); -		ret = adtfn(set, &data, timeout, flags); +		e.port = htons(port); +		ret = adtfn(set, &e, &ext, &ext, flags);  		if (ret && !ip_set_eexist(ret, flags))  			return ret; @@ -465,78 +408,6 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],  	return ret;  } -/* Create hash:ip type of sets */ - -static int -hash_ipportip_create(struct ip_set *set, struct nlattr *tb[], u32 flags) -{ -	struct ip_set_hash *h; -	u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; -	u8 hbits; -	size_t hsize; - -	if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) -		return -IPSET_ERR_INVALID_FAMILY; - -	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || -		     !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || -		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) -		return -IPSET_ERR_PROTOCOL; - -	if (tb[IPSET_ATTR_HASHSIZE]) { -		hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); -		if (hashsize < IPSET_MIMINAL_HASHSIZE) -			hashsize = IPSET_MIMINAL_HASHSIZE; -	} - -	if (tb[IPSET_ATTR_MAXELEM]) -		maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); - -	h = kzalloc(sizeof(*h), GFP_KERNEL); -	if (!h) -		return -ENOMEM; - -	h->maxelem = maxelem; -	get_random_bytes(&h->initval, sizeof(h->initval)); -	h->timeout = IPSET_NO_TIMEOUT; - -	hbits = htable_bits(hashsize); -	hsize = htable_size(hbits); -	if (hsize == 0) { -		kfree(h); -		return -ENOMEM; -	} -	h->table = ip_set_alloc(hsize); -	if (!h->table) { -		kfree(h); -		return -ENOMEM; -	} -	h->table->htable_bits = hbits; - -	set->data = h; - -	if (tb[IPSET_ATTR_TIMEOUT]) { -		h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - -		set->variant = set->family == NFPROTO_IPV4 -			? &hash_ipportip4_tvariant : &hash_ipportip6_tvariant; - -		if (set->family == NFPROTO_IPV4) -			hash_ipportip4_gc_init(set); -		else -			hash_ipportip6_gc_init(set); -	} else { -		set->variant = set->family == NFPROTO_IPV4 -			? &hash_ipportip4_variant : &hash_ipportip6_variant; -	} - -	pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", -		 set->name, jhash_size(h->table->htable_bits), -		 h->table->htable_bits, h->maxelem, set->data, h->table); - -	return 0; -} -  static struct ip_set_type hash_ipportip_type __read_mostly = {  	.name		= "hash:ip,port,ip",  	.protocol	= IPSET_PROTOCOL, @@ -552,6 +423,7 @@ static struct ip_set_type hash_ipportip_type __read_mostly = {  		[IPSET_ATTR_PROBES]	= { .type = NLA_U8 },  		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },  		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 }, +		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },  	},  	.adt_policy	= {  		[IPSET_ATTR_IP]		= { .type = NLA_NESTED }, @@ -563,6 +435,8 @@ static struct ip_set_type hash_ipportip_type __read_mostly = {  		[IPSET_ATTR_PROTO]	= { .type = NLA_U8 },  		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },  		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 }, +		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 }, +		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },  	},  	.me		= THIS_MODULE,  }; diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 10a30b4fc7d..c6a525373be 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> +/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License version 2 as @@ -21,14 +21,14 @@  #include <linux/netfilter.h>  #include <linux/netfilter/ipset/pfxlen.h>  #include <linux/netfilter/ipset/ip_set.h> -#include <linux/netfilter/ipset/ip_set_timeout.h>  #include <linux/netfilter/ipset/ip_set_getport.h>  #include <linux/netfilter/ipset/ip_set_hash.h>  #define REVISION_MIN	0  /*			1    SCTP and UDPLITE support added */  /*			2    Range as input support for IPv4 added */ -#define REVISION_MAX	3 /* nomatch flag support added */ +/*			3    nomatch flag support added */ +#define REVISION_MAX	4 /* Counters support added */  MODULE_LICENSE("GPL");  MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); @@ -36,23 +36,19 @@ IP_SET_MODULE_DESC("hash:ip,port,net", REVISION_MIN, REVISION_MAX);  MODULE_ALIAS("ip_set_hash:ip,port,net");  /* Type specific function prefix */ -#define TYPE		hash_ipportnet - -static bool -hash_ipportnet_same_set(const struct ip_set *a, const struct ip_set *b); - -#define hash_ipportnet4_same_set	hash_ipportnet_same_set -#define hash_ipportnet6_same_set	hash_ipportnet_same_set - -/* The type variant functions: IPv4 */ +#define HTYPE		hash_ipportnet  /* We squeeze the "nomatch" flag into cidr: we don't support cidr == 0   * However this way we have to store internally cidr - 1,   * dancing back and forth.   */  #define IP_SET_HASH_WITH_NETS_PACKED +#define IP_SET_HASH_WITH_PROTO +#define IP_SET_HASH_WITH_NETS + +/* IPv4 variants */ -/* Member elements without timeout */ +/* Member elements */  struct hash_ipportnet4_elem {  	__be32 ip;  	__be32 ip2; @@ -62,8 +58,7 @@ struct hash_ipportnet4_elem {  	u8 proto;  }; -/* Member elements with timeout support */ -struct hash_ipportnet4_telem { +struct hash_ipportnet4t_elem {  	__be32 ip;  	__be32 ip2;  	__be16 port; @@ -73,6 +68,29 @@ struct hash_ipportnet4_telem {  	unsigned long timeout;  }; +struct hash_ipportnet4c_elem { +	__be32 ip; +	__be32 ip2; +	__be16 port; +	u8 cidr:7; +	u8 nomatch:1; +	u8 proto; +	struct ip_set_counter counter; +}; + +struct hash_ipportnet4ct_elem { +	__be32 ip; +	__be32 ip2; +	__be16 port; +	u8 cidr:7; +	u8 nomatch:1; +	u8 proto; +	struct ip_set_counter counter; +	unsigned long timeout; +}; + +/* Common functions */ +  static inline bool  hash_ipportnet4_data_equal(const struct hash_ipportnet4_elem *ip1,  			   const struct hash_ipportnet4_elem *ip2, @@ -85,38 +103,22 @@ hash_ipportnet4_data_equal(const struct hash_ipportnet4_elem *ip1,  	       ip1->proto == ip2->proto;  } -static inline bool -hash_ipportnet4_data_isnull(const struct hash_ipportnet4_elem *elem) -{ -	return elem->proto == 0; -} - -static inline void -hash_ipportnet4_data_copy(struct hash_ipportnet4_elem *dst, -			  const struct hash_ipportnet4_elem *src) +static inline int +hash_ipportnet4_do_data_match(const struct hash_ipportnet4_elem *elem)  { -	memcpy(dst, src, sizeof(*dst)); +	return elem->nomatch ? -ENOTEMPTY : 1;  }  static inline void -hash_ipportnet4_data_flags(struct hash_ipportnet4_elem *dst, u32 flags) +hash_ipportnet4_data_set_flags(struct hash_ipportnet4_elem *elem, u32 flags)  { -	dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); +	elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);  }  static inline void -hash_ipportnet4_data_reset_flags(struct hash_ipportnet4_elem *dst, u32 *flags) +hash_ipportnet4_data_reset_flags(struct hash_ipportnet4_elem *elem, u8 *flags)  { -	if (dst->nomatch) { -		*flags = IPSET_FLAG_NOMATCH; -		dst->nomatch = 0; -	} -} - -static inline int -hash_ipportnet4_data_match(const struct hash_ipportnet4_elem *elem) -{ -	return elem->nomatch ? -ENOTEMPTY : 1; +	swap(*flags, elem->nomatch);  }  static inline void @@ -126,12 +128,6 @@ hash_ipportnet4_data_netmask(struct hash_ipportnet4_elem *elem, u8 cidr)  	elem->cidr = cidr - 1;  } -static inline void -hash_ipportnet4_data_zero_out(struct hash_ipportnet4_elem *elem) -{ -	elem->proto = 0; -} -  static bool  hash_ipportnet4_data_list(struct sk_buff *skb,  			  const struct hash_ipportnet4_elem *data) @@ -152,81 +148,56 @@ nla_put_failure:  	return 1;  } -static bool -hash_ipportnet4_data_tlist(struct sk_buff *skb, -			   const struct hash_ipportnet4_elem *data) +static inline void +hash_ipportnet4_data_next(struct hash_ipportnet4_elem *next, +			  const struct hash_ipportnet4_elem *d)  { -	const struct hash_ipportnet4_telem *tdata = -		(const struct hash_ipportnet4_telem *)data; -	u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; - -	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, tdata->ip) || -	    nla_put_ipaddr4(skb, IPSET_ATTR_IP2, tdata->ip2) || -	    nla_put_net16(skb, IPSET_ATTR_PORT, tdata->port) || -	    nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr + 1) || -	    nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) || -	    nla_put_net32(skb, IPSET_ATTR_TIMEOUT, -			  htonl(ip_set_timeout_get(tdata->timeout))) || -	    (flags && -	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) -		goto nla_put_failure; -	return 0; - -nla_put_failure: -	return 1; +	next->ip = d->ip; +	next->port = d->port; +	next->ip2 = d->ip2;  } -#define IP_SET_HASH_WITH_PROTO -#define IP_SET_HASH_WITH_NETS - +#define MTYPE		hash_ipportnet4  #define PF		4  #define HOST_MASK	32 -#include <linux/netfilter/ipset/ip_set_ahash.h> - -static inline void -hash_ipportnet4_data_next(struct ip_set_hash *h, -			  const struct hash_ipportnet4_elem *d) -{ -	h->next.ip = d->ip; -	h->next.port = d->port; -	h->next.ip2 = d->ip2; -} +#include "ip_set_hash_gen.h"  static int  hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,  		     const struct xt_action_param *par, -		     enum ipset_adt adt, const struct ip_set_adt_opt *opt) +		     enum ipset_adt adt, struct ip_set_adt_opt *opt)  { -	const struct ip_set_hash *h = set->data; +	const struct hash_ipportnet *h = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt]; -	struct hash_ipportnet4_elem data = { +	struct hash_ipportnet4_elem e = {  		.cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1  	}; +	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);  	if (adt == IPSET_TEST) -		data.cidr = HOST_MASK - 1; +		e.cidr = HOST_MASK - 1;  	if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, -				 &data.port, &data.proto)) +				 &e.port, &e.proto))  		return -EINVAL; -	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip); -	ip4addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &data.ip2); -	data.ip2 &= ip_set_netmask(data.cidr + 1); +	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip); +	ip4addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip2); +	e.ip2 &= ip_set_netmask(e.cidr + 1); -	return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); +	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);  }  static int  hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],  		     enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)  { -	const struct ip_set_hash *h = set->data; +	const struct hash_ipportnet *h = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt]; -	struct hash_ipportnet4_elem data = { .cidr = HOST_MASK - 1 }; +	struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 }; +	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);  	u32 ip, ip_to, p = 0, port, port_to;  	u32 ip2_from, ip2_to, ip2_last, ip2; -	u32 timeout = h->timeout;  	bool with_ports = false;  	u8 cidr;  	int ret; @@ -235,13 +206,16 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],  		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||  		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||  		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || -		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))  		return -IPSET_ERR_PROTOCOL;  	if (tb[IPSET_ATTR_LINENO])  		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); -	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); +	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) || +	      ip_set_get_extensions(set, tb, &ext);  	if (ret)  		return ret; @@ -253,46 +227,41 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],  		cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]);  		if (!cidr || cidr > HOST_MASK)  			return -IPSET_ERR_INVALID_CIDR; -		data.cidr = cidr - 1; +		e.cidr = cidr - 1;  	}  	if (tb[IPSET_ATTR_PORT]) -		data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); +		e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);  	else  		return -IPSET_ERR_PROTOCOL;  	if (tb[IPSET_ATTR_PROTO]) { -		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); -		with_ports = ip_set_proto_with_ports(data.proto); +		e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); +		with_ports = ip_set_proto_with_ports(e.proto); -		if (data.proto == 0) +		if (e.proto == 0)  			return -IPSET_ERR_INVALID_PROTO;  	} else  		return -IPSET_ERR_MISSING_PROTO; -	if (!(with_ports || data.proto == IPPROTO_ICMP)) -		data.port = 0; - -	if (tb[IPSET_ATTR_TIMEOUT]) { -		if (!with_timeout(h->timeout)) -			return -IPSET_ERR_TIMEOUT; -		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); -	} +	if (!(with_ports || e.proto == IPPROTO_ICMP)) +		e.port = 0; -	if (tb[IPSET_ATTR_CADT_FLAGS] && adt == IPSET_ADD) { +	if (tb[IPSET_ATTR_CADT_FLAGS]) {  		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);  		if (cadt_flags & IPSET_FLAG_NOMATCH) -			flags |= (cadt_flags << 16); +			flags |= (IPSET_FLAG_NOMATCH << 16);  	}  	with_ports = with_ports && tb[IPSET_ATTR_PORT_TO];  	if (adt == IPSET_TEST ||  	    !(tb[IPSET_ATTR_CIDR] || tb[IPSET_ATTR_IP_TO] || with_ports ||  	      tb[IPSET_ATTR_IP2_TO])) { -		data.ip = htonl(ip); -		data.ip2 = htonl(ip2_from & ip_set_hostmask(data.cidr + 1)); -		ret = adtfn(set, &data, timeout, flags); -		return ip_set_eexist(ret, flags) ? 0 : ret; +		e.ip = htonl(ip); +		e.ip2 = htonl(ip2_from & ip_set_hostmask(e.cidr + 1)); +		ret = adtfn(set, &e, &ext, &ext, flags); +		return ip_set_enomatch(ret, flags, adt) ? 1 : +		       ip_set_eexist(ret, flags) ? 0 : ret;  	}  	ip_to = ip; @@ -310,7 +279,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],  		ip_set_mask_from_to(ip, ip_to, cidr);  	} -	port_to = port = ntohs(data.port); +	port_to = port = ntohs(e.port);  	if (tb[IPSET_ATTR_PORT_TO]) {  		port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);  		if (port > port_to) @@ -326,28 +295,27 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],  			swap(ip2_from, ip2_to);  		if (ip2_from + UINT_MAX == ip2_to)  			return -IPSET_ERR_HASH_RANGE; -	} else { -		ip_set_mask_from_to(ip2_from, ip2_to, data.cidr + 1); -	} +	} else +		ip_set_mask_from_to(ip2_from, ip2_to, e.cidr + 1);  	if (retried)  		ip = ntohl(h->next.ip);  	for (; !before(ip_to, ip); ip++) { -		data.ip = htonl(ip); +		e.ip = htonl(ip);  		p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)  						       : port;  		for (; p <= port_to; p++) { -			data.port = htons(p); +			e.port = htons(p);  			ip2 = retried  			      && ip == ntohl(h->next.ip)  			      && p == ntohs(h->next.port)  				? ntohl(h->next.ip2) : ip2_from;  			while (!after(ip2, ip2_to)) { -				data.ip2 = htonl(ip2); +				e.ip2 = htonl(ip2);  				ip2_last = ip_set_range_to_cidr(ip2, ip2_to,  								&cidr); -				data.cidr = cidr - 1; -				ret = adtfn(set, &data, timeout, flags); +				e.cidr = cidr - 1; +				ret = adtfn(set, &e, &ext, &ext, flags);  				if (ret && !ip_set_eexist(ret, flags))  					return ret; @@ -360,38 +328,50 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],  	return ret;  } -static bool -hash_ipportnet_same_set(const struct ip_set *a, const struct ip_set *b) -{ -	const struct ip_set_hash *x = a->data; -	const struct ip_set_hash *y = b->data; +/* IPv6 variants */ -	/* Resizing changes htable_bits, so we ignore it */ -	return x->maxelem == y->maxelem && -	       x->timeout == y->timeout; -} +struct hash_ipportnet6_elem { +	union nf_inet_addr ip; +	union nf_inet_addr ip2; +	__be16 port; +	u8 cidr:7; +	u8 nomatch:1; +	u8 proto; +}; -/* The type variant functions: IPv6 */ +struct hash_ipportnet6t_elem { +	union nf_inet_addr ip; +	union nf_inet_addr ip2; +	__be16 port; +	u8 cidr:7; +	u8 nomatch:1; +	u8 proto; +	unsigned long timeout; +}; -struct hash_ipportnet6_elem { +struct hash_ipportnet6c_elem {  	union nf_inet_addr ip;  	union nf_inet_addr ip2;  	__be16 port;  	u8 cidr:7;  	u8 nomatch:1;  	u8 proto; +	struct ip_set_counter counter;  }; -struct hash_ipportnet6_telem { +struct hash_ipportnet6ct_elem {  	union nf_inet_addr ip;  	union nf_inet_addr ip2;  	__be16 port;  	u8 cidr:7;  	u8 nomatch:1;  	u8 proto; +	struct ip_set_counter counter;  	unsigned long timeout;  }; +/* Common functions */ +  static inline bool  hash_ipportnet6_data_equal(const struct hash_ipportnet6_elem *ip1,  			   const struct hash_ipportnet6_elem *ip2, @@ -404,53 +384,22 @@ hash_ipportnet6_data_equal(const struct hash_ipportnet6_elem *ip1,  	       ip1->proto == ip2->proto;  } -static inline bool -hash_ipportnet6_data_isnull(const struct hash_ipportnet6_elem *elem) -{ -	return elem->proto == 0; -} - -static inline void -hash_ipportnet6_data_copy(struct hash_ipportnet6_elem *dst, -			  const struct hash_ipportnet6_elem *src) -{ -	memcpy(dst, src, sizeof(*dst)); -} - -static inline void -hash_ipportnet6_data_flags(struct hash_ipportnet6_elem *dst, u32 flags) -{ -	dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); -} - -static inline void -hash_ipportnet6_data_reset_flags(struct hash_ipportnet6_elem *dst, u32 *flags) -{ -	if (dst->nomatch) { -		*flags = IPSET_FLAG_NOMATCH; -		dst->nomatch = 0; -	} -} -  static inline int -hash_ipportnet6_data_match(const struct hash_ipportnet6_elem *elem) +hash_ipportnet6_do_data_match(const struct hash_ipportnet6_elem *elem)  {  	return elem->nomatch ? -ENOTEMPTY : 1;  }  static inline void -hash_ipportnet6_data_zero_out(struct hash_ipportnet6_elem *elem) +hash_ipportnet6_data_set_flags(struct hash_ipportnet6_elem *elem, u32 flags)  { -	elem->proto = 0; +	elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);  }  static inline void -ip6_netmask(union nf_inet_addr *ip, u8 prefix) +hash_ipportnet6_data_reset_flags(struct hash_ipportnet6_elem *elem, u8 *flags)  { -	ip->ip6[0] &= ip_set_netmask6(prefix)[0]; -	ip->ip6[1] &= ip_set_netmask6(prefix)[1]; -	ip->ip6[2] &= ip_set_netmask6(prefix)[2]; -	ip->ip6[3] &= ip_set_netmask6(prefix)[3]; +	swap(*flags, elem->nomatch);  }  static inline void @@ -480,78 +429,58 @@ nla_put_failure:  	return 1;  } -static bool -hash_ipportnet6_data_tlist(struct sk_buff *skb, -			   const struct hash_ipportnet6_elem *data) +static inline void +hash_ipportnet6_data_next(struct hash_ipportnet4_elem *next, +			  const struct hash_ipportnet6_elem *d)  { -	const struct hash_ipportnet6_telem *e = -		(const struct hash_ipportnet6_telem *)data; -	u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; - -	if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) || -	    nla_put_ipaddr6(skb, IPSET_ATTR_IP2, &data->ip2.in6) || -	    nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || -	    nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr + 1) || -	    nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) || -	    nla_put_net32(skb, IPSET_ATTR_TIMEOUT, -			  htonl(ip_set_timeout_get(e->timeout))) || -	    (flags && -	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) -		goto nla_put_failure; -	return 0; - -nla_put_failure: -	return 1; +	next->port = d->port;  } +#undef MTYPE  #undef PF  #undef HOST_MASK +#define MTYPE		hash_ipportnet6  #define PF		6  #define HOST_MASK	128 -#include <linux/netfilter/ipset/ip_set_ahash.h> - -static inline void -hash_ipportnet6_data_next(struct ip_set_hash *h, -			  const struct hash_ipportnet6_elem *d) -{ -	h->next.port = d->port; -} +#define IP_SET_EMIT_CREATE +#include "ip_set_hash_gen.h"  static int  hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,  		     const struct xt_action_param *par, -		     enum ipset_adt adt, const struct ip_set_adt_opt *opt) +		     enum ipset_adt adt, struct ip_set_adt_opt *opt)  { -	const struct ip_set_hash *h = set->data; +	const struct hash_ipportnet *h = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt]; -	struct hash_ipportnet6_elem data = { +	struct hash_ipportnet6_elem e = {  		.cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1  	}; +	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);  	if (adt == IPSET_TEST) -		data.cidr = HOST_MASK - 1; +		e.cidr = HOST_MASK - 1;  	if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, -				 &data.port, &data.proto)) +				 &e.port, &e.proto))  		return -EINVAL; -	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6); -	ip6addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &data.ip2.in6); -	ip6_netmask(&data.ip2, data.cidr + 1); +	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6); +	ip6addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip2.in6); +	ip6_netmask(&e.ip2, e.cidr + 1); -	return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); +	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);  }  static int  hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],  		     enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)  { -	const struct ip_set_hash *h = set->data; +	const struct hash_ipportnet *h = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt]; -	struct hash_ipportnet6_elem data = { .cidr = HOST_MASK - 1 }; +	struct hash_ipportnet6_elem e = { .cidr = HOST_MASK - 1 }; +	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);  	u32 port, port_to; -	u32 timeout = h->timeout;  	bool with_ports = false;  	u8 cidr;  	int ret; @@ -561,6 +490,8 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],  		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||  		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) ||  		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) ||  		     tb[IPSET_ATTR_IP_TO] ||  		     tb[IPSET_ATTR_CIDR]))  		return -IPSET_ERR_PROTOCOL; @@ -570,11 +501,12 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],  	if (tb[IPSET_ATTR_LINENO])  		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); -	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip); +	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) || +	      ip_set_get_extensions(set, tb, &ext);  	if (ret)  		return ret; -	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &data.ip2); +	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip2);  	if (ret)  		return ret; @@ -582,46 +514,41 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],  		cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]);  		if (!cidr || cidr > HOST_MASK)  			return -IPSET_ERR_INVALID_CIDR; -		data.cidr = cidr - 1; +		e.cidr = cidr - 1;  	} -	ip6_netmask(&data.ip2, data.cidr + 1); +	ip6_netmask(&e.ip2, e.cidr + 1);  	if (tb[IPSET_ATTR_PORT]) -		data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); +		e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);  	else  		return -IPSET_ERR_PROTOCOL;  	if (tb[IPSET_ATTR_PROTO]) { -		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); -		with_ports = ip_set_proto_with_ports(data.proto); +		e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); +		with_ports = ip_set_proto_with_ports(e.proto); -		if (data.proto == 0) +		if (e.proto == 0)  			return -IPSET_ERR_INVALID_PROTO;  	} else  		return -IPSET_ERR_MISSING_PROTO; -	if (!(with_ports || data.proto == IPPROTO_ICMPV6)) -		data.port = 0; - -	if (tb[IPSET_ATTR_TIMEOUT]) { -		if (!with_timeout(h->timeout)) -			return -IPSET_ERR_TIMEOUT; -		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); -	} +	if (!(with_ports || e.proto == IPPROTO_ICMPV6)) +		e.port = 0; -	if (tb[IPSET_ATTR_CADT_FLAGS] && adt == IPSET_ADD) { +	if (tb[IPSET_ATTR_CADT_FLAGS]) {  		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);  		if (cadt_flags & IPSET_FLAG_NOMATCH) -			flags |= (cadt_flags << 16); +			flags |= (IPSET_FLAG_NOMATCH << 16);  	}  	if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { -		ret = adtfn(set, &data, timeout, flags); -		return ip_set_eexist(ret, flags) ? 0 : ret; +		ret = adtfn(set, &e, &ext, &ext, flags); +		return ip_set_enomatch(ret, flags, adt) ? 1 : +		       ip_set_eexist(ret, flags) ? 0 : ret;  	} -	port = ntohs(data.port); +	port = ntohs(e.port);  	port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);  	if (port > port_to)  		swap(port, port_to); @@ -629,8 +556,8 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],  	if (retried)  		port = ntohs(h->next.port);  	for (; port <= port_to; port++) { -		data.port = htons(port); -		ret = adtfn(set, &data, timeout, flags); +		e.port = htons(port); +		ret = adtfn(set, &e, &ext, &ext, flags);  		if (ret && !ip_set_eexist(ret, flags))  			return ret; @@ -640,81 +567,6 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],  	return ret;  } -/* Create hash:ip type of sets */ - -static int -hash_ipportnet_create(struct ip_set *set, struct nlattr *tb[], u32 flags) -{ -	struct ip_set_hash *h; -	u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; -	u8 hbits; -	size_t hsize; - -	if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) -		return -IPSET_ERR_INVALID_FAMILY; - -	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || -		     !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || -		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) -		return -IPSET_ERR_PROTOCOL; - -	if (tb[IPSET_ATTR_HASHSIZE]) { -		hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); -		if (hashsize < IPSET_MIMINAL_HASHSIZE) -			hashsize = IPSET_MIMINAL_HASHSIZE; -	} - -	if (tb[IPSET_ATTR_MAXELEM]) -		maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); - -	h = kzalloc(sizeof(*h) -		    + sizeof(struct ip_set_hash_nets) -		      * (set->family == NFPROTO_IPV4 ? 32 : 128), GFP_KERNEL); -	if (!h) -		return -ENOMEM; - -	h->maxelem = maxelem; -	get_random_bytes(&h->initval, sizeof(h->initval)); -	h->timeout = IPSET_NO_TIMEOUT; - -	hbits = htable_bits(hashsize); -	hsize = htable_size(hbits); -	if (hsize == 0) { -		kfree(h); -		return -ENOMEM; -	} -	h->table = ip_set_alloc(hsize); -	if (!h->table) { -		kfree(h); -		return -ENOMEM; -	} -	h->table->htable_bits = hbits; - -	set->data = h; - -	if (tb[IPSET_ATTR_TIMEOUT]) { -		h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - -		set->variant = set->family == NFPROTO_IPV4 -			? &hash_ipportnet4_tvariant -			: &hash_ipportnet6_tvariant; - -		if (set->family == NFPROTO_IPV4) -			hash_ipportnet4_gc_init(set); -		else -			hash_ipportnet6_gc_init(set); -	} else { -		set->variant = set->family == NFPROTO_IPV4 -			? &hash_ipportnet4_variant : &hash_ipportnet6_variant; -	} - -	pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", -		 set->name, jhash_size(h->table->htable_bits), -		 h->table->htable_bits, h->maxelem, set->data, h->table); - -	return 0; -} -  static struct ip_set_type hash_ipportnet_type __read_mostly = {  	.name		= "hash:ip,port,net",  	.protocol	= IPSET_PROTOCOL, @@ -731,6 +583,7 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = {  		[IPSET_ATTR_PROBES]	= { .type = NLA_U8 },  		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },  		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 }, +		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },  	},  	.adt_policy	= {  		[IPSET_ATTR_IP]		= { .type = NLA_NESTED }, @@ -745,6 +598,8 @@ static struct ip_set_type hash_ipportnet_type __read_mostly = {  		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },  		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },  		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 }, +		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 }, +		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },  	},  	.me		= THIS_MODULE,  }; diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index d6a59154d71..da740ceb56a 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> +/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License version 2 as @@ -20,12 +20,12 @@  #include <linux/netfilter.h>  #include <linux/netfilter/ipset/pfxlen.h>  #include <linux/netfilter/ipset/ip_set.h> -#include <linux/netfilter/ipset/ip_set_timeout.h>  #include <linux/netfilter/ipset/ip_set_hash.h>  #define REVISION_MIN	0  /*			1    Range as input support for IPv4 added */ -#define REVISION_MAX	2 /* nomatch flag support added */ +/*			2    nomatch flag support added */ +#define REVISION_MAX	3 /* Counters support added */  MODULE_LICENSE("GPL");  MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); @@ -33,33 +33,46 @@ IP_SET_MODULE_DESC("hash:net", REVISION_MIN, REVISION_MAX);  MODULE_ALIAS("ip_set_hash:net");  /* Type specific function prefix */ -#define TYPE		hash_net +#define HTYPE		hash_net +#define IP_SET_HASH_WITH_NETS -static bool -hash_net_same_set(const struct ip_set *a, const struct ip_set *b); +/* IPv4 variants */ -#define hash_net4_same_set	hash_net_same_set -#define hash_net6_same_set	hash_net_same_set +/* Member elements  */ +struct hash_net4_elem { +	__be32 ip; +	u16 padding0; +	u8 nomatch; +	u8 cidr; +}; -/* The type variant functions: IPv4 */ +struct hash_net4t_elem { +	__be32 ip; +	u16 padding0; +	u8 nomatch; +	u8 cidr; +	unsigned long timeout; +}; -/* Member elements without timeout */ -struct hash_net4_elem { +struct hash_net4c_elem {  	__be32 ip;  	u16 padding0;  	u8 nomatch;  	u8 cidr; +	struct ip_set_counter counter;  }; -/* Member elements with timeout support */ -struct hash_net4_telem { +struct hash_net4ct_elem {  	__be32 ip;  	u16 padding0;  	u8 nomatch;  	u8 cidr; +	struct ip_set_counter counter;  	unsigned long timeout;  }; +/* Common functions */ +  static inline bool  hash_net4_data_equal(const struct hash_net4_elem *ip1,  		     const struct hash_net4_elem *ip2, @@ -69,40 +82,22 @@ hash_net4_data_equal(const struct hash_net4_elem *ip1,  	       ip1->cidr == ip2->cidr;  } -static inline bool -hash_net4_data_isnull(const struct hash_net4_elem *elem) -{ -	return elem->cidr == 0; -} - -static inline void -hash_net4_data_copy(struct hash_net4_elem *dst, -		    const struct hash_net4_elem *src) +static inline int +hash_net4_do_data_match(const struct hash_net4_elem *elem)  { -	dst->ip = src->ip; -	dst->cidr = src->cidr; -	dst->nomatch = src->nomatch; +	return elem->nomatch ? -ENOTEMPTY : 1;  }  static inline void -hash_net4_data_flags(struct hash_net4_elem *dst, u32 flags) +hash_net4_data_set_flags(struct hash_net4_elem *elem, u32 flags)  { -	dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); +	elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;  }  static inline void -hash_net4_data_reset_flags(struct hash_net4_elem *dst, u32 *flags) +hash_net4_data_reset_flags(struct hash_net4_elem *elem, u8 *flags)  { -	if (dst->nomatch) { -		*flags = IPSET_FLAG_NOMATCH; -		dst->nomatch = 0; -	} -} - -static inline int -hash_net4_data_match(const struct hash_net4_elem *elem) -{ -	return elem->nomatch ? -ENOTEMPTY : 1; +	swap(*flags, elem->nomatch);  }  static inline void @@ -112,13 +107,6 @@ hash_net4_data_netmask(struct hash_net4_elem *elem, u8 cidr)  	elem->cidr = cidr;  } -/* Zero CIDR values cannot be stored */ -static inline void -hash_net4_data_zero_out(struct hash_net4_elem *elem) -{ -	elem->cidr = 0; -} -  static bool  hash_net4_data_list(struct sk_buff *skb, const struct hash_net4_elem *data)  { @@ -135,106 +123,84 @@ nla_put_failure:  	return 1;  } -static bool -hash_net4_data_tlist(struct sk_buff *skb, const struct hash_net4_elem *data) +static inline void +hash_net4_data_next(struct hash_net4_elem *next, +		    const struct hash_net4_elem *d)  { -	const struct hash_net4_telem *tdata = -		(const struct hash_net4_telem *)data; -	u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; - -	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, tdata->ip) || -	    nla_put_u8(skb, IPSET_ATTR_CIDR, tdata->cidr) || -	    nla_put_net32(skb, IPSET_ATTR_TIMEOUT, -			  htonl(ip_set_timeout_get(tdata->timeout))) || -	    (flags && -	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) -		goto nla_put_failure; -	return 0; - -nla_put_failure: -	return 1; +	next->ip = d->ip;  } -#define IP_SET_HASH_WITH_NETS - +#define MTYPE		hash_net4  #define PF		4  #define HOST_MASK	32 -#include <linux/netfilter/ipset/ip_set_ahash.h> - -static inline void -hash_net4_data_next(struct ip_set_hash *h, -		    const struct hash_net4_elem *d) -{ -	h->next.ip = d->ip; -} +#include "ip_set_hash_gen.h"  static int  hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb,  	       const struct xt_action_param *par, -	       enum ipset_adt adt, const struct ip_set_adt_opt *opt) +	       enum ipset_adt adt, struct ip_set_adt_opt *opt)  { -	const struct ip_set_hash *h = set->data; +	const struct hash_net *h = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt]; -	struct hash_net4_elem data = { +	struct hash_net4_elem e = {  		.cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK  	}; +	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); -	if (data.cidr == 0) +	if (e.cidr == 0)  		return -EINVAL;  	if (adt == IPSET_TEST) -		data.cidr = HOST_MASK; +		e.cidr = HOST_MASK; -	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip); -	data.ip &= ip_set_netmask(data.cidr); +	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip); +	e.ip &= ip_set_netmask(e.cidr); -	return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); +	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);  }  static int  hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],  	       enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)  { -	const struct ip_set_hash *h = set->data; +	const struct hash_net *h = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt]; -	struct hash_net4_elem data = { .cidr = HOST_MASK }; -	u32 timeout = h->timeout; +	struct hash_net4_elem e = { .cidr = HOST_MASK }; +	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);  	u32 ip = 0, ip_to, last;  	int ret;  	if (unlikely(!tb[IPSET_ATTR_IP] ||  		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || -		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))  		return -IPSET_ERR_PROTOCOL;  	if (tb[IPSET_ATTR_LINENO])  		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); -	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); +	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) || +	      ip_set_get_extensions(set, tb, &ext);  	if (ret)  		return ret;  	if (tb[IPSET_ATTR_CIDR]) { -		data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); -		if (!data.cidr || data.cidr > HOST_MASK) +		e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); +		if (!e.cidr || e.cidr > HOST_MASK)  			return -IPSET_ERR_INVALID_CIDR;  	} -	if (tb[IPSET_ATTR_TIMEOUT]) { -		if (!with_timeout(h->timeout)) -			return -IPSET_ERR_TIMEOUT; -		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); -	} - -	if (tb[IPSET_ATTR_CADT_FLAGS] && adt == IPSET_ADD) { +	if (tb[IPSET_ATTR_CADT_FLAGS]) {  		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);  		if (cadt_flags & IPSET_FLAG_NOMATCH) -			flags |= (cadt_flags << 16); +			flags |= (IPSET_FLAG_NOMATCH << 16);  	}  	if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) { -		data.ip = htonl(ip & ip_set_hostmask(data.cidr)); -		ret = adtfn(set, &data, timeout, flags); -		return ip_set_eexist(ret, flags) ? 0 : ret; +		e.ip = htonl(ip & ip_set_hostmask(e.cidr)); +		ret = adtfn(set, &e, &ext, &ext, flags); +		return ip_set_enomatch(ret, flags, adt) ? 1 : +		       ip_set_eexist(ret, flags) ? 0 : ret;  	}  	ip_to = ip; @@ -250,9 +216,9 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],  	if (retried)  		ip = ntohl(h->next.ip);  	while (!after(ip, ip_to)) { -		data.ip = htonl(ip); -		last = ip_set_range_to_cidr(ip, ip_to, &data.cidr); -		ret = adtfn(set, &data, timeout, flags); +		e.ip = htonl(ip); +		last = ip_set_range_to_cidr(ip, ip_to, &e.cidr); +		ret = adtfn(set, &e, &ext, &ext, flags);  		if (ret && !ip_set_eexist(ret, flags))  			return ret;  		else @@ -262,34 +228,42 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],  	return ret;  } -static bool -hash_net_same_set(const struct ip_set *a, const struct ip_set *b) -{ -	const struct ip_set_hash *x = a->data; -	const struct ip_set_hash *y = b->data; +/* IPv6 variants */ -	/* Resizing changes htable_bits, so we ignore it */ -	return x->maxelem == y->maxelem && -	       x->timeout == y->timeout; -} +struct hash_net6_elem { +	union nf_inet_addr ip; +	u16 padding0; +	u8 nomatch; +	u8 cidr; +}; -/* The type variant functions: IPv6 */ +struct hash_net6t_elem { +	union nf_inet_addr ip; +	u16 padding0; +	u8 nomatch; +	u8 cidr; +	unsigned long timeout; +}; -struct hash_net6_elem { +struct hash_net6c_elem {  	union nf_inet_addr ip;  	u16 padding0;  	u8 nomatch;  	u8 cidr; +	struct ip_set_counter counter;  }; -struct hash_net6_telem { +struct hash_net6ct_elem {  	union nf_inet_addr ip;  	u16 padding0;  	u8 nomatch;  	u8 cidr; +	struct ip_set_counter counter;  	unsigned long timeout;  }; +/* Common functions */ +  static inline bool  hash_net6_data_equal(const struct hash_net6_elem *ip1,  		     const struct hash_net6_elem *ip2, @@ -299,55 +273,22 @@ hash_net6_data_equal(const struct hash_net6_elem *ip1,  	       ip1->cidr == ip2->cidr;  } -static inline bool -hash_net6_data_isnull(const struct hash_net6_elem *elem) -{ -	return elem->cidr == 0; -} - -static inline void -hash_net6_data_copy(struct hash_net6_elem *dst, -		    const struct hash_net6_elem *src) -{ -	dst->ip.in6 = src->ip.in6; -	dst->cidr = src->cidr; -	dst->nomatch = src->nomatch; -} - -static inline void -hash_net6_data_flags(struct hash_net6_elem *dst, u32 flags) -{ -	dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); -} - -static inline void -hash_net6_data_reset_flags(struct hash_net6_elem *dst, u32 *flags) -{ -	if (dst->nomatch) { -		*flags = IPSET_FLAG_NOMATCH; -		dst->nomatch = 0; -	} -} -  static inline int -hash_net6_data_match(const struct hash_net6_elem *elem) +hash_net6_do_data_match(const struct hash_net6_elem *elem)  {  	return elem->nomatch ? -ENOTEMPTY : 1;  }  static inline void -hash_net6_data_zero_out(struct hash_net6_elem *elem) +hash_net6_data_set_flags(struct hash_net6_elem *elem, u32 flags)  { -	elem->cidr = 0; +	elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;  }  static inline void -ip6_netmask(union nf_inet_addr *ip, u8 prefix) +hash_net6_data_reset_flags(struct hash_net6_elem *elem, u8 *flags)  { -	ip->ip6[0] &= ip_set_netmask6(prefix)[0]; -	ip->ip6[1] &= ip_set_netmask6(prefix)[1]; -	ip->ip6[2] &= ip_set_netmask6(prefix)[2]; -	ip->ip6[3] &= ip_set_netmask6(prefix)[3]; +	swap(*flags, elem->nomatch);  }  static inline void @@ -373,74 +314,60 @@ nla_put_failure:  	return 1;  } -static bool -hash_net6_data_tlist(struct sk_buff *skb, const struct hash_net6_elem *data) +static inline void +hash_net6_data_next(struct hash_net4_elem *next, +		    const struct hash_net6_elem *d)  { -	const struct hash_net6_telem *e = -		(const struct hash_net6_telem *)data; -	u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; - -	if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) || -	    nla_put_u8(skb, IPSET_ATTR_CIDR, e->cidr) || -	    nla_put_net32(skb, IPSET_ATTR_TIMEOUT, -			  htonl(ip_set_timeout_get(e->timeout))) || -	    (flags && -	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) -		goto nla_put_failure; -	return 0; - -nla_put_failure: -	return 1;  } +#undef MTYPE  #undef PF  #undef HOST_MASK +#define MTYPE		hash_net6  #define PF		6  #define HOST_MASK	128 -#include <linux/netfilter/ipset/ip_set_ahash.h> - -static inline void -hash_net6_data_next(struct ip_set_hash *h, -		    const struct hash_net6_elem *d) -{ -} +#define IP_SET_EMIT_CREATE +#include "ip_set_hash_gen.h"  static int  hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb,  	       const struct xt_action_param *par, -	       enum ipset_adt adt, const struct ip_set_adt_opt *opt) +	       enum ipset_adt adt, struct ip_set_adt_opt *opt)  { -	const struct ip_set_hash *h = set->data; +	const struct hash_net *h = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt]; -	struct hash_net6_elem data = { +	struct hash_net6_elem e = {  		.cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK  	}; +	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h); -	if (data.cidr == 0) +	if (e.cidr == 0)  		return -EINVAL;  	if (adt == IPSET_TEST) -		data.cidr = HOST_MASK; +		e.cidr = HOST_MASK; -	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6); -	ip6_netmask(&data.ip, data.cidr); +	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6); +	ip6_netmask(&e.ip, e.cidr); -	return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); +	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);  }  static int  hash_net6_uadt(struct ip_set *set, struct nlattr *tb[],  	       enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)  { -	const struct ip_set_hash *h = set->data; +	const struct hash_net *h = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt]; -	struct hash_net6_elem data = { .cidr = HOST_MASK }; -	u32 timeout = h->timeout; +	struct hash_net6_elem e = { .cidr = HOST_MASK }; +	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);  	int ret;  	if (unlikely(!tb[IPSET_ATTR_IP] ||  		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || -		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))  		return -IPSET_ERR_PROTOCOL;  	if (unlikely(tb[IPSET_ATTR_IP_TO]))  		return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; @@ -448,107 +375,29 @@ hash_net6_uadt(struct ip_set *set, struct nlattr *tb[],  	if (tb[IPSET_ATTR_LINENO])  		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); -	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip); +	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) || +	      ip_set_get_extensions(set, tb, &ext);  	if (ret)  		return ret;  	if (tb[IPSET_ATTR_CIDR]) -		data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); +		e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); -	if (!data.cidr || data.cidr > HOST_MASK) +	if (!e.cidr || e.cidr > HOST_MASK)  		return -IPSET_ERR_INVALID_CIDR; -	ip6_netmask(&data.ip, data.cidr); +	ip6_netmask(&e.ip, e.cidr); -	if (tb[IPSET_ATTR_TIMEOUT]) { -		if (!with_timeout(h->timeout)) -			return -IPSET_ERR_TIMEOUT; -		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); -	} - -	if (tb[IPSET_ATTR_CADT_FLAGS] && adt == IPSET_ADD) { +	if (tb[IPSET_ATTR_CADT_FLAGS]) {  		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);  		if (cadt_flags & IPSET_FLAG_NOMATCH) -			flags |= (cadt_flags << 16); +			flags |= (IPSET_FLAG_NOMATCH << 16);  	} -	ret = adtfn(set, &data, timeout, flags); +	ret = adtfn(set, &e, &ext, &ext, flags); -	return ip_set_eexist(ret, flags) ? 0 : ret; -} - -/* Create hash:ip type of sets */ - -static int -hash_net_create(struct ip_set *set, struct nlattr *tb[], u32 flags) -{ -	u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; -	struct ip_set_hash *h; -	u8 hbits; -	size_t hsize; - -	if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) -		return -IPSET_ERR_INVALID_FAMILY; - -	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || -		     !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || -		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) -		return -IPSET_ERR_PROTOCOL; - -	if (tb[IPSET_ATTR_HASHSIZE]) { -		hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); -		if (hashsize < IPSET_MIMINAL_HASHSIZE) -			hashsize = IPSET_MIMINAL_HASHSIZE; -	} - -	if (tb[IPSET_ATTR_MAXELEM]) -		maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); - -	h = kzalloc(sizeof(*h) -		    + sizeof(struct ip_set_hash_nets) -		      * (set->family == NFPROTO_IPV4 ? 32 : 128), GFP_KERNEL); -	if (!h) -		return -ENOMEM; - -	h->maxelem = maxelem; -	get_random_bytes(&h->initval, sizeof(h->initval)); -	h->timeout = IPSET_NO_TIMEOUT; - -	hbits = htable_bits(hashsize); -	hsize = htable_size(hbits); -	if (hsize == 0) { -		kfree(h); -		return -ENOMEM; -	} -	h->table = ip_set_alloc(hsize); -	if (!h->table) { -		kfree(h); -		return -ENOMEM; -	} -	h->table->htable_bits = hbits; - -	set->data = h; - -	if (tb[IPSET_ATTR_TIMEOUT]) { -		h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - -		set->variant = set->family == NFPROTO_IPV4 -			? &hash_net4_tvariant : &hash_net6_tvariant; - -		if (set->family == NFPROTO_IPV4) -			hash_net4_gc_init(set); -		else -			hash_net6_gc_init(set); -	} else { -		set->variant = set->family == NFPROTO_IPV4 -			? &hash_net4_variant : &hash_net6_variant; -	} - -	pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", -		 set->name, jhash_size(h->table->htable_bits), -		 h->table->htable_bits, h->maxelem, set->data, h->table); - -	return 0; +	return ip_set_enomatch(ret, flags, adt) ? 1 : +	       ip_set_eexist(ret, flags) ? 0 : ret;  }  static struct ip_set_type hash_net_type __read_mostly = { @@ -566,6 +415,7 @@ static struct ip_set_type hash_net_type __read_mostly = {  		[IPSET_ATTR_PROBES]	= { .type = NLA_U8 },  		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },  		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 }, +		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },  	},  	.adt_policy	= {  		[IPSET_ATTR_IP]		= { .type = NLA_NESTED }, @@ -573,6 +423,8 @@ static struct ip_set_type hash_net_type __read_mostly = {  		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },  		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },  		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 }, +		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 }, +		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },  	},  	.me		= THIS_MODULE,  }; diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index f2b0a3c3013..84ae6f6ce62 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> +/* Copyright (C) 2011-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License version 2 as @@ -21,12 +21,12 @@  #include <linux/netfilter.h>  #include <linux/netfilter/ipset/pfxlen.h>  #include <linux/netfilter/ipset/ip_set.h> -#include <linux/netfilter/ipset/ip_set_timeout.h>  #include <linux/netfilter/ipset/ip_set_hash.h>  #define REVISION_MIN	0  /*			1    nomatch flag support added */ -#define REVISION_MAX	2 /* /0 support added */ +/*			2    /0 support added */ +#define REVISION_MAX	3 /* Counters support added */  MODULE_LICENSE("GPL");  MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); @@ -127,17 +127,14 @@ iface_add(struct rb_root *root, const char **iface)  }  /* Type specific function prefix */ -#define TYPE		hash_netiface - -static bool -hash_netiface_same_set(const struct ip_set *a, const struct ip_set *b); - -#define hash_netiface4_same_set	hash_netiface_same_set -#define hash_netiface6_same_set	hash_netiface_same_set +#define HTYPE		hash_netiface +#define IP_SET_HASH_WITH_NETS +#define IP_SET_HASH_WITH_RBTREE +#define IP_SET_HASH_WITH_MULTI  #define STREQ(a, b)	(strcmp(a, b) == 0) -/* The type variant functions: IPv4 */ +/* IPv4 variants */  struct hash_netiface4_elem_hashed {  	__be32 ip; @@ -147,8 +144,6 @@ struct hash_netiface4_elem_hashed {  	u8 elem;  }; -#define HKEY_DATALEN	sizeof(struct hash_netiface4_elem_hashed) -  /* Member elements without timeout */  struct hash_netiface4_elem {  	__be32 ip; @@ -159,17 +154,39 @@ struct hash_netiface4_elem {  	const char *iface;  }; -/* Member elements with timeout support */ -struct hash_netiface4_telem { +struct hash_netiface4t_elem { +	__be32 ip; +	u8 physdev; +	u8 cidr; +	u8 nomatch; +	u8 elem; +	const char *iface; +	unsigned long timeout; +}; + +struct hash_netiface4c_elem { +	__be32 ip; +	u8 physdev; +	u8 cidr; +	u8 nomatch; +	u8 elem; +	const char *iface; +	struct ip_set_counter counter; +}; + +struct hash_netiface4ct_elem {  	__be32 ip;  	u8 physdev;  	u8 cidr;  	u8 nomatch;  	u8 elem;  	const char *iface; +	struct ip_set_counter counter;  	unsigned long timeout;  }; +/* Common functions */ +  static inline bool  hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1,  			  const struct hash_netiface4_elem *ip2, @@ -182,38 +199,22 @@ hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1,  	       ip1->iface == ip2->iface;  } -static inline bool -hash_netiface4_data_isnull(const struct hash_netiface4_elem *elem) -{ -	return elem->elem == 0; -} - -static inline void -hash_netiface4_data_copy(struct hash_netiface4_elem *dst, -			 const struct hash_netiface4_elem *src) +static inline int +hash_netiface4_do_data_match(const struct hash_netiface4_elem *elem)  { -	memcpy(dst, src, sizeof(*dst)); +	return elem->nomatch ? -ENOTEMPTY : 1;  }  static inline void -hash_netiface4_data_flags(struct hash_netiface4_elem *dst, u32 flags) +hash_netiface4_data_set_flags(struct hash_netiface4_elem *elem, u32 flags)  { -	dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); +	elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;  }  static inline void -hash_netiface4_data_reset_flags(struct hash_netiface4_elem *dst, u32 *flags) -{ -	if (dst->nomatch) { -		*flags = IPSET_FLAG_NOMATCH; -		dst->nomatch = 0; -	} -} - -static inline int -hash_netiface4_data_match(const struct hash_netiface4_elem *elem) +hash_netiface4_data_reset_flags(struct hash_netiface4_elem *elem, u8 *flags)  { -	return elem->nomatch ? -ENOTEMPTY : 1; +	swap(*flags, elem->nomatch);  }  static inline void @@ -223,12 +224,6 @@ hash_netiface4_data_netmask(struct hash_netiface4_elem *elem, u8 cidr)  	elem->cidr = cidr;  } -static inline void -hash_netiface4_data_zero_out(struct hash_netiface4_elem *elem) -{ -	elem->elem = 0; -} -  static bool  hash_netiface4_data_list(struct sk_buff *skb,  			 const struct hash_netiface4_elem *data) @@ -249,66 +244,40 @@ nla_put_failure:  	return 1;  } -static bool -hash_netiface4_data_tlist(struct sk_buff *skb, -			  const struct hash_netiface4_elem *data) +static inline void +hash_netiface4_data_next(struct hash_netiface4_elem *next, +			 const struct hash_netiface4_elem *d)  { -	const struct hash_netiface4_telem *tdata = -		(const struct hash_netiface4_telem *)data; -	u32 flags = data->physdev ? IPSET_FLAG_PHYSDEV : 0; - -	if (data->nomatch) -		flags |= IPSET_FLAG_NOMATCH; -	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) || -	    nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr) || -	    nla_put_string(skb, IPSET_ATTR_IFACE, data->iface) || -	    (flags && -	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))) || -	    nla_put_net32(skb, IPSET_ATTR_TIMEOUT, -			  htonl(ip_set_timeout_get(tdata->timeout)))) -		goto nla_put_failure; - -	return 0; - -nla_put_failure: -	return 1; +	next->ip = d->ip;  } -#define IP_SET_HASH_WITH_NETS -#define IP_SET_HASH_WITH_RBTREE -#define IP_SET_HASH_WITH_MULTI - +#define MTYPE		hash_netiface4  #define PF		4  #define HOST_MASK	32 -#include <linux/netfilter/ipset/ip_set_ahash.h> - -static inline void -hash_netiface4_data_next(struct ip_set_hash *h, -			 const struct hash_netiface4_elem *d) -{ -	h->next.ip = d->ip; -} +#define HKEY_DATALEN	sizeof(struct hash_netiface4_elem_hashed) +#include "ip_set_hash_gen.h"  static int  hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,  		    const struct xt_action_param *par, -		    enum ipset_adt adt, const struct ip_set_adt_opt *opt) +		    enum ipset_adt adt, struct ip_set_adt_opt *opt)  { -	struct ip_set_hash *h = set->data; +	struct hash_netiface *h = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt]; -	struct hash_netiface4_elem data = { +	struct hash_netiface4_elem e = {  		.cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK,  		.elem = 1,  	}; +	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);  	int ret; -	if (data.cidr == 0) +	if (e.cidr == 0)  		return -EINVAL;  	if (adt == IPSET_TEST) -		data.cidr = HOST_MASK; +		e.cidr = HOST_MASK; -	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip); -	data.ip &= ip_set_netmask(data.cidr); +	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip); +	e.ip &= ip_set_netmask(e.cidr);  #define IFACE(dir)	(par->dir ? par->dir->name : NULL)  #define PHYSDEV(dir)	(nf_bridge->dir ? nf_bridge->dir->name : NULL) @@ -320,72 +289,69 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,  		if (!nf_bridge)  			return -EINVAL; -		data.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev); -		data.physdev = 1; +		e.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev); +		e.physdev = 1;  #else -		data.iface = NULL; +		e.iface = NULL;  #endif  	} else -		data.iface = SRCDIR ? IFACE(in) : IFACE(out); +		e.iface = SRCDIR ? IFACE(in) : IFACE(out); -	if (!data.iface) +	if (!e.iface)  		return -EINVAL; -	ret = iface_test(&h->rbtree, &data.iface); +	ret = iface_test(&h->rbtree, &e.iface);  	if (adt == IPSET_ADD) {  		if (!ret) { -			ret = iface_add(&h->rbtree, &data.iface); +			ret = iface_add(&h->rbtree, &e.iface);  			if (ret)  				return ret;  		}  	} else if (!ret)  		return ret; -	return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); +	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);  }  static int  hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],  		    enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)  { -	struct ip_set_hash *h = set->data; +	struct hash_netiface *h = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt]; -	struct hash_netiface4_elem data = { .cidr = HOST_MASK, .elem = 1 }; +	struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 }; +	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);  	u32 ip = 0, ip_to, last; -	u32 timeout = h->timeout;  	char iface[IFNAMSIZ];  	int ret;  	if (unlikely(!tb[IPSET_ATTR_IP] ||  		     !tb[IPSET_ATTR_IFACE] ||  		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || -		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))  		return -IPSET_ERR_PROTOCOL;  	if (tb[IPSET_ATTR_LINENO])  		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); -	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); +	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) || +	      ip_set_get_extensions(set, tb, &ext);  	if (ret)  		return ret;  	if (tb[IPSET_ATTR_CIDR]) { -		data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); -		if (data.cidr > HOST_MASK) +		e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); +		if (e.cidr > HOST_MASK)  			return -IPSET_ERR_INVALID_CIDR;  	} -	if (tb[IPSET_ATTR_TIMEOUT]) { -		if (!with_timeout(h->timeout)) -			return -IPSET_ERR_TIMEOUT; -		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); -	} -  	strcpy(iface, nla_data(tb[IPSET_ATTR_IFACE])); -	data.iface = iface; -	ret = iface_test(&h->rbtree, &data.iface); +	e.iface = iface; +	ret = iface_test(&h->rbtree, &e.iface);  	if (adt == IPSET_ADD) {  		if (!ret) { -			ret = iface_add(&h->rbtree, &data.iface); +			ret = iface_add(&h->rbtree, &e.iface);  			if (ret)  				return ret;  		} @@ -395,14 +361,15 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],  	if (tb[IPSET_ATTR_CADT_FLAGS]) {  		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);  		if (cadt_flags & IPSET_FLAG_PHYSDEV) -			data.physdev = 1; -		if (adt == IPSET_ADD && (cadt_flags & IPSET_FLAG_NOMATCH)) -			flags |= (cadt_flags << 16); +			e.physdev = 1; +		if (cadt_flags & IPSET_FLAG_NOMATCH) +			flags |= (IPSET_FLAG_NOMATCH << 16);  	}  	if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) { -		data.ip = htonl(ip & ip_set_hostmask(data.cidr)); -		ret = adtfn(set, &data, timeout, flags); -		return ip_set_eexist(ret, flags) ? 0 : ret; +		e.ip = htonl(ip & ip_set_hostmask(e.cidr)); +		ret = adtfn(set, &e, &ext, &ext, flags); +		return ip_set_enomatch(ret, flags, adt) ? 1 : +		       ip_set_eexist(ret, flags) ? 0 : ret;  	}  	if (tb[IPSET_ATTR_IP_TO]) { @@ -413,16 +380,15 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],  			swap(ip, ip_to);  		if (ip + UINT_MAX == ip_to)  			return -IPSET_ERR_HASH_RANGE; -	} else { -		ip_set_mask_from_to(ip, ip_to, data.cidr); -	} +	} else +		ip_set_mask_from_to(ip, ip_to, e.cidr);  	if (retried)  		ip = ntohl(h->next.ip);  	while (!after(ip, ip_to)) { -		data.ip = htonl(ip); -		last = ip_set_range_to_cidr(ip, ip_to, &data.cidr); -		ret = adtfn(set, &data, timeout, flags); +		e.ip = htonl(ip); +		last = ip_set_range_to_cidr(ip, ip_to, &e.cidr); +		ret = adtfn(set, &e, &ext, &ext, flags);  		if (ret && !ip_set_eexist(ret, flags))  			return ret; @@ -433,18 +399,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],  	return ret;  } -static bool -hash_netiface_same_set(const struct ip_set *a, const struct ip_set *b) -{ -	const struct ip_set_hash *x = a->data; -	const struct ip_set_hash *y = b->data; - -	/* Resizing changes htable_bits, so we ignore it */ -	return x->maxelem == y->maxelem && -	       x->timeout == y->timeout; -} - -/* The type variant functions: IPv6 */ +/* IPv6 variants */  struct hash_netiface6_elem_hashed {  	union nf_inet_addr ip; @@ -454,8 +409,6 @@ struct hash_netiface6_elem_hashed {  	u8 elem;  }; -#define HKEY_DATALEN	sizeof(struct hash_netiface6_elem_hashed) -  struct hash_netiface6_elem {  	union nf_inet_addr ip;  	u8 physdev; @@ -465,16 +418,39 @@ struct hash_netiface6_elem {  	const char *iface;  }; -struct hash_netiface6_telem { +struct hash_netiface6t_elem { +	union nf_inet_addr ip; +	u8 physdev; +	u8 cidr; +	u8 nomatch; +	u8 elem; +	const char *iface; +	unsigned long timeout; +}; + +struct hash_netiface6c_elem {  	union nf_inet_addr ip;  	u8 physdev;  	u8 cidr;  	u8 nomatch;  	u8 elem;  	const char *iface; +	struct ip_set_counter counter; +}; + +struct hash_netiface6ct_elem { +	union nf_inet_addr ip; +	u8 physdev; +	u8 cidr; +	u8 nomatch; +	u8 elem; +	const char *iface; +	struct ip_set_counter counter;  	unsigned long timeout;  }; +/* Common functions */ +  static inline bool  hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1,  			  const struct hash_netiface6_elem *ip2, @@ -487,53 +463,22 @@ hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1,  	       ip1->iface == ip2->iface;  } -static inline bool -hash_netiface6_data_isnull(const struct hash_netiface6_elem *elem) -{ -	return elem->elem == 0; -} - -static inline void -hash_netiface6_data_copy(struct hash_netiface6_elem *dst, -			 const struct hash_netiface6_elem *src) -{ -	memcpy(dst, src, sizeof(*dst)); -} - -static inline void -hash_netiface6_data_flags(struct hash_netiface6_elem *dst, u32 flags) -{ -	dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); -} -  static inline int -hash_netiface6_data_match(const struct hash_netiface6_elem *elem) +hash_netiface6_do_data_match(const struct hash_netiface6_elem *elem)  {  	return elem->nomatch ? -ENOTEMPTY : 1;  }  static inline void -hash_netiface6_data_reset_flags(struct hash_netiface6_elem *dst, u32 *flags) -{ -	if (dst->nomatch) { -		*flags = IPSET_FLAG_NOMATCH; -		dst->nomatch = 0; -	} -} - -static inline void -hash_netiface6_data_zero_out(struct hash_netiface6_elem *elem) +hash_netiface6_data_set_flags(struct hash_netiface6_elem *elem, u32 flags)  { -	elem->elem = 0; +	elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;  }  static inline void -ip6_netmask(union nf_inet_addr *ip, u8 prefix) +hash_netiface6_data_reset_flags(struct hash_netiface6_elem *elem, u8 *flags)  { -	ip->ip6[0] &= ip_set_netmask6(prefix)[0]; -	ip->ip6[1] &= ip_set_netmask6(prefix)[1]; -	ip->ip6[2] &= ip_set_netmask6(prefix)[2]; -	ip->ip6[3] &= ip_set_netmask6(prefix)[3]; +	swap(*flags, elem->nomatch);  }  static inline void @@ -563,63 +508,45 @@ nla_put_failure:  	return 1;  } -static bool -hash_netiface6_data_tlist(struct sk_buff *skb, -			  const struct hash_netiface6_elem *data) +static inline void +hash_netiface6_data_next(struct hash_netiface4_elem *next, +			 const struct hash_netiface6_elem *d)  { -	const struct hash_netiface6_telem *e = -		(const struct hash_netiface6_telem *)data; -	u32 flags = data->physdev ? IPSET_FLAG_PHYSDEV : 0; - -	if (data->nomatch) -		flags |= IPSET_FLAG_NOMATCH; -	if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) || -	    nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr) || -	    nla_put_string(skb, IPSET_ATTR_IFACE, data->iface) || -	    (flags && -	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags))) || -	    nla_put_net32(skb, IPSET_ATTR_TIMEOUT, -			  htonl(ip_set_timeout_get(e->timeout)))) -		goto nla_put_failure; -	return 0; - -nla_put_failure: -	return 1;  } +#undef MTYPE  #undef PF  #undef HOST_MASK +#undef HKEY_DATALEN +#define MTYPE		hash_netiface6  #define PF		6  #define HOST_MASK	128 -#include <linux/netfilter/ipset/ip_set_ahash.h> - -static inline void -hash_netiface6_data_next(struct ip_set_hash *h, -			 const struct hash_netiface6_elem *d) -{ -} +#define HKEY_DATALEN	sizeof(struct hash_netiface6_elem_hashed) +#define IP_SET_EMIT_CREATE +#include "ip_set_hash_gen.h"  static int  hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,  		    const struct xt_action_param *par, -		    enum ipset_adt adt, const struct ip_set_adt_opt *opt) +		    enum ipset_adt adt, struct ip_set_adt_opt *opt)  { -	struct ip_set_hash *h = set->data; +	struct hash_netiface *h = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt]; -	struct hash_netiface6_elem data = { +	struct hash_netiface6_elem e = {  		.cidr = h->nets[0].cidr ? h->nets[0].cidr : HOST_MASK,  		.elem = 1,  	}; +	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);  	int ret; -	if (data.cidr == 0) +	if (e.cidr == 0)  		return -EINVAL;  	if (adt == IPSET_TEST) -		data.cidr = HOST_MASK; +		e.cidr = HOST_MASK; -	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6); -	ip6_netmask(&data.ip, data.cidr); +	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6); +	ip6_netmask(&e.ip, e.cidr);  	if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {  #ifdef CONFIG_BRIDGE_NETFILTER @@ -627,44 +554,46 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,  		if (!nf_bridge)  			return -EINVAL; -		data.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev); -		data.physdev = 1; +		e.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev); +		e.physdev = 1;  #else -		data.iface = NULL; +		e.iface = NULL;  #endif  	} else -		data.iface = SRCDIR ? IFACE(in) : IFACE(out); +		e.iface = SRCDIR ? IFACE(in) : IFACE(out); -	if (!data.iface) +	if (!e.iface)  		return -EINVAL; -	ret = iface_test(&h->rbtree, &data.iface); +	ret = iface_test(&h->rbtree, &e.iface);  	if (adt == IPSET_ADD) {  		if (!ret) { -			ret = iface_add(&h->rbtree, &data.iface); +			ret = iface_add(&h->rbtree, &e.iface);  			if (ret)  				return ret;  		}  	} else if (!ret)  		return ret; -	return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); +	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);  }  static int  hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],  		   enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)  { -	struct ip_set_hash *h = set->data; +	struct hash_netiface *h = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt]; -	struct hash_netiface6_elem data = { .cidr = HOST_MASK, .elem = 1 }; -	u32 timeout = h->timeout; +	struct hash_netiface6_elem e = { .cidr = HOST_MASK, .elem = 1 }; +	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);  	char iface[IFNAMSIZ];  	int ret;  	if (unlikely(!tb[IPSET_ATTR_IP] ||  		     !tb[IPSET_ATTR_IFACE] ||  		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || -		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))  		return -IPSET_ERR_PROTOCOL;  	if (unlikely(tb[IPSET_ATTR_IP_TO]))  		return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; @@ -672,28 +601,23 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],  	if (tb[IPSET_ATTR_LINENO])  		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); -	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip); +	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) || +	      ip_set_get_extensions(set, tb, &ext);  	if (ret)  		return ret;  	if (tb[IPSET_ATTR_CIDR]) -		data.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); -	if (data.cidr > HOST_MASK) +		e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); +	if (e.cidr > HOST_MASK)  		return -IPSET_ERR_INVALID_CIDR; -	ip6_netmask(&data.ip, data.cidr); - -	if (tb[IPSET_ATTR_TIMEOUT]) { -		if (!with_timeout(h->timeout)) -			return -IPSET_ERR_TIMEOUT; -		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); -	} +	ip6_netmask(&e.ip, e.cidr);  	strcpy(iface, nla_data(tb[IPSET_ATTR_IFACE])); -	data.iface = iface; -	ret = iface_test(&h->rbtree, &data.iface); +	e.iface = iface; +	ret = iface_test(&h->rbtree, &e.iface);  	if (adt == IPSET_ADD) {  		if (!ret) { -			ret = iface_add(&h->rbtree, &data.iface); +			ret = iface_add(&h->rbtree, &e.iface);  			if (ret)  				return ret;  		} @@ -703,90 +627,15 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],  	if (tb[IPSET_ATTR_CADT_FLAGS]) {  		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);  		if (cadt_flags & IPSET_FLAG_PHYSDEV) -			data.physdev = 1; -		if (adt == IPSET_ADD && (cadt_flags & IPSET_FLAG_NOMATCH)) -			flags |= (cadt_flags << 16); -	} - -	ret = adtfn(set, &data, timeout, flags); - -	return ip_set_eexist(ret, flags) ? 0 : ret; -} - -/* Create hash:ip type of sets */ - -static int -hash_netiface_create(struct ip_set *set, struct nlattr *tb[], u32 flags) -{ -	struct ip_set_hash *h; -	u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; -	u8 hbits; -	size_t hsize; - -	if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) -		return -IPSET_ERR_INVALID_FAMILY; - -	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || -		     !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || -		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) -		return -IPSET_ERR_PROTOCOL; - -	if (tb[IPSET_ATTR_HASHSIZE]) { -		hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); -		if (hashsize < IPSET_MIMINAL_HASHSIZE) -			hashsize = IPSET_MIMINAL_HASHSIZE; +			e.physdev = 1; +		if (cadt_flags & IPSET_FLAG_NOMATCH) +			flags |= (IPSET_FLAG_NOMATCH << 16);  	} -	if (tb[IPSET_ATTR_MAXELEM]) -		maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); - -	h = kzalloc(sizeof(*h) -		    + sizeof(struct ip_set_hash_nets) -		      * (set->family == NFPROTO_IPV4 ? 32 : 128), GFP_KERNEL); -	if (!h) -		return -ENOMEM; +	ret = adtfn(set, &e, &ext, &ext, flags); -	h->maxelem = maxelem; -	get_random_bytes(&h->initval, sizeof(h->initval)); -	h->timeout = IPSET_NO_TIMEOUT; -	h->ahash_max = AHASH_MAX_SIZE; - -	hbits = htable_bits(hashsize); -	hsize = htable_size(hbits); -	if (hsize == 0) { -		kfree(h); -		return -ENOMEM; -	} -	h->table = ip_set_alloc(hsize); -	if (!h->table) { -		kfree(h); -		return -ENOMEM; -	} -	h->table->htable_bits = hbits; -	h->rbtree = RB_ROOT; - -	set->data = h; - -	if (tb[IPSET_ATTR_TIMEOUT]) { -		h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - -		set->variant = set->family == NFPROTO_IPV4 -			? &hash_netiface4_tvariant : &hash_netiface6_tvariant; - -		if (set->family == NFPROTO_IPV4) -			hash_netiface4_gc_init(set); -		else -			hash_netiface6_gc_init(set); -	} else { -		set->variant = set->family == NFPROTO_IPV4 -			? &hash_netiface4_variant : &hash_netiface6_variant; -	} - -	pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", -		 set->name, jhash_size(h->table->htable_bits), -		 h->table->htable_bits, h->maxelem, set->data, h->table); - -	return 0; +	return ip_set_enomatch(ret, flags, adt) ? 1 : +	       ip_set_eexist(ret, flags) ? 0 : ret;  }  static struct ip_set_type hash_netiface_type __read_mostly = { @@ -806,6 +655,7 @@ static struct ip_set_type hash_netiface_type __read_mostly = {  		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },  		[IPSET_ATTR_PROTO]	= { .type = NLA_U8 },  		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 }, +		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },  	},  	.adt_policy	= {  		[IPSET_ATTR_IP]		= { .type = NLA_NESTED }, @@ -816,6 +666,8 @@ static struct ip_set_type hash_netiface_type __read_mostly = {  		[IPSET_ATTR_CIDR]	= { .type = NLA_U8 },  		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },  		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 }, +		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 }, +		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },  	},  	.me		= THIS_MODULE,  }; diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 349deb672a2..9a0869853be 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> +/* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License version 2 as @@ -20,14 +20,14 @@  #include <linux/netfilter.h>  #include <linux/netfilter/ipset/pfxlen.h>  #include <linux/netfilter/ipset/ip_set.h> -#include <linux/netfilter/ipset/ip_set_timeout.h>  #include <linux/netfilter/ipset/ip_set_getport.h>  #include <linux/netfilter/ipset/ip_set_hash.h>  #define REVISION_MIN	0  /*			1    SCTP and UDPLITE support added */  /*			2    Range as input support for IPv4 added */ -#define REVISION_MAX	3 /* nomatch flag support added */ +/*			3    nomatch flag support added */ +#define REVISION_MAX	4 /* Counters support added */  MODULE_LICENSE("GPL");  MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); @@ -35,15 +35,9 @@ IP_SET_MODULE_DESC("hash:net,port", REVISION_MIN, REVISION_MAX);  MODULE_ALIAS("ip_set_hash:net,port");  /* Type specific function prefix */ -#define TYPE		hash_netport - -static bool -hash_netport_same_set(const struct ip_set *a, const struct ip_set *b); - -#define hash_netport4_same_set	hash_netport_same_set -#define hash_netport6_same_set	hash_netport_same_set - -/* The type variant functions: IPv4 */ +#define HTYPE		hash_netport +#define IP_SET_HASH_WITH_PROTO +#define IP_SET_HASH_WITH_NETS  /* We squeeze the "nomatch" flag into cidr: we don't support cidr == 0   * However this way we have to store internally cidr - 1, @@ -51,7 +45,9 @@ hash_netport_same_set(const struct ip_set *a, const struct ip_set *b);   */  #define IP_SET_HASH_WITH_NETS_PACKED -/* Member elements without timeout */ +/* IPv4 variants */ + +/* Member elements */  struct hash_netport4_elem {  	__be32 ip;  	__be16 port; @@ -60,16 +56,36 @@ struct hash_netport4_elem {  	u8 nomatch:1;  }; -/* Member elements with timeout support */ -struct hash_netport4_telem { +struct hash_netport4t_elem { +	__be32 ip; +	__be16 port; +	u8 proto; +	u8 cidr:7; +	u8 nomatch:1; +	unsigned long timeout; +}; + +struct hash_netport4c_elem { +	__be32 ip; +	__be16 port; +	u8 proto; +	u8 cidr:7; +	u8 nomatch:1; +	struct ip_set_counter counter; +}; + +struct hash_netport4ct_elem {  	__be32 ip;  	__be16 port;  	u8 proto;  	u8 cidr:7;  	u8 nomatch:1; +	struct ip_set_counter counter;  	unsigned long timeout;  }; +/* Common functions */ +  static inline bool  hash_netport4_data_equal(const struct hash_netport4_elem *ip1,  			 const struct hash_netport4_elem *ip2, @@ -81,42 +97,22 @@ hash_netport4_data_equal(const struct hash_netport4_elem *ip1,  	       ip1->cidr == ip2->cidr;  } -static inline bool -hash_netport4_data_isnull(const struct hash_netport4_elem *elem) -{ -	return elem->proto == 0; -} - -static inline void -hash_netport4_data_copy(struct hash_netport4_elem *dst, -			const struct hash_netport4_elem *src) +static inline int +hash_netport4_do_data_match(const struct hash_netport4_elem *elem)  { -	dst->ip = src->ip; -	dst->port = src->port; -	dst->proto = src->proto; -	dst->cidr = src->cidr; -	dst->nomatch = src->nomatch; +	return elem->nomatch ? -ENOTEMPTY : 1;  }  static inline void -hash_netport4_data_flags(struct hash_netport4_elem *dst, u32 flags) +hash_netport4_data_set_flags(struct hash_netport4_elem *elem, u32 flags)  { -	dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); +	elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);  }  static inline void -hash_netport4_data_reset_flags(struct hash_netport4_elem *dst, u32 *flags) +hash_netport4_data_reset_flags(struct hash_netport4_elem *elem, u8 *flags)  { -	if (dst->nomatch) { -		*flags = IPSET_FLAG_NOMATCH; -		dst->nomatch = 0; -	} -} - -static inline int -hash_netport4_data_match(const struct hash_netport4_elem *elem) -{ -	return elem->nomatch ? -ENOTEMPTY : 1; +	swap(*flags, elem->nomatch);  }  static inline void @@ -126,12 +122,6 @@ hash_netport4_data_netmask(struct hash_netport4_elem *elem, u8 cidr)  	elem->cidr = cidr - 1;  } -static inline void -hash_netport4_data_zero_out(struct hash_netport4_elem *elem) -{ -	elem->proto = 0; -} -  static bool  hash_netport4_data_list(struct sk_buff *skb,  			const struct hash_netport4_elem *data) @@ -151,77 +141,53 @@ nla_put_failure:  	return 1;  } -static bool -hash_netport4_data_tlist(struct sk_buff *skb, -			 const struct hash_netport4_elem *data) +static inline void +hash_netport4_data_next(struct hash_netport4_elem *next, +			const struct hash_netport4_elem *d)  { -	const struct hash_netport4_telem *tdata = -		(const struct hash_netport4_telem *)data; -	u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; - -	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, tdata->ip) || -	    nla_put_net16(skb, IPSET_ATTR_PORT, tdata->port) || -	    nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr + 1) || -	    nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) || -	    nla_put_net32(skb, IPSET_ATTR_TIMEOUT, -			  htonl(ip_set_timeout_get(tdata->timeout))) || -	    (flags && -	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) -		goto nla_put_failure; -	return 0; - -nla_put_failure: -	return 1; +	next->ip = d->ip; +	next->port = d->port;  } -#define IP_SET_HASH_WITH_PROTO -#define IP_SET_HASH_WITH_NETS - +#define MTYPE		hash_netport4  #define PF		4  #define HOST_MASK	32 -#include <linux/netfilter/ipset/ip_set_ahash.h> - -static inline void -hash_netport4_data_next(struct ip_set_hash *h, -			const struct hash_netport4_elem *d) -{ -	h->next.ip = d->ip; -	h->next.port = d->port; -} +#include "ip_set_hash_gen.h"  static int  hash_netport4_kadt(struct ip_set *set, const struct sk_buff *skb,  		   const struct xt_action_param *par, -		   enum ipset_adt adt, const struct ip_set_adt_opt *opt) +		   enum ipset_adt adt, struct ip_set_adt_opt *opt)  { -	const struct ip_set_hash *h = set->data; +	const struct hash_netport *h = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt]; -	struct hash_netport4_elem data = { +	struct hash_netport4_elem e = {  		.cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1  	}; +	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);  	if (adt == IPSET_TEST) -		data.cidr = HOST_MASK - 1; +		e.cidr = HOST_MASK - 1;  	if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, -				 &data.port, &data.proto)) +				 &e.port, &e.proto))  		return -EINVAL; -	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip); -	data.ip &= ip_set_netmask(data.cidr + 1); +	ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip); +	e.ip &= ip_set_netmask(e.cidr + 1); -	return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); +	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);  }  static int  hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],  		   enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)  { -	const struct ip_set_hash *h = set->data; +	const struct hash_netport *h = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt]; -	struct hash_netport4_elem data = { .cidr = HOST_MASK - 1 }; +	struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 }; +	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);  	u32 port, port_to, p = 0, ip = 0, ip_to, last; -	u32 timeout = h->timeout;  	bool with_ports = false;  	u8 cidr;  	int ret; @@ -230,13 +196,16 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],  		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||  		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||  		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || -		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))  		return -IPSET_ERR_PROTOCOL;  	if (tb[IPSET_ATTR_LINENO])  		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); -	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); +	ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip) || +	      ip_set_get_extensions(set, tb, &ext);  	if (ret)  		return ret; @@ -244,47 +213,42 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],  		cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);  		if (!cidr || cidr > HOST_MASK)  			return -IPSET_ERR_INVALID_CIDR; -		data.cidr = cidr - 1; +		e.cidr = cidr - 1;  	}  	if (tb[IPSET_ATTR_PORT]) -		data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); +		e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);  	else  		return -IPSET_ERR_PROTOCOL;  	if (tb[IPSET_ATTR_PROTO]) { -		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); -		with_ports = ip_set_proto_with_ports(data.proto); +		e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); +		with_ports = ip_set_proto_with_ports(e.proto); -		if (data.proto == 0) +		if (e.proto == 0)  			return -IPSET_ERR_INVALID_PROTO;  	} else  		return -IPSET_ERR_MISSING_PROTO; -	if (!(with_ports || data.proto == IPPROTO_ICMP)) -		data.port = 0; - -	if (tb[IPSET_ATTR_TIMEOUT]) { -		if (!with_timeout(h->timeout)) -			return -IPSET_ERR_TIMEOUT; -		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); -	} +	if (!(with_ports || e.proto == IPPROTO_ICMP)) +		e.port = 0;  	with_ports = with_ports && tb[IPSET_ATTR_PORT_TO]; -	if (tb[IPSET_ATTR_CADT_FLAGS] && adt == IPSET_ADD) { +	if (tb[IPSET_ATTR_CADT_FLAGS]) {  		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);  		if (cadt_flags & IPSET_FLAG_NOMATCH) -			flags |= (cadt_flags << 16); +			flags |= (IPSET_FLAG_NOMATCH << 16);  	}  	if (adt == IPSET_TEST || !(with_ports || tb[IPSET_ATTR_IP_TO])) { -		data.ip = htonl(ip & ip_set_hostmask(data.cidr + 1)); -		ret = adtfn(set, &data, timeout, flags); -		return ip_set_eexist(ret, flags) ? 0 : ret; +		e.ip = htonl(ip & ip_set_hostmask(e.cidr + 1)); +		ret = adtfn(set, &e, &ext, &ext, flags); +		return ip_set_enomatch(ret, flags, adt) ? 1 : +		       ip_set_eexist(ret, flags) ? 0 : ret;  	} -	port = port_to = ntohs(data.port); +	port = port_to = ntohs(e.port);  	if (tb[IPSET_ATTR_PORT_TO]) {  		port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);  		if (port_to < port) @@ -298,21 +262,20 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],  			swap(ip, ip_to);  		if (ip + UINT_MAX == ip_to)  			return -IPSET_ERR_HASH_RANGE; -	} else { -		ip_set_mask_from_to(ip, ip_to, data.cidr + 1); -	} +	} else +		ip_set_mask_from_to(ip, ip_to, e.cidr + 1);  	if (retried)  		ip = ntohl(h->next.ip);  	while (!after(ip, ip_to)) { -		data.ip = htonl(ip); +		e.ip = htonl(ip);  		last = ip_set_range_to_cidr(ip, ip_to, &cidr); -		data.cidr = cidr - 1; +		e.cidr = cidr - 1;  		p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)  						       : port;  		for (; p <= port_to; p++) { -			data.port = htons(p); -			ret = adtfn(set, &data, timeout, flags); +			e.port = htons(p); +			ret = adtfn(set, &e, &ext, &ext, flags);  			if (ret && !ip_set_eexist(ret, flags))  				return ret; @@ -324,36 +287,46 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],  	return ret;  } -static bool -hash_netport_same_set(const struct ip_set *a, const struct ip_set *b) -{ -	const struct ip_set_hash *x = a->data; -	const struct ip_set_hash *y = b->data; +/* IPv6 variants */ -	/* Resizing changes htable_bits, so we ignore it */ -	return x->maxelem == y->maxelem && -	       x->timeout == y->timeout; -} +struct hash_netport6_elem { +	union nf_inet_addr ip; +	__be16 port; +	u8 proto; +	u8 cidr:7; +	u8 nomatch:1; +}; -/* The type variant functions: IPv6 */ +struct hash_netport6t_elem { +	union nf_inet_addr ip; +	__be16 port; +	u8 proto; +	u8 cidr:7; +	u8 nomatch:1; +	unsigned long timeout; +}; -struct hash_netport6_elem { +struct hash_netport6c_elem {  	union nf_inet_addr ip;  	__be16 port;  	u8 proto;  	u8 cidr:7;  	u8 nomatch:1; +	struct ip_set_counter counter;  }; -struct hash_netport6_telem { +struct hash_netport6ct_elem {  	union nf_inet_addr ip;  	__be16 port;  	u8 proto;  	u8 cidr:7;  	u8 nomatch:1; +	struct ip_set_counter counter;  	unsigned long timeout;  }; +/* Common functions */ +  static inline bool  hash_netport6_data_equal(const struct hash_netport6_elem *ip1,  			 const struct hash_netport6_elem *ip2, @@ -365,53 +338,22 @@ hash_netport6_data_equal(const struct hash_netport6_elem *ip1,  	       ip1->cidr == ip2->cidr;  } -static inline bool -hash_netport6_data_isnull(const struct hash_netport6_elem *elem) -{ -	return elem->proto == 0; -} - -static inline void -hash_netport6_data_copy(struct hash_netport6_elem *dst, -			const struct hash_netport6_elem *src) -{ -	memcpy(dst, src, sizeof(*dst)); -} - -static inline void -hash_netport6_data_flags(struct hash_netport6_elem *dst, u32 flags) -{ -	dst->nomatch = !!(flags & IPSET_FLAG_NOMATCH); -} - -static inline void -hash_netport6_data_reset_flags(struct hash_netport6_elem *dst, u32 *flags) -{ -	if (dst->nomatch) { -		*flags = IPSET_FLAG_NOMATCH; -		dst->nomatch = 0; -	} -} -  static inline int -hash_netport6_data_match(const struct hash_netport6_elem *elem) +hash_netport6_do_data_match(const struct hash_netport6_elem *elem)  {  	return elem->nomatch ? -ENOTEMPTY : 1;  }  static inline void -hash_netport6_data_zero_out(struct hash_netport6_elem *elem) +hash_netport6_data_set_flags(struct hash_netport6_elem *elem, u32 flags)  { -	elem->proto = 0; +	elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);  }  static inline void -ip6_netmask(union nf_inet_addr *ip, u8 prefix) +hash_netport6_data_reset_flags(struct hash_netport6_elem *elem, u8 *flags)  { -	ip->ip6[0] &= ip_set_netmask6(prefix)[0]; -	ip->ip6[1] &= ip_set_netmask6(prefix)[1]; -	ip->ip6[2] &= ip_set_netmask6(prefix)[2]; -	ip->ip6[3] &= ip_set_netmask6(prefix)[3]; +	swap(*flags, elem->nomatch);  }  static inline void @@ -440,76 +382,57 @@ nla_put_failure:  	return 1;  } -static bool -hash_netport6_data_tlist(struct sk_buff *skb, -			 const struct hash_netport6_elem *data) +static inline void +hash_netport6_data_next(struct hash_netport4_elem *next, +			const struct hash_netport6_elem *d)  { -	const struct hash_netport6_telem *e = -		(const struct hash_netport6_telem *)data; -	u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; - -	if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &e->ip.in6) || -	    nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || -	    nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr + 1) || -	    nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) || -	    nla_put_net32(skb, IPSET_ATTR_TIMEOUT, -			  htonl(ip_set_timeout_get(e->timeout))) || -	    (flags && -	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) -		goto nla_put_failure; -	return 0; - -nla_put_failure: -	return 1; +	next->port = d->port;  } +#undef MTYPE  #undef PF  #undef HOST_MASK +#define MTYPE		hash_netport6  #define PF		6  #define HOST_MASK	128 -#include <linux/netfilter/ipset/ip_set_ahash.h> - -static inline void -hash_netport6_data_next(struct ip_set_hash *h, -			const struct hash_netport6_elem *d) -{ -	h->next.port = d->port; -} +#define IP_SET_EMIT_CREATE +#include "ip_set_hash_gen.h"  static int  hash_netport6_kadt(struct ip_set *set, const struct sk_buff *skb,  		   const struct xt_action_param *par, -		   enum ipset_adt adt, const struct ip_set_adt_opt *opt) +		   enum ipset_adt adt, struct ip_set_adt_opt *opt)  { -	const struct ip_set_hash *h = set->data; +	const struct hash_netport *h = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt]; -	struct hash_netport6_elem data = { +	struct hash_netport6_elem e = {  		.cidr = h->nets[0].cidr ? h->nets[0].cidr - 1 : HOST_MASK - 1,  	}; +	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, h);  	if (adt == IPSET_TEST) -		data.cidr = HOST_MASK - 1; +		e.cidr = HOST_MASK - 1;  	if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, -				 &data.port, &data.proto)) +				 &e.port, &e.proto))  		return -EINVAL; -	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &data.ip.in6); -	ip6_netmask(&data.ip, data.cidr + 1); +	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6); +	ip6_netmask(&e.ip, e.cidr + 1); -	return adtfn(set, &data, opt_timeout(opt, h), opt->cmdflags); +	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);  }  static int  hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],  		   enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)  { -	const struct ip_set_hash *h = set->data; +	const struct hash_netport *h = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt]; -	struct hash_netport6_elem data = { .cidr = HOST_MASK  - 1 }; +	struct hash_netport6_elem e = { .cidr = HOST_MASK  - 1 }; +	struct ip_set_ext ext = IP_SET_INIT_UEXT(h);  	u32 port, port_to; -	u32 timeout = h->timeout;  	bool with_ports = false;  	u8 cidr;  	int ret; @@ -518,7 +441,9 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],  		     !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) ||  		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) ||  		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || -		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))  		return -IPSET_ERR_PROTOCOL;  	if (unlikely(tb[IPSET_ATTR_IP_TO]))  		return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; @@ -526,7 +451,8 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],  	if (tb[IPSET_ATTR_LINENO])  		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); -	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &data.ip); +	ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip) || +	      ip_set_get_extensions(set, tb, &ext);  	if (ret)  		return ret; @@ -534,45 +460,40 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],  		cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);  		if (!cidr || cidr > HOST_MASK)  			return -IPSET_ERR_INVALID_CIDR; -		data.cidr = cidr - 1; +		e.cidr = cidr - 1;  	} -	ip6_netmask(&data.ip, data.cidr + 1); +	ip6_netmask(&e.ip, e.cidr + 1);  	if (tb[IPSET_ATTR_PORT]) -		data.port = nla_get_be16(tb[IPSET_ATTR_PORT]); +		e.port = nla_get_be16(tb[IPSET_ATTR_PORT]);  	else  		return -IPSET_ERR_PROTOCOL;  	if (tb[IPSET_ATTR_PROTO]) { -		data.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); -		with_ports = ip_set_proto_with_ports(data.proto); +		e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); +		with_ports = ip_set_proto_with_ports(e.proto); -		if (data.proto == 0) +		if (e.proto == 0)  			return -IPSET_ERR_INVALID_PROTO;  	} else  		return -IPSET_ERR_MISSING_PROTO; -	if (!(with_ports || data.proto == IPPROTO_ICMPV6)) -		data.port = 0; +	if (!(with_ports || e.proto == IPPROTO_ICMPV6)) +		e.port = 0; -	if (tb[IPSET_ATTR_TIMEOUT]) { -		if (!with_timeout(h->timeout)) -			return -IPSET_ERR_TIMEOUT; -		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); -	} - -	if (tb[IPSET_ATTR_CADT_FLAGS] && adt == IPSET_ADD) { +	if (tb[IPSET_ATTR_CADT_FLAGS]) {  		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);  		if (cadt_flags & IPSET_FLAG_NOMATCH) -			flags |= (cadt_flags << 16); +			flags |= (IPSET_FLAG_NOMATCH << 16);  	}  	if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { -		ret = adtfn(set, &data, timeout, flags); -		return ip_set_eexist(ret, flags) ? 0 : ret; +		ret = adtfn(set, &e, &ext, &ext, flags); +		return ip_set_enomatch(ret, flags, adt) ? 1 : +		       ip_set_eexist(ret, flags) ? 0 : ret;  	} -	port = ntohs(data.port); +	port = ntohs(e.port);  	port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]);  	if (port > port_to)  		swap(port, port_to); @@ -580,8 +501,8 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],  	if (retried)  		port = ntohs(h->next.port);  	for (; port <= port_to; port++) { -		data.port = htons(port); -		ret = adtfn(set, &data, timeout, flags); +		e.port = htons(port); +		ret = adtfn(set, &e, &ext, &ext, flags);  		if (ret && !ip_set_eexist(ret, flags))  			return ret; @@ -591,80 +512,6 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],  	return ret;  } -/* Create hash:ip type of sets */ - -static int -hash_netport_create(struct ip_set *set, struct nlattr *tb[], u32 flags) -{ -	struct ip_set_hash *h; -	u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; -	u8 hbits; -	size_t hsize; - -	if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) -		return -IPSET_ERR_INVALID_FAMILY; - -	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || -		     !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || -		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) -		return -IPSET_ERR_PROTOCOL; - -	if (tb[IPSET_ATTR_HASHSIZE]) { -		hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); -		if (hashsize < IPSET_MIMINAL_HASHSIZE) -			hashsize = IPSET_MIMINAL_HASHSIZE; -	} - -	if (tb[IPSET_ATTR_MAXELEM]) -		maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); - -	h = kzalloc(sizeof(*h) -		    + sizeof(struct ip_set_hash_nets) -		      * (set->family == NFPROTO_IPV4 ? 32 : 128), GFP_KERNEL); -	if (!h) -		return -ENOMEM; - -	h->maxelem = maxelem; -	get_random_bytes(&h->initval, sizeof(h->initval)); -	h->timeout = IPSET_NO_TIMEOUT; - -	hbits = htable_bits(hashsize); -	hsize = htable_size(hbits); -	if (hsize == 0) { -		kfree(h); -		return -ENOMEM; -	} -	h->table = ip_set_alloc(hsize); -	if (!h->table) { -		kfree(h); -		return -ENOMEM; -	} -	h->table->htable_bits = hbits; - -	set->data = h; - -	if (tb[IPSET_ATTR_TIMEOUT]) { -		h->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); - -		set->variant = set->family == NFPROTO_IPV4 -			? &hash_netport4_tvariant : &hash_netport6_tvariant; - -		if (set->family == NFPROTO_IPV4) -			hash_netport4_gc_init(set); -		else -			hash_netport6_gc_init(set); -	} else { -		set->variant = set->family == NFPROTO_IPV4 -			? &hash_netport4_variant : &hash_netport6_variant; -	} - -	pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", -		 set->name, jhash_size(h->table->htable_bits), -		 h->table->htable_bits, h->maxelem, set->data, h->table); - -	return 0; -} -  static struct ip_set_type hash_netport_type __read_mostly = {  	.name		= "hash:net,port",  	.protocol	= IPSET_PROTOCOL, @@ -681,6 +528,7 @@ static struct ip_set_type hash_netport_type __read_mostly = {  		[IPSET_ATTR_RESIZE]	= { .type = NLA_U8  },  		[IPSET_ATTR_PROTO]	= { .type = NLA_U8 },  		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 }, +		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },  	},  	.adt_policy	= {  		[IPSET_ATTR_IP]		= { .type = NLA_NESTED }, @@ -692,6 +540,8 @@ static struct ip_set_type hash_netport_type __read_mostly = {  		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },  		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },  		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 }, +		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 }, +		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },  	},  	.me		= THIS_MODULE,  }; diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 09c744aa898..979b8c90e42 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2008-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> +/* Copyright (C) 2008-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License version 2 as @@ -13,30 +13,53 @@  #include <linux/errno.h>  #include <linux/netfilter/ipset/ip_set.h> -#include <linux/netfilter/ipset/ip_set_timeout.h>  #include <linux/netfilter/ipset/ip_set_list.h>  #define REVISION_MIN	0 -#define REVISION_MAX	0 +#define REVISION_MAX	1 /* Counters support added */  MODULE_LICENSE("GPL");  MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");  IP_SET_MODULE_DESC("list:set", REVISION_MIN, REVISION_MAX);  MODULE_ALIAS("ip_set_list:set"); -/* Member elements without and with timeout */ +/* Member elements  */  struct set_elem {  	ip_set_id_t id;  }; -struct set_telem { -	ip_set_id_t id; +struct sett_elem { +	struct { +		ip_set_id_t id; +	} __attribute__ ((aligned)); +	unsigned long timeout; +}; + +struct setc_elem { +	struct { +		ip_set_id_t id; +	} __attribute__ ((aligned)); +	struct ip_set_counter counter; +}; + +struct setct_elem { +	struct { +		ip_set_id_t id; +	} __attribute__ ((aligned)); +	struct ip_set_counter counter;  	unsigned long timeout;  }; +struct set_adt_elem { +	ip_set_id_t id; +	ip_set_id_t refid; +	int before; +}; +  /* Type structure */  struct list_set {  	size_t dsize;		/* element size */ +	size_t offset[IPSET_OFFSET_MAX]; /* Offsets to extensions */  	u32 size;		/* size of set list array */  	u32 timeout;		/* timeout value */  	struct timer_list gc;	/* garbage collection */ @@ -49,179 +72,311 @@ list_set_elem(const struct list_set *map, u32 id)  	return (struct set_elem *)((void *)map->members + id * map->dsize);  } -static inline struct set_telem * -list_set_telem(const struct list_set *map, u32 id) -{ -	return (struct set_telem *)((void *)map->members + id * map->dsize); -} +#define ext_timeout(e, m)	\ +(unsigned long *)((void *)(e) + (m)->offset[IPSET_OFFSET_TIMEOUT]) +#define ext_counter(e, m)	\ +(struct ip_set_counter *)((void *)(e) + (m)->offset[IPSET_OFFSET_COUNTER]) -static inline bool -list_set_timeout(const struct list_set *map, u32 id) +static int +list_set_ktest(struct ip_set *set, const struct sk_buff *skb, +	       const struct xt_action_param *par, +	       struct ip_set_adt_opt *opt, const struct ip_set_ext *ext)  { -	const struct set_telem *elem = list_set_telem(map, id); +	struct list_set *map = set->data; +	struct set_elem *e; +	u32 i, cmdflags = opt->cmdflags; +	int ret; -	return ip_set_timeout_test(elem->timeout); +	/* Don't lookup sub-counters at all */ +	opt->cmdflags &= ~IPSET_FLAG_MATCH_COUNTERS; +	if (opt->cmdflags & IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE) +		opt->cmdflags &= ~IPSET_FLAG_SKIP_COUNTER_UPDATE; +	for (i = 0; i < map->size; i++) { +		e = list_set_elem(map, i); +		if (e->id == IPSET_INVALID_ID) +			return 0; +		if (SET_WITH_TIMEOUT(set) && +		    ip_set_timeout_expired(ext_timeout(e, map))) +			continue; +		ret = ip_set_test(e->id, skb, par, opt); +		if (ret > 0) { +			if (SET_WITH_COUNTER(set)) +				ip_set_update_counter(ext_counter(e, map), +						      ext, &opt->ext, +						      cmdflags); +			return ret; +		} +	} +	return 0;  } -static inline bool -list_set_expired(const struct list_set *map, u32 id) +static int +list_set_kadd(struct ip_set *set, const struct sk_buff *skb, +	      const struct xt_action_param *par, +	      struct ip_set_adt_opt *opt, const struct ip_set_ext *ext)  { -	const struct set_telem *elem = list_set_telem(map, id); +	struct list_set *map = set->data; +	struct set_elem *e; +	u32 i; +	int ret; -	return ip_set_timeout_expired(elem->timeout); +	for (i = 0; i < map->size; i++) { +		e = list_set_elem(map, i); +		if (e->id == IPSET_INVALID_ID) +			return 0; +		if (SET_WITH_TIMEOUT(set) && +		    ip_set_timeout_expired(ext_timeout(e, map))) +			continue; +		ret = ip_set_add(e->id, skb, par, opt); +		if (ret == 0) +			return ret; +	} +	return 0;  } -/* Set list without and with timeout */ -  static int -list_set_kadt(struct ip_set *set, const struct sk_buff *skb, +list_set_kdel(struct ip_set *set, const struct sk_buff *skb,  	      const struct xt_action_param *par, -	      enum ipset_adt adt, const struct ip_set_adt_opt *opt) +	      struct ip_set_adt_opt *opt, const struct ip_set_ext *ext)  {  	struct list_set *map = set->data; -	struct set_elem *elem; +	struct set_elem *e;  	u32 i;  	int ret;  	for (i = 0; i < map->size; i++) { -		elem = list_set_elem(map, i); -		if (elem->id == IPSET_INVALID_ID) +		e = list_set_elem(map, i); +		if (e->id == IPSET_INVALID_ID)  			return 0; -		if (with_timeout(map->timeout) && list_set_expired(map, i)) +		if (SET_WITH_TIMEOUT(set) && +		    ip_set_timeout_expired(ext_timeout(e, map)))  			continue; -		switch (adt) { -		case IPSET_TEST: -			ret = ip_set_test(elem->id, skb, par, opt); -			if (ret > 0) -				return ret; -			break; -		case IPSET_ADD: -			ret = ip_set_add(elem->id, skb, par, opt); -			if (ret == 0) -				return ret; -			break; -		case IPSET_DEL: -			ret = ip_set_del(elem->id, skb, par, opt); -			if (ret == 0) -				return ret; -			break; -		default: -			break; -		} +		ret = ip_set_del(e->id, skb, par, opt); +		if (ret == 0) +			return ret; +	} +	return 0; +} + +static int +list_set_kadt(struct ip_set *set, const struct sk_buff *skb, +	      const struct xt_action_param *par, +	      enum ipset_adt adt, struct ip_set_adt_opt *opt) +{ +	struct list_set *map = set->data; +	struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, map); + +	switch (adt) { +	case IPSET_TEST: +		return list_set_ktest(set, skb, par, opt, &ext); +	case IPSET_ADD: +		return list_set_kadd(set, skb, par, opt, &ext); +	case IPSET_DEL: +		return list_set_kdel(set, skb, par, opt, &ext); +	default: +		break;  	}  	return -EINVAL;  }  static bool -id_eq(const struct list_set *map, u32 i, ip_set_id_t id) +id_eq(const struct ip_set *set, u32 i, ip_set_id_t id)  { -	const struct set_elem *elem; +	const struct list_set *map = set->data; +	const struct set_elem *e; -	if (i < map->size) { -		elem = list_set_elem(map, i); -		return elem->id == id; -	} +	if (i >= map->size) +		return 0; -	return 0; +	e = list_set_elem(map, i); +	return !!(e->id == id && +		 !(SET_WITH_TIMEOUT(set) && +		   ip_set_timeout_expired(ext_timeout(e, map))));  } -static bool -id_eq_timeout(const struct list_set *map, u32 i, ip_set_id_t id) +static int +list_set_add(struct ip_set *set, u32 i, struct set_adt_elem *d, +	     const struct ip_set_ext *ext)  { -	const struct set_elem *elem; +	struct list_set *map = set->data; +	struct set_elem *e = list_set_elem(map, i); -	if (i < map->size) { -		elem = list_set_elem(map, i); -		return !!(elem->id == id && -			  !(with_timeout(map->timeout) && -			    list_set_expired(map, i))); +	if (e->id != IPSET_INVALID_ID) { +		if (i == map->size - 1) +			/* Last element replaced: e.g. add new,before,last */ +			ip_set_put_byindex(e->id); +		else { +			struct set_elem *x = list_set_elem(map, map->size - 1); + +			/* Last element pushed off */ +			if (x->id != IPSET_INVALID_ID) +				ip_set_put_byindex(x->id); +			memmove(list_set_elem(map, i + 1), e, +				map->dsize * (map->size - (i + 1))); +		}  	} +	e->id = d->id; +	if (SET_WITH_TIMEOUT(set)) +		ip_set_timeout_set(ext_timeout(e, map), ext->timeout); +	if (SET_WITH_COUNTER(set)) +		ip_set_init_counter(ext_counter(e, map), ext);  	return 0;  } -static void -list_elem_add(struct list_set *map, u32 i, ip_set_id_t id) +static int +list_set_del(struct ip_set *set, u32 i)  { -	struct set_elem *e; +	struct list_set *map = set->data; +	struct set_elem *e = list_set_elem(map, i); -	for (; i < map->size; i++) { -		e = list_set_elem(map, i); -		swap(e->id, id); -		if (e->id == IPSET_INVALID_ID) -			break; -	} +	ip_set_put_byindex(e->id); + +	if (i < map->size - 1) +		memmove(e, list_set_elem(map, i + 1), +			map->dsize * (map->size - (i + 1))); + +	/* Last element */ +	e = list_set_elem(map, map->size - 1); +	e->id = IPSET_INVALID_ID; +	return 0;  }  static void -list_elem_tadd(struct list_set *map, u32 i, ip_set_id_t id, -	       unsigned long timeout) +set_cleanup_entries(struct ip_set *set)  { -	struct set_telem *e; +	struct list_set *map = set->data; +	struct set_elem *e; +	u32 i; -	for (; i < map->size; i++) { -		e = list_set_telem(map, i); -		swap(e->id, id); -		swap(e->timeout, timeout); -		if (e->id == IPSET_INVALID_ID) -			break; +	for (i = 0; i < map->size; i++) { +		e = list_set_elem(map, i); +		if (e->id != IPSET_INVALID_ID && +		    ip_set_timeout_expired(ext_timeout(e, map))) +			list_set_del(set, i);  	}  }  static int -list_set_add(struct list_set *map, u32 i, ip_set_id_t id, -	     unsigned long timeout) +list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext, +	       struct ip_set_ext *mext, u32 flags)  { -	const struct set_elem *e = list_set_elem(map, i); +	struct list_set *map = set->data; +	struct set_adt_elem *d = value; +	struct set_elem *e; +	u32 i; +	int ret; -	if (e->id != IPSET_INVALID_ID) { -		const struct set_elem *x = list_set_elem(map, map->size - 1); +	for (i = 0; i < map->size; i++) { +		e = list_set_elem(map, i); +		if (e->id == IPSET_INVALID_ID) +			return 0; +		else if (SET_WITH_TIMEOUT(set) && +			 ip_set_timeout_expired(ext_timeout(e, map))) +			continue; +		else if (e->id != d->id) +			continue; -		/* Last element replaced or pushed off */ -		if (x->id != IPSET_INVALID_ID) -			ip_set_put_byindex(x->id); +		if (d->before == 0) +			return 1; +		else if (d->before > 0) +			ret = id_eq(set, i + 1, d->refid); +		else +			ret = i > 0 && id_eq(set, i - 1, d->refid); +		return ret;  	} -	if (with_timeout(map->timeout)) -		list_elem_tadd(map, i, id, ip_set_timeout_set(timeout)); -	else -		list_elem_add(map, i, id); -  	return 0;  } +  static int -list_set_del(struct list_set *map, u32 i) +list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, +	      struct ip_set_ext *mext, u32 flags)  { -	struct set_elem *a = list_set_elem(map, i), *b; +	struct list_set *map = set->data; +	struct set_adt_elem *d = value; +	struct set_elem *e; +	bool flag_exist = flags & IPSET_FLAG_EXIST; +	u32 i, ret = 0; -	ip_set_put_byindex(a->id); +	/* Check already added element */ +	for (i = 0; i < map->size; i++) { +		e = list_set_elem(map, i); +		if (e->id == IPSET_INVALID_ID) +			goto insert; +		else if (SET_WITH_TIMEOUT(set) && +			 ip_set_timeout_expired(ext_timeout(e, map))) +			continue; +		else if (e->id != d->id) +			continue; -	for (; i < map->size - 1; i++) { -		b = list_set_elem(map, i + 1); -		a->id = b->id; -		if (with_timeout(map->timeout)) -			((struct set_telem *)a)->timeout = -				((struct set_telem *)b)->timeout; -		a = b; -		if (a->id == IPSET_INVALID_ID) -			break; +		if ((d->before > 1 && !id_eq(set, i + 1, d->refid)) || +		    (d->before < 0 && +		     (i == 0 || !id_eq(set, i - 1, d->refid)))) +			/* Before/after doesn't match */ +			return -IPSET_ERR_REF_EXIST; +		if (!flag_exist) +			/* Can't re-add */ +			return -IPSET_ERR_EXIST; +		/* Update extensions */ +		if (SET_WITH_TIMEOUT(set)) +			ip_set_timeout_set(ext_timeout(e, map), ext->timeout); +		if (SET_WITH_COUNTER(set)) +			ip_set_init_counter(ext_counter(e, map), ext); +		/* Set is already added to the list */ +		ip_set_put_byindex(d->id); +		return 0;  	} -	/* Last element */ -	a->id = IPSET_INVALID_ID; -	return 0; +insert: +	ret = -IPSET_ERR_LIST_FULL; +	for (i = 0; i < map->size && ret == -IPSET_ERR_LIST_FULL; i++) { +		e = list_set_elem(map, i); +		if (e->id == IPSET_INVALID_ID) +			ret = d->before != 0 ? -IPSET_ERR_REF_EXIST +				: list_set_add(set, i, d, ext); +		else if (e->id != d->refid) +			continue; +		else if (d->before > 0) +			ret = list_set_add(set, i, d, ext); +		else if (i + 1 < map->size) +			ret = list_set_add(set, i + 1, d, ext); +	} + +	return ret;  } -static void -cleanup_entries(struct list_set *map) +static int +list_set_udel(struct ip_set *set, void *value, const struct ip_set_ext *ext, +	      struct ip_set_ext *mext, u32 flags)  { -	struct set_telem *e; +	struct list_set *map = set->data; +	struct set_adt_elem *d = value; +	struct set_elem *e;  	u32 i;  	for (i = 0; i < map->size; i++) { -		e = list_set_telem(map, i); -		if (e->id != IPSET_INVALID_ID && list_set_expired(map, i)) -			list_set_del(map, i); +		e = list_set_elem(map, i); +		if (e->id == IPSET_INVALID_ID) +			return d->before != 0 ? -IPSET_ERR_REF_EXIST +					      : -IPSET_ERR_EXIST; +		else if (SET_WITH_TIMEOUT(set) && +			 ip_set_timeout_expired(ext_timeout(e, map))) +			continue; +		else if (e->id != d->id) +			continue; + +		if (d->before == 0) +			return list_set_del(set, i); +		else if (d->before > 0) { +			if (!id_eq(set, i + 1, d->refid)) +				return -IPSET_ERR_REF_EXIST; +			return list_set_del(set, i); +		} else if (i == 0 || !id_eq(set, i - 1, d->refid)) +			return -IPSET_ERR_REF_EXIST; +		else +			return list_set_del(set, i);  	} +	return -IPSET_ERR_EXIST;  }  static int @@ -229,26 +384,27 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],  	      enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)  {  	struct list_set *map = set->data; -	bool with_timeout = with_timeout(map->timeout); -	bool flag_exist = flags & IPSET_FLAG_EXIST; -	int before = 0; -	u32 timeout = map->timeout; -	ip_set_id_t id, refid = IPSET_INVALID_ID; -	const struct set_elem *elem; +	ipset_adtfn adtfn = set->variant->adt[adt]; +	struct set_adt_elem e = { .refid = IPSET_INVALID_ID }; +	struct ip_set_ext ext = IP_SET_INIT_UEXT(map);  	struct ip_set *s; -	u32 i;  	int ret = 0;  	if (unlikely(!tb[IPSET_ATTR_NAME] ||  		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || -		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES)))  		return -IPSET_ERR_PROTOCOL;  	if (tb[IPSET_ATTR_LINENO])  		*lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); -	id = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAME]), &s); -	if (id == IPSET_INVALID_ID) +	ret = ip_set_get_extensions(set, tb, &ext); +	if (ret) +		return ret; +	e.id = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAME]), &s); +	if (e.id == IPSET_INVALID_ID)  		return -IPSET_ERR_NAME;  	/* "Loop detection" */  	if (s->type->features & IPSET_TYPE_NAME) { @@ -258,115 +414,34 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[],  	if (tb[IPSET_ATTR_CADT_FLAGS]) {  		u32 f = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); -		before = f & IPSET_FLAG_BEFORE; +		e.before = f & IPSET_FLAG_BEFORE;  	} -	if (before && !tb[IPSET_ATTR_NAMEREF]) { +	if (e.before && !tb[IPSET_ATTR_NAMEREF]) {  		ret = -IPSET_ERR_BEFORE;  		goto finish;  	}  	if (tb[IPSET_ATTR_NAMEREF]) { -		refid = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAMEREF]), -					  &s); -		if (refid == IPSET_INVALID_ID) { +		e.refid = ip_set_get_byname(nla_data(tb[IPSET_ATTR_NAMEREF]), +					    &s); +		if (e.refid == IPSET_INVALID_ID) {  			ret = -IPSET_ERR_NAMEREF;  			goto finish;  		} -		if (!before) -			before = -1; +		if (!e.before) +			e.before = -1;  	} -	if (tb[IPSET_ATTR_TIMEOUT]) { -		if (!with_timeout) { -			ret = -IPSET_ERR_TIMEOUT; -			goto finish; -		} -		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); -	} -	if (with_timeout && adt != IPSET_TEST) -		cleanup_entries(map); +	if (adt != IPSET_TEST && SET_WITH_TIMEOUT(set)) +		set_cleanup_entries(set); -	switch (adt) { -	case IPSET_TEST: -		for (i = 0; i < map->size && !ret; i++) { -			elem = list_set_elem(map, i); -			if (elem->id == IPSET_INVALID_ID || -			    (before != 0 && i + 1 >= map->size)) -				break; -			else if (with_timeout && list_set_expired(map, i)) -				continue; -			else if (before > 0 && elem->id == id) -				ret = id_eq_timeout(map, i + 1, refid); -			else if (before < 0 && elem->id == refid) -				ret = id_eq_timeout(map, i + 1, id); -			else if (before == 0 && elem->id == id) -				ret = 1; -		} -		break; -	case IPSET_ADD: -		for (i = 0; i < map->size; i++) { -			elem = list_set_elem(map, i); -			if (elem->id != id) -				continue; -			if (!(with_timeout && flag_exist)) { -				ret = -IPSET_ERR_EXIST; -				goto finish; -			} else { -				struct set_telem *e = list_set_telem(map, i); - -				if ((before > 1 && -				     !id_eq(map, i + 1, refid)) || -				    (before < 0 && -				     (i == 0 || !id_eq(map, i - 1, refid)))) { -					ret = -IPSET_ERR_EXIST; -					goto finish; -				} -				e->timeout = ip_set_timeout_set(timeout); -				ip_set_put_byindex(id); -				ret = 0; -				goto finish; -			} -		} -		ret = -IPSET_ERR_LIST_FULL; -		for (i = 0; i < map->size && ret == -IPSET_ERR_LIST_FULL; i++) { -			elem = list_set_elem(map, i); -			if (elem->id == IPSET_INVALID_ID) -				ret = before != 0 ? -IPSET_ERR_REF_EXIST -					: list_set_add(map, i, id, timeout); -			else if (elem->id != refid) -				continue; -			else if (before > 0) -				ret = list_set_add(map, i, id, timeout); -			else if (i + 1 < map->size) -				ret = list_set_add(map, i + 1, id, timeout); -		} -		break; -	case IPSET_DEL: -		ret = -IPSET_ERR_EXIST; -		for (i = 0; i < map->size && ret == -IPSET_ERR_EXIST; i++) { -			elem = list_set_elem(map, i); -			if (elem->id == IPSET_INVALID_ID) { -				ret = before != 0 ? -IPSET_ERR_REF_EXIST -						  : -IPSET_ERR_EXIST; -				break; -			} else if (elem->id == id && -				   (before == 0 || -				    (before > 0 && id_eq(map, i + 1, refid)))) -				ret = list_set_del(map, i); -			else if (elem->id == refid && -				 before < 0 && id_eq(map, i + 1, id)) -				ret = list_set_del(map, i + 1); -		} -		break; -	default: -		break; -	} +	ret = adtfn(set, &e, &ext, &ext, flags);  finish: -	if (refid != IPSET_INVALID_ID) -		ip_set_put_byindex(refid); +	if (e.refid != IPSET_INVALID_ID) +		ip_set_put_byindex(e.refid);  	if (adt != IPSET_ADD || ret) -		ip_set_put_byindex(id); +		ip_set_put_byindex(e.id);  	return ip_set_eexist(ret, flags) ? 0 : ret;  } @@ -375,14 +450,14 @@ static void  list_set_flush(struct ip_set *set)  {  	struct list_set *map = set->data; -	struct set_elem *elem; +	struct set_elem *e;  	u32 i;  	for (i = 0; i < map->size; i++) { -		elem = list_set_elem(map, i); -		if (elem->id != IPSET_INVALID_ID) { -			ip_set_put_byindex(elem->id); -			elem->id = IPSET_INVALID_ID; +		e = list_set_elem(map, i); +		if (e->id != IPSET_INVALID_ID) { +			ip_set_put_byindex(e->id); +			e->id = IPSET_INVALID_ID;  		}  	}  } @@ -392,7 +467,7 @@ list_set_destroy(struct ip_set *set)  {  	struct list_set *map = set->data; -	if (with_timeout(map->timeout)) +	if (SET_WITH_TIMEOUT(set))  		del_timer_sync(&map->gc);  	list_set_flush(set);  	kfree(map); @@ -410,8 +485,11 @@ list_set_head(struct ip_set *set, struct sk_buff *skb)  	if (!nested)  		goto nla_put_failure;  	if (nla_put_net32(skb, IPSET_ATTR_SIZE, htonl(map->size)) || -	    (with_timeout(map->timeout) && +	    (SET_WITH_TIMEOUT(set) &&  	     nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(map->timeout))) || +	    (SET_WITH_COUNTER(set) && +	     nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, +			   htonl(IPSET_FLAG_WITH_COUNTERS))) ||  	    nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||  	    nla_put_net32(skb, IPSET_ATTR_MEMSIZE,  			  htonl(sizeof(*map) + map->size * map->dsize))) @@ -440,7 +518,8 @@ list_set_list(const struct ip_set *set,  		e = list_set_elem(map, i);  		if (e->id == IPSET_INVALID_ID)  			goto finish; -		if (with_timeout(map->timeout) && list_set_expired(map, i)) +		if (SET_WITH_TIMEOUT(set) && +		    ip_set_timeout_expired(ext_timeout(e, map)))  			continue;  		nested = ipset_nest_start(skb, IPSET_ATTR_DATA);  		if (!nested) { @@ -453,13 +532,14 @@ list_set_list(const struct ip_set *set,  		if (nla_put_string(skb, IPSET_ATTR_NAME,  				   ip_set_name_byindex(e->id)))  			goto nla_put_failure; -		if (with_timeout(map->timeout)) { -			const struct set_telem *te = -				(const struct set_telem *) e; -			__be32 to = htonl(ip_set_timeout_get(te->timeout)); -			if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, to)) -				goto nla_put_failure; -		} +		if (SET_WITH_TIMEOUT(set) && +		    nla_put_net32(skb, IPSET_ATTR_TIMEOUT, +				  htonl(ip_set_timeout_get( +						ext_timeout(e, map))))) +			goto nla_put_failure; +		if (SET_WITH_COUNTER(set) && +		    ip_set_put_counter(skb, ext_counter(e, map))) +			goto nla_put_failure;  		ipset_nest_end(skb, nested);  	}  finish: @@ -485,12 +565,18 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b)  	const struct list_set *y = b->data;  	return x->size == y->size && -	       x->timeout == y->timeout; +	       x->timeout == y->timeout && +	       a->extensions == b->extensions;  } -static const struct ip_set_type_variant list_set = { +static const struct ip_set_type_variant set_variant = {  	.kadt	= list_set_kadt,  	.uadt	= list_set_uadt, +	.adt	= { +		[IPSET_ADD] = list_set_uadd, +		[IPSET_DEL] = list_set_udel, +		[IPSET_TEST] = list_set_utest, +	},  	.destroy = list_set_destroy,  	.flush	= list_set_flush,  	.head	= list_set_head, @@ -505,7 +591,7 @@ list_set_gc(unsigned long ul_set)  	struct list_set *map = set->data;  	write_lock_bh(&set->lock); -	cleanup_entries(map); +	set_cleanup_entries(set);  	write_unlock_bh(&set->lock);  	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ; @@ -513,20 +599,20 @@ list_set_gc(unsigned long ul_set)  }  static void -list_set_gc_init(struct ip_set *set) +list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))  {  	struct list_set *map = set->data;  	init_timer(&map->gc);  	map->gc.data = (unsigned long) set; -	map->gc.function = list_set_gc; +	map->gc.function = gc;  	map->gc.expires = jiffies + IPSET_GC_PERIOD(map->timeout) * HZ;  	add_timer(&map->gc);  }  /* Create list:set type of sets */ -static bool +static struct list_set *  init_list_set(struct ip_set *set, u32 size, size_t dsize,  	      unsigned long timeout)  { @@ -536,7 +622,7 @@ init_list_set(struct ip_set *set, u32 size, size_t dsize,  	map = kzalloc(sizeof(*map) + size * dsize, GFP_KERNEL);  	if (!map) -		return false; +		return NULL;  	map->size = size;  	map->dsize = dsize; @@ -548,16 +634,19 @@ init_list_set(struct ip_set *set, u32 size, size_t dsize,  		e->id = IPSET_INVALID_ID;  	} -	return true; +	return map;  }  static int  list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)  { -	u32 size = IP_SET_LIST_DEFAULT_SIZE; +	struct list_set *map; +	u32 size = IP_SET_LIST_DEFAULT_SIZE, cadt_flags = 0; +	unsigned long timeout = 0;  	if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_SIZE) || -		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT))) +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || +		     !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS)))  		return -IPSET_ERR_PROTOCOL;  	if (tb[IPSET_ATTR_SIZE]) @@ -565,18 +654,46 @@ list_set_create(struct ip_set *set, struct nlattr *tb[], u32 flags)  	if (size < IP_SET_LIST_MIN_SIZE)  		size = IP_SET_LIST_MIN_SIZE; -	if (tb[IPSET_ATTR_TIMEOUT]) { -		if (!init_list_set(set, size, sizeof(struct set_telem), -				   ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]))) +	if (tb[IPSET_ATTR_CADT_FLAGS]) +		cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); +	if (tb[IPSET_ATTR_TIMEOUT]) +		timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); +	set->variant = &set_variant; +	if (cadt_flags & IPSET_FLAG_WITH_COUNTERS) { +		set->extensions |= IPSET_EXT_COUNTER; +		if (tb[IPSET_ATTR_TIMEOUT]) { +			map = init_list_set(set, size, +					sizeof(struct setct_elem), timeout); +			if (!map) +				return -ENOMEM; +			set->extensions |= IPSET_EXT_TIMEOUT; +			map->offset[IPSET_OFFSET_TIMEOUT] = +				offsetof(struct setct_elem, timeout); +			map->offset[IPSET_OFFSET_COUNTER] = +				offsetof(struct setct_elem, counter); +			list_set_gc_init(set, list_set_gc); +		} else { +			map = init_list_set(set, size, +					    sizeof(struct setc_elem), 0); +			if (!map) +				return -ENOMEM; +			map->offset[IPSET_OFFSET_COUNTER] = +				offsetof(struct setc_elem, counter); +		} +	} else if (tb[IPSET_ATTR_TIMEOUT]) { +		map = init_list_set(set, size, +				    sizeof(struct sett_elem), timeout); +		if (!map)  			return -ENOMEM; - -		list_set_gc_init(set); +		set->extensions |= IPSET_EXT_TIMEOUT; +		map->offset[IPSET_OFFSET_TIMEOUT] = +			offsetof(struct sett_elem, timeout); +		list_set_gc_init(set, list_set_gc);  	} else { -		if (!init_list_set(set, size, sizeof(struct set_elem), -				   IPSET_NO_TIMEOUT)) +		map = init_list_set(set, size, sizeof(struct set_elem), 0); +		if (!map)  			return -ENOMEM;  	} -	set->variant = &list_set;  	return 0;  } @@ -592,6 +709,7 @@ static struct ip_set_type list_set_type __read_mostly = {  	.create_policy	= {  		[IPSET_ATTR_SIZE]	= { .type = NLA_U32 },  		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 }, +		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 },  	},  	.adt_policy	= {  		[IPSET_ATTR_NAME]	= { .type = NLA_STRING, @@ -601,6 +719,8 @@ static struct ip_set_type list_set_type __read_mostly = {  		[IPSET_ATTR_TIMEOUT]	= { .type = NLA_U32 },  		[IPSET_ATTR_LINENO]	= { .type = NLA_U32 },  		[IPSET_ATTR_CADT_FLAGS]	= { .type = NLA_U32 }, +		[IPSET_ATTR_BYTES]	= { .type = NLA_U64 }, +		[IPSET_ATTR_PACKETS]	= { .type = NLA_U64 },  	},  	.me		= THIS_MODULE,  }; diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 0b779d7df88..dfd7b65b3d2 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -58,6 +58,18 @@ static inline void ip_vs_app_put(struct ip_vs_app *app)  	module_put(app->module);  } +static void ip_vs_app_inc_destroy(struct ip_vs_app *inc) +{ +	kfree(inc->timeout_table); +	kfree(inc); +} + +static void ip_vs_app_inc_rcu_free(struct rcu_head *head) +{ +	struct ip_vs_app *inc = container_of(head, struct ip_vs_app, rcu_head); + +	ip_vs_app_inc_destroy(inc); +}  /*   *	Allocate/initialize app incarnation and register it in proto apps. @@ -106,8 +118,7 @@ ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,  	return 0;    out: -	kfree(inc->timeout_table); -	kfree(inc); +	ip_vs_app_inc_destroy(inc);  	return ret;  } @@ -131,8 +142,7 @@ ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)  	list_del(&inc->a_list); -	kfree(inc->timeout_table); -	kfree(inc); +	call_rcu(&inc->rcu_head, ip_vs_app_inc_rcu_free);  } @@ -144,9 +154,9 @@ int ip_vs_app_inc_get(struct ip_vs_app *inc)  {  	int result; -	atomic_inc(&inc->usecnt); -	if (unlikely((result = ip_vs_app_get(inc->app)) != 1)) -		atomic_dec(&inc->usecnt); +	result = ip_vs_app_get(inc->app); +	if (result) +		atomic_inc(&inc->usecnt);  	return result;  } @@ -156,8 +166,8 @@ int ip_vs_app_inc_get(struct ip_vs_app *inc)   */  void ip_vs_app_inc_put(struct ip_vs_app *inc)  { -	ip_vs_app_put(inc->app);  	atomic_dec(&inc->usecnt); +	ip_vs_app_put(inc->app);  } @@ -218,6 +228,7 @@ out_unlock:  /*   *	ip_vs_app unregistration routine   *	We are sure there are no app incarnations attached to services + *	Caller should use synchronize_rcu() or rcu_barrier()   */  void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)  { @@ -341,14 +352,14 @@ static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,  				 unsigned int flag, __u32 seq, int diff)  {  	/* spinlock is to keep updating cp->flags atomic */ -	spin_lock(&cp->lock); +	spin_lock_bh(&cp->lock);  	if (!(cp->flags & flag) || after(seq, vseq->init_seq)) {  		vseq->previous_delta = vseq->delta;  		vseq->delta += diff;  		vseq->init_seq = seq;  		cp->flags |= flag;  	} -	spin_unlock(&cp->lock); +	spin_unlock_bh(&cp->lock);  }  static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 704e514e02a..a083bda322b 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -79,51 +79,21 @@ static unsigned int ip_vs_conn_rnd __read_mostly;  struct ip_vs_aligned_lock  { -	rwlock_t	l; +	spinlock_t	l;  } __attribute__((__aligned__(SMP_CACHE_BYTES)));  /* lock array for conn table */  static struct ip_vs_aligned_lock  __ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned; -static inline void ct_read_lock(unsigned int key) -{ -	read_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); -} - -static inline void ct_read_unlock(unsigned int key) -{ -	read_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); -} - -static inline void ct_write_lock(unsigned int key) -{ -	write_lock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); -} - -static inline void ct_write_unlock(unsigned int key) -{ -	write_unlock(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); -} - -static inline void ct_read_lock_bh(unsigned int key) -{ -	read_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); -} - -static inline void ct_read_unlock_bh(unsigned int key) -{ -	read_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); -} -  static inline void ct_write_lock_bh(unsigned int key)  { -	write_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); +	spin_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);  }  static inline void ct_write_unlock_bh(unsigned int key)  { -	write_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); +	spin_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l);  } @@ -197,13 +167,13 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)  	/* Hash by protocol, client address and port */  	hash = ip_vs_conn_hashkey_conn(cp); -	ct_write_lock(hash); +	ct_write_lock_bh(hash);  	spin_lock(&cp->lock);  	if (!(cp->flags & IP_VS_CONN_F_HASHED)) { -		hlist_add_head(&cp->c_list, &ip_vs_conn_tab[hash]);  		cp->flags |= IP_VS_CONN_F_HASHED;  		atomic_inc(&cp->refcnt); +		hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]);  		ret = 1;  	} else {  		pr_err("%s(): request for already hashed, called from %pF\n", @@ -212,7 +182,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)  	}  	spin_unlock(&cp->lock); -	ct_write_unlock(hash); +	ct_write_unlock_bh(hash);  	return ret;  } @@ -220,7 +190,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)  /*   *	UNhashes ip_vs_conn from ip_vs_conn_tab. - *	returns bool success. + *	returns bool success. Caller should hold conn reference.   */  static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)  { @@ -230,11 +200,11 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)  	/* unhash it and decrease its reference counter */  	hash = ip_vs_conn_hashkey_conn(cp); -	ct_write_lock(hash); +	ct_write_lock_bh(hash);  	spin_lock(&cp->lock);  	if (cp->flags & IP_VS_CONN_F_HASHED) { -		hlist_del(&cp->c_list); +		hlist_del_rcu(&cp->c_list);  		cp->flags &= ~IP_VS_CONN_F_HASHED;  		atomic_dec(&cp->refcnt);  		ret = 1; @@ -242,7 +212,37 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)  		ret = 0;  	spin_unlock(&cp->lock); -	ct_write_unlock(hash); +	ct_write_unlock_bh(hash); + +	return ret; +} + +/* Try to unlink ip_vs_conn from ip_vs_conn_tab. + * returns bool success. + */ +static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp) +{ +	unsigned int hash; +	bool ret; + +	hash = ip_vs_conn_hashkey_conn(cp); + +	ct_write_lock_bh(hash); +	spin_lock(&cp->lock); + +	if (cp->flags & IP_VS_CONN_F_HASHED) { +		ret = false; +		/* Decrease refcnt and unlink conn only if we are last user */ +		if (atomic_cmpxchg(&cp->refcnt, 1, 0) == 1) { +			hlist_del_rcu(&cp->c_list); +			cp->flags &= ~IP_VS_CONN_F_HASHED; +			ret = true; +		} +	} else +		ret = atomic_read(&cp->refcnt) ? false : true; + +	spin_unlock(&cp->lock); +	ct_write_unlock_bh(hash);  	return ret;  } @@ -262,24 +262,25 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p)  	hash = ip_vs_conn_hashkey_param(p, false); -	ct_read_lock(hash); +	rcu_read_lock(); -	hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { -		if (cp->af == p->af && -		    p->cport == cp->cport && p->vport == cp->vport && +	hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { +		if (p->cport == cp->cport && p->vport == cp->vport && +		    cp->af == p->af &&  		    ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&  		    ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&  		    ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&  		    p->protocol == cp->protocol &&  		    ip_vs_conn_net_eq(cp, p->net)) { +			if (!__ip_vs_conn_get(cp)) +				continue;  			/* HIT */ -			atomic_inc(&cp->refcnt); -			ct_read_unlock(hash); +			rcu_read_unlock();  			return cp;  		}  	} -	ct_read_unlock(hash); +	rcu_read_unlock();  	return NULL;  } @@ -346,14 +347,16 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)  	hash = ip_vs_conn_hashkey_param(p, false); -	ct_read_lock(hash); +	rcu_read_lock(); -	hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { -		if (!ip_vs_conn_net_eq(cp, p->net)) -			continue; -		if (p->pe_data && p->pe->ct_match) { -			if (p->pe == cp->pe && p->pe->ct_match(p, cp)) -				goto out; +	hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { +		if (unlikely(p->pe_data && p->pe->ct_match)) { +			if (!ip_vs_conn_net_eq(cp, p->net)) +				continue; +			if (p->pe == cp->pe && p->pe->ct_match(p, cp)) { +				if (__ip_vs_conn_get(cp)) +					goto out; +			}  			continue;  		} @@ -363,17 +366,18 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)  		     * p->vaddr is a fwmark */  		    ip_vs_addr_equal(p->protocol == IPPROTO_IP ? AF_UNSPEC :  				     p->af, p->vaddr, &cp->vaddr) && -		    p->cport == cp->cport && p->vport == cp->vport && +		    p->vport == cp->vport && p->cport == cp->cport &&  		    cp->flags & IP_VS_CONN_F_TEMPLATE && -		    p->protocol == cp->protocol) -			goto out; +		    p->protocol == cp->protocol && +		    ip_vs_conn_net_eq(cp, p->net)) { +			if (__ip_vs_conn_get(cp)) +				goto out; +		}  	}  	cp = NULL;    out: -	if (cp) -		atomic_inc(&cp->refcnt); -	ct_read_unlock(hash); +	rcu_read_unlock();  	IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n",  		      ip_vs_proto_name(p->protocol), @@ -398,23 +402,24 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)  	 */  	hash = ip_vs_conn_hashkey_param(p, true); -	ct_read_lock(hash); +	rcu_read_lock(); -	hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { -		if (cp->af == p->af && -		    p->vport == cp->cport && p->cport == cp->dport && +	hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { +		if (p->vport == cp->cport && p->cport == cp->dport && +		    cp->af == p->af &&  		    ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&  		    ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&  		    p->protocol == cp->protocol &&  		    ip_vs_conn_net_eq(cp, p->net)) { +			if (!__ip_vs_conn_get(cp)) +				continue;  			/* HIT */ -			atomic_inc(&cp->refcnt);  			ret = cp;  			break;  		}  	} -	ct_read_unlock(hash); +	rcu_read_unlock();  	IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n",  		      ip_vs_proto_name(p->protocol), @@ -457,13 +462,13 @@ void ip_vs_conn_put(struct ip_vs_conn *cp)  void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport)  {  	if (ip_vs_conn_unhash(cp)) { -		spin_lock(&cp->lock); +		spin_lock_bh(&cp->lock);  		if (cp->flags & IP_VS_CONN_F_NO_CPORT) {  			atomic_dec(&ip_vs_conn_no_cport_cnt);  			cp->flags &= ~IP_VS_CONN_F_NO_CPORT;  			cp->cport = cport;  		} -		spin_unlock(&cp->lock); +		spin_unlock_bh(&cp->lock);  		/* hash on new dport */  		ip_vs_conn_hash(cp); @@ -549,7 +554,7 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)  		return;  	/* Increase the refcnt counter of the dest */ -	atomic_inc(&dest->refcnt); +	ip_vs_dest_hold(dest);  	conn_flags = atomic_read(&dest->conn_flags);  	if (cp->protocol != IPPROTO_UDP) @@ -606,20 +611,22 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)   * Check if there is a destination for the connection, if so   * bind the connection to the destination.   */ -struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) +void ip_vs_try_bind_dest(struct ip_vs_conn *cp)  {  	struct ip_vs_dest *dest; +	rcu_read_lock();  	dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,  			       cp->dport, &cp->vaddr, cp->vport,  			       cp->protocol, cp->fwmark, cp->flags);  	if (dest) {  		struct ip_vs_proto_data *pd; -		spin_lock(&cp->lock); +		spin_lock_bh(&cp->lock);  		if (cp->dest) { -			spin_unlock(&cp->lock); -			return dest; +			spin_unlock_bh(&cp->lock); +			rcu_read_unlock(); +			return;  		}  		/* Applications work depending on the forwarding method @@ -628,7 +635,7 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)  			ip_vs_unbind_app(cp);  		ip_vs_bind_dest(cp, dest); -		spin_unlock(&cp->lock); +		spin_unlock_bh(&cp->lock);  		/* Update its packet transmitter */  		cp->packet_xmit = NULL; @@ -643,7 +650,7 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)  		if (pd && atomic_read(&pd->appcnt))  			ip_vs_bind_app(cp, pd->pp);  	} -	return dest; +	rcu_read_unlock();  } @@ -695,12 +702,7 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)  			dest->flags &= ~IP_VS_DEST_F_OVERLOAD;  	} -	/* -	 * Simply decrease the refcnt of the dest, because the -	 * dest will be either in service's destination list -	 * or in the trash. -	 */ -	atomic_dec(&dest->refcnt); +	ip_vs_dest_put(dest);  }  static int expire_quiescent_template(struct netns_ipvs *ipvs, @@ -757,41 +759,36 @@ int ip_vs_check_template(struct ip_vs_conn *ct)  		 * Simply decrease the refcnt of the template,  		 * don't restart its timer.  		 */ -		atomic_dec(&ct->refcnt); +		__ip_vs_conn_put(ct);  		return 0;  	}  	return 1;  } +static void ip_vs_conn_rcu_free(struct rcu_head *head) +{ +	struct ip_vs_conn *cp = container_of(head, struct ip_vs_conn, +					     rcu_head); + +	ip_vs_pe_put(cp->pe); +	kfree(cp->pe_data); +	kmem_cache_free(ip_vs_conn_cachep, cp); +} +  static void ip_vs_conn_expire(unsigned long data)  {  	struct ip_vs_conn *cp = (struct ip_vs_conn *)data;  	struct net *net = ip_vs_conn_net(cp);  	struct netns_ipvs *ipvs = net_ipvs(net); -	cp->timeout = 60*HZ; - -	/* -	 *	hey, I'm using it -	 */ -	atomic_inc(&cp->refcnt); -  	/*  	 *	do I control anybody?  	 */  	if (atomic_read(&cp->n_control))  		goto expire_later; -	/* -	 *	unhash it if it is hashed in the conn table -	 */ -	if (!ip_vs_conn_unhash(cp) && !(cp->flags & IP_VS_CONN_F_ONE_PACKET)) -		goto expire_later; - -	/* -	 *	refcnt==1 implies I'm the only one referrer -	 */ -	if (likely(atomic_read(&cp->refcnt) == 1)) { +	/* Unlink conn if not referenced anymore */ +	if (likely(ip_vs_conn_unlink(cp))) {  		/* delete the timer if it is activated by other users */  		del_timer(&cp->timer); @@ -810,38 +807,41 @@ static void ip_vs_conn_expire(unsigned long data)  				ip_vs_conn_drop_conntrack(cp);  		} -		ip_vs_pe_put(cp->pe); -		kfree(cp->pe_data);  		if (unlikely(cp->app != NULL))  			ip_vs_unbind_app(cp);  		ip_vs_unbind_dest(cp);  		if (cp->flags & IP_VS_CONN_F_NO_CPORT)  			atomic_dec(&ip_vs_conn_no_cport_cnt); +		call_rcu(&cp->rcu_head, ip_vs_conn_rcu_free);  		atomic_dec(&ipvs->conn_count); - -		kmem_cache_free(ip_vs_conn_cachep, cp);  		return;  	} -	/* hash it back to the table */ -	ip_vs_conn_hash(cp); -    expire_later: -	IP_VS_DBG(7, "delayed: conn->refcnt-1=%d conn->n_control=%d\n", -		  atomic_read(&cp->refcnt)-1, +	IP_VS_DBG(7, "delayed: conn->refcnt=%d conn->n_control=%d\n", +		  atomic_read(&cp->refcnt),  		  atomic_read(&cp->n_control)); +	atomic_inc(&cp->refcnt); +	cp->timeout = 60*HZ; +  	if (ipvs->sync_state & IP_VS_STATE_MASTER)  		ip_vs_sync_conn(net, cp, sysctl_sync_threshold(ipvs));  	ip_vs_conn_put(cp);  } - +/* Modify timer, so that it expires as soon as possible. + * Can be called without reference only if under RCU lock. + */  void ip_vs_conn_expire_now(struct ip_vs_conn *cp)  { -	if (del_timer(&cp->timer)) -		mod_timer(&cp->timer, jiffies); +	/* Using mod_timer_pending will ensure the timer is not +	 * modified after the final del_timer in ip_vs_conn_expire. +	 */ +	if (timer_pending(&cp->timer) && +	    time_after(cp->timer.expires, jiffies)) +		mod_timer_pending(&cp->timer, jiffies);  } @@ -858,7 +858,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,  	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net,  							   p->protocol); -	cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC); +	cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC);  	if (cp == NULL) {  		IP_VS_ERR_RL("%s(): no memory\n", __func__);  		return NULL; @@ -869,13 +869,13 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,  	ip_vs_conn_net_set(cp, p->net);  	cp->af		   = p->af;  	cp->protocol	   = p->protocol; -	ip_vs_addr_copy(p->af, &cp->caddr, p->caddr); +	ip_vs_addr_set(p->af, &cp->caddr, p->caddr);  	cp->cport	   = p->cport; -	ip_vs_addr_copy(p->af, &cp->vaddr, p->vaddr); +	ip_vs_addr_set(p->af, &cp->vaddr, p->vaddr);  	cp->vport	   = p->vport;  	/* proto should only be IPPROTO_IP if d_addr is a fwmark */ -	ip_vs_addr_copy(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af, -			&cp->daddr, daddr); +	ip_vs_addr_set(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af, +		       &cp->daddr, daddr);  	cp->dport          = dport;  	cp->flags	   = flags;  	cp->fwmark         = fwmark; @@ -884,6 +884,10 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,  		cp->pe = p->pe;  		cp->pe_data = p->pe_data;  		cp->pe_data_len = p->pe_data_len; +	} else { +		cp->pe = NULL; +		cp->pe_data = NULL; +		cp->pe_data_len = 0;  	}  	spin_lock_init(&cp->lock); @@ -894,18 +898,28 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,  	 */  	atomic_set(&cp->refcnt, 1); +	cp->control = NULL;  	atomic_set(&cp->n_control, 0);  	atomic_set(&cp->in_pkts, 0); +	cp->packet_xmit = NULL; +	cp->app = NULL; +	cp->app_data = NULL; +	/* reset struct ip_vs_seq */ +	cp->in_seq.delta = 0; +	cp->out_seq.delta = 0; +  	atomic_inc(&ipvs->conn_count);  	if (flags & IP_VS_CONN_F_NO_CPORT)  		atomic_inc(&ip_vs_conn_no_cport_cnt);  	/* Bind the connection with a destination server */ +	cp->dest = NULL;  	ip_vs_bind_dest(cp, dest);  	/* Set its state and timeout */  	cp->state = 0; +	cp->old_state = 0;  	cp->timeout = 3*HZ;  	cp->sync_endtime = jiffies & ~3UL; @@ -952,24 +966,29 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)  	struct ip_vs_iter_state *iter = seq->private;  	for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { -		ct_read_lock_bh(idx); -		hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { +		hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { +			/* __ip_vs_conn_get() is not needed by +			 * ip_vs_conn_seq_show and ip_vs_conn_sync_seq_show +			 */  			if (pos-- == 0) {  				iter->l = &ip_vs_conn_tab[idx];  				return cp;  			}  		} -		ct_read_unlock_bh(idx); +		rcu_read_unlock(); +		rcu_read_lock();  	}  	return NULL;  }  static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos) +	__acquires(RCU)  {  	struct ip_vs_iter_state *iter = seq->private;  	iter->l = NULL; +	rcu_read_lock();  	return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN;  } @@ -977,6 +996,7 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)  {  	struct ip_vs_conn *cp = v;  	struct ip_vs_iter_state *iter = seq->private; +	struct hlist_node *e;  	struct hlist_head *l = iter->l;  	int idx; @@ -985,31 +1005,27 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)  		return ip_vs_conn_array(seq, 0);  	/* more on same hash chain? */ -	if (cp->c_list.next) -		return hlist_entry(cp->c_list.next, struct ip_vs_conn, c_list); +	e = rcu_dereference(hlist_next_rcu(&cp->c_list)); +	if (e) +		return hlist_entry(e, struct ip_vs_conn, c_list);  	idx = l - ip_vs_conn_tab; -	ct_read_unlock_bh(idx); -  	while (++idx < ip_vs_conn_tab_size) { -		ct_read_lock_bh(idx); -		hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { +		hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {  			iter->l = &ip_vs_conn_tab[idx];  			return cp;  		} -		ct_read_unlock_bh(idx); +		rcu_read_unlock(); +		rcu_read_lock();  	}  	iter->l = NULL;  	return NULL;  }  static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v) +	__releases(RCU)  { -	struct ip_vs_iter_state *iter = seq->private; -	struct hlist_head *l = iter->l; - -	if (l) -		ct_read_unlock_bh(l - ip_vs_conn_tab); +	rcu_read_unlock();  }  static int ip_vs_conn_seq_show(struct seq_file *seq, void *v) @@ -1188,7 +1204,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp)  void ip_vs_random_dropentry(struct net *net)  {  	int idx; -	struct ip_vs_conn *cp; +	struct ip_vs_conn *cp, *cp_c;  	/*  	 * Randomly scan 1/32 of the whole table every second @@ -1199,9 +1215,9 @@ void ip_vs_random_dropentry(struct net *net)  		/*  		 *  Lock is actually needed in this loop.  		 */ -		ct_write_lock_bh(hash); +		rcu_read_lock(); -		hlist_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { +		hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {  			if (cp->flags & IP_VS_CONN_F_TEMPLATE)  				/* connection template */  				continue; @@ -1228,12 +1244,15 @@ void ip_vs_random_dropentry(struct net *net)  			IP_VS_DBG(4, "del connection\n");  			ip_vs_conn_expire_now(cp); -			if (cp->control) { +			cp_c = cp->control; +			/* cp->control is valid only with reference to cp */ +			if (cp_c && __ip_vs_conn_get(cp)) {  				IP_VS_DBG(4, "del conn template\n"); -				ip_vs_conn_expire_now(cp->control); +				ip_vs_conn_expire_now(cp_c); +				__ip_vs_conn_put(cp);  			}  		} -		ct_write_unlock_bh(hash); +		rcu_read_unlock();  	}  } @@ -1244,7 +1263,7 @@ void ip_vs_random_dropentry(struct net *net)  static void ip_vs_conn_flush(struct net *net)  {  	int idx; -	struct ip_vs_conn *cp; +	struct ip_vs_conn *cp, *cp_c;  	struct netns_ipvs *ipvs = net_ipvs(net);  flush_again: @@ -1252,19 +1271,22 @@ flush_again:  		/*  		 *  Lock is actually needed in this loop.  		 */ -		ct_write_lock_bh(idx); +		rcu_read_lock(); -		hlist_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { +		hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {  			if (!ip_vs_conn_net_eq(cp, net))  				continue;  			IP_VS_DBG(4, "del connection\n");  			ip_vs_conn_expire_now(cp); -			if (cp->control) { +			cp_c = cp->control; +			/* cp->control is valid only with reference to cp */ +			if (cp_c && __ip_vs_conn_get(cp)) {  				IP_VS_DBG(4, "del conn template\n"); -				ip_vs_conn_expire_now(cp->control); +				ip_vs_conn_expire_now(cp_c); +				__ip_vs_conn_put(cp);  			}  		} -		ct_write_unlock_bh(idx); +		rcu_read_unlock();  	}  	/* the counter may be not NULL, because maybe some conn entries @@ -1331,7 +1353,7 @@ int __init ip_vs_conn_init(void)  		INIT_HLIST_HEAD(&ip_vs_conn_tab[idx]);  	for (idx = 0; idx < CT_LOCKARRAY_SIZE; idx++)  { -		rwlock_init(&__ip_vs_conntbl_lock_array[idx].l); +		spin_lock_init(&__ip_vs_conntbl_lock_array[idx].l);  	}  	/* calculate the random value for connection hash */ @@ -1342,6 +1364,8 @@ int __init ip_vs_conn_init(void)  void ip_vs_conn_cleanup(void)  { +	/* Wait all ip_vs_conn_rcu_free() callbacks to complete */ +	rcu_barrier();  	/* Release the empty cache */  	kmem_cache_destroy(ip_vs_conn_cachep);  	vfree(ip_vs_conn_tab); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 61f49d24171..085b5880ab0 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -69,10 +69,7 @@ EXPORT_SYMBOL(ip_vs_conn_put);  EXPORT_SYMBOL(ip_vs_get_debug_level);  #endif -int ip_vs_net_id __read_mostly; -#ifdef IP_VS_GENERIC_NETNS -EXPORT_SYMBOL(ip_vs_net_id); -#endif +static int ip_vs_net_id __read_mostly;  /* netns cnt used for uniqueness */  static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0); @@ -206,7 +203,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,  {  	ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr,  			      vport, p); -	p->pe = svc->pe; +	p->pe = rcu_dereference(svc->pe);  	if (p->pe && p->pe->fill_param)  		return p->pe->fill_param(p, skb); @@ -238,7 +235,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,  	/* Mask saddr with the netmask to adjust template granularity */  #ifdef CONFIG_IP_VS_IPV6  	if (svc->af == AF_INET6) -		ipv6_addr_prefix(&snet.in6, &iph->saddr.in6, svc->netmask); +		ipv6_addr_prefix(&snet.in6, &iph->saddr.in6, +				 (__force __u32) svc->netmask);  	else  #endif  		snet.ip = iph->saddr.ip & svc->netmask; @@ -299,12 +297,15 @@ ip_vs_sched_persist(struct ip_vs_service *svc,  	/* Check if a template already exists */  	ct = ip_vs_ct_in_get(¶m);  	if (!ct || !ip_vs_check_template(ct)) { +		struct ip_vs_scheduler *sched; +  		/*  		 * No template found or the dest of the connection  		 * template is not available.  		 * return *ignored=0 i.e. ICMP and NF_DROP  		 */ -		dest = svc->scheduler->schedule(svc, skb); +		sched = rcu_dereference(svc->scheduler); +		dest = sched->schedule(svc, skb);  		if (!dest) {  			IP_VS_DBG(1, "p-schedule: no dest found.\n");  			kfree(param.pe_data); @@ -394,6 +395,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,  {  	struct ip_vs_protocol *pp = pd->pp;  	struct ip_vs_conn *cp = NULL; +	struct ip_vs_scheduler *sched;  	struct ip_vs_dest *dest;  	__be16 _ports[2], *pptr;  	unsigned int flags; @@ -449,7 +451,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,  		return NULL;  	} -	dest = svc->scheduler->schedule(svc, skb); +	sched = rcu_dereference(svc->scheduler); +	dest = sched->schedule(svc, skb);  	if (dest == NULL) {  		IP_VS_DBG(1, "Schedule: no dest found.\n");  		return NULL; @@ -507,7 +510,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,  	pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);  	if (pptr == NULL) { -		ip_vs_service_put(svc);  		return NF_DROP;  	} @@ -533,8 +535,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,  				      IP_VS_CONN_F_ONE_PACKET : 0;  		union nf_inet_addr daddr =  { .all = { 0, 0, 0, 0 } }; -		ip_vs_service_put(svc); -  		/* create a new connection entry */  		IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);  		{ @@ -571,12 +571,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,  	 * listed in the ipvs table), pass the packets, because it is  	 * not ipvs job to decide to drop the packets.  	 */ -	if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT)) { -		ip_vs_service_put(svc); +	if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT))  		return NF_ACCEPT; -	} - -	ip_vs_service_put(svc);  	/*  	 * Notify the client that the destination is unreachable, and @@ -588,9 +584,9 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,  #ifdef CONFIG_IP_VS_IPV6  	if (svc->af == AF_INET6) {  		if (!skb->dev) { -			struct net *net = dev_net(skb_dst(skb)->dev); +			struct net *net_ = dev_net(skb_dst(skb)->dev); -			skb->dev = net->loopback_dev; +			skb->dev = net_->loopback_dev;  		}  		icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);  	} else @@ -643,8 +639,11 @@ static inline enum ip_defrag_users ip_vs_defrag_user(unsigned int hooknum)  static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)  { -	int err = ip_defrag(skb, user); +	int err; +	local_bh_disable(); +	err = ip_defrag(skb, user); +	local_bh_enable();  	if (!err)  		ip_send_check(ip_hdr(skb)); @@ -1164,9 +1163,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)  					 sizeof(_ports), _ports, &iph);  		if (pptr == NULL)  			return NF_ACCEPT;	/* Not for me */ -		if (ip_vs_lookup_real_service(net, af, iph.protocol, -					      &iph.saddr, -					      pptr[0])) { +		if (ip_vs_has_real_service(net, af, iph.protocol, &iph.saddr, +					   pptr[0])) {  			/*  			 * Notify the real server: there is no  			 * existing entry if it is not RST @@ -1181,9 +1179,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)  						iph.len)))) {  #ifdef CONFIG_IP_VS_IPV6  				if (af == AF_INET6) { -					struct net *net = -						dev_net(skb_dst(skb)->dev); -  					if (!skb->dev)  						skb->dev = net->loopback_dev;  					icmpv6_send(skb, @@ -1226,13 +1221,7 @@ ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb,  		   const struct net_device *in, const struct net_device *out,  		   int (*okfn)(struct sk_buff *))  { -	unsigned int verdict; - -	/* Disable BH in LOCAL_OUT until all places are fixed */ -	local_bh_disable(); -	verdict = ip_vs_out(hooknum, skb, AF_INET); -	local_bh_enable(); -	return verdict; +	return ip_vs_out(hooknum, skb, AF_INET);  }  #ifdef CONFIG_IP_VS_IPV6 @@ -1259,13 +1248,7 @@ ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,  		   const struct net_device *in, const struct net_device *out,  		   int (*okfn)(struct sk_buff *))  { -	unsigned int verdict; - -	/* Disable BH in LOCAL_OUT until all places are fixed */ -	local_bh_disable(); -	verdict = ip_vs_out(hooknum, skb, AF_INET6); -	local_bh_enable(); -	return verdict; +	return ip_vs_out(hooknum, skb, AF_INET6);  }  #endif @@ -1401,10 +1384,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)  				goto ignore_ipip;  			/* Prefer the resulting PMTU */  			if (dest) { -				spin_lock(&dest->dst_lock); -				if (dest->dst_cache) -					mtu = dst_mtu(dest->dst_cache); -				spin_unlock(&dest->dst_lock); +				struct ip_vs_dest_dst *dest_dst; + +				rcu_read_lock(); +				dest_dst = rcu_dereference(dest->dest_dst); +				if (dest_dst) +					mtu = dst_mtu(dest_dst->dst_cache); +				rcu_read_unlock();  			}  			if (mtu > 68 + sizeof(struct iphdr))  				mtu -= sizeof(struct iphdr); @@ -1720,13 +1706,7 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb,  		     const struct net_device *in, const struct net_device *out,  		     int (*okfn)(struct sk_buff *))  { -	unsigned int verdict; - -	/* Disable BH in LOCAL_OUT until all places are fixed */ -	local_bh_disable(); -	verdict = ip_vs_in(hooknum, skb, AF_INET); -	local_bh_enable(); -	return verdict; +	return ip_vs_in(hooknum, skb, AF_INET);  }  #ifdef CONFIG_IP_VS_IPV6 @@ -1785,13 +1765,7 @@ ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb,  		     const struct net_device *in, const struct net_device *out,  		     int (*okfn)(struct sk_buff *))  { -	unsigned int verdict; - -	/* Disable BH in LOCAL_OUT until all places are fixed */ -	local_bh_disable(); -	verdict = ip_vs_in(hooknum, skb, AF_INET6); -	local_bh_enable(); -	return verdict; +	return ip_vs_in(hooknum, skb, AF_INET6);  }  #endif diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 9e2d1cccd1e..5b142fb1648 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -55,9 +55,6 @@  /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */  static DEFINE_MUTEX(__ip_vs_mutex); -/* lock for service table */ -static DEFINE_RWLOCK(__ip_vs_svc_lock); -  /* sysctl variables */  #ifdef CONFIG_IP_VS_DEBUG @@ -71,7 +68,7 @@ int ip_vs_get_debug_level(void)  /*  Protos */ -static void __ip_vs_del_service(struct ip_vs_service *svc); +static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup);  #ifdef CONFIG_IP_VS_IPV6 @@ -257,9 +254,9 @@ ip_vs_use_count_dec(void)  #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)  /* the service table hashed by <protocol, addr, port> */ -static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; +static struct hlist_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];  /* the service table hashed by fwmark */ -static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; +static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];  /* @@ -271,16 +268,18 @@ ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,  {  	register unsigned int porth = ntohs(port);  	__be32 addr_fold = addr->ip; +	__u32 ahash;  #ifdef CONFIG_IP_VS_IPV6  	if (af == AF_INET6)  		addr_fold = addr->ip6[0]^addr->ip6[1]^  			    addr->ip6[2]^addr->ip6[3];  #endif -	addr_fold ^= ((size_t)net>>8); +	ahash = ntohl(addr_fold); +	ahash ^= ((size_t) net >> 8); -	return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth) -		& IP_VS_SVC_TAB_MASK; +	return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) & +	       IP_VS_SVC_TAB_MASK;  }  /* @@ -312,13 +311,13 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)  		 */  		hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,  					 &svc->addr, svc->port); -		list_add(&svc->s_list, &ip_vs_svc_table[hash]); +		hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]);  	} else {  		/*  		 *  Hash it by fwmark in svc_fwm_table  		 */  		hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark); -		list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]); +		hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]);  	}  	svc->flags |= IP_VS_SVC_F_HASHED; @@ -342,10 +341,10 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)  	if (svc->fwmark == 0) {  		/* Remove it from the svc_table table */ -		list_del(&svc->s_list); +		hlist_del_rcu(&svc->s_list);  	} else {  		/* Remove it from the svc_fwm_table table */ -		list_del(&svc->f_list); +		hlist_del_rcu(&svc->f_list);  	}  	svc->flags &= ~IP_VS_SVC_F_HASHED; @@ -367,7 +366,7 @@ __ip_vs_service_find(struct net *net, int af, __u16 protocol,  	/* Check for "full" addressed entries */  	hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport); -	list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){ +	hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) {  		if ((svc->af == af)  		    && ip_vs_addr_equal(af, &svc->addr, vaddr)  		    && (svc->port == vport) @@ -394,7 +393,7 @@ __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)  	/* Check for fwmark addressed entries */  	hash = ip_vs_svc_fwm_hashkey(net, fwmark); -	list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) { +	hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) {  		if (svc->fwmark == fwmark && svc->af == af  		    && net_eq(svc->net, net)) {  			/* HIT */ @@ -405,15 +404,14 @@ __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)  	return NULL;  } +/* Find service, called under RCU lock */  struct ip_vs_service * -ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, -		  const union nf_inet_addr *vaddr, __be16 vport) +ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol, +		   const union nf_inet_addr *vaddr, __be16 vport)  {  	struct ip_vs_service *svc;  	struct netns_ipvs *ipvs = net_ipvs(net); -	read_lock(&__ip_vs_svc_lock); -  	/*  	 *	Check the table hashed by fwmark first  	 */ @@ -449,10 +447,6 @@ ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,  	}    out: -	if (svc) -		atomic_inc(&svc->usecnt); -	read_unlock(&__ip_vs_svc_lock); -  	IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",  		      fwmark, ip_vs_proto_name(protocol),  		      IP_VS_DBG_ADDR(af, vaddr), ntohs(vport), @@ -469,6 +463,13 @@ __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)  	dest->svc = svc;  } +static void ip_vs_service_free(struct ip_vs_service *svc) +{ +	if (svc->stats.cpustats) +		free_percpu(svc->stats.cpustats); +	kfree(svc); +} +  static void  __ip_vs_unbind_svc(struct ip_vs_dest *dest)  { @@ -476,12 +477,11 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest)  	dest->svc = NULL;  	if (atomic_dec_and_test(&svc->refcnt)) { -		IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n", +		IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",  			      svc->fwmark,  			      IP_VS_DBG_ADDR(svc->af, &svc->addr), -			      ntohs(svc->port), atomic_read(&svc->usecnt)); -		free_percpu(svc->stats.cpustats); -		kfree(svc); +			      ntohs(svc->port)); +		ip_vs_service_free(svc);  	}  } @@ -506,17 +506,13 @@ static inline unsigned int ip_vs_rs_hashkey(int af,  		& IP_VS_RTAB_MASK;  } -/* - *	Hashes ip_vs_dest in rs_table by <proto,addr,port>. - *	should be called with locked tables. - */ -static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest) +/* Hash ip_vs_dest in rs_table by <proto,addr,port>. */ +static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)  {  	unsigned int hash; -	if (!list_empty(&dest->d_list)) { -		return 0; -	} +	if (dest->in_rs_table) +		return;  	/*  	 *	Hash by proto,addr,port, @@ -524,64 +520,51 @@ static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)  	 */  	hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port); -	list_add(&dest->d_list, &ipvs->rs_table[hash]); - -	return 1; +	hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]); +	dest->in_rs_table = 1;  } -/* - *	UNhashes ip_vs_dest from rs_table. - *	should be called with locked tables. - */ -static int ip_vs_rs_unhash(struct ip_vs_dest *dest) +/* Unhash ip_vs_dest from rs_table. */ +static void ip_vs_rs_unhash(struct ip_vs_dest *dest)  {  	/*  	 * Remove it from the rs_table table.  	 */ -	if (!list_empty(&dest->d_list)) { -		list_del_init(&dest->d_list); +	if (dest->in_rs_table) { +		hlist_del_rcu(&dest->d_list); +		dest->in_rs_table = 0;  	} - -	return 1;  } -/* - *	Lookup real service by <proto,addr,port> in the real service table. - */ -struct ip_vs_dest * -ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol, -			  const union nf_inet_addr *daddr, -			  __be16 dport) +/* Check if real service by <proto,addr,port> is present */ +bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol, +			    const union nf_inet_addr *daddr, __be16 dport)  {  	struct netns_ipvs *ipvs = net_ipvs(net);  	unsigned int hash;  	struct ip_vs_dest *dest; -	/* -	 *	Check for "full" addressed entries -	 *	Return the first found entry -	 */ +	/* Check for "full" addressed entries */  	hash = ip_vs_rs_hashkey(af, daddr, dport); -	read_lock(&ipvs->rs_lock); -	list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) { -		if ((dest->af == af) -		    && ip_vs_addr_equal(af, &dest->addr, daddr) -		    && (dest->port == dport) -		    && ((dest->protocol == protocol) || -			dest->vfwmark)) { +	rcu_read_lock(); +	hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) { +		if (dest->port == dport && +		    dest->af == af && +		    ip_vs_addr_equal(af, &dest->addr, daddr) && +		    (dest->protocol == protocol || dest->vfwmark)) {  			/* HIT */ -			read_unlock(&ipvs->rs_lock); -			return dest; +			rcu_read_unlock(); +			return true;  		}  	} -	read_unlock(&ipvs->rs_lock); +	rcu_read_unlock(); -	return NULL; +	return false;  } -/* - *	Lookup destination by {addr,port} in the given service +/* Lookup destination by {addr,port} in the given service + * Called under RCU lock.   */  static struct ip_vs_dest *  ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, @@ -592,7 +575,7 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,  	/*  	 * Find the destination for the given service  	 */ -	list_for_each_entry(dest, &svc->destinations, n_list) { +	list_for_each_entry_rcu(dest, &svc->destinations, n_list) {  		if ((dest->af == svc->af)  		    && ip_vs_addr_equal(svc->af, &dest->addr, daddr)  		    && (dest->port == dport)) { @@ -606,13 +589,11 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,  /*   * Find destination by {daddr,dport,vaddr,protocol} - * Cretaed to be used in ip_vs_process_message() in + * Created to be used in ip_vs_process_message() in   * the backup synchronization daemon. It finds the   * destination to be bound to the received connection   * on the backup. - * - * ip_vs_lookup_real_service() looked promissing, but - * seems not working as expected. + * Called under RCU lock, no refcnt is returned.   */  struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int af,  				   const union nf_inet_addr *daddr, @@ -625,7 +606,7 @@ struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int af,  	struct ip_vs_service *svc;  	__be16 port = dport; -	svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport); +	svc = ip_vs_service_find(net, af, fwmark, protocol, vaddr, vport);  	if (!svc)  		return NULL;  	if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) @@ -633,12 +614,31 @@ struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int af,  	dest = ip_vs_lookup_dest(svc, daddr, port);  	if (!dest)  		dest = ip_vs_lookup_dest(svc, daddr, port ^ dport); -	if (dest) -		atomic_inc(&dest->refcnt); -	ip_vs_service_put(svc);  	return dest;  } +void ip_vs_dest_dst_rcu_free(struct rcu_head *head) +{ +	struct ip_vs_dest_dst *dest_dst = container_of(head, +						       struct ip_vs_dest_dst, +						       rcu_head); + +	dst_release(dest_dst->dst_cache); +	kfree(dest_dst); +} + +/* Release dest_dst and dst_cache for dest in user context */ +static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest) +{ +	struct ip_vs_dest_dst *old; + +	old = rcu_dereference_protected(dest->dest_dst, 1); +	if (old) { +		RCU_INIT_POINTER(dest->dest_dst, NULL); +		call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free); +	} +} +  /*   *  Lookup dest by {svc,addr,port} in the destination trash.   *  The destination trash is used to hold the destinations that are removed @@ -653,19 +653,25 @@ static struct ip_vs_dest *  ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,  		     __be16 dport)  { -	struct ip_vs_dest *dest, *nxt; +	struct ip_vs_dest *dest;  	struct netns_ipvs *ipvs = net_ipvs(svc->net);  	/*  	 * Find the destination in trash  	 */ -	list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) { +	spin_lock_bh(&ipvs->dest_trash_lock); +	list_for_each_entry(dest, &ipvs->dest_trash, t_list) {  		IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "  			      "dest->refcnt=%d\n",  			      dest->vfwmark,  			      IP_VS_DBG_ADDR(svc->af, &dest->addr),  			      ntohs(dest->port),  			      atomic_read(&dest->refcnt)); +		/* We can not reuse dest while in grace period +		 * because conns still can use dest->svc +		 */ +		if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state)) +			continue;  		if (dest->af == svc->af &&  		    ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&  		    dest->port == dport && @@ -675,29 +681,27 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,  		     (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&  		      dest->vport == svc->port))) {  			/* HIT */ -			return dest; -		} - -		/* -		 * Try to purge the destination from trash if not referenced -		 */ -		if (atomic_read(&dest->refcnt) == 1) { -			IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u " -				      "from trash\n", -				      dest->vfwmark, -				      IP_VS_DBG_ADDR(svc->af, &dest->addr), -				      ntohs(dest->port)); -			list_del(&dest->n_list); -			ip_vs_dst_reset(dest); -			__ip_vs_unbind_svc(dest); -			free_percpu(dest->stats.cpustats); -			kfree(dest); +			list_del(&dest->t_list); +			ip_vs_dest_hold(dest); +			goto out;  		}  	} -	return NULL; +	dest = NULL; + +out: +	spin_unlock_bh(&ipvs->dest_trash_lock); + +	return dest;  } +static void ip_vs_dest_free(struct ip_vs_dest *dest) +{ +	__ip_vs_dst_cache_reset(dest); +	__ip_vs_unbind_svc(dest); +	free_percpu(dest->stats.cpustats); +	kfree(dest); +}  /*   *  Clean up all the destinations in the trash @@ -706,19 +710,18 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,   *  When the ip_vs_control_clearup is activated by ipvs module exit,   *  the service tables must have been flushed and all the connections   *  are expired, and the refcnt of each destination in the trash must - *  be 1, so we simply release them here. + *  be 0, so we simply release them here.   */  static void ip_vs_trash_cleanup(struct net *net)  {  	struct ip_vs_dest *dest, *nxt;  	struct netns_ipvs *ipvs = net_ipvs(net); -	list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) { -		list_del(&dest->n_list); -		ip_vs_dst_reset(dest); -		__ip_vs_unbind_svc(dest); -		free_percpu(dest->stats.cpustats); -		kfree(dest); +	del_timer_sync(&ipvs->dest_trash_timer); +	/* No need to use dest_trash_lock */ +	list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, t_list) { +		list_del(&dest->t_list); +		ip_vs_dest_free(dest);  	}  } @@ -768,6 +771,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,  		    struct ip_vs_dest_user_kern *udest, int add)  {  	struct netns_ipvs *ipvs = net_ipvs(svc->net); +	struct ip_vs_scheduler *sched;  	int conn_flags;  	/* set the weight and the flags */ @@ -783,9 +787,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,  		 *    Put the real service in rs_table if not present.  		 *    For now only for NAT!  		 */ -		write_lock_bh(&ipvs->rs_lock);  		ip_vs_rs_hash(ipvs, dest); -		write_unlock_bh(&ipvs->rs_lock);  	}  	atomic_set(&dest->conn_flags, conn_flags); @@ -809,27 +811,20 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,  	dest->l_threshold = udest->l_threshold;  	spin_lock_bh(&dest->dst_lock); -	ip_vs_dst_reset(dest); +	__ip_vs_dst_cache_reset(dest);  	spin_unlock_bh(&dest->dst_lock); -	if (add) -		ip_vs_start_estimator(svc->net, &dest->stats); - -	write_lock_bh(&__ip_vs_svc_lock); - -	/* Wait until all other svc users go away */ -	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); - +	sched = rcu_dereference_protected(svc->scheduler, 1);  	if (add) { -		list_add(&dest->n_list, &svc->destinations); +		ip_vs_start_estimator(svc->net, &dest->stats); +		list_add_rcu(&dest->n_list, &svc->destinations);  		svc->num_dests++; +		if (sched->add_dest) +			sched->add_dest(svc, dest); +	} else { +		if (sched->upd_dest) +			sched->upd_dest(svc, dest);  	} - -	/* call the update_service, because server weight may be changed */ -	if (svc->scheduler->update_service) -		svc->scheduler->update_service(svc); - -	write_unlock_bh(&__ip_vs_svc_lock);  } @@ -881,7 +876,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,  	atomic_set(&dest->persistconns, 0);  	atomic_set(&dest->refcnt, 1); -	INIT_LIST_HEAD(&dest->d_list); +	INIT_HLIST_NODE(&dest->d_list);  	spin_lock_init(&dest->dst_lock);  	spin_lock_init(&dest->stats.lock);  	__ip_vs_update_dest(svc, dest, udest, 1); @@ -923,10 +918,10 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)  	ip_vs_addr_copy(svc->af, &daddr, &udest->addr); -	/* -	 * Check if the dest already exists in the list -	 */ +	/* We use function that requires RCU lock */ +	rcu_read_lock();  	dest = ip_vs_lookup_dest(svc, &daddr, dport); +	rcu_read_unlock();  	if (dest != NULL) {  		IP_VS_DBG(1, "%s(): dest already exists\n", __func__); @@ -948,11 +943,6 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)  			      IP_VS_DBG_ADDR(svc->af, &dest->vaddr),  			      ntohs(dest->vport)); -		/* -		 * Get the destination from the trash -		 */ -		list_del(&dest->n_list); -  		__ip_vs_update_dest(svc, dest, udest, 1);  		ret = 0;  	} else { @@ -992,10 +982,10 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)  	ip_vs_addr_copy(svc->af, &daddr, &udest->addr); -	/* -	 *  Lookup the destination list -	 */ +	/* We use function that requires RCU lock */ +	rcu_read_lock();  	dest = ip_vs_lookup_dest(svc, &daddr, dport); +	rcu_read_unlock();  	if (dest == NULL) {  		IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__); @@ -1008,11 +998,21 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)  	return 0;  } +static void ip_vs_dest_wait_readers(struct rcu_head *head) +{ +	struct ip_vs_dest *dest = container_of(head, struct ip_vs_dest, +					       rcu_head); + +	/* End of grace period after unlinking */ +	clear_bit(IP_VS_DEST_STATE_REMOVING, &dest->state); +} +  /*   *	Delete a destination (must be already unlinked from the service)   */ -static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) +static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest, +			     bool cleanup)  {  	struct netns_ipvs *ipvs = net_ipvs(net); @@ -1021,38 +1021,24 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)  	/*  	 *  Remove it from the d-linked list with the real services.  	 */ -	write_lock_bh(&ipvs->rs_lock);  	ip_vs_rs_unhash(dest); -	write_unlock_bh(&ipvs->rs_lock); -	/* -	 *  Decrease the refcnt of the dest, and free the dest -	 *  if nobody refers to it (refcnt=0). Otherwise, throw -	 *  the destination into the trash. -	 */ -	if (atomic_dec_and_test(&dest->refcnt)) { -		IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n", -			      dest->vfwmark, -			      IP_VS_DBG_ADDR(dest->af, &dest->addr), -			      ntohs(dest->port)); -		ip_vs_dst_reset(dest); -		/* simply decrease svc->refcnt here, let the caller check -		   and release the service if nobody refers to it. -		   Only user context can release destination and service, -		   and only one user context can update virtual service at a -		   time, so the operation here is OK */ -		atomic_dec(&dest->svc->refcnt); -		free_percpu(dest->stats.cpustats); -		kfree(dest); -	} else { -		IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, " -			      "dest->refcnt=%d\n", -			      IP_VS_DBG_ADDR(dest->af, &dest->addr), -			      ntohs(dest->port), -			      atomic_read(&dest->refcnt)); -		list_add(&dest->n_list, &ipvs->dest_trash); -		atomic_inc(&dest->refcnt); +	if (!cleanup) { +		set_bit(IP_VS_DEST_STATE_REMOVING, &dest->state); +		call_rcu(&dest->rcu_head, ip_vs_dest_wait_readers);  	} + +	spin_lock_bh(&ipvs->dest_trash_lock); +	IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n", +		      IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), +		      atomic_read(&dest->refcnt)); +	if (list_empty(&ipvs->dest_trash) && !cleanup) +		mod_timer(&ipvs->dest_trash_timer, +			  jiffies + IP_VS_DEST_TRASH_PERIOD); +	/* dest lives in trash without reference */ +	list_add(&dest->t_list, &ipvs->dest_trash); +	spin_unlock_bh(&ipvs->dest_trash_lock); +	ip_vs_dest_put(dest);  } @@ -1068,14 +1054,16 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc,  	/*  	 *  Remove it from the d-linked destination list.  	 */ -	list_del(&dest->n_list); +	list_del_rcu(&dest->n_list);  	svc->num_dests--; -	/* -	 *  Call the update_service function of its scheduler -	 */ -	if (svcupd && svc->scheduler->update_service) -			svc->scheduler->update_service(svc); +	if (svcupd) { +		struct ip_vs_scheduler *sched; + +		sched = rcu_dereference_protected(svc->scheduler, 1); +		if (sched->del_dest) +			sched->del_dest(svc, dest); +	}  } @@ -1090,37 +1078,56 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)  	EnterFunction(2); +	/* We use function that requires RCU lock */ +	rcu_read_lock();  	dest = ip_vs_lookup_dest(svc, &udest->addr, dport); +	rcu_read_unlock();  	if (dest == NULL) {  		IP_VS_DBG(1, "%s(): destination not found!\n", __func__);  		return -ENOENT;  	} -	write_lock_bh(&__ip_vs_svc_lock); - -	/* -	 *	Wait until all other svc users go away. -	 */ -	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); -  	/*  	 *	Unlink dest from the service  	 */  	__ip_vs_unlink_dest(svc, dest, 1); -	write_unlock_bh(&__ip_vs_svc_lock); -  	/*  	 *	Delete the destination  	 */ -	__ip_vs_del_dest(svc->net, dest); +	__ip_vs_del_dest(svc->net, dest, false);  	LeaveFunction(2);  	return 0;  } +static void ip_vs_dest_trash_expire(unsigned long data) +{ +	struct net *net = (struct net *) data; +	struct netns_ipvs *ipvs = net_ipvs(net); +	struct ip_vs_dest *dest, *next; + +	spin_lock(&ipvs->dest_trash_lock); +	list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) { +		/* Skip if dest is in grace period */ +		if (test_bit(IP_VS_DEST_STATE_REMOVING, &dest->state)) +			continue; +		if (atomic_read(&dest->refcnt) > 0) +			continue; +		IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n", +			      dest->vfwmark, +			      IP_VS_DBG_ADDR(dest->svc->af, &dest->addr), +			      ntohs(dest->port)); +		list_del(&dest->t_list); +		ip_vs_dest_free(dest); +	} +	if (!list_empty(&ipvs->dest_trash)) +		mod_timer(&ipvs->dest_trash_timer, +			  jiffies + IP_VS_DEST_TRASH_PERIOD); +	spin_unlock(&ipvs->dest_trash_lock); +}  /*   *	Add a service into the service hash table @@ -1157,9 +1164,13 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,  	}  #ifdef CONFIG_IP_VS_IPV6 -	if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) { -		ret = -EINVAL; -		goto out_err; +	if (u->af == AF_INET6) { +		__u32 plen = (__force __u32) u->netmask; + +		if (plen < 1 || plen > 128) { +			ret = -EINVAL; +			goto out_err; +		}  	}  #endif @@ -1176,7 +1187,6 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,  	}  	/* I'm the first user of the service */ -	atomic_set(&svc->usecnt, 0);  	atomic_set(&svc->refcnt, 0);  	svc->af = u->af; @@ -1190,7 +1200,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,  	svc->net = net;  	INIT_LIST_HEAD(&svc->destinations); -	rwlock_init(&svc->sched_lock); +	spin_lock_init(&svc->sched_lock);  	spin_lock_init(&svc->stats.lock);  	/* Bind the scheduler */ @@ -1200,7 +1210,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,  	sched = NULL;  	/* Bind the ct retriever */ -	ip_vs_bind_pe(svc, pe); +	RCU_INIT_POINTER(svc->pe, pe);  	pe = NULL;  	/* Update the virtual service counters */ @@ -1216,9 +1226,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,  		ipvs->num_services++;  	/* Hash the service into the service table */ -	write_lock_bh(&__ip_vs_svc_lock);  	ip_vs_svc_hash(svc); -	write_unlock_bh(&__ip_vs_svc_lock);  	*svc_p = svc;  	/* Now there is a service - full throttle */ @@ -1228,15 +1236,8 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,   out_err:  	if (svc != NULL) { -		ip_vs_unbind_scheduler(svc); -		if (svc->inc) { -			local_bh_disable(); -			ip_vs_app_inc_put(svc->inc); -			local_bh_enable(); -		} -		if (svc->stats.cpustats) -			free_percpu(svc->stats.cpustats); -		kfree(svc); +		ip_vs_unbind_scheduler(svc, sched); +		ip_vs_service_free(svc);  	}  	ip_vs_scheduler_put(sched);  	ip_vs_pe_put(pe); @@ -1280,18 +1281,27 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)  	}  #ifdef CONFIG_IP_VS_IPV6 -	if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) { -		ret = -EINVAL; -		goto out; +	if (u->af == AF_INET6) { +		__u32 plen = (__force __u32) u->netmask; + +		if (plen < 1 || plen > 128) { +			ret = -EINVAL; +			goto out; +		}  	}  #endif -	write_lock_bh(&__ip_vs_svc_lock); - -	/* -	 * Wait until all other svc users go away. -	 */ -	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); +	old_sched = rcu_dereference_protected(svc->scheduler, 1); +	if (sched != old_sched) { +		/* Bind the new scheduler */ +		ret = ip_vs_bind_scheduler(svc, sched); +		if (ret) { +			old_sched = sched; +			goto out; +		} +		/* Unbind the old scheduler on success */ +		ip_vs_unbind_scheduler(svc, old_sched); +	}  	/*  	 * Set the flags and timeout value @@ -1300,57 +1310,30 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)  	svc->timeout = u->timeout * HZ;  	svc->netmask = u->netmask; -	old_sched = svc->scheduler; -	if (sched != old_sched) { -		/* -		 * Unbind the old scheduler -		 */ -		if ((ret = ip_vs_unbind_scheduler(svc))) { -			old_sched = sched; -			goto out_unlock; -		} +	old_pe = rcu_dereference_protected(svc->pe, 1); +	if (pe != old_pe) +		rcu_assign_pointer(svc->pe, pe); -		/* -		 * Bind the new scheduler -		 */ -		if ((ret = ip_vs_bind_scheduler(svc, sched))) { -			/* -			 * If ip_vs_bind_scheduler fails, restore the old -			 * scheduler. -			 * The main reason of failure is out of memory. -			 * -			 * The question is if the old scheduler can be -			 * restored all the time. TODO: if it cannot be -			 * restored some time, we must delete the service, -			 * otherwise the system may crash. -			 */ -			ip_vs_bind_scheduler(svc, old_sched); -			old_sched = sched; -			goto out_unlock; -		} -	} - -	old_pe = svc->pe; -	if (pe != old_pe) { -		ip_vs_unbind_pe(svc); -		ip_vs_bind_pe(svc, pe); -	} - -out_unlock: -	write_unlock_bh(&__ip_vs_svc_lock);  out:  	ip_vs_scheduler_put(old_sched);  	ip_vs_pe_put(old_pe);  	return ret;  } +static void ip_vs_service_rcu_free(struct rcu_head *head) +{ +	struct ip_vs_service *svc; + +	svc = container_of(head, struct ip_vs_service, rcu_head); +	ip_vs_service_free(svc); +}  /*   *	Delete a service from the service list   *	- The service must be unlinked, unlocked and not referenced!   *	- We are called under _bh lock   */ -static void __ip_vs_del_service(struct ip_vs_service *svc) +static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)  {  	struct ip_vs_dest *dest, *nxt;  	struct ip_vs_scheduler *old_sched; @@ -1366,27 +1349,20 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)  	ip_vs_stop_estimator(svc->net, &svc->stats);  	/* Unbind scheduler */ -	old_sched = svc->scheduler; -	ip_vs_unbind_scheduler(svc); +	old_sched = rcu_dereference_protected(svc->scheduler, 1); +	ip_vs_unbind_scheduler(svc, old_sched);  	ip_vs_scheduler_put(old_sched); -	/* Unbind persistence engine */ -	old_pe = svc->pe; -	ip_vs_unbind_pe(svc); +	/* Unbind persistence engine, keep svc->pe */ +	old_pe = rcu_dereference_protected(svc->pe, 1);  	ip_vs_pe_put(old_pe); -	/* Unbind app inc */ -	if (svc->inc) { -		ip_vs_app_inc_put(svc->inc); -		svc->inc = NULL; -	} -  	/*  	 *    Unlink the whole destination list  	 */  	list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {  		__ip_vs_unlink_dest(svc, dest, 0); -		__ip_vs_del_dest(svc->net, dest); +		__ip_vs_del_dest(svc->net, dest, cleanup);  	}  	/* @@ -1400,13 +1376,12 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)  	/*  	 *    Free the service if nobody refers to it  	 */ -	if (atomic_read(&svc->refcnt) == 0) { -		IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n", +	if (atomic_dec_and_test(&svc->refcnt)) { +		IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n",  			      svc->fwmark,  			      IP_VS_DBG_ADDR(svc->af, &svc->addr), -			      ntohs(svc->port), atomic_read(&svc->usecnt)); -		free_percpu(svc->stats.cpustats); -		kfree(svc); +			      ntohs(svc->port)); +		call_rcu(&svc->rcu_head, ip_vs_service_rcu_free);  	}  	/* decrease the module use count */ @@ -1416,23 +1391,16 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)  /*   * Unlink a service from list and try to delete it if its refcnt reached 0   */ -static void ip_vs_unlink_service(struct ip_vs_service *svc) +static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup)  { +	/* Hold svc to avoid double release from dest_trash */ +	atomic_inc(&svc->refcnt);  	/*  	 * Unhash it from the service table  	 */ -	write_lock_bh(&__ip_vs_svc_lock); -  	ip_vs_svc_unhash(svc); -	/* -	 * Wait until all the svc users go away. -	 */ -	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); - -	__ip_vs_del_service(svc); - -	write_unlock_bh(&__ip_vs_svc_lock); +	__ip_vs_del_service(svc, cleanup);  }  /* @@ -1442,7 +1410,7 @@ static int ip_vs_del_service(struct ip_vs_service *svc)  {  	if (svc == NULL)  		return -EEXIST; -	ip_vs_unlink_service(svc); +	ip_vs_unlink_service(svc, false);  	return 0;  } @@ -1451,19 +1419,20 @@ static int ip_vs_del_service(struct ip_vs_service *svc)  /*   *	Flush all the virtual services   */ -static int ip_vs_flush(struct net *net) +static int ip_vs_flush(struct net *net, bool cleanup)  {  	int idx; -	struct ip_vs_service *svc, *nxt; +	struct ip_vs_service *svc; +	struct hlist_node *n;  	/*  	 * Flush the service table hashed by <netns,protocol,addr,port>  	 */  	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { -		list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], -					 s_list) { +		hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx], +					  s_list) {  			if (net_eq(svc->net, net)) -				ip_vs_unlink_service(svc); +				ip_vs_unlink_service(svc, cleanup);  		}  	} @@ -1471,10 +1440,10 @@ static int ip_vs_flush(struct net *net)  	 * Flush the service table hashed by fwmark  	 */  	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { -		list_for_each_entry_safe(svc, nxt, -					 &ip_vs_svc_fwm_table[idx], f_list) { +		hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx], +					  f_list) {  			if (net_eq(svc->net, net)) -				ip_vs_unlink_service(svc); +				ip_vs_unlink_service(svc, cleanup);  		}  	} @@ -1490,32 +1459,32 @@ void ip_vs_service_net_cleanup(struct net *net)  	EnterFunction(2);  	/* Check for "full" addressed entries */  	mutex_lock(&__ip_vs_mutex); -	ip_vs_flush(net); +	ip_vs_flush(net, true);  	mutex_unlock(&__ip_vs_mutex);  	LeaveFunction(2);  } -/* - * Release dst hold by dst_cache - */ + +/* Put all references for device (dst_cache) */  static inline void -__ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev) +ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev)  { +	struct ip_vs_dest_dst *dest_dst; +  	spin_lock_bh(&dest->dst_lock); -	if (dest->dst_cache && dest->dst_cache->dev == dev) { +	dest_dst = rcu_dereference_protected(dest->dest_dst, 1); +	if (dest_dst && dest_dst->dst_cache->dev == dev) {  		IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",  			      dev->name,  			      IP_VS_DBG_ADDR(dest->af, &dest->addr),  			      ntohs(dest->port),  			      atomic_read(&dest->refcnt)); -		ip_vs_dst_reset(dest); +		__ip_vs_dst_cache_reset(dest);  	}  	spin_unlock_bh(&dest->dst_lock);  } -/* - * Netdev event receiver - * Currently only NETDEV_UNREGISTER is handled, i.e. if we hold a reference to - * a device that is "unregister" it must be released. +/* Netdev event receiver + * Currently only NETDEV_DOWN is handled to release refs to cached dsts   */  static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,  			    void *ptr) @@ -1527,35 +1496,37 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,  	struct ip_vs_dest *dest;  	unsigned int idx; -	if (event != NETDEV_UNREGISTER || !ipvs) +	if (event != NETDEV_DOWN || !ipvs)  		return NOTIFY_DONE;  	IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);  	EnterFunction(2);  	mutex_lock(&__ip_vs_mutex);  	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { -		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { +		hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {  			if (net_eq(svc->net, net)) {  				list_for_each_entry(dest, &svc->destinations,  						    n_list) { -					__ip_vs_dev_reset(dest, dev); +					ip_vs_forget_dev(dest, dev);  				}  			}  		} -		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { +		hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {  			if (net_eq(svc->net, net)) {  				list_for_each_entry(dest, &svc->destinations,  						    n_list) { -					__ip_vs_dev_reset(dest, dev); +					ip_vs_forget_dev(dest, dev);  				}  			}  		}  	} -	list_for_each_entry(dest, &ipvs->dest_trash, n_list) { -		__ip_vs_dev_reset(dest, dev); +	spin_lock_bh(&ipvs->dest_trash_lock); +	list_for_each_entry(dest, &ipvs->dest_trash, t_list) { +		ip_vs_forget_dev(dest, dev);  	} +	spin_unlock_bh(&ipvs->dest_trash_lock);  	mutex_unlock(&__ip_vs_mutex);  	LeaveFunction(2);  	return NOTIFY_DONE; @@ -1568,12 +1539,10 @@ static int ip_vs_zero_service(struct ip_vs_service *svc)  {  	struct ip_vs_dest *dest; -	write_lock_bh(&__ip_vs_svc_lock);  	list_for_each_entry(dest, &svc->destinations, n_list) {  		ip_vs_zero_stats(&dest->stats);  	}  	ip_vs_zero_stats(&svc->stats); -	write_unlock_bh(&__ip_vs_svc_lock);  	return 0;  } @@ -1583,14 +1552,14 @@ static int ip_vs_zero_all(struct net *net)  	struct ip_vs_service *svc;  	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { -		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { +		hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {  			if (net_eq(svc->net, net))  				ip_vs_zero_service(svc);  		}  	}  	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { -		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { +		hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {  			if (net_eq(svc->net, net))  				ip_vs_zero_service(svc);  		} @@ -1918,7 +1887,7 @@ static struct ctl_table vs_vars[] = {  struct ip_vs_iter {  	struct seq_net_private p;  /* Do not move this, netns depends upon it*/ -	struct list_head *table; +	struct hlist_head *table;  	int bucket;  }; @@ -1951,7 +1920,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)  	/* look in hash by protocol */  	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { -		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { +		hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) {  			if (net_eq(svc->net, net) && pos-- == 0) {  				iter->table = ip_vs_svc_table;  				iter->bucket = idx; @@ -1962,7 +1931,8 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)  	/* keep looking in fwmark */  	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { -		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { +		hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx], +					 f_list) {  			if (net_eq(svc->net, net) && pos-- == 0) {  				iter->table = ip_vs_svc_fwm_table;  				iter->bucket = idx; @@ -1975,17 +1945,16 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)  }  static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos) -__acquires(__ip_vs_svc_lock) +	__acquires(RCU)  { - -	read_lock_bh(&__ip_vs_svc_lock); +	rcu_read_lock();  	return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;  }  static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)  { -	struct list_head *e; +	struct hlist_node *e;  	struct ip_vs_iter *iter;  	struct ip_vs_service *svc; @@ -1998,13 +1967,14 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)  	if (iter->table == ip_vs_svc_table) {  		/* next service in table hashed by protocol */ -		if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket]) -			return list_entry(e, struct ip_vs_service, s_list); - +		e = rcu_dereference(hlist_next_rcu(&svc->s_list)); +		if (e) +			return hlist_entry(e, struct ip_vs_service, s_list);  		while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { -			list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket], -					    s_list) { +			hlist_for_each_entry_rcu(svc, +						 &ip_vs_svc_table[iter->bucket], +						 s_list) {  				return svc;  			}  		} @@ -2015,13 +1985,15 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)  	}  	/* next service in hashed by fwmark */ -	if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket]) -		return list_entry(e, struct ip_vs_service, f_list); +	e = rcu_dereference(hlist_next_rcu(&svc->f_list)); +	if (e) +		return hlist_entry(e, struct ip_vs_service, f_list);   scan_fwmark:  	while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { -		list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket], -				    f_list) +		hlist_for_each_entry_rcu(svc, +					 &ip_vs_svc_fwm_table[iter->bucket], +					 f_list)  			return svc;  	} @@ -2029,9 +2001,9 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)  }  static void ip_vs_info_seq_stop(struct seq_file *seq, void *v) -__releases(__ip_vs_svc_lock) +	__releases(RCU)  { -	read_unlock_bh(&__ip_vs_svc_lock); +	rcu_read_unlock();  } @@ -2049,6 +2021,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)  		const struct ip_vs_service *svc = v;  		const struct ip_vs_iter *iter = seq->private;  		const struct ip_vs_dest *dest; +		struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);  		if (iter->table == ip_vs_svc_table) {  #ifdef CONFIG_IP_VS_IPV6 @@ -2057,18 +2030,18 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)  					   ip_vs_proto_name(svc->protocol),  					   &svc->addr.in6,  					   ntohs(svc->port), -					   svc->scheduler->name); +					   sched->name);  			else  #endif  				seq_printf(seq, "%s  %08X:%04X %s %s ",  					   ip_vs_proto_name(svc->protocol),  					   ntohl(svc->addr.ip),  					   ntohs(svc->port), -					   svc->scheduler->name, +					   sched->name,  					   (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");  		} else {  			seq_printf(seq, "FWM  %08X %s %s", -				   svc->fwmark, svc->scheduler->name, +				   svc->fwmark, sched->name,  				   (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");  		} @@ -2079,7 +2052,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)  		else  			seq_putc(seq, '\n'); -		list_for_each_entry(dest, &svc->destinations, n_list) { +		list_for_each_entry_rcu(dest, &svc->destinations, n_list) {  #ifdef CONFIG_IP_VS_IPV6  			if (dest->af == AF_INET6)  				seq_printf(seq, @@ -2173,7 +2146,7 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)  {  	struct net *net = seq_file_single_net(seq);  	struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats; -	struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats; +	struct ip_vs_cpu_stats __percpu *cpustats = tot_stats->cpustats;  	struct ip_vs_stats_user rates;  	int i; @@ -2389,7 +2362,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)  	if (cmd == IP_VS_SO_SET_FLUSH) {  		/* Flush the virtual service */ -		ret = ip_vs_flush(net); +		ret = ip_vs_flush(net, false);  		goto out_unlock;  	} else if (cmd == IP_VS_SO_SET_TIMEOUT) {  		/* Set timeout values for (tcp tcpfin udp) */ @@ -2424,11 +2397,13 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)  	}  	/* Lookup the exact service by <protocol, addr, port> or fwmark */ +	rcu_read_lock();  	if (usvc.fwmark == 0)  		svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,  					   &usvc.addr, usvc.port);  	else  		svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark); +	rcu_read_unlock();  	if (cmd != IP_VS_SO_SET_ADD  	    && (svc == NULL || svc->protocol != usvc.protocol)) { @@ -2480,11 +2455,14 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)  static void  ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)  { +	struct ip_vs_scheduler *sched; + +	sched = rcu_dereference_protected(src->scheduler, 1);  	dst->protocol = src->protocol;  	dst->addr = src->addr.ip;  	dst->port = src->port;  	dst->fwmark = src->fwmark; -	strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name)); +	strlcpy(dst->sched_name, sched->name, sizeof(dst->sched_name));  	dst->flags = src->flags;  	dst->timeout = src->timeout / HZ;  	dst->netmask = src->netmask; @@ -2503,7 +2481,7 @@ __ip_vs_get_service_entries(struct net *net,  	int ret = 0;  	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { -		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { +		hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {  			/* Only expose IPv4 entries to old interface */  			if (svc->af != AF_INET || !net_eq(svc->net, net))  				continue; @@ -2522,7 +2500,7 @@ __ip_vs_get_service_entries(struct net *net,  	}  	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { -		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { +		hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {  			/* Only expose IPv4 entries to old interface */  			if (svc->af != AF_INET || !net_eq(svc->net, net))  				continue; @@ -2551,11 +2529,13 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,  	union nf_inet_addr addr = { .ip = get->addr };  	int ret = 0; +	rcu_read_lock();  	if (get->fwmark)  		svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);  	else  		svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,  					   get->port); +	rcu_read_unlock();  	if (svc) {  		int count = 0; @@ -2738,12 +2718,14 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)  		entry = (struct ip_vs_service_entry *)arg;  		addr.ip = entry->addr; +		rcu_read_lock();  		if (entry->fwmark)  			svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);  		else  			svc = __ip_vs_service_find(net, AF_INET,  						   entry->protocol, &addr,  						   entry->port); +		rcu_read_unlock();  		if (svc) {  			ip_vs_copy_service(entry, svc);  			if (copy_to_user(user, entry, sizeof(*entry)) != 0) @@ -2900,6 +2882,8 @@ nla_put_failure:  static int ip_vs_genl_fill_service(struct sk_buff *skb,  				   struct ip_vs_service *svc)  { +	struct ip_vs_scheduler *sched; +	struct ip_vs_pe *pe;  	struct nlattr *nl_service;  	struct ip_vs_flags flags = { .flags = svc->flags,  				     .mask = ~0 }; @@ -2916,16 +2900,17 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,  	} else {  		if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) ||  		    nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) || -		    nla_put_u16(skb, IPVS_SVC_ATTR_PORT, svc->port)) +		    nla_put_be16(skb, IPVS_SVC_ATTR_PORT, svc->port))  			goto nla_put_failure;  	} -	if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name) || -	    (svc->pe && -	     nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name)) || +	sched = rcu_dereference_protected(svc->scheduler, 1); +	pe = rcu_dereference_protected(svc->pe, 1); +	if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched->name) || +	    (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) ||  	    nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||  	    nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) || -	    nla_put_u32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask)) +	    nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))  		goto nla_put_failure;  	if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))  		goto nla_put_failure; @@ -2971,7 +2956,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,  	mutex_lock(&__ip_vs_mutex);  	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { -		list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { +		hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {  			if (++idx <= start || !net_eq(svc->net, net))  				continue;  			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { @@ -2982,7 +2967,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,  	}  	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { -		list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { +		hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {  			if (++idx <= start || !net_eq(svc->net, net))  				continue;  			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { @@ -3038,15 +3023,17 @@ static int ip_vs_genl_parse_service(struct net *net,  	} else {  		usvc->protocol = nla_get_u16(nla_protocol);  		nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr)); -		usvc->port = nla_get_u16(nla_port); +		usvc->port = nla_get_be16(nla_port);  		usvc->fwmark = 0;  	} +	rcu_read_lock();  	if (usvc->fwmark)  		svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);  	else  		svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,  					   &usvc->addr, usvc->port); +	rcu_read_unlock();  	*ret_svc = svc;  	/* If a full entry was requested, check for the additional fields */ @@ -3076,7 +3063,7 @@ static int ip_vs_genl_parse_service(struct net *net,  		usvc->sched_name = nla_data(nla_sched);  		usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;  		usvc->timeout = nla_get_u32(nla_timeout); -		usvc->netmask = nla_get_u32(nla_netmask); +		usvc->netmask = nla_get_be32(nla_netmask);  	}  	return 0; @@ -3102,7 +3089,7 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)  		return -EMSGSIZE;  	if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) || -	    nla_put_u16(skb, IPVS_DEST_ATTR_PORT, dest->port) || +	    nla_put_be16(skb, IPVS_DEST_ATTR_PORT, dest->port) ||  	    nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD,  			(atomic_read(&dest->conn_flags) &  			 IP_VS_CONN_F_FWD_MASK)) || @@ -3211,7 +3198,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,  	memset(udest, 0, sizeof(*udest));  	nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr)); -	udest->port = nla_get_u16(nla_port); +	udest->port = nla_get_be16(nla_port);  	/* If a full entry was requested, check for the additional fields */  	if (full_entry) { @@ -3236,8 +3223,8 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,  	return 0;  } -static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state, -				  const char *mcast_ifn, __be32 syncid) +static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state, +				  const char *mcast_ifn, __u32 syncid)  {  	struct nlattr *nl_daemon; @@ -3258,8 +3245,8 @@ nla_put_failure:  	return -EMSGSIZE;  } -static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state, -				  const char *mcast_ifn, __be32 syncid, +static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state, +				  const char *mcast_ifn, __u32 syncid,  				  struct netlink_callback *cb)  {  	void *hdr; @@ -3398,7 +3385,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)  	mutex_lock(&__ip_vs_mutex);  	if (cmd == IPVS_CMD_FLUSH) { -		ret = ip_vs_flush(net); +		ret = ip_vs_flush(net, false);  		goto out;  	} else if (cmd == IPVS_CMD_SET_CONFIG) {  		ret = ip_vs_genl_set_config(net, info->attrs); @@ -3790,13 +3777,14 @@ int __net_init ip_vs_control_net_init(struct net *net)  	int idx;  	struct netns_ipvs *ipvs = net_ipvs(net); -	rwlock_init(&ipvs->rs_lock); -  	/* Initialize rs_table */  	for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) -		INIT_LIST_HEAD(&ipvs->rs_table[idx]); +		INIT_HLIST_HEAD(&ipvs->rs_table[idx]);  	INIT_LIST_HEAD(&ipvs->dest_trash); +	spin_lock_init(&ipvs->dest_trash_lock); +	setup_timer(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire, +		    (unsigned long) net);  	atomic_set(&ipvs->ftpsvc_counter, 0);  	atomic_set(&ipvs->nullsvc_counter, 0); @@ -3826,6 +3814,10 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net)  {  	struct netns_ipvs *ipvs = net_ipvs(net); +	/* Some dest can be in grace period even before cleanup, we have to +	 * defer ip_vs_trash_cleanup until ip_vs_dest_wait_readers is called. +	 */ +	rcu_barrier();  	ip_vs_trash_cleanup(net);  	ip_vs_stop_estimator(net, &ipvs->tot_stats);  	ip_vs_control_net_cleanup_sysctl(net); @@ -3871,10 +3863,10 @@ int __init ip_vs_control_init(void)  	EnterFunction(2); -	/* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */ +	/* Initialize svc_table, ip_vs_svc_fwm_table */  	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { -		INIT_LIST_HEAD(&ip_vs_svc_table[idx]); -		INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); +		INIT_HLIST_HEAD(&ip_vs_svc_table[idx]); +		INIT_HLIST_HEAD(&ip_vs_svc_fwm_table[idx]);  	}  	smp_wmb();	/* Do we really need it now ? */ diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c index 7f3b0cc00b7..ccab120df45 100644 --- a/net/netfilter/ipvs/ip_vs_dh.c +++ b/net/netfilter/ipvs/ip_vs_dh.c @@ -51,7 +51,7 @@   *      IPVS DH bucket   */  struct ip_vs_dh_bucket { -	struct ip_vs_dest       *dest;          /* real server (cache) */ +	struct ip_vs_dest __rcu	*dest;	/* real server (cache) */  };  /* @@ -64,6 +64,10 @@ struct ip_vs_dh_bucket {  #define IP_VS_DH_TAB_SIZE               (1 << IP_VS_DH_TAB_BITS)  #define IP_VS_DH_TAB_MASK               (IP_VS_DH_TAB_SIZE - 1) +struct ip_vs_dh_state { +	struct ip_vs_dh_bucket		buckets[IP_VS_DH_TAB_SIZE]; +	struct rcu_head			rcu_head; +};  /*   *	Returns hash value for IPVS DH entry @@ -85,10 +89,9 @@ static inline unsigned int ip_vs_dh_hashkey(int af, const union nf_inet_addr *ad   *      Get ip_vs_dest associated with supplied parameters.   */  static inline struct ip_vs_dest * -ip_vs_dh_get(int af, struct ip_vs_dh_bucket *tbl, -	     const union nf_inet_addr *addr) +ip_vs_dh_get(int af, struct ip_vs_dh_state *s, const union nf_inet_addr *addr)  { -	return (tbl[ip_vs_dh_hashkey(af, addr)]).dest; +	return rcu_dereference(s->buckets[ip_vs_dh_hashkey(af, addr)].dest);  } @@ -96,25 +99,30 @@ ip_vs_dh_get(int af, struct ip_vs_dh_bucket *tbl,   *      Assign all the hash buckets of the specified table with the service.   */  static int -ip_vs_dh_assign(struct ip_vs_dh_bucket *tbl, struct ip_vs_service *svc) +ip_vs_dh_reassign(struct ip_vs_dh_state *s, struct ip_vs_service *svc)  {  	int i;  	struct ip_vs_dh_bucket *b;  	struct list_head *p;  	struct ip_vs_dest *dest; +	bool empty; -	b = tbl; +	b = &s->buckets[0];  	p = &svc->destinations; +	empty = list_empty(p);  	for (i=0; i<IP_VS_DH_TAB_SIZE; i++) { -		if (list_empty(p)) { -			b->dest = NULL; -		} else { +		dest = rcu_dereference_protected(b->dest, 1); +		if (dest) +			ip_vs_dest_put(dest); +		if (empty) +			RCU_INIT_POINTER(b->dest, NULL); +		else {  			if (p == &svc->destinations)  				p = p->next;  			dest = list_entry(p, struct ip_vs_dest, n_list); -			atomic_inc(&dest->refcnt); -			b->dest = dest; +			ip_vs_dest_hold(dest); +			RCU_INIT_POINTER(b->dest, dest);  			p = p->next;  		} @@ -127,16 +135,18 @@ ip_vs_dh_assign(struct ip_vs_dh_bucket *tbl, struct ip_vs_service *svc)  /*   *      Flush all the hash buckets of the specified table.   */ -static void ip_vs_dh_flush(struct ip_vs_dh_bucket *tbl) +static void ip_vs_dh_flush(struct ip_vs_dh_state *s)  {  	int i;  	struct ip_vs_dh_bucket *b; +	struct ip_vs_dest *dest; -	b = tbl; +	b = &s->buckets[0];  	for (i=0; i<IP_VS_DH_TAB_SIZE; i++) { -		if (b->dest) { -			atomic_dec(&b->dest->refcnt); -			b->dest = NULL; +		dest = rcu_dereference_protected(b->dest, 1); +		if (dest) { +			ip_vs_dest_put(dest); +			RCU_INIT_POINTER(b->dest, NULL);  		}  		b++;  	} @@ -145,51 +155,46 @@ static void ip_vs_dh_flush(struct ip_vs_dh_bucket *tbl)  static int ip_vs_dh_init_svc(struct ip_vs_service *svc)  { -	struct ip_vs_dh_bucket *tbl; +	struct ip_vs_dh_state *s;  	/* allocate the DH table for this service */ -	tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE, -		      GFP_KERNEL); -	if (tbl == NULL) +	s = kzalloc(sizeof(struct ip_vs_dh_state), GFP_KERNEL); +	if (s == NULL)  		return -ENOMEM; -	svc->sched_data = tbl; +	svc->sched_data = s;  	IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) allocated for "  		  "current service\n",  		  sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE); -	/* assign the hash buckets with the updated service */ -	ip_vs_dh_assign(tbl, svc); +	/* assign the hash buckets with current dests */ +	ip_vs_dh_reassign(s, svc);  	return 0;  } -static int ip_vs_dh_done_svc(struct ip_vs_service *svc) +static void ip_vs_dh_done_svc(struct ip_vs_service *svc)  { -	struct ip_vs_dh_bucket *tbl = svc->sched_data; +	struct ip_vs_dh_state *s = svc->sched_data;  	/* got to clean up hash buckets here */ -	ip_vs_dh_flush(tbl); +	ip_vs_dh_flush(s);  	/* release the table itself */ -	kfree(svc->sched_data); +	kfree_rcu(s, rcu_head);  	IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) released\n",  		  sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE); - -	return 0;  } -static int ip_vs_dh_update_svc(struct ip_vs_service *svc) +static int ip_vs_dh_dest_changed(struct ip_vs_service *svc, +				 struct ip_vs_dest *dest)  { -	struct ip_vs_dh_bucket *tbl = svc->sched_data; - -	/* got to clean up hash buckets here */ -	ip_vs_dh_flush(tbl); +	struct ip_vs_dh_state *s = svc->sched_data;  	/* assign the hash buckets with the updated service */ -	ip_vs_dh_assign(tbl, svc); +	ip_vs_dh_reassign(s, svc);  	return 0;  } @@ -212,19 +217,20 @@ static struct ip_vs_dest *  ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)  {  	struct ip_vs_dest *dest; -	struct ip_vs_dh_bucket *tbl; +	struct ip_vs_dh_state *s;  	struct ip_vs_iphdr iph;  	ip_vs_fill_iph_addr_only(svc->af, skb, &iph);  	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); -	tbl = (struct ip_vs_dh_bucket *)svc->sched_data; -	dest = ip_vs_dh_get(svc->af, tbl, &iph.daddr); +	s = (struct ip_vs_dh_state *) svc->sched_data; +	dest = ip_vs_dh_get(svc->af, s, &iph.daddr);  	if (!dest  	    || !(dest->flags & IP_VS_DEST_F_AVAILABLE)  	    || atomic_read(&dest->weight) <= 0  	    || is_overloaded(dest)) { +		ip_vs_scheduler_err(svc, "no destination available");  		return NULL;  	} @@ -248,7 +254,8 @@ static struct ip_vs_scheduler ip_vs_dh_scheduler =  	.n_list =		LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list),  	.init_service =		ip_vs_dh_init_svc,  	.done_service =		ip_vs_dh_done_svc, -	.update_service =	ip_vs_dh_update_svc, +	.add_dest =		ip_vs_dh_dest_changed, +	.del_dest =		ip_vs_dh_dest_changed,  	.schedule =		ip_vs_dh_schedule,  }; @@ -262,6 +269,7 @@ static int __init ip_vs_dh_init(void)  static void __exit ip_vs_dh_cleanup(void)  {  	unregister_ip_vs_scheduler(&ip_vs_dh_scheduler); +	synchronize_rcu();  } diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 0fac6017b6f..6bee6d0c73a 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -56,7 +56,7 @@   * Make a summary from each cpu   */  static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum, -				 struct ip_vs_cpu_stats *stats) +				 struct ip_vs_cpu_stats __percpu *stats)  {  	int i; diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 4f53a5f0443..77c173282f3 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -267,10 +267,12 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,  			 * hopefully it will succeed on the retransmitted  			 * packet.  			 */ +			rcu_read_lock();  			ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,  						       iph->ihl * 4,  						       start-data, end-start,  						       buf, buf_len); +			rcu_read_unlock();  			if (ret) {  				ip_vs_nfct_expect_related(skb, ct, n_cp,  							  IPPROTO_TCP, 0, 0); @@ -480,6 +482,7 @@ static int __init ip_vs_ftp_init(void)  	int rv;  	rv = register_pernet_subsys(&ip_vs_ftp_ops); +	/* rcu_barrier() is called by netns on error */  	return rv;  } @@ -489,6 +492,7 @@ static int __init ip_vs_ftp_init(void)  static void __exit ip_vs_ftp_exit(void)  {  	unregister_pernet_subsys(&ip_vs_ftp_ops); +	/* rcu_barrier() is called by netns */  } diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index fdd89b9564e..5ea26bd8774 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -90,11 +90,12 @@   *      IP address and its destination server   */  struct ip_vs_lblc_entry { -	struct list_head        list; +	struct hlist_node	list;  	int			af;		/* address family */  	union nf_inet_addr      addr;           /* destination IP address */ -	struct ip_vs_dest       *dest;          /* real server (cache) */ +	struct ip_vs_dest __rcu	*dest;          /* real server (cache) */  	unsigned long           lastuse;        /* last used time */ +	struct rcu_head		rcu_head;  }; @@ -102,12 +103,14 @@ struct ip_vs_lblc_entry {   *      IPVS lblc hash table   */  struct ip_vs_lblc_table { -	struct list_head        bucket[IP_VS_LBLC_TAB_SIZE];  /* hash bucket */ +	struct rcu_head		rcu_head; +	struct hlist_head	bucket[IP_VS_LBLC_TAB_SIZE];  /* hash bucket */ +	struct timer_list       periodic_timer; /* collect stale entries */  	atomic_t                entries;        /* number of entries */  	int                     max_size;       /* maximum size of entries */ -	struct timer_list       periodic_timer; /* collect stale entries */  	int                     rover;          /* rover for expire check */  	int                     counter;        /* counter for no expire */ +	bool			dead;  }; @@ -129,13 +132,16 @@ static ctl_table vs_vars_table[] = {  static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)  { -	list_del(&en->list); +	struct ip_vs_dest *dest; + +	hlist_del_rcu(&en->list);  	/*  	 * We don't kfree dest because it is referred either by its service  	 * or the trash dest list.  	 */ -	atomic_dec(&en->dest->refcnt); -	kfree(en); +	dest = rcu_dereference_protected(en->dest, 1); +	ip_vs_dest_put(dest); +	kfree_rcu(en, rcu_head);  } @@ -165,15 +171,12 @@ ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en)  {  	unsigned int hash = ip_vs_lblc_hashkey(en->af, &en->addr); -	list_add(&en->list, &tbl->bucket[hash]); +	hlist_add_head_rcu(&en->list, &tbl->bucket[hash]);  	atomic_inc(&tbl->entries);  } -/* - *  Get ip_vs_lblc_entry associated with supplied parameters. Called under read - *  lock - */ +/* Get ip_vs_lblc_entry associated with supplied parameters. */  static inline struct ip_vs_lblc_entry *  ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl,  	       const union nf_inet_addr *addr) @@ -181,7 +184,7 @@ ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl,  	unsigned int hash = ip_vs_lblc_hashkey(af, addr);  	struct ip_vs_lblc_entry *en; -	list_for_each_entry(en, &tbl->bucket[hash], list) +	hlist_for_each_entry_rcu(en, &tbl->bucket[hash], list)  		if (ip_vs_addr_equal(af, &en->addr, addr))  			return en; @@ -191,7 +194,7 @@ ip_vs_lblc_get(int af, struct ip_vs_lblc_table *tbl,  /*   * Create or update an ip_vs_lblc_entry, which is a mapping of a destination IP - * address to a server. Called under write lock. + * address to a server. Called under spin lock.   */  static inline struct ip_vs_lblc_entry *  ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr, @@ -209,14 +212,20 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,  		ip_vs_addr_copy(dest->af, &en->addr, daddr);  		en->lastuse = jiffies; -		atomic_inc(&dest->refcnt); -		en->dest = dest; +		ip_vs_dest_hold(dest); +		RCU_INIT_POINTER(en->dest, dest);  		ip_vs_lblc_hash(tbl, en); -	} else if (en->dest != dest) { -		atomic_dec(&en->dest->refcnt); -		atomic_inc(&dest->refcnt); -		en->dest = dest; +	} else { +		struct ip_vs_dest *old_dest; + +		old_dest = rcu_dereference_protected(en->dest, 1); +		if (old_dest != dest) { +			ip_vs_dest_put(old_dest); +			ip_vs_dest_hold(dest); +			/* No ordering constraints for refcnt */ +			RCU_INIT_POINTER(en->dest, dest); +		}  	}  	return en; @@ -226,17 +235,22 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,  /*   *      Flush all the entries of the specified table.   */ -static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl) +static void ip_vs_lblc_flush(struct ip_vs_service *svc)  { -	struct ip_vs_lblc_entry *en, *nxt; +	struct ip_vs_lblc_table *tbl = svc->sched_data; +	struct ip_vs_lblc_entry *en; +	struct hlist_node *next;  	int i; +	spin_lock_bh(&svc->sched_lock); +	tbl->dead = 1;  	for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { -		list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) { +		hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {  			ip_vs_lblc_free(en);  			atomic_dec(&tbl->entries);  		}  	} +	spin_unlock_bh(&svc->sched_lock);  }  static int sysctl_lblc_expiration(struct ip_vs_service *svc) @@ -252,15 +266,16 @@ static int sysctl_lblc_expiration(struct ip_vs_service *svc)  static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)  {  	struct ip_vs_lblc_table *tbl = svc->sched_data; -	struct ip_vs_lblc_entry *en, *nxt; +	struct ip_vs_lblc_entry *en; +	struct hlist_node *next;  	unsigned long now = jiffies;  	int i, j;  	for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {  		j = (j + 1) & IP_VS_LBLC_TAB_MASK; -		write_lock(&svc->sched_lock); -		list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { +		spin_lock(&svc->sched_lock); +		hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) {  			if (time_before(now,  					en->lastuse +  					sysctl_lblc_expiration(svc))) @@ -269,7 +284,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)  			ip_vs_lblc_free(en);  			atomic_dec(&tbl->entries);  		} -		write_unlock(&svc->sched_lock); +		spin_unlock(&svc->sched_lock);  	}  	tbl->rover = j;  } @@ -293,7 +308,8 @@ static void ip_vs_lblc_check_expire(unsigned long data)  	unsigned long now = jiffies;  	int goal;  	int i, j; -	struct ip_vs_lblc_entry *en, *nxt; +	struct ip_vs_lblc_entry *en; +	struct hlist_node *next;  	if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {  		/* do full expiration check */ @@ -314,8 +330,8 @@ static void ip_vs_lblc_check_expire(unsigned long data)  	for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {  		j = (j + 1) & IP_VS_LBLC_TAB_MASK; -		write_lock(&svc->sched_lock); -		list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { +		spin_lock(&svc->sched_lock); +		hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) {  			if (time_before(now, en->lastuse + ENTRY_TIMEOUT))  				continue; @@ -323,7 +339,7 @@ static void ip_vs_lblc_check_expire(unsigned long data)  			atomic_dec(&tbl->entries);  			goal--;  		} -		write_unlock(&svc->sched_lock); +		spin_unlock(&svc->sched_lock);  		if (goal <= 0)  			break;  	} @@ -354,11 +370,12 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)  	 *    Initialize the hash buckets  	 */  	for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { -		INIT_LIST_HEAD(&tbl->bucket[i]); +		INIT_HLIST_HEAD(&tbl->bucket[i]);  	}  	tbl->max_size = IP_VS_LBLC_TAB_SIZE*16;  	tbl->rover = 0;  	tbl->counter = 1; +	tbl->dead = 0;  	/*  	 *    Hook periodic timer for garbage collection @@ -371,7 +388,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)  } -static int ip_vs_lblc_done_svc(struct ip_vs_service *svc) +static void ip_vs_lblc_done_svc(struct ip_vs_service *svc)  {  	struct ip_vs_lblc_table *tbl = svc->sched_data; @@ -379,14 +396,12 @@ static int ip_vs_lblc_done_svc(struct ip_vs_service *svc)  	del_timer_sync(&tbl->periodic_timer);  	/* got to clean up table entries here */ -	ip_vs_lblc_flush(tbl); +	ip_vs_lblc_flush(svc);  	/* release the table itself */ -	kfree(tbl); +	kfree_rcu(tbl, rcu_head);  	IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) released\n",  		  sizeof(*tbl)); - -	return 0;  } @@ -408,7 +423,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)  	 * The server with weight=0 is quiesced and will not receive any  	 * new connection.  	 */ -	list_for_each_entry(dest, &svc->destinations, n_list) { +	list_for_each_entry_rcu(dest, &svc->destinations, n_list) {  		if (dest->flags & IP_VS_DEST_F_OVERLOAD)  			continue;  		if (atomic_read(&dest->weight) > 0) { @@ -423,7 +438,7 @@ __ip_vs_lblc_schedule(struct ip_vs_service *svc)  	 *    Find the destination with the least load.  	 */    nextstage: -	list_for_each_entry_continue(dest, &svc->destinations, n_list) { +	list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) {  		if (dest->flags & IP_VS_DEST_F_OVERLOAD)  			continue; @@ -457,7 +472,7 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)  	if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {  		struct ip_vs_dest *d; -		list_for_each_entry(d, &svc->destinations, n_list) { +		list_for_each_entry_rcu(d, &svc->destinations, n_list) {  			if (atomic_read(&d->activeconns)*2  			    < atomic_read(&d->weight)) {  				return 1; @@ -484,7 +499,6 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)  	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);  	/* First look in our cache */ -	read_lock(&svc->sched_lock);  	en = ip_vs_lblc_get(svc->af, tbl, &iph.daddr);  	if (en) {  		/* We only hold a read lock, but this is atomic */ @@ -499,14 +513,11 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)  		 * free up entries from the trash at any time.  		 */ -		if (en->dest->flags & IP_VS_DEST_F_AVAILABLE) -			dest = en->dest; +		dest = rcu_dereference(en->dest); +		if ((dest->flags & IP_VS_DEST_F_AVAILABLE) && +		    atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc)) +			goto out;  	} -	read_unlock(&svc->sched_lock); - -	/* If the destination has a weight and is not overloaded, use it */ -	if (dest && atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc)) -		goto out;  	/* No cache entry or it is invalid, time to schedule */  	dest = __ip_vs_lblc_schedule(svc); @@ -516,9 +527,10 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)  	}  	/* If we fail to create a cache entry, we'll just use the valid dest */ -	write_lock(&svc->sched_lock); -	ip_vs_lblc_new(tbl, &iph.daddr, dest); -	write_unlock(&svc->sched_lock); +	spin_lock_bh(&svc->sched_lock); +	if (!tbl->dead) +		ip_vs_lblc_new(tbl, &iph.daddr, dest); +	spin_unlock_bh(&svc->sched_lock);  out:  	IP_VS_DBG_BUF(6, "LBLC: destination IP address %s --> server %s:%d\n", @@ -621,6 +633,7 @@ static void __exit ip_vs_lblc_cleanup(void)  {  	unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);  	unregister_pernet_subsys(&ip_vs_lblc_ops); +	synchronize_rcu();  } diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index c03b6a3ade2..50123c2ab48 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -89,40 +89,44 @@   */  struct ip_vs_dest_set_elem {  	struct list_head	list;          /* list link */ -	struct ip_vs_dest       *dest;          /* destination server */ +	struct ip_vs_dest __rcu *dest;         /* destination server */ +	struct rcu_head		rcu_head;  };  struct ip_vs_dest_set {  	atomic_t                size;           /* set size */  	unsigned long           lastmod;        /* last modified time */  	struct list_head	list;           /* destination list */ -	rwlock_t	        lock;           /* lock for this list */  }; -static struct ip_vs_dest_set_elem * -ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) +static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set, +				  struct ip_vs_dest *dest, bool check)  {  	struct ip_vs_dest_set_elem *e; -	list_for_each_entry(e, &set->list, list) { -		if (e->dest == dest) -			/* already existed */ -			return NULL; +	if (check) { +		list_for_each_entry(e, &set->list, list) { +			struct ip_vs_dest *d; + +			d = rcu_dereference_protected(e->dest, 1); +			if (d == dest) +				/* already existed */ +				return; +		}  	}  	e = kmalloc(sizeof(*e), GFP_ATOMIC);  	if (e == NULL) -		return NULL; +		return; -	atomic_inc(&dest->refcnt); -	e->dest = dest; +	ip_vs_dest_hold(dest); +	RCU_INIT_POINTER(e->dest, dest); -	list_add(&e->list, &set->list); +	list_add_rcu(&e->list, &set->list);  	atomic_inc(&set->size);  	set->lastmod = jiffies; -	return e;  }  static void @@ -131,13 +135,16 @@ ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)  	struct ip_vs_dest_set_elem *e;  	list_for_each_entry(e, &set->list, list) { -		if (e->dest == dest) { +		struct ip_vs_dest *d; + +		d = rcu_dereference_protected(e->dest, 1); +		if (d == dest) {  			/* HIT */  			atomic_dec(&set->size);  			set->lastmod = jiffies; -			atomic_dec(&e->dest->refcnt); -			list_del(&e->list); -			kfree(e); +			ip_vs_dest_put(dest); +			list_del_rcu(&e->list); +			kfree_rcu(e, rcu_head);  			break;  		}  	} @@ -147,17 +154,18 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)  {  	struct ip_vs_dest_set_elem *e, *ep; -	write_lock(&set->lock);  	list_for_each_entry_safe(e, ep, &set->list, list) { +		struct ip_vs_dest *d; + +		d = rcu_dereference_protected(e->dest, 1);  		/*  		 * We don't kfree dest because it is referred either  		 * by its service or by the trash dest list.  		 */ -		atomic_dec(&e->dest->refcnt); -		list_del(&e->list); -		kfree(e); +		ip_vs_dest_put(d); +		list_del_rcu(&e->list); +		kfree_rcu(e, rcu_head);  	} -	write_unlock(&set->lock);  }  /* get weighted least-connection node in the destination set */ @@ -171,8 +179,8 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)  		return NULL;  	/* select the first destination server, whose weight > 0 */ -	list_for_each_entry(e, &set->list, list) { -		least = e->dest; +	list_for_each_entry_rcu(e, &set->list, list) { +		least = rcu_dereference(e->dest);  		if (least->flags & IP_VS_DEST_F_OVERLOAD)  			continue; @@ -186,8 +194,8 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)  	/* find the destination with the weighted least load */    nextstage: -	list_for_each_entry(e, &set->list, list) { -		dest = e->dest; +	list_for_each_entry_continue_rcu(e, &set->list, list) { +		dest = rcu_dereference(e->dest);  		if (dest->flags & IP_VS_DEST_F_OVERLOAD)  			continue; @@ -224,7 +232,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)  	/* select the first destination server, whose weight > 0 */  	list_for_each_entry(e, &set->list, list) { -		most = e->dest; +		most = rcu_dereference_protected(e->dest, 1);  		if (atomic_read(&most->weight) > 0) {  			moh = ip_vs_dest_conn_overhead(most);  			goto nextstage; @@ -234,8 +242,8 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)  	/* find the destination with the weighted most load */    nextstage: -	list_for_each_entry(e, &set->list, list) { -		dest = e->dest; +	list_for_each_entry_continue(e, &set->list, list) { +		dest = rcu_dereference_protected(e->dest, 1);  		doh = ip_vs_dest_conn_overhead(dest);  		/* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */  		if ((moh * atomic_read(&dest->weight) < @@ -262,11 +270,12 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)   *      IP address and its destination server set   */  struct ip_vs_lblcr_entry { -	struct list_head        list; +	struct hlist_node       list;  	int			af;		/* address family */  	union nf_inet_addr      addr;           /* destination IP address */  	struct ip_vs_dest_set   set;            /* destination server set */  	unsigned long           lastuse;        /* last used time */ +	struct rcu_head		rcu_head;  }; @@ -274,12 +283,14 @@ struct ip_vs_lblcr_entry {   *      IPVS lblcr hash table   */  struct ip_vs_lblcr_table { -	struct list_head        bucket[IP_VS_LBLCR_TAB_SIZE];  /* hash bucket */ +	struct rcu_head		rcu_head; +	struct hlist_head	bucket[IP_VS_LBLCR_TAB_SIZE];  /* hash bucket */  	atomic_t                entries;        /* number of entries */  	int                     max_size;       /* maximum size of entries */  	struct timer_list       periodic_timer; /* collect stale entries */  	int                     rover;          /* rover for expire check */  	int                     counter;        /* counter for no expire */ +	bool			dead;  }; @@ -302,9 +313,9 @@ static ctl_table vs_vars_table[] = {  static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)  { -	list_del(&en->list); +	hlist_del_rcu(&en->list);  	ip_vs_dest_set_eraseall(&en->set); -	kfree(en); +	kfree_rcu(en, rcu_head);  } @@ -334,15 +345,12 @@ ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en)  {  	unsigned int hash = ip_vs_lblcr_hashkey(en->af, &en->addr); -	list_add(&en->list, &tbl->bucket[hash]); +	hlist_add_head_rcu(&en->list, &tbl->bucket[hash]);  	atomic_inc(&tbl->entries);  } -/* - *  Get ip_vs_lblcr_entry associated with supplied parameters. Called under - *  read lock. - */ +/* Get ip_vs_lblcr_entry associated with supplied parameters. */  static inline struct ip_vs_lblcr_entry *  ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl,  		const union nf_inet_addr *addr) @@ -350,7 +358,7 @@ ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl,  	unsigned int hash = ip_vs_lblcr_hashkey(af, addr);  	struct ip_vs_lblcr_entry *en; -	list_for_each_entry(en, &tbl->bucket[hash], list) +	hlist_for_each_entry_rcu(en, &tbl->bucket[hash], list)  		if (ip_vs_addr_equal(af, &en->addr, addr))  			return en; @@ -360,7 +368,7 @@ ip_vs_lblcr_get(int af, struct ip_vs_lblcr_table *tbl,  /*   * Create or update an ip_vs_lblcr_entry, which is a mapping of a destination - * IP address to a server. Called under write lock. + * IP address to a server. Called under spin lock.   */  static inline struct ip_vs_lblcr_entry *  ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr, @@ -381,14 +389,14 @@ ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,  		/* initialize its dest set */  		atomic_set(&(en->set.size), 0);  		INIT_LIST_HEAD(&en->set.list); -		rwlock_init(&en->set.lock); + +		ip_vs_dest_set_insert(&en->set, dest, false);  		ip_vs_lblcr_hash(tbl, en); +		return en;  	} -	write_lock(&en->set.lock); -	ip_vs_dest_set_insert(&en->set, dest); -	write_unlock(&en->set.lock); +	ip_vs_dest_set_insert(&en->set, dest, true);  	return en;  } @@ -397,17 +405,21 @@ ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,  /*   *      Flush all the entries of the specified table.   */ -static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl) +static void ip_vs_lblcr_flush(struct ip_vs_service *svc)  { +	struct ip_vs_lblcr_table *tbl = svc->sched_data;  	int i; -	struct ip_vs_lblcr_entry *en, *nxt; +	struct ip_vs_lblcr_entry *en; +	struct hlist_node *next; -	/* No locking required, only called during cleanup. */ +	spin_lock_bh(&svc->sched_lock); +	tbl->dead = 1;  	for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) { -		list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) { +		hlist_for_each_entry_safe(en, next, &tbl->bucket[i], list) {  			ip_vs_lblcr_free(en);  		}  	} +	spin_unlock_bh(&svc->sched_lock);  }  static int sysctl_lblcr_expiration(struct ip_vs_service *svc) @@ -425,13 +437,14 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)  	struct ip_vs_lblcr_table *tbl = svc->sched_data;  	unsigned long now = jiffies;  	int i, j; -	struct ip_vs_lblcr_entry *en, *nxt; +	struct ip_vs_lblcr_entry *en; +	struct hlist_node *next;  	for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {  		j = (j + 1) & IP_VS_LBLCR_TAB_MASK; -		write_lock(&svc->sched_lock); -		list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { +		spin_lock(&svc->sched_lock); +		hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) {  			if (time_after(en->lastuse +  				       sysctl_lblcr_expiration(svc), now))  				continue; @@ -439,7 +452,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)  			ip_vs_lblcr_free(en);  			atomic_dec(&tbl->entries);  		} -		write_unlock(&svc->sched_lock); +		spin_unlock(&svc->sched_lock);  	}  	tbl->rover = j;  } @@ -463,7 +476,8 @@ static void ip_vs_lblcr_check_expire(unsigned long data)  	unsigned long now = jiffies;  	int goal;  	int i, j; -	struct ip_vs_lblcr_entry *en, *nxt; +	struct ip_vs_lblcr_entry *en; +	struct hlist_node *next;  	if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) {  		/* do full expiration check */ @@ -484,8 +498,8 @@ static void ip_vs_lblcr_check_expire(unsigned long data)  	for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {  		j = (j + 1) & IP_VS_LBLCR_TAB_MASK; -		write_lock(&svc->sched_lock); -		list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { +		spin_lock(&svc->sched_lock); +		hlist_for_each_entry_safe(en, next, &tbl->bucket[j], list) {  			if (time_before(now, en->lastuse+ENTRY_TIMEOUT))  				continue; @@ -493,7 +507,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data)  			atomic_dec(&tbl->entries);  			goal--;  		} -		write_unlock(&svc->sched_lock); +		spin_unlock(&svc->sched_lock);  		if (goal <= 0)  			break;  	} @@ -523,11 +537,12 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)  	 *    Initialize the hash buckets  	 */  	for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) { -		INIT_LIST_HEAD(&tbl->bucket[i]); +		INIT_HLIST_HEAD(&tbl->bucket[i]);  	}  	tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;  	tbl->rover = 0;  	tbl->counter = 1; +	tbl->dead = 0;  	/*  	 *    Hook periodic timer for garbage collection @@ -540,7 +555,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)  } -static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc) +static void ip_vs_lblcr_done_svc(struct ip_vs_service *svc)  {  	struct ip_vs_lblcr_table *tbl = svc->sched_data; @@ -548,14 +563,12 @@ static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc)  	del_timer_sync(&tbl->periodic_timer);  	/* got to clean up table entries here */ -	ip_vs_lblcr_flush(tbl); +	ip_vs_lblcr_flush(svc);  	/* release the table itself */ -	kfree(tbl); +	kfree_rcu(tbl, rcu_head);  	IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n",  		  sizeof(*tbl)); - -	return 0;  } @@ -577,7 +590,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)  	 * The server with weight=0 is quiesced and will not receive any  	 * new connection.  	 */ -	list_for_each_entry(dest, &svc->destinations, n_list) { +	list_for_each_entry_rcu(dest, &svc->destinations, n_list) {  		if (dest->flags & IP_VS_DEST_F_OVERLOAD)  			continue; @@ -593,7 +606,7 @@ __ip_vs_lblcr_schedule(struct ip_vs_service *svc)  	 *    Find the destination with the least load.  	 */    nextstage: -	list_for_each_entry_continue(dest, &svc->destinations, n_list) { +	list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) {  		if (dest->flags & IP_VS_DEST_F_OVERLOAD)  			continue; @@ -627,7 +640,7 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc)  	if (atomic_read(&dest->activeconns) > atomic_read(&dest->weight)) {  		struct ip_vs_dest *d; -		list_for_each_entry(d, &svc->destinations, n_list) { +		list_for_each_entry_rcu(d, &svc->destinations, n_list) {  			if (atomic_read(&d->activeconns)*2  			    < atomic_read(&d->weight)) {  				return 1; @@ -646,7 +659,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)  {  	struct ip_vs_lblcr_table *tbl = svc->sched_data;  	struct ip_vs_iphdr iph; -	struct ip_vs_dest *dest = NULL; +	struct ip_vs_dest *dest;  	struct ip_vs_lblcr_entry *en;  	ip_vs_fill_iph_addr_only(svc->af, skb, &iph); @@ -654,53 +667,46 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)  	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);  	/* First look in our cache */ -	read_lock(&svc->sched_lock);  	en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr);  	if (en) { -		/* We only hold a read lock, but this is atomic */  		en->lastuse = jiffies;  		/* Get the least loaded destination */ -		read_lock(&en->set.lock);  		dest = ip_vs_dest_set_min(&en->set); -		read_unlock(&en->set.lock);  		/* More than one destination + enough time passed by, cleanup */  		if (atomic_read(&en->set.size) > 1 && -				time_after(jiffies, en->set.lastmod + +		    time_after(jiffies, en->set.lastmod +  				sysctl_lblcr_expiration(svc))) { -			struct ip_vs_dest *m; +			spin_lock_bh(&svc->sched_lock); +			if (atomic_read(&en->set.size) > 1) { +				struct ip_vs_dest *m; -			write_lock(&en->set.lock); -			m = ip_vs_dest_set_max(&en->set); -			if (m) -				ip_vs_dest_set_erase(&en->set, m); -			write_unlock(&en->set.lock); +				m = ip_vs_dest_set_max(&en->set); +				if (m) +					ip_vs_dest_set_erase(&en->set, m); +			} +			spin_unlock_bh(&svc->sched_lock);  		}  		/* If the destination is not overloaded, use it */ -		if (dest && !is_overloaded(dest, svc)) { -			read_unlock(&svc->sched_lock); +		if (dest && !is_overloaded(dest, svc))  			goto out; -		}  		/* The cache entry is invalid, time to schedule */  		dest = __ip_vs_lblcr_schedule(svc);  		if (!dest) {  			ip_vs_scheduler_err(svc, "no destination available"); -			read_unlock(&svc->sched_lock);  			return NULL;  		}  		/* Update our cache entry */ -		write_lock(&en->set.lock); -		ip_vs_dest_set_insert(&en->set, dest); -		write_unlock(&en->set.lock); -	} -	read_unlock(&svc->sched_lock); - -	if (dest) +		spin_lock_bh(&svc->sched_lock); +		if (!tbl->dead) +			ip_vs_dest_set_insert(&en->set, dest, true); +		spin_unlock_bh(&svc->sched_lock);  		goto out; +	}  	/* No cache entry, time to schedule */  	dest = __ip_vs_lblcr_schedule(svc); @@ -710,9 +716,10 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)  	}  	/* If we fail to create a cache entry, we'll just use the valid dest */ -	write_lock(&svc->sched_lock); -	ip_vs_lblcr_new(tbl, &iph.daddr, dest); -	write_unlock(&svc->sched_lock); +	spin_lock_bh(&svc->sched_lock); +	if (!tbl->dead) +		ip_vs_lblcr_new(tbl, &iph.daddr, dest); +	spin_unlock_bh(&svc->sched_lock);  out:  	IP_VS_DBG_BUF(6, "LBLCR: destination IP address %s --> server %s:%d\n", @@ -814,6 +821,7 @@ static void __exit ip_vs_lblcr_cleanup(void)  {  	unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);  	unregister_pernet_subsys(&ip_vs_lblcr_ops); +	synchronize_rcu();  } diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c index f391819c0cc..5128e338a74 100644 --- a/net/netfilter/ipvs/ip_vs_lc.c +++ b/net/netfilter/ipvs/ip_vs_lc.c @@ -42,7 +42,7 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)  	 * served, but no new connection is assigned to the server.  	 */ -	list_for_each_entry(dest, &svc->destinations, n_list) { +	list_for_each_entry_rcu(dest, &svc->destinations, n_list) {  		if ((dest->flags & IP_VS_DEST_F_OVERLOAD) ||  		    atomic_read(&dest->weight) == 0)  			continue; @@ -84,6 +84,7 @@ static int __init ip_vs_lc_init(void)  static void __exit ip_vs_lc_cleanup(void)  {  	unregister_ip_vs_scheduler(&ip_vs_lc_scheduler); +	synchronize_rcu();  }  module_init(ip_vs_lc_init); diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c index 984d9c137d8..646cfd4baa7 100644 --- a/net/netfilter/ipvs/ip_vs_nq.c +++ b/net/netfilter/ipvs/ip_vs_nq.c @@ -75,7 +75,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)  	 * new connections.  	 */ -	list_for_each_entry(dest, &svc->destinations, n_list) { +	list_for_each_entry_rcu(dest, &svc->destinations, n_list) {  		if (dest->flags & IP_VS_DEST_F_OVERLOAD ||  		    !atomic_read(&dest->weight)) @@ -133,6 +133,7 @@ static int __init ip_vs_nq_init(void)  static void __exit ip_vs_nq_cleanup(void)  {  	unregister_ip_vs_scheduler(&ip_vs_nq_scheduler); +	synchronize_rcu();  }  module_init(ip_vs_nq_init); diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c index 5cf859ccb31..1a82b29ce8e 100644 --- a/net/netfilter/ipvs/ip_vs_pe.c +++ b/net/netfilter/ipvs/ip_vs_pe.c @@ -13,20 +13,8 @@  /* IPVS pe list */  static LIST_HEAD(ip_vs_pe); -/* lock for service table */ -static DEFINE_SPINLOCK(ip_vs_pe_lock); - -/* Bind a service with a pe */ -void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe) -{ -	svc->pe = pe; -} - -/* Unbind a service from its pe */ -void ip_vs_unbind_pe(struct ip_vs_service *svc) -{ -	svc->pe = NULL; -} +/* semaphore for IPVS PEs. */ +static DEFINE_MUTEX(ip_vs_pe_mutex);  /* Get pe in the pe list by name */  struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name) @@ -36,9 +24,8 @@ struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)  	IP_VS_DBG(10, "%s(): pe_name \"%s\"\n", __func__,  		  pe_name); -	spin_lock_bh(&ip_vs_pe_lock); - -	list_for_each_entry(pe, &ip_vs_pe, n_list) { +	rcu_read_lock(); +	list_for_each_entry_rcu(pe, &ip_vs_pe, n_list) {  		/* Test and get the modules atomically */  		if (pe->module &&  		    !try_module_get(pe->module)) { @@ -47,14 +34,14 @@ struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)  		}  		if (strcmp(pe_name, pe->name)==0) {  			/* HIT */ -			spin_unlock_bh(&ip_vs_pe_lock); +			rcu_read_unlock();  			return pe;  		}  		if (pe->module)  			module_put(pe->module);  	} +	rcu_read_unlock(); -	spin_unlock_bh(&ip_vs_pe_lock);  	return NULL;  } @@ -83,22 +70,13 @@ int register_ip_vs_pe(struct ip_vs_pe *pe)  	/* increase the module use count */  	ip_vs_use_count_inc(); -	spin_lock_bh(&ip_vs_pe_lock); - -	if (!list_empty(&pe->n_list)) { -		spin_unlock_bh(&ip_vs_pe_lock); -		ip_vs_use_count_dec(); -		pr_err("%s(): [%s] pe already linked\n", -		       __func__, pe->name); -		return -EINVAL; -	} - +	mutex_lock(&ip_vs_pe_mutex);  	/* Make sure that the pe with this name doesn't exist  	 * in the pe list.  	 */  	list_for_each_entry(tmp, &ip_vs_pe, n_list) {  		if (strcmp(tmp->name, pe->name) == 0) { -			spin_unlock_bh(&ip_vs_pe_lock); +			mutex_unlock(&ip_vs_pe_mutex);  			ip_vs_use_count_dec();  			pr_err("%s(): [%s] pe already existed "  			       "in the system\n", __func__, pe->name); @@ -106,8 +84,8 @@ int register_ip_vs_pe(struct ip_vs_pe *pe)  		}  	}  	/* Add it into the d-linked pe list */ -	list_add(&pe->n_list, &ip_vs_pe); -	spin_unlock_bh(&ip_vs_pe_lock); +	list_add_rcu(&pe->n_list, &ip_vs_pe); +	mutex_unlock(&ip_vs_pe_mutex);  	pr_info("[%s] pe registered.\n", pe->name); @@ -118,17 +96,10 @@ EXPORT_SYMBOL_GPL(register_ip_vs_pe);  /* Unregister a pe from the pe list */  int unregister_ip_vs_pe(struct ip_vs_pe *pe)  { -	spin_lock_bh(&ip_vs_pe_lock); -	if (list_empty(&pe->n_list)) { -		spin_unlock_bh(&ip_vs_pe_lock); -		pr_err("%s(): [%s] pe is not in the list. failed\n", -		       __func__, pe->name); -		return -EINVAL; -	} - +	mutex_lock(&ip_vs_pe_mutex);  	/* Remove it from the d-linked pe list */ -	list_del(&pe->n_list); -	spin_unlock_bh(&ip_vs_pe_lock); +	list_del_rcu(&pe->n_list); +	mutex_unlock(&ip_vs_pe_mutex);  	/* decrease the module use count */  	ip_vs_use_count_dec(); diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c index 12475ef88da..9ef22bdce9f 100644 --- a/net/netfilter/ipvs/ip_vs_pe_sip.c +++ b/net/netfilter/ipvs/ip_vs_pe_sip.c @@ -13,7 +13,8 @@ static const char *ip_vs_dbg_callid(char *buf, size_t buf_len,  				    const char *callid, size_t callid_len,  				    int *idx)  { -	size_t len = min(min(callid_len, (size_t)64), buf_len - *idx - 1); +	size_t max_len = 64; +	size_t len = min3(max_len, callid_len, buf_len - *idx - 1);  	memcpy(buf + *idx, callid, len);  	buf[*idx+len] = '\0';  	*idx += len + 1; @@ -37,14 +38,10 @@ static int get_callid(const char *dptr, unsigned int dataoff,  		if (ret > 0)  			break;  		if (!ret) -			return 0; +			return -EINVAL;  		dataoff += *matchoff;  	} -	/* Empty callid is useless */ -	if (!*matchlen) -		return -EINVAL; -  	/* Too large is useless */  	if (*matchlen > IP_VS_PEDATA_MAXLEN)  		return -EINVAL; @@ -172,6 +169,7 @@ static int __init ip_vs_sip_init(void)  static void __exit ip_vs_sip_cleanup(void)  {  	unregister_ip_vs_pe(&ip_vs_sip_pe); +	synchronize_rcu();  }  module_init(ip_vs_sip_init); diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index cd1d7298f7b..86464881cd2 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -27,9 +27,10 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,  	if (sch == NULL)  		return 0;  	net = skb_net(skb); +	rcu_read_lock();  	if ((sch->type == SCTP_CID_INIT) && -	    (svc = ip_vs_service_get(net, af, skb->mark, iph->protocol, -				     &iph->daddr, sh->dest))) { +	    (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, +				      &iph->daddr, sh->dest))) {  		int ignored;  		if (ip_vs_todrop(net_ipvs(net))) { @@ -37,7 +38,7 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,  			 * It seems that we are very loaded.  			 * We have to drop this packet :(  			 */ -			ip_vs_service_put(svc); +			rcu_read_unlock();  			*verdict = NF_DROP;  			return 0;  		} @@ -49,14 +50,13 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,  		if (!*cpp && ignored <= 0) {  			if (!ignored)  				*verdict = ip_vs_leave(svc, skb, pd, iph); -			else { -				ip_vs_service_put(svc); +			else  				*verdict = NF_DROP; -			} +			rcu_read_unlock();  			return 0;  		} -		ip_vs_service_put(svc);  	} +	rcu_read_unlock();  	/* NF_ACCEPT */  	return 1;  } @@ -208,7 +208,7 @@ enum ipvs_sctp_event_t {  	IP_VS_SCTP_EVE_LAST  }; -static enum ipvs_sctp_event_t sctp_events[255] = { +static enum ipvs_sctp_event_t sctp_events[256] = {  	IP_VS_SCTP_EVE_DATA_CLI,  	IP_VS_SCTP_EVE_INIT_CLI,  	IP_VS_SCTP_EVE_INIT_ACK_CLI, @@ -994,9 +994,9 @@ static void  sctp_state_transition(struct ip_vs_conn *cp, int direction,  		const struct sk_buff *skb, struct ip_vs_proto_data *pd)  { -	spin_lock(&cp->lock); +	spin_lock_bh(&cp->lock);  	set_sctp_state(pd, cp, direction, skb); -	spin_unlock(&cp->lock); +	spin_unlock_bh(&cp->lock);  }  static inline __u16 sctp_app_hashkey(__be16 port) @@ -1016,30 +1016,25 @@ static int sctp_register_app(struct net *net, struct ip_vs_app *inc)  	hash = sctp_app_hashkey(port); -	spin_lock_bh(&ipvs->sctp_app_lock);  	list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) {  		if (i->port == port) {  			ret = -EEXIST;  			goto out;  		}  	} -	list_add(&inc->p_list, &ipvs->sctp_apps[hash]); +	list_add_rcu(&inc->p_list, &ipvs->sctp_apps[hash]);  	atomic_inc(&pd->appcnt);  out: -	spin_unlock_bh(&ipvs->sctp_app_lock);  	return ret;  }  static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)  { -	struct netns_ipvs *ipvs = net_ipvs(net);  	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP); -	spin_lock_bh(&ipvs->sctp_app_lock);  	atomic_dec(&pd->appcnt); -	list_del(&inc->p_list); -	spin_unlock_bh(&ipvs->sctp_app_lock); +	list_del_rcu(&inc->p_list);  }  static int sctp_app_conn_bind(struct ip_vs_conn *cp) @@ -1055,12 +1050,12 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)  	/* Lookup application incarnations and bind the right one */  	hash = sctp_app_hashkey(cp->vport); -	spin_lock(&ipvs->sctp_app_lock); -	list_for_each_entry(inc, &ipvs->sctp_apps[hash], p_list) { +	rcu_read_lock(); +	list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) {  		if (inc->port == cp->vport) {  			if (unlikely(!ip_vs_app_inc_get(inc)))  				break; -			spin_unlock(&ipvs->sctp_app_lock); +			rcu_read_unlock();  			IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"  					"%s:%u to app %s on port %u\n", @@ -1076,7 +1071,7 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp)  			goto out;  		}  	} -	spin_unlock(&ipvs->sctp_app_lock); +	rcu_read_unlock();  out:  	return result;  } @@ -1090,7 +1085,6 @@ static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)  	struct netns_ipvs *ipvs = net_ipvs(net);  	ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE); -	spin_lock_init(&ipvs->sctp_app_lock);  	pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,  							sizeof(sctp_timeouts));  	if (!pd->timeout_table) diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 9af653a7582..50a15944c6c 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -47,9 +47,10 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,  	}  	net = skb_net(skb);  	/* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ +	rcu_read_lock();  	if (th->syn && -	    (svc = ip_vs_service_get(net, af, skb->mark, iph->protocol, -				     &iph->daddr, th->dest))) { +	    (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, +				      &iph->daddr, th->dest))) {  		int ignored;  		if (ip_vs_todrop(net_ipvs(net))) { @@ -57,7 +58,7 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,  			 * It seems that we are very loaded.  			 * We have to drop this packet :(  			 */ -			ip_vs_service_put(svc); +			rcu_read_unlock();  			*verdict = NF_DROP;  			return 0;  		} @@ -70,14 +71,13 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,  		if (!*cpp && ignored <= 0) {  			if (!ignored)  				*verdict = ip_vs_leave(svc, skb, pd, iph); -			else { -				ip_vs_service_put(svc); +			else  				*verdict = NF_DROP; -			} +			rcu_read_unlock();  			return 0;  		} -		ip_vs_service_put(svc);  	} +	rcu_read_unlock();  	/* NF_ACCEPT */  	return 1;  } @@ -557,9 +557,9 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,  	if (th == NULL)  		return; -	spin_lock(&cp->lock); +	spin_lock_bh(&cp->lock);  	set_tcp_state(pd, cp, direction, th); -	spin_unlock(&cp->lock); +	spin_unlock_bh(&cp->lock);  }  static inline __u16 tcp_app_hashkey(__be16 port) @@ -580,18 +580,16 @@ static int tcp_register_app(struct net *net, struct ip_vs_app *inc)  	hash = tcp_app_hashkey(port); -	spin_lock_bh(&ipvs->tcp_app_lock);  	list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) {  		if (i->port == port) {  			ret = -EEXIST;  			goto out;  		}  	} -	list_add(&inc->p_list, &ipvs->tcp_apps[hash]); +	list_add_rcu(&inc->p_list, &ipvs->tcp_apps[hash]);  	atomic_inc(&pd->appcnt);    out: -	spin_unlock_bh(&ipvs->tcp_app_lock);  	return ret;  } @@ -599,13 +597,10 @@ static int tcp_register_app(struct net *net, struct ip_vs_app *inc)  static void  tcp_unregister_app(struct net *net, struct ip_vs_app *inc)  { -	struct netns_ipvs *ipvs = net_ipvs(net);  	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); -	spin_lock_bh(&ipvs->tcp_app_lock);  	atomic_dec(&pd->appcnt); -	list_del(&inc->p_list); -	spin_unlock_bh(&ipvs->tcp_app_lock); +	list_del_rcu(&inc->p_list);  } @@ -624,12 +619,12 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)  	/* Lookup application incarnations and bind the right one */  	hash = tcp_app_hashkey(cp->vport); -	spin_lock(&ipvs->tcp_app_lock); -	list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) { +	rcu_read_lock(); +	list_for_each_entry_rcu(inc, &ipvs->tcp_apps[hash], p_list) {  		if (inc->port == cp->vport) {  			if (unlikely(!ip_vs_app_inc_get(inc)))  				break; -			spin_unlock(&ipvs->tcp_app_lock); +			rcu_read_unlock();  			IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"  				      "%s:%u to app %s on port %u\n", @@ -646,7 +641,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)  			goto out;  		}  	} -	spin_unlock(&ipvs->tcp_app_lock); +	rcu_read_unlock();    out:  	return result; @@ -660,11 +655,11 @@ void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)  {  	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); -	spin_lock(&cp->lock); +	spin_lock_bh(&cp->lock);  	cp->state = IP_VS_TCP_S_LISTEN;  	cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN]  			   : tcp_timeouts[IP_VS_TCP_S_LISTEN]); -	spin_unlock(&cp->lock); +	spin_unlock_bh(&cp->lock);  }  /* --------------------------------------------- @@ -676,7 +671,6 @@ static int __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)  	struct netns_ipvs *ipvs = net_ipvs(net);  	ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE); -	spin_lock_init(&ipvs->tcp_app_lock);  	pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts,  							sizeof(tcp_timeouts));  	if (!pd->timeout_table) diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 503a842c90d..b62a3c0ff9b 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -44,8 +44,9 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,  		return 0;  	}  	net = skb_net(skb); -	svc = ip_vs_service_get(net, af, skb->mark, iph->protocol, -				&iph->daddr, uh->dest); +	rcu_read_lock(); +	svc = ip_vs_service_find(net, af, skb->mark, iph->protocol, +				 &iph->daddr, uh->dest);  	if (svc) {  		int ignored; @@ -54,7 +55,7 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,  			 * It seems that we are very loaded.  			 * We have to drop this packet :(  			 */ -			ip_vs_service_put(svc); +			rcu_read_unlock();  			*verdict = NF_DROP;  			return 0;  		} @@ -67,14 +68,13 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,  		if (!*cpp && ignored <= 0) {  			if (!ignored)  				*verdict = ip_vs_leave(svc, skb, pd, iph); -			else { -				ip_vs_service_put(svc); +			else  				*verdict = NF_DROP; -			} +			rcu_read_unlock();  			return 0;  		} -		ip_vs_service_put(svc);  	} +	rcu_read_unlock();  	/* NF_ACCEPT */  	return 1;  } @@ -359,19 +359,16 @@ static int udp_register_app(struct net *net, struct ip_vs_app *inc)  	hash = udp_app_hashkey(port); - -	spin_lock_bh(&ipvs->udp_app_lock);  	list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) {  		if (i->port == port) {  			ret = -EEXIST;  			goto out;  		}  	} -	list_add(&inc->p_list, &ipvs->udp_apps[hash]); +	list_add_rcu(&inc->p_list, &ipvs->udp_apps[hash]);  	atomic_inc(&pd->appcnt);    out: -	spin_unlock_bh(&ipvs->udp_app_lock);  	return ret;  } @@ -380,12 +377,9 @@ static void  udp_unregister_app(struct net *net, struct ip_vs_app *inc)  {  	struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP); -	struct netns_ipvs *ipvs = net_ipvs(net); -	spin_lock_bh(&ipvs->udp_app_lock);  	atomic_dec(&pd->appcnt); -	list_del(&inc->p_list); -	spin_unlock_bh(&ipvs->udp_app_lock); +	list_del_rcu(&inc->p_list);  } @@ -403,12 +397,12 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)  	/* Lookup application incarnations and bind the right one */  	hash = udp_app_hashkey(cp->vport); -	spin_lock(&ipvs->udp_app_lock); -	list_for_each_entry(inc, &ipvs->udp_apps[hash], p_list) { +	rcu_read_lock(); +	list_for_each_entry_rcu(inc, &ipvs->udp_apps[hash], p_list) {  		if (inc->port == cp->vport) {  			if (unlikely(!ip_vs_app_inc_get(inc)))  				break; -			spin_unlock(&ipvs->udp_app_lock); +			rcu_read_unlock();  			IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"  				      "%s:%u to app %s on port %u\n", @@ -425,7 +419,7 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)  			goto out;  		}  	} -	spin_unlock(&ipvs->udp_app_lock); +	rcu_read_unlock();    out:  	return result; @@ -467,7 +461,6 @@ static int __udp_init(struct net *net, struct ip_vs_proto_data *pd)  	struct netns_ipvs *ipvs = net_ipvs(net);  	ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE); -	spin_lock_init(&ipvs->udp_app_lock);  	pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts,  							sizeof(udp_timeouts));  	if (!pd->timeout_table) diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c index c49b388d108..c35986c793d 100644 --- a/net/netfilter/ipvs/ip_vs_rr.c +++ b/net/netfilter/ipvs/ip_vs_rr.c @@ -35,9 +35,18 @@ static int ip_vs_rr_init_svc(struct ip_vs_service *svc)  } -static int ip_vs_rr_update_svc(struct ip_vs_service *svc) +static int ip_vs_rr_del_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest)  { -	svc->sched_data = &svc->destinations; +	struct list_head *p; + +	spin_lock_bh(&svc->sched_lock); +	p = (struct list_head *) svc->sched_data; +	/* dest is already unlinked, so p->prev is not valid but +	 * p->next is valid, use it to reach previous entry. +	 */ +	if (p == &dest->n_list) +		svc->sched_data = p->next->prev; +	spin_unlock_bh(&svc->sched_lock);  	return 0;  } @@ -48,36 +57,41 @@ static int ip_vs_rr_update_svc(struct ip_vs_service *svc)  static struct ip_vs_dest *  ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)  { -	struct list_head *p, *q; -	struct ip_vs_dest *dest; +	struct list_head *p; +	struct ip_vs_dest *dest, *last; +	int pass = 0;  	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); -	write_lock(&svc->sched_lock); -	p = (struct list_head *)svc->sched_data; -	p = p->next; -	q = p; +	spin_lock_bh(&svc->sched_lock); +	p = (struct list_head *) svc->sched_data; +	last = dest = list_entry(p, struct ip_vs_dest, n_list); +  	do { -		/* skip list head */ -		if (q == &svc->destinations) { -			q = q->next; -			continue; +		list_for_each_entry_continue_rcu(dest, +						 &svc->destinations, +						 n_list) { +			if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && +			    atomic_read(&dest->weight) > 0) +				/* HIT */ +				goto out; +			if (dest == last) +				goto stop;  		} +		pass++; +		/* Previous dest could be unlinked, do not loop forever. +		 * If we stay at head there is no need for 2nd pass. +		 */ +	} while (pass < 2 && p != &svc->destinations); -		dest = list_entry(q, struct ip_vs_dest, n_list); -		if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && -		    atomic_read(&dest->weight) > 0) -			/* HIT */ -			goto out; -		q = q->next; -	} while (q != p); -	write_unlock(&svc->sched_lock); +stop: +	spin_unlock_bh(&svc->sched_lock);  	ip_vs_scheduler_err(svc, "no destination available");  	return NULL;    out: -	svc->sched_data = q; -	write_unlock(&svc->sched_lock); +	svc->sched_data = &dest->n_list; +	spin_unlock_bh(&svc->sched_lock);  	IP_VS_DBG_BUF(6, "RR: server %s:%u "  		      "activeconns %d refcnt %d weight %d\n",  		      IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port), @@ -94,7 +108,8 @@ static struct ip_vs_scheduler ip_vs_rr_scheduler = {  	.module =		THIS_MODULE,  	.n_list =		LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list),  	.init_service =		ip_vs_rr_init_svc, -	.update_service =	ip_vs_rr_update_svc, +	.add_dest =		NULL, +	.del_dest =		ip_vs_rr_del_dest,  	.schedule =		ip_vs_rr_schedule,  }; @@ -106,6 +121,7 @@ static int __init ip_vs_rr_init(void)  static void __exit ip_vs_rr_cleanup(void)  {  	unregister_ip_vs_scheduler(&ip_vs_rr_scheduler); +	synchronize_rcu();  }  module_init(ip_vs_rr_init); diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c index d6bf20d6cdb..4dbcda6258b 100644 --- a/net/netfilter/ipvs/ip_vs_sched.c +++ b/net/netfilter/ipvs/ip_vs_sched.c @@ -35,8 +35,8 @@ EXPORT_SYMBOL(ip_vs_scheduler_err);   */  static LIST_HEAD(ip_vs_schedulers); -/* lock for service table */ -static DEFINE_SPINLOCK(ip_vs_sched_lock); +/* semaphore for schedulers */ +static DEFINE_MUTEX(ip_vs_sched_mutex);  /* @@ -47,8 +47,6 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,  {  	int ret; -	svc->scheduler = scheduler; -  	if (scheduler->init_service) {  		ret = scheduler->init_service(svc);  		if (ret) { @@ -56,7 +54,7 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,  			return ret;  		}  	} - +	rcu_assign_pointer(svc->scheduler, scheduler);  	return 0;  } @@ -64,22 +62,19 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,  /*   *  Unbind a service with its scheduler   */ -int ip_vs_unbind_scheduler(struct ip_vs_service *svc) +void ip_vs_unbind_scheduler(struct ip_vs_service *svc, +			    struct ip_vs_scheduler *sched)  { -	struct ip_vs_scheduler *sched = svc->scheduler; +	struct ip_vs_scheduler *cur_sched; -	if (!sched) -		return 0; +	cur_sched = rcu_dereference_protected(svc->scheduler, 1); +	/* This check proves that old 'sched' was installed */ +	if (!cur_sched) +		return; -	if (sched->done_service) { -		if (sched->done_service(svc) != 0) { -			pr_err("%s(): done error\n", __func__); -			return -EINVAL; -		} -	} - -	svc->scheduler = NULL; -	return 0; +	if (sched->done_service) +		sched->done_service(svc); +	/* svc->scheduler can not be set to NULL */  } @@ -92,7 +87,7 @@ static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)  	IP_VS_DBG(2, "%s(): sched_name \"%s\"\n", __func__, sched_name); -	spin_lock_bh(&ip_vs_sched_lock); +	mutex_lock(&ip_vs_sched_mutex);  	list_for_each_entry(sched, &ip_vs_schedulers, n_list) {  		/* @@ -106,14 +101,14 @@ static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)  		}  		if (strcmp(sched_name, sched->name)==0) {  			/* HIT */ -			spin_unlock_bh(&ip_vs_sched_lock); +			mutex_unlock(&ip_vs_sched_mutex);  			return sched;  		}  		if (sched->module)  			module_put(sched->module);  	} -	spin_unlock_bh(&ip_vs_sched_lock); +	mutex_unlock(&ip_vs_sched_mutex);  	return NULL;  } @@ -153,21 +148,21 @@ void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)  void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg)  { +	struct ip_vs_scheduler *sched; + +	sched = rcu_dereference(svc->scheduler);  	if (svc->fwmark) {  		IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n", -			     svc->scheduler->name, svc->fwmark, -			     svc->fwmark, msg); +			     sched->name, svc->fwmark, svc->fwmark, msg);  #ifdef CONFIG_IP_VS_IPV6  	} else if (svc->af == AF_INET6) {  		IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n", -			     svc->scheduler->name, -			     ip_vs_proto_name(svc->protocol), +			     sched->name, ip_vs_proto_name(svc->protocol),  			     &svc->addr.in6, ntohs(svc->port), msg);  #endif  	} else {  		IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n", -			     svc->scheduler->name, -			     ip_vs_proto_name(svc->protocol), +			     sched->name, ip_vs_proto_name(svc->protocol),  			     &svc->addr.ip, ntohs(svc->port), msg);  	}  } @@ -192,10 +187,10 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)  	/* increase the module use count */  	ip_vs_use_count_inc(); -	spin_lock_bh(&ip_vs_sched_lock); +	mutex_lock(&ip_vs_sched_mutex);  	if (!list_empty(&scheduler->n_list)) { -		spin_unlock_bh(&ip_vs_sched_lock); +		mutex_unlock(&ip_vs_sched_mutex);  		ip_vs_use_count_dec();  		pr_err("%s(): [%s] scheduler already linked\n",  		       __func__, scheduler->name); @@ -208,7 +203,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)  	 */  	list_for_each_entry(sched, &ip_vs_schedulers, n_list) {  		if (strcmp(scheduler->name, sched->name) == 0) { -			spin_unlock_bh(&ip_vs_sched_lock); +			mutex_unlock(&ip_vs_sched_mutex);  			ip_vs_use_count_dec();  			pr_err("%s(): [%s] scheduler already existed "  			       "in the system\n", __func__, scheduler->name); @@ -219,7 +214,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)  	 *	Add it into the d-linked scheduler list  	 */  	list_add(&scheduler->n_list, &ip_vs_schedulers); -	spin_unlock_bh(&ip_vs_sched_lock); +	mutex_unlock(&ip_vs_sched_mutex);  	pr_info("[%s] scheduler registered.\n", scheduler->name); @@ -237,9 +232,9 @@ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)  		return -EINVAL;  	} -	spin_lock_bh(&ip_vs_sched_lock); +	mutex_lock(&ip_vs_sched_mutex);  	if (list_empty(&scheduler->n_list)) { -		spin_unlock_bh(&ip_vs_sched_lock); +		mutex_unlock(&ip_vs_sched_mutex);  		pr_err("%s(): [%s] scheduler is not in the list. failed\n",  		       __func__, scheduler->name);  		return -EINVAL; @@ -249,7 +244,7 @@ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)  	 *	Remove it from the d-linked scheduler list  	 */  	list_del(&scheduler->n_list); -	spin_unlock_bh(&ip_vs_sched_lock); +	mutex_unlock(&ip_vs_sched_mutex);  	/* decrease the module use count */  	ip_vs_use_count_dec(); diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c index 89ead246ed3..f3205925359 100644 --- a/net/netfilter/ipvs/ip_vs_sed.c +++ b/net/netfilter/ipvs/ip_vs_sed.c @@ -79,7 +79,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)  	 * new connections.  	 */ -	list_for_each_entry(dest, &svc->destinations, n_list) { +	list_for_each_entry_rcu(dest, &svc->destinations, n_list) {  		if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&  		    atomic_read(&dest->weight) > 0) {  			least = dest; @@ -94,7 +94,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)  	 *    Find the destination with the least load.  	 */    nextstage: -	list_for_each_entry_continue(dest, &svc->destinations, n_list) { +	list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) {  		if (dest->flags & IP_VS_DEST_F_OVERLOAD)  			continue;  		doh = ip_vs_sed_dest_overhead(dest); @@ -134,6 +134,7 @@ static int __init ip_vs_sed_init(void)  static void __exit ip_vs_sed_cleanup(void)  {  	unregister_ip_vs_scheduler(&ip_vs_sed_scheduler); +	synchronize_rcu();  }  module_init(ip_vs_sed_init); diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index e3312699462..0df269d7c99 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -53,7 +53,7 @@   *      IPVS SH bucket   */  struct ip_vs_sh_bucket { -	struct ip_vs_dest       *dest;          /* real server (cache) */ +	struct ip_vs_dest __rcu	*dest;	/* real server (cache) */  };  /* @@ -66,6 +66,10 @@ struct ip_vs_sh_bucket {  #define IP_VS_SH_TAB_SIZE               (1 << IP_VS_SH_TAB_BITS)  #define IP_VS_SH_TAB_MASK               (IP_VS_SH_TAB_SIZE - 1) +struct ip_vs_sh_state { +	struct ip_vs_sh_bucket		buckets[IP_VS_SH_TAB_SIZE]; +	struct rcu_head			rcu_head; +};  /*   *	Returns hash value for IPVS SH entry @@ -87,10 +91,9 @@ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *ad   *      Get ip_vs_dest associated with supplied parameters.   */  static inline struct ip_vs_dest * -ip_vs_sh_get(int af, struct ip_vs_sh_bucket *tbl, -	     const union nf_inet_addr *addr) +ip_vs_sh_get(int af, struct ip_vs_sh_state *s, const union nf_inet_addr *addr)  { -	return (tbl[ip_vs_sh_hashkey(af, addr)]).dest; +	return rcu_dereference(s->buckets[ip_vs_sh_hashkey(af, addr)].dest);  } @@ -98,27 +101,32 @@ ip_vs_sh_get(int af, struct ip_vs_sh_bucket *tbl,   *      Assign all the hash buckets of the specified table with the service.   */  static int -ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc) +ip_vs_sh_reassign(struct ip_vs_sh_state *s, struct ip_vs_service *svc)  {  	int i;  	struct ip_vs_sh_bucket *b;  	struct list_head *p;  	struct ip_vs_dest *dest;  	int d_count; +	bool empty; -	b = tbl; +	b = &s->buckets[0];  	p = &svc->destinations; +	empty = list_empty(p);  	d_count = 0;  	for (i=0; i<IP_VS_SH_TAB_SIZE; i++) { -		if (list_empty(p)) { -			b->dest = NULL; -		} else { +		dest = rcu_dereference_protected(b->dest, 1); +		if (dest) +			ip_vs_dest_put(dest); +		if (empty) +			RCU_INIT_POINTER(b->dest, NULL); +		else {  			if (p == &svc->destinations)  				p = p->next;  			dest = list_entry(p, struct ip_vs_dest, n_list); -			atomic_inc(&dest->refcnt); -			b->dest = dest; +			ip_vs_dest_hold(dest); +			RCU_INIT_POINTER(b->dest, dest);  			IP_VS_DBG_BUF(6, "assigned i: %d dest: %s weight: %d\n",  				      i, IP_VS_DBG_ADDR(svc->af, &dest->addr), @@ -140,16 +148,18 @@ ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc)  /*   *      Flush all the hash buckets of the specified table.   */ -static void ip_vs_sh_flush(struct ip_vs_sh_bucket *tbl) +static void ip_vs_sh_flush(struct ip_vs_sh_state *s)  {  	int i;  	struct ip_vs_sh_bucket *b; +	struct ip_vs_dest *dest; -	b = tbl; +	b = &s->buckets[0];  	for (i=0; i<IP_VS_SH_TAB_SIZE; i++) { -		if (b->dest) { -			atomic_dec(&b->dest->refcnt); -			b->dest = NULL; +		dest = rcu_dereference_protected(b->dest, 1); +		if (dest) { +			ip_vs_dest_put(dest); +			RCU_INIT_POINTER(b->dest, NULL);  		}  		b++;  	} @@ -158,51 +168,46 @@ static void ip_vs_sh_flush(struct ip_vs_sh_bucket *tbl)  static int ip_vs_sh_init_svc(struct ip_vs_service *svc)  { -	struct ip_vs_sh_bucket *tbl; +	struct ip_vs_sh_state *s;  	/* allocate the SH table for this service */ -	tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE, -		      GFP_KERNEL); -	if (tbl == NULL) +	s = kzalloc(sizeof(struct ip_vs_sh_state), GFP_KERNEL); +	if (s == NULL)  		return -ENOMEM; -	svc->sched_data = tbl; +	svc->sched_data = s;  	IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) allocated for "  		  "current service\n",  		  sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE); -	/* assign the hash buckets with the updated service */ -	ip_vs_sh_assign(tbl, svc); +	/* assign the hash buckets with current dests */ +	ip_vs_sh_reassign(s, svc);  	return 0;  } -static int ip_vs_sh_done_svc(struct ip_vs_service *svc) +static void ip_vs_sh_done_svc(struct ip_vs_service *svc)  { -	struct ip_vs_sh_bucket *tbl = svc->sched_data; +	struct ip_vs_sh_state *s = svc->sched_data;  	/* got to clean up hash buckets here */ -	ip_vs_sh_flush(tbl); +	ip_vs_sh_flush(s);  	/* release the table itself */ -	kfree(svc->sched_data); +	kfree_rcu(s, rcu_head);  	IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) released\n",  		  sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE); - -	return 0;  } -static int ip_vs_sh_update_svc(struct ip_vs_service *svc) +static int ip_vs_sh_dest_changed(struct ip_vs_service *svc, +				 struct ip_vs_dest *dest)  { -	struct ip_vs_sh_bucket *tbl = svc->sched_data; - -	/* got to clean up hash buckets here */ -	ip_vs_sh_flush(tbl); +	struct ip_vs_sh_state *s = svc->sched_data;  	/* assign the hash buckets with the updated service */ -	ip_vs_sh_assign(tbl, svc); +	ip_vs_sh_reassign(s, svc);  	return 0;  } @@ -225,15 +230,15 @@ static struct ip_vs_dest *  ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)  {  	struct ip_vs_dest *dest; -	struct ip_vs_sh_bucket *tbl; +	struct ip_vs_sh_state *s;  	struct ip_vs_iphdr iph;  	ip_vs_fill_iph_addr_only(svc->af, skb, &iph);  	IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); -	tbl = (struct ip_vs_sh_bucket *)svc->sched_data; -	dest = ip_vs_sh_get(svc->af, tbl, &iph.saddr); +	s = (struct ip_vs_sh_state *) svc->sched_data; +	dest = ip_vs_sh_get(svc->af, s, &iph.saddr);  	if (!dest  	    || !(dest->flags & IP_VS_DEST_F_AVAILABLE)  	    || atomic_read(&dest->weight) <= 0 @@ -262,7 +267,9 @@ static struct ip_vs_scheduler ip_vs_sh_scheduler =  	.n_list	 =		LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list),  	.init_service =		ip_vs_sh_init_svc,  	.done_service =		ip_vs_sh_done_svc, -	.update_service =	ip_vs_sh_update_svc, +	.add_dest =		ip_vs_sh_dest_changed, +	.del_dest =		ip_vs_sh_dest_changed, +	.upd_dest =		ip_vs_sh_dest_changed,  	.schedule =		ip_vs_sh_schedule,  }; @@ -276,6 +283,7 @@ static int __init ip_vs_sh_init(void)  static void __exit ip_vs_sh_cleanup(void)  {  	unregister_ip_vs_scheduler(&ip_vs_sh_scheduler); +	synchronize_rcu();  } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 44fd10c539a..f6046d9af8d 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -246,7 +246,7 @@ struct ip_vs_sync_thread_data {  struct ip_vs_sync_mesg_v0 {  	__u8                    nr_conns;  	__u8                    syncid; -	__u16                   size; +	__be16                  size;  	/* ip_vs_sync_conn entries start here */  }; @@ -255,7 +255,7 @@ struct ip_vs_sync_mesg_v0 {  struct ip_vs_sync_mesg {  	__u8			reserved;	/* must be zero */  	__u8			syncid; -	__u16			size; +	__be16			size;  	__u8			nr_conns;  	__s8			version;	/* SYNC_PROTO_VER  */  	__u16			spare; @@ -335,7 +335,7 @@ ip_vs_sync_buff_create(struct netns_ipvs *ipvs)  	sb->mesg->reserved = 0;  /* old nr_conns i.e. must be zero now */  	sb->mesg->version = SYNC_PROTO_VER;  	sb->mesg->syncid = ipvs->master_syncid; -	sb->mesg->size = sizeof(struct ip_vs_sync_mesg); +	sb->mesg->size = htons(sizeof(struct ip_vs_sync_mesg));  	sb->mesg->nr_conns = 0;  	sb->mesg->spare = 0;  	sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg); @@ -418,7 +418,7 @@ ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)  	mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;  	mesg->nr_conns = 0;  	mesg->syncid = ipvs->master_syncid; -	mesg->size = sizeof(struct ip_vs_sync_mesg_v0); +	mesg->size = htons(sizeof(struct ip_vs_sync_mesg_v0));  	sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0);  	sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen;  	sb->firstuse = jiffies; @@ -531,9 +531,9 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,  	if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))  		return; -	spin_lock(&ipvs->sync_buff_lock); +	spin_lock_bh(&ipvs->sync_buff_lock);  	if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { -		spin_unlock(&ipvs->sync_buff_lock); +		spin_unlock_bh(&ipvs->sync_buff_lock);  		return;  	} @@ -552,7 +552,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,  	if (!buff) {  		buff = ip_vs_sync_buff_create_v0(ipvs);  		if (!buff) { -			spin_unlock(&ipvs->sync_buff_lock); +			spin_unlock_bh(&ipvs->sync_buff_lock);  			pr_err("ip_vs_sync_buff_create failed.\n");  			return;  		} @@ -582,7 +582,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,  	}  	m->nr_conns++; -	m->size += len; +	m->size = htons(ntohs(m->size) + len);  	buff->head += len;  	/* check if there is a space for next one */ @@ -590,7 +590,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,  		sb_queue_tail(ipvs, ms);  		ms->sync_buff = NULL;  	} -	spin_unlock(&ipvs->sync_buff_lock); +	spin_unlock_bh(&ipvs->sync_buff_lock);  	/* synchronize its controller if it has */  	cp = cp->control; @@ -641,9 +641,9 @@ sloop:  		pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);  	} -	spin_lock(&ipvs->sync_buff_lock); +	spin_lock_bh(&ipvs->sync_buff_lock);  	if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) { -		spin_unlock(&ipvs->sync_buff_lock); +		spin_unlock_bh(&ipvs->sync_buff_lock);  		return;  	} @@ -683,7 +683,7 @@ sloop:  	if (!buff) {  		buff = ip_vs_sync_buff_create(ipvs);  		if (!buff) { -			spin_unlock(&ipvs->sync_buff_lock); +			spin_unlock_bh(&ipvs->sync_buff_lock);  			pr_err("ip_vs_sync_buff_create failed.\n");  			return;  		} @@ -693,7 +693,7 @@ sloop:  	p = buff->head;  	buff->head += pad + len; -	m->size += pad + len; +	m->size = htons(ntohs(m->size) + pad + len);  	/* Add ev. padding from prev. sync_conn */  	while (pad--)  		*(p++) = 0; @@ -750,7 +750,7 @@ sloop:  		}  	} -	spin_unlock(&ipvs->sync_buff_lock); +	spin_unlock_bh(&ipvs->sync_buff_lock);  control:  	/* synchronize its controller if it has */ @@ -843,7 +843,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,  		kfree(param->pe_data);  		dest = cp->dest; -		spin_lock(&cp->lock); +		spin_lock_bh(&cp->lock);  		if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE &&  		    !(flags & IP_VS_CONN_F_TEMPLATE) && dest) {  			if (flags & IP_VS_CONN_F_INACTIVE) { @@ -857,24 +857,21 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,  		flags &= IP_VS_CONN_F_BACKUP_UPD_MASK;  		flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK;  		cp->flags = flags; -		spin_unlock(&cp->lock); -		if (!dest) { -			dest = ip_vs_try_bind_dest(cp); -			if (dest) -				atomic_dec(&dest->refcnt); -		} +		spin_unlock_bh(&cp->lock); +		if (!dest) +			ip_vs_try_bind_dest(cp);  	} else {  		/*  		 * Find the appropriate destination for the connection.  		 * If it is not found the connection will remain unbound  		 * but still handled.  		 */ +		rcu_read_lock();  		dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,  				       param->vport, protocol, fwmark, flags);  		cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark); -		if (dest) -			atomic_dec(&dest->refcnt); +		rcu_read_unlock();  		if (!cp) {  			if (param->pe_data)  				kfree(param->pe_data); @@ -1178,10 +1175,8 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer,  		IP_VS_DBG(2, "BACKUP, message header too short\n");  		return;  	} -	/* Convert size back to host byte order */ -	m2->size = ntohs(m2->size); -	if (buflen != m2->size) { +	if (buflen != ntohs(m2->size)) {  		IP_VS_DBG(2, "BACKUP, bogus message size\n");  		return;  	} @@ -1547,10 +1542,7 @@ ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg)  	int msize;  	int ret; -	msize = msg->size; - -	/* Put size in network byte order */ -	msg->size = htons(msg->size); +	msize = ntohs(msg->size);  	ret = ip_vs_send_async(sock, (char *)msg, msize);  	if (ret >= 0 || ret == -EAGAIN) @@ -1692,11 +1684,7 @@ static int sync_thread_backup(void *data)  				break;  			} -			/* disable bottom half, because it accesses the data -			   shared by softirq while getting/creating conns */ -			local_bh_disable();  			ip_vs_process_message(tinfo->net, tinfo->buf, len); -			local_bh_enable();  		}  	} diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c index bc1bfc48a17..c60a81c4ce9 100644 --- a/net/netfilter/ipvs/ip_vs_wlc.c +++ b/net/netfilter/ipvs/ip_vs_wlc.c @@ -51,7 +51,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)  	 * new connections.  	 */ -	list_for_each_entry(dest, &svc->destinations, n_list) { +	list_for_each_entry_rcu(dest, &svc->destinations, n_list) {  		if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&  		    atomic_read(&dest->weight) > 0) {  			least = dest; @@ -66,7 +66,7 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)  	 *    Find the destination with the least load.  	 */    nextstage: -	list_for_each_entry_continue(dest, &svc->destinations, n_list) { +	list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) {  		if (dest->flags & IP_VS_DEST_F_OVERLOAD)  			continue;  		doh = ip_vs_dest_conn_overhead(dest); @@ -106,6 +106,7 @@ static int __init ip_vs_wlc_init(void)  static void __exit ip_vs_wlc_cleanup(void)  {  	unregister_ip_vs_scheduler(&ip_vs_wlc_scheduler); +	synchronize_rcu();  }  module_init(ip_vs_wlc_init); diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c index 231be7dd547..0e68555bceb 100644 --- a/net/netfilter/ipvs/ip_vs_wrr.c +++ b/net/netfilter/ipvs/ip_vs_wrr.c @@ -29,14 +29,45 @@  #include <net/ip_vs.h> +/* The WRR algorithm depends on some caclulations: + * - mw: maximum weight + * - di: weight step, greatest common divisor from all weights + * - cw: current required weight + * As result, all weights are in the [di..mw] range with a step=di. + * + * First, we start with cw = mw and select dests with weight >= cw. + * Then cw is reduced with di and all dests are checked again. + * Last pass should be with cw = di. We have mw/di passes in total: + * + * pass 1: cw = max weight + * pass 2: cw = max weight - di + * pass 3: cw = max weight - 2 * di + * ... + * last pass: cw = di + * + * Weights are supposed to be >= di but we run in parallel with + * weight changes, it is possible some dest weight to be reduced + * below di, bad if it is the only available dest. + * + * So, we modify how mw is calculated, now it is reduced with (di - 1), + * so that last cw is 1 to catch such dests with weight below di: + * pass 1: cw = max weight - (di - 1) + * pass 2: cw = max weight - di - (di - 1) + * pass 3: cw = max weight - 2 * di - (di - 1) + * ... + * last pass: cw = 1 + * + */ +  /*   * current destination pointer for weighted round-robin scheduling   */  struct ip_vs_wrr_mark { -	struct list_head *cl;	/* current list head */ +	struct ip_vs_dest *cl;	/* current dest or head */  	int cw;			/* current weight */  	int mw;			/* maximum weight */  	int di;			/* decreasing interval */ +	struct rcu_head		rcu_head;  }; @@ -88,36 +119,41 @@ static int ip_vs_wrr_init_svc(struct ip_vs_service *svc)  	if (mark == NULL)  		return -ENOMEM; -	mark->cl = &svc->destinations; -	mark->cw = 0; -	mark->mw = ip_vs_wrr_max_weight(svc); +	mark->cl = list_entry(&svc->destinations, struct ip_vs_dest, n_list);  	mark->di = ip_vs_wrr_gcd_weight(svc); +	mark->mw = ip_vs_wrr_max_weight(svc) - (mark->di - 1); +	mark->cw = mark->mw;  	svc->sched_data = mark;  	return 0;  } -static int ip_vs_wrr_done_svc(struct ip_vs_service *svc) +static void ip_vs_wrr_done_svc(struct ip_vs_service *svc)  { +	struct ip_vs_wrr_mark *mark = svc->sched_data; +  	/*  	 *    Release the mark variable  	 */ -	kfree(svc->sched_data); - -	return 0; +	kfree_rcu(mark, rcu_head);  } -static int ip_vs_wrr_update_svc(struct ip_vs_service *svc) +static int ip_vs_wrr_dest_changed(struct ip_vs_service *svc, +				  struct ip_vs_dest *dest)  {  	struct ip_vs_wrr_mark *mark = svc->sched_data; -	mark->cl = &svc->destinations; -	mark->mw = ip_vs_wrr_max_weight(svc); +	spin_lock_bh(&svc->sched_lock); +	mark->cl = list_entry(&svc->destinations, struct ip_vs_dest, n_list);  	mark->di = ip_vs_wrr_gcd_weight(svc); -	if (mark->cw > mark->mw) -		mark->cw = 0; +	mark->mw = ip_vs_wrr_max_weight(svc) - (mark->di - 1); +	if (mark->cw > mark->mw || !mark->cw) +		mark->cw = mark->mw; +	else if (mark->di > 1) +		mark->cw = (mark->cw / mark->di) * mark->di + 1; +	spin_unlock_bh(&svc->sched_lock);  	return 0;  } @@ -128,80 +164,79 @@ static int ip_vs_wrr_update_svc(struct ip_vs_service *svc)  static struct ip_vs_dest *  ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)  { -	struct ip_vs_dest *dest; +	struct ip_vs_dest *dest, *last, *stop = NULL;  	struct ip_vs_wrr_mark *mark = svc->sched_data; -	struct list_head *p; +	bool last_pass = false, restarted = false;  	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); -	/* -	 * This loop will always terminate, because mark->cw in (0, max_weight] -	 * and at least one server has its weight equal to max_weight. -	 */ -	write_lock(&svc->sched_lock); -	p = mark->cl; +	spin_lock_bh(&svc->sched_lock); +	dest = mark->cl; +	/* No available dests? */ +	if (mark->mw == 0) +		goto err_noavail; +	last = dest; +	/* Stop only after all dests were checked for weight >= 1 (last pass) */  	while (1) { -		if (mark->cl == &svc->destinations) { -			/* it is at the head of the destination list */ - -			if (mark->cl == mark->cl->next) { -				/* no dest entry */ -				ip_vs_scheduler_err(svc, -					"no destination available: " -					"no destinations present"); -				dest = NULL; -				goto out; -			} - -			mark->cl = svc->destinations.next; -			mark->cw -= mark->di; -			if (mark->cw <= 0) { -				mark->cw = mark->mw; -				/* -				 * Still zero, which means no available servers. -				 */ -				if (mark->cw == 0) { -					mark->cl = &svc->destinations; -					ip_vs_scheduler_err(svc, -						"no destination available"); -					dest = NULL; -					goto out; -				} -			} -		} else -			mark->cl = mark->cl->next; - -		if (mark->cl != &svc->destinations) { -			/* not at the head of the list */ -			dest = list_entry(mark->cl, struct ip_vs_dest, n_list); +		list_for_each_entry_continue_rcu(dest, +						 &svc->destinations, +						 n_list) {  			if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && -			    atomic_read(&dest->weight) >= mark->cw) { -				/* got it */ -				break; -			} +			    atomic_read(&dest->weight) >= mark->cw) +				goto found; +			if (dest == stop) +				goto err_over;  		} - -		if (mark->cl == p && mark->cw == mark->di) { -			/* back to the start, and no dest is found. -			   It is only possible when all dests are OVERLOADED */ -			dest = NULL; -			ip_vs_scheduler_err(svc, -				"no destination available: " -				"all destinations are overloaded"); -			goto out; +		mark->cw -= mark->di; +		if (mark->cw <= 0) { +			mark->cw = mark->mw; +			/* Stop if we tried last pass from first dest: +			 * 1. last_pass: we started checks when cw > di but +			 *	then all dests were checked for w >= 1 +			 * 2. last was head: the first and only traversal +			 *	was for weight >= 1, for all dests. +			 */ +			if (last_pass || +			    &last->n_list == &svc->destinations) +				goto err_over; +			restarted = true; +		} +		last_pass = mark->cw <= mark->di; +		if (last_pass && restarted && +		    &last->n_list != &svc->destinations) { +			/* First traversal was for w >= 1 but only +			 * for dests after 'last', now do the same +			 * for all dests up to 'last'. +			 */ +			stop = last;  		}  	} +found:  	IP_VS_DBG_BUF(6, "WRR: server %s:%u "  		      "activeconns %d refcnt %d weight %d\n",  		      IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),  		      atomic_read(&dest->activeconns),  		      atomic_read(&dest->refcnt),  		      atomic_read(&dest->weight)); +	mark->cl = dest;    out: -	write_unlock(&svc->sched_lock); +	spin_unlock_bh(&svc->sched_lock);  	return dest; + +err_noavail: +	mark->cl = dest; +	dest = NULL; +	ip_vs_scheduler_err(svc, "no destination available"); +	goto out; + +err_over: +	mark->cl = dest; +	dest = NULL; +	ip_vs_scheduler_err(svc, "no destination available: " +			    "all destinations are overloaded"); +	goto out;  } @@ -212,7 +247,9 @@ static struct ip_vs_scheduler ip_vs_wrr_scheduler = {  	.n_list =		LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list),  	.init_service =		ip_vs_wrr_init_svc,  	.done_service =		ip_vs_wrr_done_svc, -	.update_service =	ip_vs_wrr_update_svc, +	.add_dest =		ip_vs_wrr_dest_changed, +	.del_dest =		ip_vs_wrr_dest_changed, +	.upd_dest =		ip_vs_wrr_dest_changed,  	.schedule =		ip_vs_wrr_schedule,  }; @@ -224,6 +261,7 @@ static int __init ip_vs_wrr_init(void)  static void __exit ip_vs_wrr_cleanup(void)  {  	unregister_ip_vs_scheduler(&ip_vs_wrr_scheduler); +	synchronize_rcu();  }  module_init(ip_vs_wrr_init); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index ee6b7a9f1ec..b75ff6429a0 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -17,6 +17,8 @@   * - not all connections have destination server, for example,   * connections in backup server when fwmark is used   * - bypass connections use daddr from packet + * - we can use dst without ref while sending in RCU section, we use + * ref when returning NF_ACCEPT for NAT-ed packet via loopback   * LOCAL_OUT rules:   * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING)   * - skb->pkt_type is not set yet @@ -51,39 +53,54 @@ enum {  				      */  	IP_VS_RT_MODE_CONNECT	= 8, /* Always bind route to saddr */  	IP_VS_RT_MODE_KNOWN_NH	= 16,/* Route via remote addr */ +	IP_VS_RT_MODE_TUNNEL	= 32,/* Tunnel mode */  }; +static inline struct ip_vs_dest_dst *ip_vs_dest_dst_alloc(void) +{ +	return kmalloc(sizeof(struct ip_vs_dest_dst), GFP_ATOMIC); +} + +static inline void ip_vs_dest_dst_free(struct ip_vs_dest_dst *dest_dst) +{ +	kfree(dest_dst); +} +  /*   *      Destination cache to speed up outgoing route lookup   */  static inline void -__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst, -		u32 dst_cookie) +__ip_vs_dst_set(struct ip_vs_dest *dest, struct ip_vs_dest_dst *dest_dst, +		struct dst_entry *dst, u32 dst_cookie)  { -	struct dst_entry *old_dst; +	struct ip_vs_dest_dst *old; + +	old = rcu_dereference_protected(dest->dest_dst, +					lockdep_is_held(&dest->dst_lock)); -	old_dst = dest->dst_cache; -	dest->dst_cache = dst; -	dest->dst_rtos = rtos; -	dest->dst_cookie = dst_cookie; -	dst_release(old_dst); +	if (dest_dst) { +		dest_dst->dst_cache = dst; +		dest_dst->dst_cookie = dst_cookie; +	} +	rcu_assign_pointer(dest->dest_dst, dest_dst); + +	if (old) +		call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free);  } -static inline struct dst_entry * -__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos) +static inline struct ip_vs_dest_dst * +__ip_vs_dst_check(struct ip_vs_dest *dest)  { -	struct dst_entry *dst = dest->dst_cache; +	struct ip_vs_dest_dst *dest_dst = rcu_dereference(dest->dest_dst); +	struct dst_entry *dst; -	if (!dst) +	if (!dest_dst)  		return NULL; -	if ((dst->obsolete || rtos != dest->dst_rtos) && -	    dst->ops->check(dst, dest->dst_cookie) == NULL) { -		dest->dst_cache = NULL; -		dst_release(dst); +	dst = dest_dst->dst_cache; +	if (dst->obsolete && +	    dst->ops->check(dst, dest_dst->dst_cookie) == NULL)  		return NULL; -	} -	dst_hold(dst); -	return dst; +	return dest_dst;  }  static inline bool @@ -104,7 +121,7 @@ __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu)  /* Get route to daddr, update *saddr, optionally bind route to saddr */  static struct rtable *do_output_route4(struct net *net, __be32 daddr, -				       u32 rtos, int rt_mode, __be32 *saddr) +				       int rt_mode, __be32 *saddr)  {  	struct flowi4 fl4;  	struct rtable *rt; @@ -113,7 +130,6 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr,  	memset(&fl4, 0, sizeof(fl4));  	fl4.daddr = daddr;  	fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0; -	fl4.flowi4_tos = rtos;  	fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ?  			   FLOWI_FLAG_KNOWN_NH : 0; @@ -124,7 +140,7 @@ retry:  		if (PTR_ERR(rt) == -EINVAL && *saddr &&  		    rt_mode & IP_VS_RT_MODE_CONNECT && !loop) {  			*saddr = 0; -			flowi4_update_output(&fl4, 0, rtos, daddr, 0); +			flowi4_update_output(&fl4, 0, 0, daddr, 0);  			goto retry;  		}  		IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr); @@ -132,7 +148,7 @@ retry:  	} else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) {  		ip_rt_put(rt);  		*saddr = fl4.saddr; -		flowi4_update_output(&fl4, 0, rtos, daddr, fl4.saddr); +		flowi4_update_output(&fl4, 0, 0, daddr, fl4.saddr);  		loop++;  		goto retry;  	} @@ -141,113 +157,140 @@ retry:  }  /* Get route to destination or remote server */ -static struct rtable * +static int  __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, -		   __be32 daddr, u32 rtos, int rt_mode, __be32 *ret_saddr) +		   __be32 daddr, int rt_mode, __be32 *ret_saddr)  {  	struct net *net = dev_net(skb_dst(skb)->dev); +	struct netns_ipvs *ipvs = net_ipvs(net); +	struct ip_vs_dest_dst *dest_dst;  	struct rtable *rt;			/* Route to the other host */  	struct rtable *ort;			/* Original route */ -	int local; +	struct iphdr *iph; +	__be16 df; +	int mtu; +	int local, noref = 1;  	if (dest) { -		spin_lock(&dest->dst_lock); -		if (!(rt = (struct rtable *) -		      __ip_vs_dst_check(dest, rtos))) { -			rt = do_output_route4(net, dest->addr.ip, rtos, -					      rt_mode, &dest->dst_saddr.ip); +		dest_dst = __ip_vs_dst_check(dest); +		if (likely(dest_dst)) +			rt = (struct rtable *) dest_dst->dst_cache; +		else { +			dest_dst = ip_vs_dest_dst_alloc(); +			spin_lock_bh(&dest->dst_lock); +			if (!dest_dst) { +				__ip_vs_dst_set(dest, NULL, NULL, 0); +				spin_unlock_bh(&dest->dst_lock); +				goto err_unreach; +			} +			rt = do_output_route4(net, dest->addr.ip, rt_mode, +					      &dest_dst->dst_saddr.ip);  			if (!rt) { -				spin_unlock(&dest->dst_lock); -				return NULL; +				__ip_vs_dst_set(dest, NULL, NULL, 0); +				spin_unlock_bh(&dest->dst_lock); +				ip_vs_dest_dst_free(dest_dst); +				goto err_unreach;  			} -			__ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0); -			IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, " -				  "rtos=%X\n", -				  &dest->addr.ip, &dest->dst_saddr.ip, -				  atomic_read(&rt->dst.__refcnt), rtos); +			__ip_vs_dst_set(dest, dest_dst, &rt->dst, 0); +			spin_unlock_bh(&dest->dst_lock); +			IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n", +				  &dest->addr.ip, &dest_dst->dst_saddr.ip, +				  atomic_read(&rt->dst.__refcnt));  		}  		daddr = dest->addr.ip;  		if (ret_saddr) -			*ret_saddr = dest->dst_saddr.ip; -		spin_unlock(&dest->dst_lock); +			*ret_saddr = dest_dst->dst_saddr.ip;  	} else {  		__be32 saddr = htonl(INADDR_ANY); +		noref = 0; +  		/* For such unconfigured boxes avoid many route lookups  		 * for performance reasons because we do not remember saddr  		 */  		rt_mode &= ~IP_VS_RT_MODE_CONNECT; -		rt = do_output_route4(net, daddr, rtos, rt_mode, &saddr); +		rt = do_output_route4(net, daddr, rt_mode, &saddr);  		if (!rt) -			return NULL; +			goto err_unreach;  		if (ret_saddr)  			*ret_saddr = saddr;  	} -	local = rt->rt_flags & RTCF_LOCAL; +	local = (rt->rt_flags & RTCF_LOCAL) ? 1 : 0;  	if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &  	      rt_mode)) {  		IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",  			     (rt->rt_flags & RTCF_LOCAL) ?  			     "local":"non-local", &daddr); -		ip_rt_put(rt); -		return NULL; +		goto err_put;  	} -	if (local && !(rt_mode & IP_VS_RT_MODE_RDR) && -	    !((ort = skb_rtable(skb)) && ort->rt_flags & RTCF_LOCAL)) { -		IP_VS_DBG_RL("Redirect from non-local address %pI4 to local " -			     "requires NAT method, dest: %pI4\n", -			     &ip_hdr(skb)->daddr, &daddr); -		ip_rt_put(rt); -		return NULL; +	iph = ip_hdr(skb); +	if (likely(!local)) { +		if (unlikely(ipv4_is_loopback(iph->saddr))) { +			IP_VS_DBG_RL("Stopping traffic from loopback address " +				     "%pI4 to non-local address, dest: %pI4\n", +				     &iph->saddr, &daddr); +			goto err_put; +		} +	} else { +		ort = skb_rtable(skb); +		if (!(rt_mode & IP_VS_RT_MODE_RDR) && +		    !(ort->rt_flags & RTCF_LOCAL)) { +			IP_VS_DBG_RL("Redirect from non-local address %pI4 to " +				     "local requires NAT method, dest: %pI4\n", +				     &iph->daddr, &daddr); +			goto err_put; +		} +		/* skb to local stack, preserve old route */ +		if (!noref) +			ip_rt_put(rt); +		return local;  	} -	if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) { -		IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 " -			     "to non-local address, dest: %pI4\n", -			     &ip_hdr(skb)->saddr, &daddr); -		ip_rt_put(rt); -		return NULL; + +	if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) { +		mtu = dst_mtu(&rt->dst); +		df = iph->frag_off & htons(IP_DF); +	} else { +		struct sock *sk = skb->sk; + +		mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); +		if (mtu < 68) { +			IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); +			goto err_put; +		} +		ort = skb_rtable(skb); +		if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT) +			ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu); +		/* MTU check allowed? */ +		df = sysctl_pmtu_disc(ipvs) ? iph->frag_off & htons(IP_DF) : 0;  	} -	return rt; -} +	/* MTU checking */ +	if (unlikely(df && skb->len > mtu && !skb_is_gso(skb))) { +		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); +		IP_VS_DBG(1, "frag needed for %pI4\n", &iph->saddr); +		goto err_put; +	} -/* Reroute packet to local IPv4 stack after DNAT */ -static int -__ip_vs_reroute_locally(struct sk_buff *skb) -{ -	struct rtable *rt = skb_rtable(skb); -	struct net_device *dev = rt->dst.dev; -	struct net *net = dev_net(dev); -	struct iphdr *iph = ip_hdr(skb); +	skb_dst_drop(skb); +	if (noref) { +		if (!local) +			skb_dst_set_noref_force(skb, &rt->dst); +		else +			skb_dst_set(skb, dst_clone(&rt->dst)); +	} else +		skb_dst_set(skb, &rt->dst); -	if (rt_is_input_route(rt)) { -		unsigned long orefdst = skb->_skb_refdst; +	return local; -		if (ip_route_input(skb, iph->daddr, iph->saddr, -				   iph->tos, skb->dev)) -			return 0; -		refdst_drop(orefdst); -	} else { -		struct flowi4 fl4 = { -			.daddr = iph->daddr, -			.saddr = iph->saddr, -			.flowi4_tos = RT_TOS(iph->tos), -			.flowi4_mark = skb->mark, -		}; +err_put: +	if (!noref) +		ip_rt_put(rt); +	return -1; -		rt = ip_route_output_key(net, &fl4); -		if (IS_ERR(rt)) -			return 0; -		if (!(rt->rt_flags & RTCF_LOCAL)) { -			ip_rt_put(rt); -			return 0; -		} -		/* Drop old route. */ -		skb_dst_drop(skb); -		skb_dst_set(skb, &rt->dst); -	} -	return 1; +err_unreach: +	dst_link_failure(skb); +	return -1;  }  #ifdef CONFIG_IP_VS_IPV6 @@ -294,44 +337,57 @@ out_err:  /*   * Get route to destination or remote server   */ -static struct rt6_info * +static int  __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,  		      struct in6_addr *daddr, struct in6_addr *ret_saddr, -		      int do_xfrm, int rt_mode) +		      struct ip_vs_iphdr *ipvsh, int do_xfrm, int rt_mode)  {  	struct net *net = dev_net(skb_dst(skb)->dev); +	struct ip_vs_dest_dst *dest_dst;  	struct rt6_info *rt;			/* Route to the other host */  	struct rt6_info *ort;			/* Original route */  	struct dst_entry *dst; -	int local; +	int mtu; +	int local, noref = 1;  	if (dest) { -		spin_lock(&dest->dst_lock); -		rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0); -		if (!rt) { +		dest_dst = __ip_vs_dst_check(dest); +		if (likely(dest_dst)) +			rt = (struct rt6_info *) dest_dst->dst_cache; +		else {  			u32 cookie; +			dest_dst = ip_vs_dest_dst_alloc(); +			spin_lock_bh(&dest->dst_lock); +			if (!dest_dst) { +				__ip_vs_dst_set(dest, NULL, NULL, 0); +				spin_unlock_bh(&dest->dst_lock); +				goto err_unreach; +			}  			dst = __ip_vs_route_output_v6(net, &dest->addr.in6, -						      &dest->dst_saddr.in6, +						      &dest_dst->dst_saddr.in6,  						      do_xfrm);  			if (!dst) { -				spin_unlock(&dest->dst_lock); -				return NULL; +				__ip_vs_dst_set(dest, NULL, NULL, 0); +				spin_unlock_bh(&dest->dst_lock); +				ip_vs_dest_dst_free(dest_dst); +				goto err_unreach;  			}  			rt = (struct rt6_info *) dst;  			cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; -			__ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie); +			__ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie); +			spin_unlock_bh(&dest->dst_lock);  			IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", -				  &dest->addr.in6, &dest->dst_saddr.in6, +				  &dest->addr.in6, &dest_dst->dst_saddr.in6,  				  atomic_read(&rt->dst.__refcnt));  		}  		if (ret_saddr) -			*ret_saddr = dest->dst_saddr.in6; -		spin_unlock(&dest->dst_lock); +			*ret_saddr = dest_dst->dst_saddr.in6;  	} else { +		noref = 0;  		dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);  		if (!dst) -			return NULL; +			goto err_unreach;  		rt = (struct rt6_info *) dst;  	} @@ -340,86 +396,137 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,  	      rt_mode)) {  		IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n",  			     local ? "local":"non-local", daddr); -		dst_release(&rt->dst); -		return NULL; +		goto err_put;  	} -	if (local && !(rt_mode & IP_VS_RT_MODE_RDR) && -	    !((ort = (struct rt6_info *) skb_dst(skb)) && -	      __ip_vs_is_local_route6(ort))) { -		IP_VS_DBG_RL("Redirect from non-local address %pI6c to local " -			     "requires NAT method, dest: %pI6c\n", -			     &ipv6_hdr(skb)->daddr, daddr); -		dst_release(&rt->dst); -		return NULL; +	if (likely(!local)) { +		if (unlikely((!skb->dev || skb->dev->flags & IFF_LOOPBACK) && +			     ipv6_addr_type(&ipv6_hdr(skb)->saddr) & +					    IPV6_ADDR_LOOPBACK)) { +			IP_VS_DBG_RL("Stopping traffic from loopback address " +				     "%pI6c to non-local address, " +				     "dest: %pI6c\n", +				     &ipv6_hdr(skb)->saddr, daddr); +			goto err_put; +		} +	} else { +		ort = (struct rt6_info *) skb_dst(skb); +		if (!(rt_mode & IP_VS_RT_MODE_RDR) && +		    !__ip_vs_is_local_route6(ort)) { +			IP_VS_DBG_RL("Redirect from non-local address %pI6c " +				     "to local requires NAT method, " +				     "dest: %pI6c\n", +				     &ipv6_hdr(skb)->daddr, daddr); +			goto err_put; +		} +		/* skb to local stack, preserve old route */ +		if (!noref) +			dst_release(&rt->dst); +		return local;  	} -	if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) && -		     ipv6_addr_type(&ipv6_hdr(skb)->saddr) & -				    IPV6_ADDR_LOOPBACK)) { -		IP_VS_DBG_RL("Stopping traffic from loopback address %pI6c " -			     "to non-local address, dest: %pI6c\n", -			     &ipv6_hdr(skb)->saddr, daddr); -		dst_release(&rt->dst); -		return NULL; + +	/* MTU checking */ +	if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) +		mtu = dst_mtu(&rt->dst); +	else { +		struct sock *sk = skb->sk; + +		mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); +		if (mtu < IPV6_MIN_MTU) { +			IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__, +				     IPV6_MIN_MTU); +			goto err_put; +		} +		ort = (struct rt6_info *) skb_dst(skb); +		if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT) +			ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);  	} -	return rt; +	if (unlikely(__mtu_check_toobig_v6(skb, mtu))) { +		if (!skb->dev) +			skb->dev = net->loopback_dev; +		/* only send ICMP too big on first fragment */ +		if (!ipvsh->fragoffs) +			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); +		IP_VS_DBG(1, "frag needed for %pI6c\n", &ipv6_hdr(skb)->saddr); +		goto err_put; +	} + +	skb_dst_drop(skb); +	if (noref) { +		if (!local) +			skb_dst_set_noref_force(skb, &rt->dst); +		else +			skb_dst_set(skb, dst_clone(&rt->dst)); +	} else +		skb_dst_set(skb, &rt->dst); + +	return local; + +err_put: +	if (!noref) +		dst_release(&rt->dst); +	return -1; + +err_unreach: +	dst_link_failure(skb); +	return -1;  }  #endif -/* - *	Release dest->dst_cache before a dest is removed - */ -void -ip_vs_dst_reset(struct ip_vs_dest *dest) +/* return NF_ACCEPT to allow forwarding or other NF_xxx on error */ +static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb, +					    struct ip_vs_conn *cp)  { -	struct dst_entry *old_dst; +	int ret = NF_ACCEPT; -	old_dst = dest->dst_cache; -	dest->dst_cache = NULL; -	dst_release(old_dst); -	dest->dst_saddr.ip = 0; +	skb->ipvs_property = 1; +	if (unlikely(cp->flags & IP_VS_CONN_F_NFCT)) +		ret = ip_vs_confirm_conntrack(skb); +	if (ret == NF_ACCEPT) { +		nf_reset(skb); +		skb_forward_csum(skb); +	} +	return ret;  } -#define IP_VS_XMIT_TUNNEL(skb, cp)				\ -({								\ -	int __ret = NF_ACCEPT;					\ -								\ -	(skb)->ipvs_property = 1;				\ -	if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT))		\ -		__ret = ip_vs_confirm_conntrack(skb);		\ -	if (__ret == NF_ACCEPT) {				\ -		nf_reset(skb);					\ -		skb_forward_csum(skb);				\ -	}							\ -	__ret;							\ -}) +/* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */ +static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb, +					 struct ip_vs_conn *cp, int local) +{ +	int ret = NF_STOLEN; -#define IP_VS_XMIT_NAT(pf, skb, cp, local)		\ -do {							\ -	(skb)->ipvs_property = 1;			\ -	if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT)))	\ -		ip_vs_notrack(skb);			\ -	else						\ -		ip_vs_update_conntrack(skb, cp, 1);	\ -	if (local)					\ -		return NF_ACCEPT;			\ -	skb_forward_csum(skb);				\ -	NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,	\ -		skb_dst(skb)->dev, dst_output);		\ -} while (0) +	skb->ipvs_property = 1; +	if (likely(!(cp->flags & IP_VS_CONN_F_NFCT))) +		ip_vs_notrack(skb); +	else +		ip_vs_update_conntrack(skb, cp, 1); +	if (!local) { +		skb_forward_csum(skb); +		NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev, +			dst_output); +	} else +		ret = NF_ACCEPT; +	return ret; +} -#define IP_VS_XMIT(pf, skb, cp, local)			\ -do {							\ -	(skb)->ipvs_property = 1;			\ -	if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT)))	\ -		ip_vs_notrack(skb);			\ -	if (local)					\ -		return NF_ACCEPT;			\ -	skb_forward_csum(skb);				\ -	NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,	\ -		skb_dst(skb)->dev, dst_output);		\ -} while (0) +/* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */ +static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb, +				     struct ip_vs_conn *cp, int local) +{ +	int ret = NF_STOLEN; + +	skb->ipvs_property = 1; +	if (likely(!(cp->flags & IP_VS_CONN_F_NFCT))) +		ip_vs_notrack(skb); +	if (!local) { +		skb_forward_csum(skb); +		NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev, +			dst_output); +	} else +		ret = NF_ACCEPT; +	return ret; +}  /* @@ -430,7 +537,7 @@ ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,  		struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)  {  	/* we do not touch skb and do not need pskb ptr */ -	IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); +	return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);  } @@ -443,52 +550,29 @@ int  ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,  		  struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)  { -	struct rtable *rt;			/* Route to the other host */  	struct iphdr  *iph = ip_hdr(skb); -	int    mtu;  	EnterFunction(10); -	if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, RT_TOS(iph->tos), -				      IP_VS_RT_MODE_NON_LOCAL, NULL))) -		goto tx_error_icmp; - -	/* MTU checking */ -	mtu = dst_mtu(&rt->dst); -	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) && -	    !skb_is_gso(skb)) { -		ip_rt_put(rt); -		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); -		IP_VS_DBG_RL("%s(): frag needed\n", __func__); +	rcu_read_lock(); +	if (__ip_vs_get_out_rt(skb, NULL, iph->daddr, IP_VS_RT_MODE_NON_LOCAL, +			       NULL) < 0)  		goto tx_error; -	} -	/* -	 * Call ip_send_check because we are not sure it is called -	 * after ip_defrag. Is copy-on-write needed? -	 */ -	if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) { -		ip_rt_put(rt); -		return NF_STOLEN; -	} -	ip_send_check(ip_hdr(skb)); - -	/* drop old route */ -	skb_dst_drop(skb); -	skb_dst_set(skb, &rt->dst); +	ip_send_check(iph);  	/* Another hack: avoid icmp_send in ip_fragment */  	skb->local_df = 1; -	IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0); +	ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); +	rcu_read_unlock();  	LeaveFunction(10);  	return NF_STOLEN; - tx_error_icmp: -	dst_link_failure(skb);   tx_error:  	kfree_skb(skb); +	rcu_read_unlock();  	LeaveFunction(10);  	return NF_STOLEN;  } @@ -496,60 +580,27 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,  #ifdef CONFIG_IP_VS_IPV6  int  ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, -		     struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph) +		     struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)  { -	struct rt6_info *rt;			/* Route to the other host */ -	int    mtu; -  	EnterFunction(10); -	rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr.in6, NULL, 0, -				   IP_VS_RT_MODE_NON_LOCAL); -	if (!rt) -		goto tx_error_icmp; - -	/* MTU checking */ -	mtu = dst_mtu(&rt->dst); -	if (__mtu_check_toobig_v6(skb, mtu)) { -		if (!skb->dev) { -			struct net *net = dev_net(skb_dst(skb)->dev); - -			skb->dev = net->loopback_dev; -		} -		/* only send ICMP too big on first fragment */ -		if (!iph->fragoffs) -			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); -		dst_release(&rt->dst); -		IP_VS_DBG_RL("%s(): frag needed\n", __func__); +	rcu_read_lock(); +	if (__ip_vs_get_out_rt_v6(skb, NULL, &ipvsh->daddr.in6, NULL, +				  ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0)  		goto tx_error; -	} - -	/* -	 * Call ip_send_check because we are not sure it is called -	 * after ip_defrag. Is copy-on-write needed? -	 */ -	skb = skb_share_check(skb, GFP_ATOMIC); -	if (unlikely(skb == NULL)) { -		dst_release(&rt->dst); -		return NF_STOLEN; -	} - -	/* drop old route */ -	skb_dst_drop(skb); -	skb_dst_set(skb, &rt->dst);  	/* Another hack: avoid icmp_send in ip_fragment */  	skb->local_df = 1; -	IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0); +	ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); +	rcu_read_unlock();  	LeaveFunction(10);  	return NF_STOLEN; - tx_error_icmp: -	dst_link_failure(skb);   tx_error:  	kfree_skb(skb); +	rcu_read_unlock();  	LeaveFunction(10);  	return NF_STOLEN;  } @@ -564,29 +615,30 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,  	       struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)  {  	struct rtable *rt;		/* Route to the other host */ -	int mtu; -	struct iphdr *iph = ip_hdr(skb); -	int local; +	int local, rc, was_input;  	EnterFunction(10); +	rcu_read_lock();  	/* check if it is a connection of no-client-port */  	if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {  		__be16 _pt, *p; -		p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt); + +		p = skb_header_pointer(skb, ipvsh->len, sizeof(_pt), &_pt);  		if (p == NULL)  			goto tx_error;  		ip_vs_conn_fill_cport(cp, *p);  		IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));  	} -	if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, -				      RT_TOS(iph->tos), -				      IP_VS_RT_MODE_LOCAL | -					IP_VS_RT_MODE_NON_LOCAL | -					IP_VS_RT_MODE_RDR, NULL))) -		goto tx_error_icmp; -	local = rt->rt_flags & RTCF_LOCAL; +	was_input = rt_is_input_route(skb_rtable(skb)); +	local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, +				   IP_VS_RT_MODE_LOCAL | +				   IP_VS_RT_MODE_NON_LOCAL | +				   IP_VS_RT_MODE_RDR, NULL); +	if (local < 0) +		goto tx_error; +	rt = skb_rtable(skb);  	/*  	 * Avoid duplicate tuple in reply direction for NAT traffic  	 * to local address when connection is sync-ed @@ -600,57 +652,31 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,  			IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0,  					 "ip_vs_nat_xmit(): "  					 "stopping DNAT to local address"); -			goto tx_error_put; +			goto tx_error;  		}  	}  #endif  	/* From world but DNAT to loopback address? */ -	if (local && ipv4_is_loopback(cp->daddr.ip) && -	    rt_is_input_route(skb_rtable(skb))) { +	if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) {  		IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "  				 "stopping DNAT to loopback address"); -		goto tx_error_put; -	} - -	/* MTU checking */ -	mtu = dst_mtu(&rt->dst); -	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) && -	    !skb_is_gso(skb)) { -		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); -		IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0, -				 "ip_vs_nat_xmit(): frag needed for"); -		goto tx_error_put; +		goto tx_error;  	}  	/* copy-on-write the packet before mangling it */  	if (!skb_make_writable(skb, sizeof(struct iphdr))) -		goto tx_error_put; +		goto tx_error;  	if (skb_cow(skb, rt->dst.dev->hard_header_len)) -		goto tx_error_put; +		goto tx_error;  	/* mangle the packet */  	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh)) -		goto tx_error_put; +		goto tx_error;  	ip_hdr(skb)->daddr = cp->daddr.ip;  	ip_send_check(ip_hdr(skb)); -	if (!local) { -		/* drop old route */ -		skb_dst_drop(skb); -		skb_dst_set(skb, &rt->dst); -	} else { -		ip_rt_put(rt); -		/* -		 * Some IPv4 replies get local address from routes, -		 * not from iph, so while we DNAT after routing -		 * we need this second input/output route. -		 */ -		if (!__ip_vs_reroute_locally(skb)) -			goto tx_error; -	} -  	IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT");  	/* FIXME: when application helper enlarges the packet and the length @@ -660,49 +686,48 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,  	/* Another hack: avoid icmp_send in ip_fragment */  	skb->local_df = 1; -	IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local); +	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); +	rcu_read_unlock();  	LeaveFunction(10); -	return NF_STOLEN; +	return rc; -  tx_error_icmp: -	dst_link_failure(skb);    tx_error:  	kfree_skb(skb); +	rcu_read_unlock();  	LeaveFunction(10);  	return NF_STOLEN; -  tx_error_put: -	ip_rt_put(rt); -	goto tx_error;  }  #ifdef CONFIG_IP_VS_IPV6  int  ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, -		  struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph) +		  struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)  {  	struct rt6_info *rt;		/* Route to the other host */ -	int mtu; -	int local; +	int local, rc;  	EnterFunction(10); +	rcu_read_lock();  	/* check if it is a connection of no-client-port */ -	if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !iph->fragoffs)) { +	if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !ipvsh->fragoffs)) {  		__be16 _pt, *p; -		p = skb_header_pointer(skb, iph->len, sizeof(_pt), &_pt); +		p = skb_header_pointer(skb, ipvsh->len, sizeof(_pt), &_pt);  		if (p == NULL)  			goto tx_error;  		ip_vs_conn_fill_cport(cp, *p);  		IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));  	} -	if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, -					 0, (IP_VS_RT_MODE_LOCAL | -					     IP_VS_RT_MODE_NON_LOCAL | -					     IP_VS_RT_MODE_RDR)))) -		goto tx_error_icmp; -	local = __ip_vs_is_local_route6(rt); +	local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, +				      ipvsh, 0, +				      IP_VS_RT_MODE_LOCAL | +				      IP_VS_RT_MODE_NON_LOCAL | +				      IP_VS_RT_MODE_RDR); +	if (local < 0) +		goto tx_error; +	rt = (struct rt6_info *) skb_dst(skb);  	/*  	 * Avoid duplicate tuple in reply direction for NAT traffic  	 * to local address when connection is sync-ed @@ -716,7 +741,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,  			IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0,  					 "ip_vs_nat_xmit_v6(): "  					 "stopping DNAT to local address"); -			goto tx_error_put; +			goto tx_error;  		}  	}  #endif @@ -727,46 +752,21 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,  		IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0,  				 "ip_vs_nat_xmit_v6(): "  				 "stopping DNAT to loopback address"); -		goto tx_error_put; -	} - -	/* MTU checking */ -	mtu = dst_mtu(&rt->dst); -	if (__mtu_check_toobig_v6(skb, mtu)) { -		if (!skb->dev) { -			struct net *net = dev_net(skb_dst(skb)->dev); - -			skb->dev = net->loopback_dev; -		} -		/* only send ICMP too big on first fragment */ -		if (!iph->fragoffs) -			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); -		IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0, -				 "ip_vs_nat_xmit_v6(): frag needed for"); -		goto tx_error_put; +		goto tx_error;  	}  	/* copy-on-write the packet before mangling it */  	if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) -		goto tx_error_put; +		goto tx_error;  	if (skb_cow(skb, rt->dst.dev->hard_header_len)) -		goto tx_error_put; +		goto tx_error;  	/* mangle the packet */ -	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, iph)) +	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh))  		goto tx_error;  	ipv6_hdr(skb)->daddr = cp->daddr.in6; -	if (!local || !skb->dev) { -		/* drop the old route when skb is not shared */ -		skb_dst_drop(skb); -		skb_dst_set(skb, &rt->dst); -	} else { -		/* destined to loopback, do we need to change route? */ -		dst_release(&rt->dst); -	} -  	IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT");  	/* FIXME: when application helper enlarges the packet and the length @@ -776,20 +776,17 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,  	/* Another hack: avoid icmp_send in ip_fragment */  	skb->local_df = 1; -	IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local); +	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); +	rcu_read_unlock();  	LeaveFunction(10); -	return NF_STOLEN; +	return rc; -tx_error_icmp: -	dst_link_failure(skb);  tx_error:  	LeaveFunction(10);  	kfree_skb(skb); +	rcu_read_unlock();  	return NF_STOLEN; -tx_error_put: -	dst_release(&rt->dst); -	goto tx_error;  }  #endif @@ -826,56 +823,40 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,  	__be16 df;  	struct iphdr  *iph;			/* Our new IP header */  	unsigned int max_headroom;		/* The extra header space needed */ -	int    mtu; -	int ret; +	int ret, local;  	EnterFunction(10); -	if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, -				      RT_TOS(tos), IP_VS_RT_MODE_LOCAL | -						   IP_VS_RT_MODE_NON_LOCAL | -						   IP_VS_RT_MODE_CONNECT, -						   &saddr))) -		goto tx_error_icmp; -	if (rt->rt_flags & RTCF_LOCAL) { -		ip_rt_put(rt); -		IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); +	rcu_read_lock(); +	local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, +				   IP_VS_RT_MODE_LOCAL | +				   IP_VS_RT_MODE_NON_LOCAL | +				   IP_VS_RT_MODE_CONNECT | +				   IP_VS_RT_MODE_TUNNEL, &saddr); +	if (local < 0) +		goto tx_error; +	if (local) { +		rcu_read_unlock(); +		return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);  	} +	rt = skb_rtable(skb);  	tdev = rt->dst.dev; -	mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); -	if (mtu < 68) { -		IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); -		goto tx_error_put; -	} -	if (rt_is_output_route(skb_rtable(skb))) -		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); -  	/* Copy DF, reset fragment offset and MF */  	df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0; -	if (df && mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb)) { -		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); -		IP_VS_DBG_RL("%s(): frag needed\n", __func__); -		goto tx_error_put; -	} -  	/*  	 * Okay, now see if we can stuff it in the buffer as-is.  	 */  	max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr); -	if (skb_headroom(skb) < max_headroom -	    || skb_cloned(skb) || skb_shared(skb)) { +	if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {  		struct sk_buff *new_skb =  			skb_realloc_headroom(skb, max_headroom); -		if (!new_skb) { -			ip_rt_put(rt); -			kfree_skb(skb); -			IP_VS_ERR_RL("%s(): no memory\n", __func__); -			return NF_STOLEN; -		} + +		if (!new_skb) +			goto tx_error;  		consume_skb(skb);  		skb = new_skb;  		old_iph = ip_hdr(skb); @@ -890,10 +871,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,  	skb_reset_network_header(skb);  	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); -	/* drop old route */ -	skb_dst_drop(skb); -	skb_dst_set(skb, &rt->dst); -  	/*  	 *	Push down and install the IPIP header.  	 */ @@ -911,25 +888,22 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,  	/* Another hack: avoid icmp_send in ip_fragment */  	skb->local_df = 1; -	ret = IP_VS_XMIT_TUNNEL(skb, cp); +	ret = ip_vs_tunnel_xmit_prepare(skb, cp);  	if (ret == NF_ACCEPT)  		ip_local_out(skb);  	else if (ret == NF_DROP)  		kfree_skb(skb); +	rcu_read_unlock();  	LeaveFunction(10);  	return NF_STOLEN; -  tx_error_icmp: -	dst_link_failure(skb);    tx_error:  	kfree_skb(skb); +	rcu_read_unlock();  	LeaveFunction(10);  	return NF_STOLEN; -tx_error_put: -	ip_rt_put(rt); -	goto tx_error;  }  #ifdef CONFIG_IP_VS_IPV6 @@ -943,60 +917,37 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,  	struct ipv6hdr  *old_iph = ipv6_hdr(skb);  	struct ipv6hdr  *iph;		/* Our new IP header */  	unsigned int max_headroom;	/* The extra header space needed */ -	int    mtu; -	int ret; +	int ret, local;  	EnterFunction(10); -	if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, -					 &saddr, 1, (IP_VS_RT_MODE_LOCAL | -						     IP_VS_RT_MODE_NON_LOCAL)))) -		goto tx_error_icmp; -	if (__ip_vs_is_local_route6(rt)) { -		dst_release(&rt->dst); -		IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1); +	rcu_read_lock(); +	local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, +				      &saddr, ipvsh, 1, +				      IP_VS_RT_MODE_LOCAL | +				      IP_VS_RT_MODE_NON_LOCAL | +				      IP_VS_RT_MODE_TUNNEL); +	if (local < 0) +		goto tx_error; +	if (local) { +		rcu_read_unlock(); +		return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);  	} +	rt = (struct rt6_info *) skb_dst(skb);  	tdev = rt->dst.dev; -	mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); -	if (mtu < IPV6_MIN_MTU) { -		IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__, -			     IPV6_MIN_MTU); -		goto tx_error_put; -	} -	if (skb_dst(skb)) -		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - -	/* MTU checking: Notice that 'mtu' have been adjusted before hand */ -	if (__mtu_check_toobig_v6(skb, mtu)) { -		if (!skb->dev) { -			struct net *net = dev_net(skb_dst(skb)->dev); - -			skb->dev = net->loopback_dev; -		} -		/* only send ICMP too big on first fragment */ -		if (!ipvsh->fragoffs) -			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); -		IP_VS_DBG_RL("%s(): frag needed\n", __func__); -		goto tx_error_put; -	} -  	/*  	 * Okay, now see if we can stuff it in the buffer as-is.  	 */  	max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr); -	if (skb_headroom(skb) < max_headroom -	    || skb_cloned(skb) || skb_shared(skb)) { +	if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {  		struct sk_buff *new_skb =  			skb_realloc_headroom(skb, max_headroom); -		if (!new_skb) { -			dst_release(&rt->dst); -			kfree_skb(skb); -			IP_VS_ERR_RL("%s(): no memory\n", __func__); -			return NF_STOLEN; -		} + +		if (!new_skb) +			goto tx_error;  		consume_skb(skb);  		skb = new_skb;  		old_iph = ipv6_hdr(skb); @@ -1008,10 +959,6 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,  	skb_reset_network_header(skb);  	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); -	/* drop old route */ -	skb_dst_drop(skb); -	skb_dst_set(skb, &rt->dst); -  	/*  	 *	Push down and install the IPIP header.  	 */ @@ -1029,25 +976,22 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,  	/* Another hack: avoid icmp_send in ip_fragment */  	skb->local_df = 1; -	ret = IP_VS_XMIT_TUNNEL(skb, cp); +	ret = ip_vs_tunnel_xmit_prepare(skb, cp);  	if (ret == NF_ACCEPT)  		ip6_local_out(skb);  	else if (ret == NF_DROP)  		kfree_skb(skb); +	rcu_read_unlock();  	LeaveFunction(10);  	return NF_STOLEN; -tx_error_icmp: -	dst_link_failure(skb);  tx_error:  	kfree_skb(skb); +	rcu_read_unlock();  	LeaveFunction(10);  	return NF_STOLEN; -tx_error_put: -	dst_release(&rt->dst); -	goto tx_error;  }  #endif @@ -1060,59 +1004,36 @@ int  ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,  	      struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)  { -	struct rtable *rt;			/* Route to the other host */ -	struct iphdr  *iph = ip_hdr(skb); -	int    mtu; +	int local;  	EnterFunction(10); -	if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, -				      RT_TOS(iph->tos), -				      IP_VS_RT_MODE_LOCAL | -				      IP_VS_RT_MODE_NON_LOCAL | -				      IP_VS_RT_MODE_KNOWN_NH, NULL))) -		goto tx_error_icmp; -	if (rt->rt_flags & RTCF_LOCAL) { -		ip_rt_put(rt); -		IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); -	} - -	/* MTU checking */ -	mtu = dst_mtu(&rt->dst); -	if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu && -	    !skb_is_gso(skb)) { -		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); -		ip_rt_put(rt); -		IP_VS_DBG_RL("%s(): frag needed\n", __func__); +	rcu_read_lock(); +	local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, +				   IP_VS_RT_MODE_LOCAL | +				   IP_VS_RT_MODE_NON_LOCAL | +				   IP_VS_RT_MODE_KNOWN_NH, NULL); +	if (local < 0)  		goto tx_error; +	if (local) { +		rcu_read_unlock(); +		return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);  	} -	/* -	 * Call ip_send_check because we are not sure it is called -	 * after ip_defrag. Is copy-on-write needed? -	 */ -	if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) { -		ip_rt_put(rt); -		return NF_STOLEN; -	}  	ip_send_check(ip_hdr(skb)); -	/* drop old route */ -	skb_dst_drop(skb); -	skb_dst_set(skb, &rt->dst); -  	/* Another hack: avoid icmp_send in ip_fragment */  	skb->local_df = 1; -	IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0); +	ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); +	rcu_read_unlock();  	LeaveFunction(10);  	return NF_STOLEN; -  tx_error_icmp: -	dst_link_failure(skb);    tx_error:  	kfree_skb(skb); +	rcu_read_unlock();  	LeaveFunction(10);  	return NF_STOLEN;  } @@ -1120,64 +1041,36 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,  #ifdef CONFIG_IP_VS_IPV6  int  ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, -		 struct ip_vs_protocol *pp, struct ip_vs_iphdr *iph) +		 struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)  { -	struct rt6_info *rt;			/* Route to the other host */ -	int    mtu; +	int local;  	EnterFunction(10); -	if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, -					 0, (IP_VS_RT_MODE_LOCAL | -					     IP_VS_RT_MODE_NON_LOCAL)))) -		goto tx_error_icmp; -	if (__ip_vs_is_local_route6(rt)) { -		dst_release(&rt->dst); -		IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1); -	} - -	/* MTU checking */ -	mtu = dst_mtu(&rt->dst); -	if (__mtu_check_toobig_v6(skb, mtu)) { -		if (!skb->dev) { -			struct net *net = dev_net(skb_dst(skb)->dev); - -			skb->dev = net->loopback_dev; -		} -		/* only send ICMP too big on first fragment */ -		if (!iph->fragoffs) -			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); -		dst_release(&rt->dst); -		IP_VS_DBG_RL("%s(): frag needed\n", __func__); +	rcu_read_lock(); +	local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, +				      ipvsh, 0, +				      IP_VS_RT_MODE_LOCAL | +				      IP_VS_RT_MODE_NON_LOCAL); +	if (local < 0)  		goto tx_error; +	if (local) { +		rcu_read_unlock(); +		return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1);  	} -	/* -	 * Call ip_send_check because we are not sure it is called -	 * after ip_defrag. Is copy-on-write needed? -	 */ -	skb = skb_share_check(skb, GFP_ATOMIC); -	if (unlikely(skb == NULL)) { -		dst_release(&rt->dst); -		return NF_STOLEN; -	} - -	/* drop old route */ -	skb_dst_drop(skb); -	skb_dst_set(skb, &rt->dst); -  	/* Another hack: avoid icmp_send in ip_fragment */  	skb->local_df = 1; -	IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0); +	ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); +	rcu_read_unlock();  	LeaveFunction(10);  	return NF_STOLEN; -tx_error_icmp: -	dst_link_failure(skb);  tx_error:  	kfree_skb(skb); +	rcu_read_unlock();  	LeaveFunction(10);  	return NF_STOLEN;  } @@ -1194,10 +1087,9 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,  		struct ip_vs_iphdr *iph)  {  	struct rtable	*rt;	/* Route to the other host */ -	int mtu;  	int rc;  	int local; -	int rt_mode; +	int rt_mode, was_input;  	EnterFunction(10); @@ -1217,16 +1109,17 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,  	/*  	 * mangle and send the packet here (only for VS/NAT)  	 */ +	was_input = rt_is_input_route(skb_rtable(skb));  	/* LOCALNODE from FORWARD hook is not supported */  	rt_mode = (hooknum != NF_INET_FORWARD) ?  		  IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |  		  IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; -	if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, -				      RT_TOS(ip_hdr(skb)->tos), -				      rt_mode, NULL))) -		goto tx_error_icmp; -	local = rt->rt_flags & RTCF_LOCAL; +	rcu_read_lock(); +	local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, rt_mode, NULL); +	if (local < 0) +		goto tx_error; +	rt = skb_rtable(skb);  	/*  	 * Avoid duplicate tuple in reply direction for NAT traffic @@ -1241,82 +1134,51 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,  			IP_VS_DBG(10, "%s(): "  				  "stopping DNAT to local address %pI4\n",  				  __func__, &cp->daddr.ip); -			goto tx_error_put; +			goto tx_error;  		}  	}  #endif  	/* From world but DNAT to loopback address? */ -	if (local && ipv4_is_loopback(cp->daddr.ip) && -	    rt_is_input_route(skb_rtable(skb))) { +	if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) {  		IP_VS_DBG(1, "%s(): "  			  "stopping DNAT to loopback %pI4\n",  			  __func__, &cp->daddr.ip); -		goto tx_error_put; -	} - -	/* MTU checking */ -	mtu = dst_mtu(&rt->dst); -	if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) && -	    !skb_is_gso(skb)) { -		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); -		IP_VS_DBG_RL("%s(): frag needed\n", __func__); -		goto tx_error_put; +		goto tx_error;  	}  	/* copy-on-write the packet before mangling it */  	if (!skb_make_writable(skb, offset)) -		goto tx_error_put; +		goto tx_error;  	if (skb_cow(skb, rt->dst.dev->hard_header_len)) -		goto tx_error_put; +		goto tx_error;  	ip_vs_nat_icmp(skb, pp, cp, 0); -	if (!local) { -		/* drop the old route when skb is not shared */ -		skb_dst_drop(skb); -		skb_dst_set(skb, &rt->dst); -	} else { -		ip_rt_put(rt); -		/* -		 * Some IPv4 replies get local address from routes, -		 * not from iph, so while we DNAT after routing -		 * we need this second input/output route. -		 */ -		if (!__ip_vs_reroute_locally(skb)) -			goto tx_error; -	} -  	/* Another hack: avoid icmp_send in ip_fragment */  	skb->local_df = 1; -	IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local); - -	rc = NF_STOLEN; +	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); +	rcu_read_unlock();  	goto out; -  tx_error_icmp: -	dst_link_failure(skb);    tx_error: -	dev_kfree_skb(skb); +	kfree_skb(skb); +	rcu_read_unlock();  	rc = NF_STOLEN;    out:  	LeaveFunction(10);  	return rc; -  tx_error_put: -	ip_rt_put(rt); -	goto tx_error;  }  #ifdef CONFIG_IP_VS_IPV6  int  ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,  		struct ip_vs_protocol *pp, int offset, unsigned int hooknum, -		struct ip_vs_iphdr *iph) +		struct ip_vs_iphdr *ipvsh)  {  	struct rt6_info	*rt;	/* Route to the other host */ -	int mtu;  	int rc;  	int local;  	int rt_mode; @@ -1328,7 +1190,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,  	   translate address/port back */  	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {  		if (cp->packet_xmit) -			rc = cp->packet_xmit(skb, cp, pp, iph); +			rc = cp->packet_xmit(skb, cp, pp, ipvsh);  		else  			rc = NF_ACCEPT;  		/* do not touch skb anymore */ @@ -1344,11 +1206,12 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,  	rt_mode = (hooknum != NF_INET_FORWARD) ?  		  IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |  		  IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; -	if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, -					 0, rt_mode))) -		goto tx_error_icmp; - -	local = __ip_vs_is_local_route6(rt); +	rcu_read_lock(); +	local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, +				      ipvsh, 0, rt_mode); +	if (local < 0) +		goto tx_error; +	rt = (struct rt6_info *) skb_dst(skb);  	/*  	 * Avoid duplicate tuple in reply direction for NAT traffic  	 * to local address when connection is sync-ed @@ -1362,7 +1225,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,  			IP_VS_DBG(10, "%s(): "  				  "stopping DNAT to local address %pI6\n",  				  __func__, &cp->daddr.in6); -			goto tx_error_put; +			goto tx_error;  		}  	}  #endif @@ -1373,60 +1236,31 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,  		IP_VS_DBG(1, "%s(): "  			  "stopping DNAT to loopback %pI6\n",  			  __func__, &cp->daddr.in6); -		goto tx_error_put; -	} - -	/* MTU checking */ -	mtu = dst_mtu(&rt->dst); -	if (__mtu_check_toobig_v6(skb, mtu)) { -		if (!skb->dev) { -			struct net *net = dev_net(skb_dst(skb)->dev); - -			skb->dev = net->loopback_dev; -		} -		/* only send ICMP too big on first fragment */ -		if (!iph->fragoffs) -			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); -		IP_VS_DBG_RL("%s(): frag needed\n", __func__); -		goto tx_error_put; +		goto tx_error;  	}  	/* copy-on-write the packet before mangling it */  	if (!skb_make_writable(skb, offset)) -		goto tx_error_put; +		goto tx_error;  	if (skb_cow(skb, rt->dst.dev->hard_header_len)) -		goto tx_error_put; +		goto tx_error;  	ip_vs_nat_icmp_v6(skb, pp, cp, 0); -	if (!local || !skb->dev) { -		/* drop the old route when skb is not shared */ -		skb_dst_drop(skb); -		skb_dst_set(skb, &rt->dst); -	} else { -		/* destined to loopback, do we need to change route? */ -		dst_release(&rt->dst); -	} -  	/* Another hack: avoid icmp_send in ip_fragment */  	skb->local_df = 1; -	IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local); - -	rc = NF_STOLEN; +	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); +	rcu_read_unlock();  	goto out; -tx_error_icmp: -	dst_link_failure(skb);  tx_error: -	dev_kfree_skb(skb); +	kfree_skb(skb); +	rcu_read_unlock();  	rc = NF_STOLEN;  out:  	LeaveFunction(10);  	return rc; -tx_error_put: -	dst_release(&rt->dst); -	goto tx_error;  }  #endif diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index dbdaa114926..b8b95f4027c 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -2,6 +2,7 @@   *   * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>   * based on HW's ip_conntrack_irc.c as well as other modules + * (C) 2006 Patrick McHardy <kaber@trash.net>   *   * This program is free software; you can redistribute it and/or   * modify it under the terms of the GNU General Public License diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f84965af4a4..0283baedcdf 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -5,6 +5,7 @@  /* (C) 1999-2001 Paul `Rusty' Russell   * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>   * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> + * (C) 2005-2012 Patrick McHardy <kaber@trash.net>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License version 2 as @@ -48,6 +49,7 @@  #include <net/netfilter/nf_conntrack_labels.h>  #include <net/netfilter/nf_nat.h>  #include <net/netfilter/nf_nat_core.h> +#include <net/netfilter/nf_nat_helper.h>  #define NF_CONNTRACK_VERSION	"0.5.0" @@ -1259,7 +1261,7 @@ void nf_ct_iterate_cleanup(struct net *net,  EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);  struct __nf_ct_flush_report { -	u32 pid; +	u32 portid;  	int report;  }; @@ -1274,7 +1276,7 @@ static int kill_report(struct nf_conn *i, void *data)  	/* If we fail to deliver the event, death_by_timeout() will retry */  	if (nf_conntrack_event_report(IPCT_DESTROY, i, -				      fr->pid, fr->report) < 0) +				      fr->portid, fr->report) < 0)  		return 1;  	/* Avoid the delivery of the destroy event in death_by_timeout(). */ @@ -1297,10 +1299,10 @@ void nf_ct_free_hashtable(void *hash, unsigned int size)  }  EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); -void nf_conntrack_flush_report(struct net *net, u32 pid, int report) +void nf_conntrack_flush_report(struct net *net, u32 portid, int report)  {  	struct __nf_ct_flush_report fr = { -		.pid 	= pid, +		.portid	= portid,  		.report = report,  	};  	nf_ct_iterate_cleanup(net, kill_report, &fr); @@ -1364,30 +1366,48 @@ void nf_conntrack_cleanup_end(void)   */  void nf_conntrack_cleanup_net(struct net *net)  { +	LIST_HEAD(single); + +	list_add(&net->exit_list, &single); +	nf_conntrack_cleanup_net_list(&single); +} + +void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list) +{ +	int busy; +	struct net *net; +  	/*  	 * This makes sure all current packets have passed through  	 *  netfilter framework.  Roll on, two-stage module  	 *  delete...  	 */  	synchronize_net(); - i_see_dead_people: -	nf_ct_iterate_cleanup(net, kill_all, NULL); -	nf_ct_release_dying_list(net); -	if (atomic_read(&net->ct.count) != 0) { +i_see_dead_people: +	busy = 0; +	list_for_each_entry(net, net_exit_list, exit_list) { +		nf_ct_iterate_cleanup(net, kill_all, NULL); +		nf_ct_release_dying_list(net); +		if (atomic_read(&net->ct.count) != 0) +			busy = 1; +	} +	if (busy) {  		schedule();  		goto i_see_dead_people;  	} -	nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); -	nf_conntrack_proto_pernet_fini(net); -	nf_conntrack_helper_pernet_fini(net); -	nf_conntrack_ecache_pernet_fini(net); -	nf_conntrack_tstamp_pernet_fini(net); -	nf_conntrack_acct_pernet_fini(net); -	nf_conntrack_expect_pernet_fini(net); -	kmem_cache_destroy(net->ct.nf_conntrack_cachep); -	kfree(net->ct.slabname); -	free_percpu(net->ct.stat); +	list_for_each_entry(net, net_exit_list, exit_list) { +		nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); +		nf_conntrack_proto_pernet_fini(net); +		nf_conntrack_helper_pernet_fini(net); +		nf_conntrack_ecache_pernet_fini(net); +		nf_conntrack_tstamp_pernet_fini(net); +		nf_conntrack_acct_pernet_fini(net); +		nf_conntrack_expect_pernet_fini(net); +		kmem_cache_destroy(net->ct.nf_conntrack_cachep); +		kfree(net->ct.slabname); +		free_percpu(net->ct.stat); +	}  }  void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls) diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index b5d2eb8bf0d..1df17614656 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -1,8 +1,10 @@  /* Event cache for netfilter. */ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> - * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> +/* + * (C) 2005 Harald Welte <laforge@gnumonks.org> + * (C) 2005 Patrick McHardy <kaber@trash.net> + * (C) 2005-2006 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2005 USAGI/WIDE Project <http://www.linux-ipv6.org>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License version 2 as diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 8c10e3db3d9..c63b618cd61 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -3,6 +3,7 @@  /* (C) 1999-2001 Paul `Rusty' Russell   * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>   * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> + * (c) 2005-2012 Patrick McHardy <kaber@trash.net>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License version 2 as @@ -40,7 +41,7 @@ static struct kmem_cache *nf_ct_expect_cachep __read_mostly;  /* nf_conntrack_expect helper functions */  void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, -				u32 pid, int report) +				u32 portid, int report)  {  	struct nf_conn_help *master_help = nfct_help(exp->master);  	struct net *net = nf_ct_exp_net(exp); @@ -54,7 +55,7 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,  	hlist_del(&exp->lnode);  	master_help->expecting[exp->class]--; -	nf_ct_expect_event_report(IPEXP_DESTROY, exp, pid, report); +	nf_ct_expect_event_report(IPEXP_DESTROY, exp, portid, report);  	nf_ct_expect_put(exp);  	NF_CT_STAT_INC(net, expect_delete); @@ -412,7 +413,7 @@ out:  }  int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,  -				u32 pid, int report) +				u32 portid, int report)  {  	int ret; @@ -425,7 +426,7 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,  	if (ret < 0)  		goto out;  	spin_unlock_bh(&nf_conntrack_lock); -	nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report); +	nf_ct_expect_event_report(IPEXP_NEW, expect, portid, report);  	return ret;  out:  	spin_unlock_bh(&nf_conntrack_lock); diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 62fb8faedb8..6b217074237 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -3,6 +3,7 @@  /* (C) 1999-2001 Paul `Rusty' Russell   * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>   * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> + * (C) 2006-2012 Patrick McHardy <kaber@trash.net>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License version 2 as diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 7df7b36d2e2..bdebd03bc8c 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -2,6 +2,7 @@   * H.323 connection tracking helper   *   * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net> + * Copyright (c) 2006-2012 Patrick McHardy <kaber@trash.net>   *   * This source code is licensed under General Public License version 2.   * diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 94b4b9853f6..974a2a4adef 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -3,6 +3,7 @@  /* (C) 1999-2001 Paul `Rusty' Russell   * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>   * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> + * (C) 2006-2012 Patrick McHardy <kaber@trash.net>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License version 2 as @@ -353,7 +354,7 @@ void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct,  	/* rcu_read_lock()ed by nf_hook_slow */  	helper = rcu_dereference(help->helper); -	nf_log_packet(nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, +	nf_log_packet(nf_ct_net(ct), nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL,  		      "nf_ct_%s: dropping packet: %pV ", helper->name, &vaf);  	va_end(args); diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 70985c5d0ff..0fd2976db7e 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -1,6 +1,7 @@  /* IRC extension for IP connection tracking, Version 1.21   * (C) 2000-2002 by Harald Welte <laforge@gnumonks.org>   * based on RR's ip_conntrack_ftp.c + * (C) 2006-2012 Patrick McHardy <kaber@trash.net>   *   * This program is free software; you can redistribute it and/or   * modify it under the terms of the GNU General Public License diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 9904b15f600..6d0f8a17c5b 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2409,6 +2409,92 @@ out:  	return skb->len;  } +static int +ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb) +{ +	struct nf_conntrack_expect *exp, *last; +	struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); +	struct nf_conn *ct = cb->data; +	struct nf_conn_help *help = nfct_help(ct); +	u_int8_t l3proto = nfmsg->nfgen_family; + +	if (cb->args[0]) +		return 0; + +	rcu_read_lock(); +	last = (struct nf_conntrack_expect *)cb->args[1]; +restart: +	hlist_for_each_entry(exp, &help->expectations, lnode) { +		if (l3proto && exp->tuple.src.l3num != l3proto) +			continue; +		if (cb->args[1]) { +			if (exp != last) +				continue; +			cb->args[1] = 0; +		} +		if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).portid, +					    cb->nlh->nlmsg_seq, +					    IPCTNL_MSG_EXP_NEW, +					    exp) < 0) { +			if (!atomic_inc_not_zero(&exp->use)) +				continue; +			cb->args[1] = (unsigned long)exp; +			goto out; +		} +	} +	if (cb->args[1]) { +		cb->args[1] = 0; +		goto restart; +	} +	cb->args[0] = 1; +out: +	rcu_read_unlock(); +	if (last) +		nf_ct_expect_put(last); + +	return skb->len; +} + +static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb, +				 const struct nlmsghdr *nlh, +				 const struct nlattr * const cda[]) +{ +	int err; +	struct net *net = sock_net(ctnl); +	struct nfgenmsg *nfmsg = nlmsg_data(nlh); +	u_int8_t u3 = nfmsg->nfgen_family; +	struct nf_conntrack_tuple tuple; +	struct nf_conntrack_tuple_hash *h; +	struct nf_conn *ct; +	u16 zone = 0; +	struct netlink_dump_control c = { +		.dump = ctnetlink_exp_ct_dump_table, +		.done = ctnetlink_exp_done, +	}; + +	err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3); +	if (err < 0) +		return err; + +	if (cda[CTA_EXPECT_ZONE]) { +		err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone); +		if (err < 0) +			return err; +	} + +	h = nf_conntrack_find_get(net, zone, &tuple); +	if (!h) +		return -ENOENT; + +	ct = nf_ct_tuplehash_to_ctrack(h); +	c.data = ct; + +	err = netlink_dump_start(ctnl, skb, nlh, &c); +	nf_ct_put(ct); + +	return err; +} +  static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {  	[CTA_EXPECT_MASTER]	= { .type = NLA_NESTED },  	[CTA_EXPECT_TUPLE]	= { .type = NLA_NESTED }, @@ -2439,11 +2525,15 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,  	int err;  	if (nlh->nlmsg_flags & NLM_F_DUMP) { -		struct netlink_dump_control c = { -			.dump = ctnetlink_exp_dump_table, -			.done = ctnetlink_exp_done, -		}; -		return netlink_dump_start(ctnl, skb, nlh, &c); +		if (cda[CTA_EXPECT_MASTER]) +			return ctnetlink_dump_exp_ct(ctnl, skb, nlh, cda); +		else { +			struct netlink_dump_control c = { +				.dump = ctnetlink_exp_dump_table, +				.done = ctnetlink_exp_done, +			}; +			return netlink_dump_start(ctnl, skb, nlh, &c); +		}  	}  	err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone); diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index e6678d2b624..7bd03decd36 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -11,6 +11,8 @@   *   * Development of this code funded by Astaro AG (http://www.astaro.com/)   * + * (C) 2006-2012 Patrick McHardy <kaber@trash.net> + *   * Limitations:   * 	 - We blindly assume that control connections are always   * 	   established in PNS->PAC direction.  This is a violation diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 58ab4050830..0ab9636ac57 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -3,6 +3,7 @@  /* (C) 1999-2001 Paul `Rusty' Russell   * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>   * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> + * (C) 2006-2012 Patrick McHardy <kaber@trash.net>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License version 2 as diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index ba65b2041eb..a99b6c3427b 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -456,7 +456,8 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,  out_invalid:  	if (LOG_INVALID(net, IPPROTO_DCCP)) -		nf_log_packet(nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, msg); +		nf_log_packet(net, nf_ct_l3num(ct), 0, skb, NULL, NULL, +			      NULL, msg);  	return false;  } @@ -542,13 +543,13 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,  		spin_unlock_bh(&ct->lock);  		if (LOG_INVALID(net, IPPROTO_DCCP)) -			nf_log_packet(pf, 0, skb, NULL, NULL, NULL, +			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,  				      "nf_ct_dccp: invalid packet ignored ");  		return NF_ACCEPT;  	case CT_DCCP_INVALID:  		spin_unlock_bh(&ct->lock);  		if (LOG_INVALID(net, IPPROTO_DCCP)) -			nf_log_packet(pf, 0, skb, NULL, NULL, NULL, +			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,  				      "nf_ct_dccp: invalid state transition ");  		return -NF_ACCEPT;  	} @@ -613,7 +614,7 @@ static int dccp_error(struct net *net, struct nf_conn *tmpl,  out_invalid:  	if (LOG_INVALID(net, IPPROTO_DCCP)) -		nf_log_packet(pf, 0, skb, NULL, NULL, NULL, msg); +		nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, msg);  	return -NF_ACCEPT;  } diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 155ce9f8a0d..9d9c0dade60 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -21,6 +21,7 @@   *   * Development of this code funded by Astaro AG (http://www.astaro.com/)   * + * (C) 2006-2012 Patrick McHardy <kaber@trash.net>   */  #include <linux/module.h> diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index ec83536def9..1314d33f6bc 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -1,6 +1,9 @@  /*   * Connection tracking protocol helper module for SCTP.   * + * Copyright (c) 2004 Kiran Kumar Immidi <immidi_kiran@yahoo.com> + * Copyright (c) 2004-2012 Patrick McHardy <kaber@trash.net> + *   * SCTP is defined in RFC 2960. References to various sections in this code   * are to this RFC.   * diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 83876e9877f..4d4d8f1d01f 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1,5 +1,7 @@  /* (C) 1999-2001 Paul `Rusty' Russell   * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2002-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> + * (C) 2006-2012 Patrick McHardy <kaber@trash.net>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License version 2 as @@ -720,7 +722,7 @@ static bool tcp_in_window(const struct nf_conn *ct,  		    tn->tcp_be_liberal)  			res = true;  		if (!res && LOG_INVALID(net, IPPROTO_TCP)) -			nf_log_packet(pf, 0, skb, NULL, NULL, NULL, +			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,  			"nf_ct_tcp: %s ",  			before(seq, sender->td_maxend + 1) ?  			after(end, sender->td_end - receiver->td_maxwin - 1) ? @@ -772,7 +774,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,  	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);  	if (th == NULL) {  		if (LOG_INVALID(net, IPPROTO_TCP)) -			nf_log_packet(pf, 0, skb, NULL, NULL, NULL, +			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,  				"nf_ct_tcp: short packet ");  		return -NF_ACCEPT;  	} @@ -780,7 +782,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,  	/* Not whole TCP header or malformed packet */  	if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {  		if (LOG_INVALID(net, IPPROTO_TCP)) -			nf_log_packet(pf, 0, skb, NULL, NULL, NULL, +			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,  				"nf_ct_tcp: truncated/malformed packet ");  		return -NF_ACCEPT;  	} @@ -793,7 +795,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,  	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&  	    nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {  		if (LOG_INVALID(net, IPPROTO_TCP)) -			nf_log_packet(pf, 0, skb, NULL, NULL, NULL, +			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,  				  "nf_ct_tcp: bad TCP checksum ");  		return -NF_ACCEPT;  	} @@ -802,7 +804,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,  	tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));  	if (!tcp_valid_flags[tcpflags]) {  		if (LOG_INVALID(net, IPPROTO_TCP)) -			nf_log_packet(pf, 0, skb, NULL, NULL, NULL, +			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,  				  "nf_ct_tcp: invalid TCP flag combination ");  		return -NF_ACCEPT;  	} @@ -949,7 +951,7 @@ static int tcp_packet(struct nf_conn *ct,  		}  		spin_unlock_bh(&ct->lock);  		if (LOG_INVALID(net, IPPROTO_TCP)) -			nf_log_packet(pf, 0, skb, NULL, NULL, NULL, +			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,  				  "nf_ct_tcp: invalid packet ignored in "  				  "state %s ", tcp_conntrack_names[old_state]);  		return NF_ACCEPT; @@ -959,7 +961,7 @@ static int tcp_packet(struct nf_conn *ct,  			 dir, get_conntrack_index(th), old_state);  		spin_unlock_bh(&ct->lock);  		if (LOG_INVALID(net, IPPROTO_TCP)) -			nf_log_packet(pf, 0, skb, NULL, NULL, NULL, +			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,  				  "nf_ct_tcp: invalid state ");  		return -NF_ACCEPT;  	case TCP_CONNTRACK_CLOSE: @@ -969,8 +971,8 @@ static int tcp_packet(struct nf_conn *ct,  			/* Invalid RST  */  			spin_unlock_bh(&ct->lock);  			if (LOG_INVALID(net, IPPROTO_TCP)) -				nf_log_packet(pf, 0, skb, NULL, NULL, NULL, -					  "nf_ct_tcp: invalid RST "); +				nf_log_packet(net, pf, 0, skb, NULL, NULL, +					      NULL, "nf_ct_tcp: invalid RST ");  			return -NF_ACCEPT;  		}  		if (index == TCP_RST_SET diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 59623cc56e8..9d7721cbce4 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -1,5 +1,6 @@  /* (C) 1999-2001 Paul `Rusty' Russell   * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2006-2012 Patrick McHardy <kaber@trash.net>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License version 2 as @@ -119,7 +120,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,  	hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);  	if (hdr == NULL) {  		if (LOG_INVALID(net, IPPROTO_UDP)) -			nf_log_packet(pf, 0, skb, NULL, NULL, NULL, +			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,  				      "nf_ct_udp: short packet ");  		return -NF_ACCEPT;  	} @@ -127,7 +128,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,  	/* Truncated/malformed packets */  	if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {  		if (LOG_INVALID(net, IPPROTO_UDP)) -			nf_log_packet(pf, 0, skb, NULL, NULL, NULL, +			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,  				"nf_ct_udp: truncated/malformed packet ");  		return -NF_ACCEPT;  	} @@ -143,7 +144,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,  	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&  	    nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) {  		if (LOG_INVALID(net, IPPROTO_UDP)) -			nf_log_packet(pf, 0, skb, NULL, NULL, NULL, +			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,  				"nf_ct_udp: bad UDP checksum ");  		return -NF_ACCEPT;  	} diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index ca969f6273f..2750e6c69f8 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -131,7 +131,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,  	hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);  	if (hdr == NULL) {  		if (LOG_INVALID(net, IPPROTO_UDPLITE)) -			nf_log_packet(pf, 0, skb, NULL, NULL, NULL, +			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,  				      "nf_ct_udplite: short packet ");  		return -NF_ACCEPT;  	} @@ -141,7 +141,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,  		cscov = udplen;  	else if (cscov < sizeof(*hdr) || cscov > udplen) {  		if (LOG_INVALID(net, IPPROTO_UDPLITE)) -			nf_log_packet(pf, 0, skb, NULL, NULL, NULL, +			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,  				"nf_ct_udplite: invalid checksum coverage ");  		return -NF_ACCEPT;  	} @@ -149,7 +149,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,  	/* UDPLITE mandates checksums */  	if (!hdr->check) {  		if (LOG_INVALID(net, IPPROTO_UDPLITE)) -			nf_log_packet(pf, 0, skb, NULL, NULL, NULL, +			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,  				      "nf_ct_udplite: checksum missing ");  		return -NF_ACCEPT;  	} @@ -159,7 +159,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl,  	    nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP,  	    			pf)) {  		if (LOG_INVALID(net, IPPROTO_UDPLITE)) -			nf_log_packet(pf, 0, skb, NULL, NULL, NULL, +			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,  				      "nf_ct_udplite: bad UDPLite checksum ");  		return -NF_ACCEPT;  	} diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index fedee394366..bd700b4013c 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -1,5 +1,6 @@  /* (C) 1999-2001 Paul `Rusty' Russell   * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2005-2012 Patrick McHardy <kaber@trash.net>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License version 2 as @@ -545,16 +546,20 @@ out_init:  	return ret;  } -static void nf_conntrack_pernet_exit(struct net *net) +static void nf_conntrack_pernet_exit(struct list_head *net_exit_list)  { -	nf_conntrack_standalone_fini_sysctl(net); -	nf_conntrack_standalone_fini_proc(net); -	nf_conntrack_cleanup_net(net); +	struct net *net; + +	list_for_each_entry(net, net_exit_list, exit_list) { +		nf_conntrack_standalone_fini_sysctl(net); +		nf_conntrack_standalone_fini_proc(net); +	} +	nf_conntrack_cleanup_net_list(net_exit_list);  }  static struct pernet_operations nf_conntrack_net_ops = { -	.init = nf_conntrack_pernet_init, -	.exit = nf_conntrack_pernet_exit, +	.init		= nf_conntrack_pernet_init, +	.exit_batch	= nf_conntrack_pernet_exit,  };  static int __init nf_conntrack_standalone_init(void) diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index e9936c83020..e68ab4fbd71 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -1,5 +1,5 @@  /* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu> - * + * (C) 2006-2012 Patrick McHardy <kaber@trash.net>   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License version 2 as   * published by the Free Software Foundation. diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 9e312695c81..388656d5a9e 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -16,7 +16,6 @@  #define NF_LOG_PREFIXLEN		128  #define NFLOGGER_NAME_LEN		64 -static const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO] __read_mostly;  static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly;  static DEFINE_MUTEX(nf_log_mutex); @@ -32,13 +31,46 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger)  	return NULL;  } +void nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger) +{ +	const struct nf_logger *log; + +	if (pf == NFPROTO_UNSPEC) +		return; + +	mutex_lock(&nf_log_mutex); +	log = rcu_dereference_protected(net->nf.nf_loggers[pf], +					lockdep_is_held(&nf_log_mutex)); +	if (log == NULL) +		rcu_assign_pointer(net->nf.nf_loggers[pf], logger); + +	mutex_unlock(&nf_log_mutex); +} +EXPORT_SYMBOL(nf_log_set); + +void nf_log_unset(struct net *net, const struct nf_logger *logger) +{ +	int i; +	const struct nf_logger *log; + +	mutex_lock(&nf_log_mutex); +	for (i = 0; i < NFPROTO_NUMPROTO; i++) { +		log = rcu_dereference_protected(net->nf.nf_loggers[i], +				lockdep_is_held(&nf_log_mutex)); +		if (log == logger) +			RCU_INIT_POINTER(net->nf.nf_loggers[i], NULL); +	} +	mutex_unlock(&nf_log_mutex); +	synchronize_rcu(); +} +EXPORT_SYMBOL(nf_log_unset); +  /* return EEXIST if the same logger is registered, 0 on success. */  int nf_log_register(u_int8_t pf, struct nf_logger *logger)  { -	const struct nf_logger *llog;  	int i; -	if (pf >= ARRAY_SIZE(nf_loggers)) +	if (pf >= ARRAY_SIZE(init_net.nf.nf_loggers))  		return -EINVAL;  	for (i = 0; i < ARRAY_SIZE(logger->list); i++) @@ -52,10 +84,6 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger)  	} else {  		/* register at end of list to honor first register win */  		list_add_tail(&logger->list[pf], &nf_loggers_l[pf]); -		llog = rcu_dereference_protected(nf_loggers[pf], -						 lockdep_is_held(&nf_log_mutex)); -		if (llog == NULL) -			rcu_assign_pointer(nf_loggers[pf], logger);  	}  	mutex_unlock(&nf_log_mutex); @@ -66,49 +94,43 @@ EXPORT_SYMBOL(nf_log_register);  void nf_log_unregister(struct nf_logger *logger)  { -	const struct nf_logger *c_logger;  	int i;  	mutex_lock(&nf_log_mutex); -	for (i = 0; i < ARRAY_SIZE(nf_loggers); i++) { -		c_logger = rcu_dereference_protected(nf_loggers[i], -						     lockdep_is_held(&nf_log_mutex)); -		if (c_logger == logger) -			RCU_INIT_POINTER(nf_loggers[i], NULL); +	for (i = 0; i < NFPROTO_NUMPROTO; i++)  		list_del(&logger->list[i]); -	}  	mutex_unlock(&nf_log_mutex); - -	synchronize_rcu();  }  EXPORT_SYMBOL(nf_log_unregister); -int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger) +int nf_log_bind_pf(struct net *net, u_int8_t pf, +		   const struct nf_logger *logger)  { -	if (pf >= ARRAY_SIZE(nf_loggers)) +	if (pf >= ARRAY_SIZE(net->nf.nf_loggers))  		return -EINVAL;  	mutex_lock(&nf_log_mutex);  	if (__find_logger(pf, logger->name) == NULL) {  		mutex_unlock(&nf_log_mutex);  		return -ENOENT;  	} -	rcu_assign_pointer(nf_loggers[pf], logger); +	rcu_assign_pointer(net->nf.nf_loggers[pf], logger);  	mutex_unlock(&nf_log_mutex);  	return 0;  }  EXPORT_SYMBOL(nf_log_bind_pf); -void nf_log_unbind_pf(u_int8_t pf) +void nf_log_unbind_pf(struct net *net, u_int8_t pf)  { -	if (pf >= ARRAY_SIZE(nf_loggers)) +	if (pf >= ARRAY_SIZE(net->nf.nf_loggers))  		return;  	mutex_lock(&nf_log_mutex); -	RCU_INIT_POINTER(nf_loggers[pf], NULL); +	RCU_INIT_POINTER(net->nf.nf_loggers[pf], NULL);  	mutex_unlock(&nf_log_mutex);  }  EXPORT_SYMBOL(nf_log_unbind_pf); -void nf_log_packet(u_int8_t pf, +void nf_log_packet(struct net *net, +		   u_int8_t pf,  		   unsigned int hooknum,  		   const struct sk_buff *skb,  		   const struct net_device *in, @@ -121,7 +143,7 @@ void nf_log_packet(u_int8_t pf,  	const struct nf_logger *logger;  	rcu_read_lock(); -	logger = rcu_dereference(nf_loggers[pf]); +	logger = rcu_dereference(net->nf.nf_loggers[pf]);  	if (logger) {  		va_start(args, fmt);  		vsnprintf(prefix, sizeof(prefix), fmt, args); @@ -135,9 +157,11 @@ EXPORT_SYMBOL(nf_log_packet);  #ifdef CONFIG_PROC_FS  static void *seq_start(struct seq_file *seq, loff_t *pos)  { +	struct net *net = seq_file_net(seq); +  	mutex_lock(&nf_log_mutex); -	if (*pos >= ARRAY_SIZE(nf_loggers)) +	if (*pos >= ARRAY_SIZE(net->nf.nf_loggers))  		return NULL;  	return pos; @@ -145,9 +169,11 @@ static void *seq_start(struct seq_file *seq, loff_t *pos)  static void *seq_next(struct seq_file *s, void *v, loff_t *pos)  { +	struct net *net = seq_file_net(s); +  	(*pos)++; -	if (*pos >= ARRAY_SIZE(nf_loggers)) +	if (*pos >= ARRAY_SIZE(net->nf.nf_loggers))  		return NULL;  	return pos; @@ -164,8 +190,9 @@ static int seq_show(struct seq_file *s, void *v)  	const struct nf_logger *logger;  	struct nf_logger *t;  	int ret; +	struct net *net = seq_file_net(s); -	logger = rcu_dereference_protected(nf_loggers[*pos], +	logger = rcu_dereference_protected(net->nf.nf_loggers[*pos],  					   lockdep_is_held(&nf_log_mutex));  	if (!logger) @@ -199,7 +226,8 @@ static const struct seq_operations nflog_seq_ops = {  static int nflog_open(struct inode *inode, struct file *file)  { -	return seq_open(file, &nflog_seq_ops); +	return seq_open_net(inode, file, &nflog_seq_ops, +			    sizeof(struct seq_net_private));  }  static const struct file_operations nflog_file_ops = { @@ -207,7 +235,7 @@ static const struct file_operations nflog_file_ops = {  	.open	 = nflog_open,  	.read	 = seq_read,  	.llseek	 = seq_lseek, -	.release = seq_release, +	.release = seq_release_net,  }; @@ -216,7 +244,6 @@ static const struct file_operations nflog_file_ops = {  #ifdef CONFIG_SYSCTL  static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3];  static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1]; -static struct ctl_table_header *nf_log_dir_header;  static int nf_log_proc_dostring(ctl_table *table, int write,  			 void __user *buffer, size_t *lenp, loff_t *ppos) @@ -226,6 +253,7 @@ static int nf_log_proc_dostring(ctl_table *table, int write,  	size_t size = *lenp;  	int r = 0;  	int tindex = (unsigned long)table->extra1; +	struct net *net = current->nsproxy->net_ns;  	if (write) {  		if (size > sizeof(buf)) @@ -234,7 +262,7 @@ static int nf_log_proc_dostring(ctl_table *table, int write,  			return -EFAULT;  		if (!strcmp(buf, "NONE")) { -			nf_log_unbind_pf(tindex); +			nf_log_unbind_pf(net, tindex);  			return 0;  		}  		mutex_lock(&nf_log_mutex); @@ -243,11 +271,11 @@ static int nf_log_proc_dostring(ctl_table *table, int write,  			mutex_unlock(&nf_log_mutex);  			return -ENOENT;  		} -		rcu_assign_pointer(nf_loggers[tindex], logger); +		rcu_assign_pointer(net->nf.nf_loggers[tindex], logger);  		mutex_unlock(&nf_log_mutex);  	} else {  		mutex_lock(&nf_log_mutex); -		logger = rcu_dereference_protected(nf_loggers[tindex], +		logger = rcu_dereference_protected(net->nf.nf_loggers[tindex],  						   lockdep_is_held(&nf_log_mutex));  		if (!logger)  			table->data = "NONE"; @@ -260,49 +288,111 @@ static int nf_log_proc_dostring(ctl_table *table, int write,  	return r;  } -static __init int netfilter_log_sysctl_init(void) +static int netfilter_log_sysctl_init(struct net *net)  {  	int i; +	struct ctl_table *table; -	for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) { -		snprintf(nf_log_sysctl_fnames[i-NFPROTO_UNSPEC], 3, "%d", i); -		nf_log_sysctl_table[i].procname	= -			nf_log_sysctl_fnames[i-NFPROTO_UNSPEC]; -		nf_log_sysctl_table[i].data = NULL; -		nf_log_sysctl_table[i].maxlen = -			NFLOGGER_NAME_LEN * sizeof(char); -		nf_log_sysctl_table[i].mode = 0644; -		nf_log_sysctl_table[i].proc_handler = nf_log_proc_dostring; -		nf_log_sysctl_table[i].extra1 = (void *)(unsigned long) i; +	table = nf_log_sysctl_table; +	if (!net_eq(net, &init_net)) { +		table = kmemdup(nf_log_sysctl_table, +				 sizeof(nf_log_sysctl_table), +				 GFP_KERNEL); +		if (!table) +			goto err_alloc; +	} else { +		for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) { +			snprintf(nf_log_sysctl_fnames[i], +				 3, "%d", i); +			nf_log_sysctl_table[i].procname	= +				nf_log_sysctl_fnames[i]; +			nf_log_sysctl_table[i].data = NULL; +			nf_log_sysctl_table[i].maxlen = +				NFLOGGER_NAME_LEN * sizeof(char); +			nf_log_sysctl_table[i].mode = 0644; +			nf_log_sysctl_table[i].proc_handler = +				nf_log_proc_dostring; +			nf_log_sysctl_table[i].extra1 = +				(void *)(unsigned long) i; +		}  	} -	nf_log_dir_header = register_net_sysctl(&init_net, "net/netfilter/nf_log", -				       nf_log_sysctl_table); -	if (!nf_log_dir_header) -		return -ENOMEM; +	net->nf.nf_log_dir_header = register_net_sysctl(net, +						"net/netfilter/nf_log", +						table); +	if (!net->nf.nf_log_dir_header) +		goto err_reg;  	return 0; + +err_reg: +	if (!net_eq(net, &init_net)) +		kfree(table); +err_alloc: +	return -ENOMEM; +} + +static void netfilter_log_sysctl_exit(struct net *net) +{ +	struct ctl_table *table; + +	table = net->nf.nf_log_dir_header->ctl_table_arg; +	unregister_net_sysctl_table(net->nf.nf_log_dir_header); +	if (!net_eq(net, &init_net)) +		kfree(table);  }  #else -static __init int netfilter_log_sysctl_init(void) +static int netfilter_log_sysctl_init(struct net *net)  {  	return 0;  } + +static void netfilter_log_sysctl_exit(struct net *net) +{ +}  #endif /* CONFIG_SYSCTL */ -int __init netfilter_log_init(void) +static int __net_init nf_log_net_init(struct net *net)  { -	int i, r; +	int ret = -ENOMEM; +  #ifdef CONFIG_PROC_FS  	if (!proc_create("nf_log", S_IRUGO, -			 proc_net_netfilter, &nflog_file_ops)) -		return -1; +			 net->nf.proc_netfilter, &nflog_file_ops)) +		return ret;  #endif +	ret = netfilter_log_sysctl_init(net); +	if (ret < 0) +		goto out_sysctl; + +	return 0; -	/* Errors will trigger panic, unroll on error is unnecessary. */ -	r = netfilter_log_sysctl_init(); -	if (r < 0) -		return r; +out_sysctl: +	/* For init_net: errors will trigger panic, don't unroll on error. */ +	if (!net_eq(net, &init_net)) +		remove_proc_entry("nf_log", net->nf.proc_netfilter); + +	return ret; +} + +static void __net_exit nf_log_net_exit(struct net *net) +{ +	netfilter_log_sysctl_exit(net); +	remove_proc_entry("nf_log", net->nf.proc_netfilter); +} + +static struct pernet_operations nf_log_net_ops = { +	.init = nf_log_net_init, +	.exit = nf_log_net_exit, +}; + +int __init netfilter_log_init(void) +{ +	int i, ret; + +	ret = register_pernet_subsys(&nf_log_net_ops); +	if (ret < 0) +		return ret;  	for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++)  		INIT_LIST_HEAD(&(nf_loggers_l[i])); diff --git a/net/netfilter/nf_nat_amanda.c b/net/netfilter/nf_nat_amanda.c index 3b67c9d1127..eb772380a20 100644 --- a/net/netfilter/nf_nat_amanda.c +++ b/net/netfilter/nf_nat_amanda.c @@ -1,6 +1,7 @@  /* Amanda extension for TCP NAT alteration.   * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>   * based on a copy of HW's ip_nat_irc.c as well as other modules + * (C) 2006-2012 Patrick McHardy <kaber@trash.net>   *   * This program is free software; you can redistribute it and/or   * modify it under the terms of the GNU General Public License diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index ad24be070e5..038eee5c8f8 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -87,9 +87,11 @@ int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family)  	struct flowi fl;  	unsigned int hh_len;  	struct dst_entry *dst; +	int err; -	if (xfrm_decode_session(skb, &fl, family) < 0) -		return -1; +	err = xfrm_decode_session(skb, &fl, family); +	if (err < 0) +		return err;  	dst = skb_dst(skb);  	if (dst->xfrm) @@ -98,7 +100,7 @@ int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family)  	dst = xfrm_lookup(dev_net(dst->dev), dst, &fl, skb->sk, 0);  	if (IS_ERR(dst)) -		return -1; +		return PTR_ERR(dst);  	skb_dst_drop(skb);  	skb_dst_set(skb, dst); @@ -107,7 +109,7 @@ int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family)  	hh_len = skb_dst(skb)->dev->hard_header_len;  	if (skb_headroom(skb) < hh_len &&  	    pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC)) -		return -1; +		return -ENOMEM;  	return 0;  }  EXPORT_SYMBOL(nf_xfrm_me_harder); diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c index 23c2b38676a..5fea563afe3 100644 --- a/net/netfilter/nf_nat_helper.c +++ b/net/netfilter/nf_nat_helper.c @@ -2,6 +2,7 @@   *   * (C) 2000-2002 Harald Welte <laforge@netfilter.org>   * (C) 2003-2006 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2007-2012 Patrick McHardy <kaber@trash.net>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License version 2 as diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c index e64faa5ca89..396e55d46f9 100644 --- a/net/netfilter/nf_nat_proto_sctp.c +++ b/net/netfilter/nf_nat_proto_sctp.c @@ -36,7 +36,7 @@ sctp_manip_pkt(struct sk_buff *skb,  {  	struct sk_buff *frag;  	sctp_sctphdr_t *hdr; -	__be32 crc32; +	__u32 crc32;  	if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))  		return false; @@ -55,8 +55,7 @@ sctp_manip_pkt(struct sk_buff *skb,  	skb_walk_frags(skb, frag)  		crc32 = sctp_update_cksum((u8 *)frag->data, skb_headlen(frag),  					  crc32); -	crc32 = sctp_end_cksum(crc32); -	hdr->checksum = crc32; +	hdr->checksum = sctp_end_cksum(crc32);  	return true;  } diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index d812c1235b3..5d24b1fdb59 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -1,3 +1,8 @@ +/* + * Rusty Russell (C)2000 -- This code is GPL. + * Patrick McHardy (c) 2006-2012 + */ +  #include <linux/kernel.h>  #include <linux/slab.h>  #include <linux/init.h> @@ -40,7 +45,7 @@ void nf_unregister_queue_handler(void)  }  EXPORT_SYMBOL(nf_unregister_queue_handler); -static void nf_queue_entry_release_refs(struct nf_queue_entry *entry) +void nf_queue_entry_release_refs(struct nf_queue_entry *entry)  {  	/* Release those devices we held, or Alexey will kill me. */  	if (entry->indev) @@ -60,12 +65,41 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)  	/* Drop reference to owner of hook which queued us. */  	module_put(entry->elem->owner);  } +EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs); + +/* Bump dev refs so they don't vanish while packet is out */ +bool nf_queue_entry_get_refs(struct nf_queue_entry *entry) +{ +	if (!try_module_get(entry->elem->owner)) +		return false; + +	if (entry->indev) +		dev_hold(entry->indev); +	if (entry->outdev) +		dev_hold(entry->outdev); +#ifdef CONFIG_BRIDGE_NETFILTER +	if (entry->skb->nf_bridge) { +		struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge; +		struct net_device *physdev; + +		physdev = nf_bridge->physindev; +		if (physdev) +			dev_hold(physdev); +		physdev = nf_bridge->physoutdev; +		if (physdev) +			dev_hold(physdev); +	} +#endif + +	return true; +} +EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);  /*   * Any packet that leaves via this function must come back   * through nf_reinject().   */ -static int __nf_queue(struct sk_buff *skb, +int nf_queue(struct sk_buff *skb,  		      struct nf_hook_ops *elem,  		      u_int8_t pf, unsigned int hook,  		      struct net_device *indev, @@ -75,10 +109,6 @@ static int __nf_queue(struct sk_buff *skb,  {  	int status = -ENOENT;  	struct nf_queue_entry *entry = NULL; -#ifdef CONFIG_BRIDGE_NETFILTER -	struct net_device *physindev; -	struct net_device *physoutdev; -#endif  	const struct nf_afinfo *afinfo;  	const struct nf_queue_handler *qh; @@ -109,28 +139,13 @@ static int __nf_queue(struct sk_buff *skb,  		.indev	= indev,  		.outdev	= outdev,  		.okfn	= okfn, +		.size	= sizeof(*entry) + afinfo->route_key_size,  	}; -	/* If it's going away, ignore hook. */ -	if (!try_module_get(entry->elem->owner)) { +	if (!nf_queue_entry_get_refs(entry)) {  		status = -ECANCELED;  		goto err_unlock;  	} -	/* Bump dev refs so they don't vanish while packet is out */ -	if (indev) -		dev_hold(indev); -	if (outdev) -		dev_hold(outdev); -#ifdef CONFIG_BRIDGE_NETFILTER -	if (skb->nf_bridge) { -		physindev = skb->nf_bridge->physindev; -		if (physindev) -			dev_hold(physindev); -		physoutdev = skb->nf_bridge->physoutdev; -		if (physoutdev) -			dev_hold(physoutdev); -	} -#endif  	skb_dst_force(skb);  	afinfo->saveroute(skb, entry);  	status = qh->outfn(entry, queuenum); @@ -151,87 +166,6 @@ err:  	return status;  } -#ifdef CONFIG_BRIDGE_NETFILTER -/* When called from bridge netfilter, skb->data must point to MAC header - * before calling skb_gso_segment(). Else, original MAC header is lost - * and segmented skbs will be sent to wrong destination. - */ -static void nf_bridge_adjust_skb_data(struct sk_buff *skb) -{ -	if (skb->nf_bridge) -		__skb_push(skb, skb->network_header - skb->mac_header); -} - -static void nf_bridge_adjust_segmented_data(struct sk_buff *skb) -{ -	if (skb->nf_bridge) -		__skb_pull(skb, skb->network_header - skb->mac_header); -} -#else -#define nf_bridge_adjust_skb_data(s) do {} while (0) -#define nf_bridge_adjust_segmented_data(s) do {} while (0) -#endif - -int nf_queue(struct sk_buff *skb, -	     struct nf_hook_ops *elem, -	     u_int8_t pf, unsigned int hook, -	     struct net_device *indev, -	     struct net_device *outdev, -	     int (*okfn)(struct sk_buff *), -	     unsigned int queuenum) -{ -	struct sk_buff *segs; -	int err = -EINVAL; -	unsigned int queued; - -	if (!skb_is_gso(skb)) -		return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn, -				  queuenum); - -	switch (pf) { -	case NFPROTO_IPV4: -		skb->protocol = htons(ETH_P_IP); -		break; -	case NFPROTO_IPV6: -		skb->protocol = htons(ETH_P_IPV6); -		break; -	} - -	nf_bridge_adjust_skb_data(skb); -	segs = skb_gso_segment(skb, 0); -	/* Does not use PTR_ERR to limit the number of error codes that can be -	 * returned by nf_queue.  For instance, callers rely on -ECANCELED to mean -	 * 'ignore this hook'. -	 */ -	if (IS_ERR(segs)) -		goto out_err; -	queued = 0; -	err = 0; -	do { -		struct sk_buff *nskb = segs->next; - -		segs->next = NULL; -		if (err == 0) { -			nf_bridge_adjust_segmented_data(segs); -			err = __nf_queue(segs, elem, pf, hook, indev, -					   outdev, okfn, queuenum); -		} -		if (err == 0) -			queued++; -		else -			kfree_skb(segs); -		segs = nskb; -	} while (segs); - -	if (queued) { -		kfree_skb(skb); -		return 0; -	} -  out_err: -	nf_bridge_adjust_segmented_data(skb); -	return err; -} -  void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)  {  	struct sk_buff *skb = entry->skb; @@ -271,9 +205,9 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)  		local_bh_enable();  		break;  	case NF_QUEUE: -		err = __nf_queue(skb, elem, entry->pf, entry->hook, -				 entry->indev, entry->outdev, entry->okfn, -				 verdict >> NF_VERDICT_QBITS); +		err = nf_queue(skb, elem, entry->pf, entry->hook, +				entry->indev, entry->outdev, entry->okfn, +				verdict >> NF_VERDICT_QBITS);  		if (err < 0) {  			if (err == -ECANCELED)  				goto next_hook; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 0b1b32cda30..572d87dc116 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -24,10 +24,9 @@  #include <linux/skbuff.h>  #include <asm/uaccess.h>  #include <net/sock.h> -#include <net/netlink.h>  #include <linux/init.h> -#include <linux/netlink.h> +#include <net/netlink.h>  #include <linux/netfilter/nfnetlink.h>  MODULE_LICENSE("GPL"); @@ -113,22 +112,30 @@ int nfnetlink_has_listeners(struct net *net, unsigned int group)  }  EXPORT_SYMBOL_GPL(nfnetlink_has_listeners); -int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, +struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size, +				    u32 dst_portid, gfp_t gfp_mask) +{ +	return netlink_alloc_skb(net->nfnl, size, dst_portid, gfp_mask); +} +EXPORT_SYMBOL_GPL(nfnetlink_alloc_skb); + +int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid,  		   unsigned int group, int echo, gfp_t flags)  { -	return nlmsg_notify(net->nfnl, skb, pid, group, echo, flags); +	return nlmsg_notify(net->nfnl, skb, portid, group, echo, flags);  }  EXPORT_SYMBOL_GPL(nfnetlink_send); -int nfnetlink_set_err(struct net *net, u32 pid, u32 group, int error) +int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error)  { -	return netlink_set_err(net->nfnl, pid, group, error); +	return netlink_set_err(net->nfnl, portid, group, error);  }  EXPORT_SYMBOL_GPL(nfnetlink_set_err); -int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u_int32_t pid, int flags) +int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid, +		      int flags)  { -	return netlink_unicast(net->nfnl, skb, pid, flags); +	return netlink_unicast(net->nfnl, skb, portid, flags);  }  EXPORT_SYMBOL_GPL(nfnetlink_unicast); @@ -144,7 +151,7 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)  		return -EPERM;  	/* All the messages must at least contain nfgenmsg */ -	if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct nfgenmsg))) +	if (nlmsg_len(nlh) < sizeof(struct nfgenmsg))  		return 0;  	type = nlh->nlmsg_type; @@ -172,7 +179,7 @@ replay:  	}  	{ -		int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); +		int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));  		u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);  		struct nlattr *cda[ss->cb[cb_id].attr_count + 1];  		struct nlattr *attr = (void *)nlh + min_len; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index f248db57297..faf1e9300d8 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -3,6 +3,7 @@   * nfetlink.   *   * (C) 2005 by Harald Welte <laforge@netfilter.org> + * (C) 2006-2012 Patrick McHardy <kaber@trash.net>   *   * Based on the old ipv4-only ipt_ULOG.c:   * (C) 2000-2004 by Harald Welte <laforge@netfilter.org> @@ -19,7 +20,7 @@  #include <linux/ipv6.h>  #include <linux/netdevice.h>  #include <linux/netfilter.h> -#include <linux/netlink.h> +#include <net/netlink.h>  #include <linux/netfilter/nfnetlink.h>  #include <linux/netfilter/nfnetlink_log.h>  #include <linux/spinlock.h> @@ -32,6 +33,7 @@  #include <linux/slab.h>  #include <net/sock.h>  #include <net/netfilter/nf_log.h> +#include <net/netns/generic.h>  #include <net/netfilter/nfnetlink_log.h>  #include <linux/atomic.h> @@ -56,6 +58,7 @@ struct nfulnl_instance {  	unsigned int qlen;		/* number of nlmsgs in skb */  	struct sk_buff *skb;		/* pre-allocatd skb */  	struct timer_list timer; +	struct net *net;  	struct user_namespace *peer_user_ns;	/* User namespace of the peer process */  	int peer_portid;			/* PORTID of the peer process */ @@ -71,25 +74,34 @@ struct nfulnl_instance {  	struct rcu_head rcu;  }; -static DEFINE_SPINLOCK(instances_lock); -static atomic_t global_seq; -  #define INSTANCE_BUCKETS	16 -static struct hlist_head instance_table[INSTANCE_BUCKETS];  static unsigned int hash_init; +static int nfnl_log_net_id __read_mostly; + +struct nfnl_log_net { +	spinlock_t instances_lock; +	struct hlist_head instance_table[INSTANCE_BUCKETS]; +	atomic_t global_seq; +}; + +static struct nfnl_log_net *nfnl_log_pernet(struct net *net) +{ +	return net_generic(net, nfnl_log_net_id); +} +  static inline u_int8_t instance_hashfn(u_int16_t group_num)  {  	return ((group_num & 0xff) % INSTANCE_BUCKETS);  }  static struct nfulnl_instance * -__instance_lookup(u_int16_t group_num) +__instance_lookup(struct nfnl_log_net *log, u_int16_t group_num)  {  	struct hlist_head *head;  	struct nfulnl_instance *inst; -	head = &instance_table[instance_hashfn(group_num)]; +	head = &log->instance_table[instance_hashfn(group_num)];  	hlist_for_each_entry_rcu(inst, head, hlist) {  		if (inst->group_num == group_num)  			return inst; @@ -104,12 +116,12 @@ instance_get(struct nfulnl_instance *inst)  }  static struct nfulnl_instance * -instance_lookup_get(u_int16_t group_num) +instance_lookup_get(struct nfnl_log_net *log, u_int16_t group_num)  {  	struct nfulnl_instance *inst;  	rcu_read_lock_bh(); -	inst = __instance_lookup(group_num); +	inst = __instance_lookup(log, group_num);  	if (inst && !atomic_inc_not_zero(&inst->use))  		inst = NULL;  	rcu_read_unlock_bh(); @@ -119,7 +131,11 @@ instance_lookup_get(u_int16_t group_num)  static void nfulnl_instance_free_rcu(struct rcu_head *head)  { -	kfree(container_of(head, struct nfulnl_instance, rcu)); +	struct nfulnl_instance *inst = +		container_of(head, struct nfulnl_instance, rcu); + +	put_net(inst->net); +	kfree(inst);  	module_put(THIS_MODULE);  } @@ -133,13 +149,15 @@ instance_put(struct nfulnl_instance *inst)  static void nfulnl_timer(unsigned long data);  static struct nfulnl_instance * -instance_create(u_int16_t group_num, int portid, struct user_namespace *user_ns) +instance_create(struct net *net, u_int16_t group_num, +		int portid, struct user_namespace *user_ns)  {  	struct nfulnl_instance *inst; +	struct nfnl_log_net *log = nfnl_log_pernet(net);  	int err; -	spin_lock_bh(&instances_lock); -	if (__instance_lookup(group_num)) { +	spin_lock_bh(&log->instances_lock); +	if (__instance_lookup(log, group_num)) {  		err = -EEXIST;  		goto out_unlock;  	} @@ -163,6 +181,7 @@ instance_create(u_int16_t group_num, int portid, struct user_namespace *user_ns)  	setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst); +	inst->net = get_net(net);  	inst->peer_user_ns = user_ns;  	inst->peer_portid = portid;  	inst->group_num = group_num; @@ -174,14 +193,15 @@ instance_create(u_int16_t group_num, int portid, struct user_namespace *user_ns)  	inst->copy_range 	= NFULNL_COPY_RANGE_MAX;  	hlist_add_head_rcu(&inst->hlist, -		       &instance_table[instance_hashfn(group_num)]); +		       &log->instance_table[instance_hashfn(group_num)]); + -	spin_unlock_bh(&instances_lock); +	spin_unlock_bh(&log->instances_lock);  	return inst;  out_unlock: -	spin_unlock_bh(&instances_lock); +	spin_unlock_bh(&log->instances_lock);  	return ERR_PTR(err);  } @@ -210,11 +230,12 @@ __instance_destroy(struct nfulnl_instance *inst)  }  static inline void -instance_destroy(struct nfulnl_instance *inst) +instance_destroy(struct nfnl_log_net *log, +		 struct nfulnl_instance *inst)  { -	spin_lock_bh(&instances_lock); +	spin_lock_bh(&log->instances_lock);  	__instance_destroy(inst); -	spin_unlock_bh(&instances_lock); +	spin_unlock_bh(&log->instances_lock);  }  static int @@ -298,7 +319,7 @@ nfulnl_set_flags(struct nfulnl_instance *inst, u_int16_t flags)  }  static struct sk_buff * -nfulnl_alloc_skb(unsigned int inst_size, unsigned int pkt_size) +nfulnl_alloc_skb(u32 peer_portid, unsigned int inst_size, unsigned int pkt_size)  {  	struct sk_buff *skb;  	unsigned int n; @@ -307,13 +328,14 @@ nfulnl_alloc_skb(unsigned int inst_size, unsigned int pkt_size)  	 * message.  WARNING: has to be <= 128k due to slab restrictions */  	n = max(inst_size, pkt_size); -	skb = alloc_skb(n, GFP_ATOMIC); +	skb = nfnetlink_alloc_skb(&init_net, n, peer_portid, GFP_ATOMIC);  	if (!skb) {  		if (n > pkt_size) {  			/* try to allocate only as much as we need for current  			 * packet */ -			skb = alloc_skb(pkt_size, GFP_ATOMIC); +			skb = nfnetlink_alloc_skb(&init_net, pkt_size, +						  peer_portid, GFP_ATOMIC);  			if (!skb)  				pr_err("nfnetlink_log: can't even alloc %u bytes\n",  				       pkt_size); @@ -336,7 +358,7 @@ __nfulnl_send(struct nfulnl_instance *inst)  		if (!nlh)  			goto out;  	} -	status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_portid, +	status = nfnetlink_unicast(inst->skb, inst->net, inst->peer_portid,  				   MSG_DONTWAIT);  	inst->qlen = 0; @@ -370,7 +392,8 @@ nfulnl_timer(unsigned long data)  /* This is an inline function, we don't really care about a long   * list of arguments */  static inline int -__build_packet_message(struct nfulnl_instance *inst, +__build_packet_message(struct nfnl_log_net *log, +			struct nfulnl_instance *inst,  			const struct sk_buff *skb,  			unsigned int data_len,  			u_int8_t pf, @@ -536,7 +559,7 @@ __build_packet_message(struct nfulnl_instance *inst,  	/* global sequence number */  	if ((inst->flags & NFULNL_CFG_F_SEQ_GLOBAL) &&  	    nla_put_be32(inst->skb, NFULA_SEQ_GLOBAL, -			 htonl(atomic_inc_return(&global_seq)))) +			 htonl(atomic_inc_return(&log->global_seq))))  		goto nla_put_failure;  	if (data_len) { @@ -592,13 +615,15 @@ nfulnl_log_packet(u_int8_t pf,  	const struct nf_loginfo *li;  	unsigned int qthreshold;  	unsigned int plen; +	struct net *net = dev_net(in ? in : out); +	struct nfnl_log_net *log = nfnl_log_pernet(net);  	if (li_user && li_user->type == NF_LOG_TYPE_ULOG)  		li = li_user;  	else  		li = &default_loginfo; -	inst = instance_lookup_get(li->u.ulog.group); +	inst = instance_lookup_get(log, li->u.ulog.group);  	if (!inst)  		return; @@ -609,7 +634,7 @@ nfulnl_log_packet(u_int8_t pf,  	/* FIXME: do we want to make the size calculation conditional based on  	 * what is actually present?  way more branches and checks, but more  	 * memory efficient... */ -	size =    NLMSG_SPACE(sizeof(struct nfgenmsg)) +	size =    nlmsg_total_size(sizeof(struct nfgenmsg))  		+ nla_total_size(sizeof(struct nfulnl_msg_packet_hdr))  		+ nla_total_size(sizeof(u_int32_t))	/* ifindex */  		+ nla_total_size(sizeof(u_int32_t))	/* ifindex */ @@ -673,14 +698,15 @@ nfulnl_log_packet(u_int8_t pf,  	}  	if (!inst->skb) { -		inst->skb = nfulnl_alloc_skb(inst->nlbufsiz, size); +		inst->skb = nfulnl_alloc_skb(inst->peer_portid, inst->nlbufsiz, +					     size);  		if (!inst->skb)  			goto alloc_failure;  	}  	inst->qlen++; -	__build_packet_message(inst, skb, data_len, pf, +	__build_packet_message(log, inst, skb, data_len, pf,  				hooknum, in, out, prefix, plen);  	if (inst->qlen >= qthreshold) @@ -709,24 +735,24 @@ nfulnl_rcv_nl_event(struct notifier_block *this,  		   unsigned long event, void *ptr)  {  	struct netlink_notify *n = ptr; +	struct nfnl_log_net *log = nfnl_log_pernet(n->net);  	if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) {  		int i;  		/* destroy all instances for this portid */ -		spin_lock_bh(&instances_lock); +		spin_lock_bh(&log->instances_lock);  		for  (i = 0; i < INSTANCE_BUCKETS; i++) {  			struct hlist_node *t2;  			struct nfulnl_instance *inst; -			struct hlist_head *head = &instance_table[i]; +			struct hlist_head *head = &log->instance_table[i];  			hlist_for_each_entry_safe(inst, t2, head, hlist) { -				if ((net_eq(n->net, &init_net)) && -				    (n->portid == inst->peer_portid)) +				if (n->portid == inst->peer_portid)  					__instance_destroy(inst);  			}  		} -		spin_unlock_bh(&instances_lock); +		spin_unlock_bh(&log->instances_lock);  	}  	return NOTIFY_DONE;  } @@ -767,6 +793,8 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,  	u_int16_t group_num = ntohs(nfmsg->res_id);  	struct nfulnl_instance *inst;  	struct nfulnl_msg_config_cmd *cmd = NULL; +	struct net *net = sock_net(ctnl); +	struct nfnl_log_net *log = nfnl_log_pernet(net);  	int ret = 0;  	if (nfula[NFULA_CFG_CMD]) { @@ -776,14 +804,14 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,  		/* Commands without queue context */  		switch (cmd->command) {  		case NFULNL_CFG_CMD_PF_BIND: -			return nf_log_bind_pf(pf, &nfulnl_logger); +			return nf_log_bind_pf(net, pf, &nfulnl_logger);  		case NFULNL_CFG_CMD_PF_UNBIND: -			nf_log_unbind_pf(pf); +			nf_log_unbind_pf(net, pf);  			return 0;  		}  	} -	inst = instance_lookup_get(group_num); +	inst = instance_lookup_get(log, group_num);  	if (inst && inst->peer_portid != NETLINK_CB(skb).portid) {  		ret = -EPERM;  		goto out_put; @@ -797,9 +825,9 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,  				goto out_put;  			} -			inst = instance_create(group_num, +			inst = instance_create(net, group_num,  					       NETLINK_CB(skb).portid, -					       sk_user_ns(NETLINK_CB(skb).ssk)); +					       sk_user_ns(NETLINK_CB(skb).sk));  			if (IS_ERR(inst)) {  				ret = PTR_ERR(inst);  				goto out; @@ -811,7 +839,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,  				goto out;  			} -			instance_destroy(inst); +			instance_destroy(log, inst);  			goto out_put;  		default:  			ret = -ENOTSUPP; @@ -894,55 +922,68 @@ static const struct nfnetlink_subsystem nfulnl_subsys = {  #ifdef CONFIG_PROC_FS  struct iter_state { +	struct seq_net_private p;  	unsigned int bucket;  }; -static struct hlist_node *get_first(struct iter_state *st) +static struct hlist_node *get_first(struct net *net, struct iter_state *st)  { +	struct nfnl_log_net *log;  	if (!st)  		return NULL; +	log = nfnl_log_pernet(net); +  	for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { -		if (!hlist_empty(&instance_table[st->bucket])) -			return rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket])); +		struct hlist_head *head = &log->instance_table[st->bucket]; + +		if (!hlist_empty(head)) +			return rcu_dereference_bh(hlist_first_rcu(head));  	}  	return NULL;  } -static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h) +static struct hlist_node *get_next(struct net *net, struct iter_state *st, +				   struct hlist_node *h)  {  	h = rcu_dereference_bh(hlist_next_rcu(h));  	while (!h) { +		struct nfnl_log_net *log; +		struct hlist_head *head; +  		if (++st->bucket >= INSTANCE_BUCKETS)  			return NULL; -		h = rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket])); +		log = nfnl_log_pernet(net); +		head = &log->instance_table[st->bucket]; +		h = rcu_dereference_bh(hlist_first_rcu(head));  	}  	return h;  } -static struct hlist_node *get_idx(struct iter_state *st, loff_t pos) +static struct hlist_node *get_idx(struct net *net, struct iter_state *st, +				  loff_t pos)  {  	struct hlist_node *head; -	head = get_first(st); +	head = get_first(net, st);  	if (head) -		while (pos && (head = get_next(st, head))) +		while (pos && (head = get_next(net, st, head)))  			pos--;  	return pos ? NULL : head;  } -static void *seq_start(struct seq_file *seq, loff_t *pos) +static void *seq_start(struct seq_file *s, loff_t *pos)  	__acquires(rcu_bh)  {  	rcu_read_lock_bh(); -	return get_idx(seq->private, *pos); +	return get_idx(seq_file_net(s), s->private, *pos);  }  static void *seq_next(struct seq_file *s, void *v, loff_t *pos)  {  	(*pos)++; -	return get_next(s->private, v); +	return get_next(seq_file_net(s), s->private, v);  }  static void seq_stop(struct seq_file *s, void *v) @@ -971,8 +1012,8 @@ static const struct seq_operations nful_seq_ops = {  static int nful_open(struct inode *inode, struct file *file)  { -	return seq_open_private(file, &nful_seq_ops, -			sizeof(struct iter_state)); +	return seq_open_net(inode, file, &nful_seq_ops, +			    sizeof(struct iter_state));  }  static const struct file_operations nful_file_ops = { @@ -980,17 +1021,43 @@ static const struct file_operations nful_file_ops = {  	.open	 = nful_open,  	.read	 = seq_read,  	.llseek	 = seq_lseek, -	.release = seq_release_private, +	.release = seq_release_net,  };  #endif /* PROC_FS */ -static int __init nfnetlink_log_init(void) +static int __net_init nfnl_log_net_init(struct net *net)  { -	int i, status = -ENOMEM; +	unsigned int i; +	struct nfnl_log_net *log = nfnl_log_pernet(net);  	for (i = 0; i < INSTANCE_BUCKETS; i++) -		INIT_HLIST_HEAD(&instance_table[i]); +		INIT_HLIST_HEAD(&log->instance_table[i]); +	spin_lock_init(&log->instances_lock); + +#ifdef CONFIG_PROC_FS +	if (!proc_create("nfnetlink_log", 0440, +			 net->nf.proc_netfilter, &nful_file_ops)) +		return -ENOMEM; +#endif +	return 0; +} + +static void __net_exit nfnl_log_net_exit(struct net *net) +{ +	remove_proc_entry("nfnetlink_log", net->nf.proc_netfilter); +} + +static struct pernet_operations nfnl_log_net_ops = { +	.init	= nfnl_log_net_init, +	.exit	= nfnl_log_net_exit, +	.id	= &nfnl_log_net_id, +	.size	= sizeof(struct nfnl_log_net), +}; + +static int __init nfnetlink_log_init(void) +{ +	int status = -ENOMEM;  	/* it's not really all that important to have a random value, so  	 * we can do this from the init function, even if there hasn't @@ -1000,29 +1067,25 @@ static int __init nfnetlink_log_init(void)  	netlink_register_notifier(&nfulnl_rtnl_notifier);  	status = nfnetlink_subsys_register(&nfulnl_subsys);  	if (status < 0) { -		printk(KERN_ERR "log: failed to create netlink socket\n"); +		pr_err("log: failed to create netlink socket\n");  		goto cleanup_netlink_notifier;  	}  	status = nf_log_register(NFPROTO_UNSPEC, &nfulnl_logger);  	if (status < 0) { -		printk(KERN_ERR "log: failed to register logger\n"); +		pr_err("log: failed to register logger\n");  		goto cleanup_subsys;  	} -#ifdef CONFIG_PROC_FS -	if (!proc_create("nfnetlink_log", 0440, -			 proc_net_netfilter, &nful_file_ops)) { -		status = -ENOMEM; +	status = register_pernet_subsys(&nfnl_log_net_ops); +	if (status < 0) { +		pr_err("log: failed to register pernet ops\n");  		goto cleanup_logger;  	} -#endif  	return status; -#ifdef CONFIG_PROC_FS  cleanup_logger:  	nf_log_unregister(&nfulnl_logger); -#endif  cleanup_subsys:  	nfnetlink_subsys_unregister(&nfulnl_subsys);  cleanup_netlink_notifier: @@ -1032,10 +1095,8 @@ cleanup_netlink_notifier:  static void __exit nfnetlink_log_fini(void)  { +	unregister_pernet_subsys(&nfnl_log_net_ops);  	nf_log_unregister(&nfulnl_logger); -#ifdef CONFIG_PROC_FS -	remove_proc_entry("nfnetlink_log", proc_net_netfilter); -#endif  	nfnetlink_subsys_unregister(&nfulnl_subsys);  	netlink_unregister_notifier(&nfulnl_rtnl_notifier);  } diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 42680b2baa1..2e0e835baf7 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -30,6 +30,7 @@  #include <linux/list.h>  #include <net/sock.h>  #include <net/netfilter/nf_queue.h> +#include <net/netns/generic.h>  #include <net/netfilter/nfnetlink_queue.h>  #include <linux/atomic.h> @@ -66,23 +67,31 @@ struct nfqnl_instance {  typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long); -static DEFINE_SPINLOCK(instances_lock); +static int nfnl_queue_net_id __read_mostly;  #define INSTANCE_BUCKETS	16 -static struct hlist_head instance_table[INSTANCE_BUCKETS] __read_mostly; +struct nfnl_queue_net { +	spinlock_t instances_lock; +	struct hlist_head instance_table[INSTANCE_BUCKETS]; +}; + +static struct nfnl_queue_net *nfnl_queue_pernet(struct net *net) +{ +	return net_generic(net, nfnl_queue_net_id); +}  static inline u_int8_t instance_hashfn(u_int16_t queue_num)  { -	return ((queue_num >> 8) | queue_num) % INSTANCE_BUCKETS; +	return ((queue_num >> 8) ^ queue_num) % INSTANCE_BUCKETS;  }  static struct nfqnl_instance * -instance_lookup(u_int16_t queue_num) +instance_lookup(struct nfnl_queue_net *q, u_int16_t queue_num)  {  	struct hlist_head *head;  	struct nfqnl_instance *inst; -	head = &instance_table[instance_hashfn(queue_num)]; +	head = &q->instance_table[instance_hashfn(queue_num)];  	hlist_for_each_entry_rcu(inst, head, hlist) {  		if (inst->queue_num == queue_num)  			return inst; @@ -91,14 +100,15 @@ instance_lookup(u_int16_t queue_num)  }  static struct nfqnl_instance * -instance_create(u_int16_t queue_num, int portid) +instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, +		int portid)  {  	struct nfqnl_instance *inst;  	unsigned int h;  	int err; -	spin_lock(&instances_lock); -	if (instance_lookup(queue_num)) { +	spin_lock(&q->instances_lock); +	if (instance_lookup(q, queue_num)) {  		err = -EEXIST;  		goto out_unlock;  	} @@ -123,16 +133,16 @@ instance_create(u_int16_t queue_num, int portid)  	}  	h = instance_hashfn(queue_num); -	hlist_add_head_rcu(&inst->hlist, &instance_table[h]); +	hlist_add_head_rcu(&inst->hlist, &q->instance_table[h]); -	spin_unlock(&instances_lock); +	spin_unlock(&q->instances_lock);  	return inst;  out_free:  	kfree(inst);  out_unlock: -	spin_unlock(&instances_lock); +	spin_unlock(&q->instances_lock);  	return ERR_PTR(err);  } @@ -158,11 +168,11 @@ __instance_destroy(struct nfqnl_instance *inst)  }  static void -instance_destroy(struct nfqnl_instance *inst) +instance_destroy(struct nfnl_queue_net *q, struct nfqnl_instance *inst)  { -	spin_lock(&instances_lock); +	spin_lock(&q->instances_lock);  	__instance_destroy(inst); -	spin_unlock(&instances_lock); +	spin_unlock(&q->instances_lock);  }  static inline void @@ -217,14 +227,71 @@ nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)  	spin_unlock_bh(&queue->lock);  } +static void +nfqnl_zcopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen) +{ +	int i, j = 0; +	int plen = 0; /* length of skb->head fragment */ +	struct page *page; +	unsigned int offset; + +	/* dont bother with small payloads */ +	if (len <= skb_tailroom(to)) { +		skb_copy_bits(from, 0, skb_put(to, len), len); +		return; +	} + +	if (hlen) { +		skb_copy_bits(from, 0, skb_put(to, hlen), hlen); +		len -= hlen; +	} else { +		plen = min_t(int, skb_headlen(from), len); +		if (plen) { +			page = virt_to_head_page(from->head); +			offset = from->data - (unsigned char *)page_address(page); +			__skb_fill_page_desc(to, 0, page, offset, plen); +			get_page(page); +			j = 1; +			len -= plen; +		} +	} + +	to->truesize += len + plen; +	to->len += len + plen; +	to->data_len += len + plen; + +	for (i = 0; i < skb_shinfo(from)->nr_frags; i++) { +		if (!len) +			break; +		skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i]; +		skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len); +		len -= skb_shinfo(to)->frags[j].size; +		skb_frag_ref(to, j); +		j++; +	} +	skb_shinfo(to)->nr_frags = j; +} + +static int nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet) +{ +	__u32 flags = 0; + +	if (packet->ip_summed == CHECKSUM_PARTIAL) +		flags = NFQA_SKB_CSUMNOTREADY; +	if (skb_is_gso(packet)) +		flags |= NFQA_SKB_GSO; + +	return flags ? nla_put_be32(nlskb, NFQA_SKB_INFO, htonl(flags)) : 0; +} +  static struct sk_buff *  nfqnl_build_packet_message(struct nfqnl_instance *queue,  			   struct nf_queue_entry *entry,  			   __be32 **packet_id_ptr)  { -	sk_buff_data_t old_tail;  	size_t size;  	size_t data_len = 0, cap_len = 0; +	int hlen = 0;  	struct sk_buff *skb;  	struct nlattr *nla;  	struct nfqnl_msg_packet_hdr *pmsg; @@ -236,7 +303,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,  	struct nf_conn *ct = NULL;  	enum ip_conntrack_info uninitialized_var(ctinfo); -	size =    NLMSG_SPACE(sizeof(struct nfgenmsg)) +	size =    nlmsg_total_size(sizeof(struct nfgenmsg))  		+ nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))  		+ nla_total_size(sizeof(u_int32_t))	/* ifindex */  		+ nla_total_size(sizeof(u_int32_t))	/* ifindex */ @@ -246,8 +313,11 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,  #endif  		+ nla_total_size(sizeof(u_int32_t))	/* mark */  		+ nla_total_size(sizeof(struct nfqnl_msg_packet_hw)) -		+ nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp) -		+ nla_total_size(sizeof(u_int32_t)));	/* cap_len */ +		+ nla_total_size(sizeof(u_int32_t))	/* skbinfo */ +		+ nla_total_size(sizeof(u_int32_t));	/* cap_len */ + +	if (entskb->tstamp.tv64) +		size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp));  	outdev = entry->outdev; @@ -257,7 +327,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,  		break;  	case NFQNL_COPY_PACKET: -		if (entskb->ip_summed == CHECKSUM_PARTIAL && +		if (!(queue->flags & NFQA_CFG_F_GSO) && +		    entskb->ip_summed == CHECKSUM_PARTIAL &&  		    skb_checksum_help(entskb))  			return NULL; @@ -265,7 +336,16 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,  		if (data_len == 0 || data_len > entskb->len)  			data_len = entskb->len; -		size += nla_total_size(data_len); + +		if (!entskb->head_frag || +		    skb_headlen(entskb) < L1_CACHE_BYTES || +		    skb_shinfo(entskb)->nr_frags >= MAX_SKB_FRAGS) +			hlen = skb_headlen(entskb); + +		if (skb_has_frag_list(entskb)) +			hlen = entskb->len; +		hlen = min_t(int, data_len, hlen); +		size += sizeof(struct nlattr) + hlen;  		cap_len = entskb->len;  		break;  	} @@ -273,11 +353,11 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,  	if (queue->flags & NFQA_CFG_F_CONNTRACK)  		ct = nfqnl_ct_get(entskb, &size, &ctinfo); -	skb = alloc_skb(size, GFP_ATOMIC); +	skb = nfnetlink_alloc_skb(&init_net, size, queue->peer_portid, +				  GFP_ATOMIC);  	if (!skb)  		return NULL; -	old_tail = skb->tail;  	nlh = nlmsg_put(skb, 0, 0,  			NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET,  			sizeof(struct nfgenmsg), 0); @@ -382,31 +462,29 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,  			goto nla_put_failure;  	} +	if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0) +		goto nla_put_failure; + +	if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len))) +		goto nla_put_failure; + +	if (nfqnl_put_packet_info(skb, entskb)) +		goto nla_put_failure; +  	if (data_len) {  		struct nlattr *nla; -		int sz = nla_attr_size(data_len); -		if (skb_tailroom(skb) < nla_total_size(data_len)) { -			printk(KERN_WARNING "nf_queue: no tailroom!\n"); -			kfree_skb(skb); -			return NULL; -		} +		if (skb_tailroom(skb) < sizeof(*nla) + hlen) +			goto nla_put_failure; -		nla = (struct nlattr *)skb_put(skb, nla_total_size(data_len)); +		nla = (struct nlattr *)skb_put(skb, sizeof(*nla));  		nla->nla_type = NFQA_PAYLOAD; -		nla->nla_len = sz; +		nla->nla_len = nla_attr_size(data_len); -		if (skb_copy_bits(entskb, 0, nla_data(nla), data_len)) -			BUG(); +		nfqnl_zcopy(skb, entskb, data_len, hlen);  	} -	if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0) -		goto nla_put_failure; - -	if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len))) -		goto nla_put_failure; - -	nlh->nlmsg_len = skb->tail - old_tail; +	nlh->nlmsg_len = skb->len;  	return skb;  nla_put_failure: @@ -416,26 +494,14 @@ nla_put_failure:  }  static int -nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) +__nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue, +			struct nf_queue_entry *entry)  {  	struct sk_buff *nskb; -	struct nfqnl_instance *queue;  	int err = -ENOBUFS;  	__be32 *packet_id_ptr;  	int failopen = 0; -	/* rcu_read_lock()ed by nf_hook_slow() */ -	queue = instance_lookup(queuenum); -	if (!queue) { -		err = -ESRCH; -		goto err_out; -	} - -	if (queue->copy_mode == NFQNL_COPY_NONE) { -		err = -EINVAL; -		goto err_out; -	} -  	nskb = nfqnl_build_packet_message(queue, entry, &packet_id_ptr);  	if (nskb == NULL) {  		err = -ENOMEM; @@ -462,7 +528,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)  	*packet_id_ptr = htonl(entry->id);  	/* nfnetlink_unicast will either free the nskb or add it to a socket */ -	err = nfnetlink_unicast(nskb, &init_net, queue->peer_portid, MSG_DONTWAIT); +	err = nfnetlink_unicast(nskb, net, queue->peer_portid, MSG_DONTWAIT);  	if (err < 0) {  		queue->queue_user_dropped++;  		goto err_out_unlock; @@ -483,6 +549,141 @@ err_out:  	return err;  } +static struct nf_queue_entry * +nf_queue_entry_dup(struct nf_queue_entry *e) +{ +	struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC); +	if (entry) { +		if (nf_queue_entry_get_refs(entry)) +			return entry; +		kfree(entry); +	} +	return NULL; +} + +#ifdef CONFIG_BRIDGE_NETFILTER +/* When called from bridge netfilter, skb->data must point to MAC header + * before calling skb_gso_segment(). Else, original MAC header is lost + * and segmented skbs will be sent to wrong destination. + */ +static void nf_bridge_adjust_skb_data(struct sk_buff *skb) +{ +	if (skb->nf_bridge) +		__skb_push(skb, skb->network_header - skb->mac_header); +} + +static void nf_bridge_adjust_segmented_data(struct sk_buff *skb) +{ +	if (skb->nf_bridge) +		__skb_pull(skb, skb->network_header - skb->mac_header); +} +#else +#define nf_bridge_adjust_skb_data(s) do {} while (0) +#define nf_bridge_adjust_segmented_data(s) do {} while (0) +#endif + +static void free_entry(struct nf_queue_entry *entry) +{ +	nf_queue_entry_release_refs(entry); +	kfree(entry); +} + +static int +__nfqnl_enqueue_packet_gso(struct net *net, struct nfqnl_instance *queue, +			   struct sk_buff *skb, struct nf_queue_entry *entry) +{ +	int ret = -ENOMEM; +	struct nf_queue_entry *entry_seg; + +	nf_bridge_adjust_segmented_data(skb); + +	if (skb->next == NULL) { /* last packet, no need to copy entry */ +		struct sk_buff *gso_skb = entry->skb; +		entry->skb = skb; +		ret = __nfqnl_enqueue_packet(net, queue, entry); +		if (ret) +			entry->skb = gso_skb; +		return ret; +	} + +	skb->next = NULL; + +	entry_seg = nf_queue_entry_dup(entry); +	if (entry_seg) { +		entry_seg->skb = skb; +		ret = __nfqnl_enqueue_packet(net, queue, entry_seg); +		if (ret) +			free_entry(entry_seg); +	} +	return ret; +} + +static int +nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) +{ +	unsigned int queued; +	struct nfqnl_instance *queue; +	struct sk_buff *skb, *segs; +	int err = -ENOBUFS; +	struct net *net = dev_net(entry->indev ? +				  entry->indev : entry->outdev); +	struct nfnl_queue_net *q = nfnl_queue_pernet(net); + +	/* rcu_read_lock()ed by nf_hook_slow() */ +	queue = instance_lookup(q, queuenum); +	if (!queue) +		return -ESRCH; + +	if (queue->copy_mode == NFQNL_COPY_NONE) +		return -EINVAL; + +	if ((queue->flags & NFQA_CFG_F_GSO) || !skb_is_gso(entry->skb)) +		return __nfqnl_enqueue_packet(net, queue, entry); + +	skb = entry->skb; + +	switch (entry->pf) { +	case NFPROTO_IPV4: +		skb->protocol = htons(ETH_P_IP); +		break; +	case NFPROTO_IPV6: +		skb->protocol = htons(ETH_P_IPV6); +		break; +	} + +	nf_bridge_adjust_skb_data(skb); +	segs = skb_gso_segment(skb, 0); +	/* Does not use PTR_ERR to limit the number of error codes that can be +	 * returned by nf_queue.  For instance, callers rely on -ECANCELED to +	 * mean 'ignore this hook'. +	 */ +	if (IS_ERR(segs)) +		goto out_err; +	queued = 0; +	err = 0; +	do { +		struct sk_buff *nskb = segs->next; +		if (err == 0) +			err = __nfqnl_enqueue_packet_gso(net, queue, +							segs, entry); +		if (err == 0) +			queued++; +		else +			kfree_skb(segs); +		segs = nskb; +	} while (segs); + +	if (queued) { +		if (err) /* some segments are already queued */ +			free_entry(entry); +		kfree_skb(skb); +		return 0; +	} + out_err: +	nf_bridge_adjust_segmented_data(skb); +	return err; +} +  static int  nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff)  { @@ -575,15 +776,16 @@ dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)  /* drop all packets with either indev or outdev == ifindex from all queue   * instances */  static void -nfqnl_dev_drop(int ifindex) +nfqnl_dev_drop(struct net *net, int ifindex)  {  	int i; +	struct nfnl_queue_net *q = nfnl_queue_pernet(net);  	rcu_read_lock();  	for (i = 0; i < INSTANCE_BUCKETS; i++) {  		struct nfqnl_instance *inst; -		struct hlist_head *head = &instance_table[i]; +		struct hlist_head *head = &q->instance_table[i];  		hlist_for_each_entry_rcu(inst, head, hlist)  			nfqnl_flush(inst, dev_cmp, ifindex); @@ -600,12 +802,9 @@ nfqnl_rcv_dev_event(struct notifier_block *this,  {  	struct net_device *dev = ptr; -	if (!net_eq(dev_net(dev), &init_net)) -		return NOTIFY_DONE; -  	/* Drop any packets associated with the downed device */  	if (event == NETDEV_DOWN) -		nfqnl_dev_drop(dev->ifindex); +		nfqnl_dev_drop(dev_net(dev), dev->ifindex);  	return NOTIFY_DONE;  } @@ -618,24 +817,24 @@ nfqnl_rcv_nl_event(struct notifier_block *this,  		   unsigned long event, void *ptr)  {  	struct netlink_notify *n = ptr; +	struct nfnl_queue_net *q = nfnl_queue_pernet(n->net);  	if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) {  		int i;  		/* destroy all instances for this portid */ -		spin_lock(&instances_lock); +		spin_lock(&q->instances_lock);  		for (i = 0; i < INSTANCE_BUCKETS; i++) {  			struct hlist_node *t2;  			struct nfqnl_instance *inst; -			struct hlist_head *head = &instance_table[i]; +			struct hlist_head *head = &q->instance_table[i];  			hlist_for_each_entry_safe(inst, t2, head, hlist) { -				if ((n->net == &init_net) && -				    (n->portid == inst->peer_portid)) +				if (n->portid == inst->peer_portid)  					__instance_destroy(inst);  			}  		} -		spin_unlock(&instances_lock); +		spin_unlock(&q->instances_lock);  	}  	return NOTIFY_DONE;  } @@ -656,11 +855,12 @@ static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = {  	[NFQA_MARK]		= { .type = NLA_U32 },  }; -static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlportid) +static struct nfqnl_instance * +verdict_instance_lookup(struct nfnl_queue_net *q, u16 queue_num, int nlportid)  {  	struct nfqnl_instance *queue; -	queue = instance_lookup(queue_num); +	queue = instance_lookup(q, queue_num);  	if (!queue)  		return ERR_PTR(-ENODEV); @@ -704,7 +904,11 @@ nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb,  	LIST_HEAD(batch_list);  	u16 queue_num = ntohs(nfmsg->res_id); -	queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).portid); +	struct net *net = sock_net(ctnl); +	struct nfnl_queue_net *q = nfnl_queue_pernet(net); + +	queue = verdict_instance_lookup(q, queue_num, +					NETLINK_CB(skb).portid);  	if (IS_ERR(queue))  		return PTR_ERR(queue); @@ -752,10 +956,13 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,  	enum ip_conntrack_info uninitialized_var(ctinfo);  	struct nf_conn *ct = NULL; -	queue = instance_lookup(queue_num); -	if (!queue) +	struct net *net = sock_net(ctnl); +	struct nfnl_queue_net *q = nfnl_queue_pernet(net); -	queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).portid); +	queue = instance_lookup(q, queue_num); +	if (!queue) +		queue = verdict_instance_lookup(q, queue_num, +						NETLINK_CB(skb).portid);  	if (IS_ERR(queue))  		return PTR_ERR(queue); @@ -819,6 +1026,8 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,  	u_int16_t queue_num = ntohs(nfmsg->res_id);  	struct nfqnl_instance *queue;  	struct nfqnl_msg_config_cmd *cmd = NULL; +	struct net *net = sock_net(ctnl); +	struct nfnl_queue_net *q = nfnl_queue_pernet(net);  	int ret = 0;  	if (nfqa[NFQA_CFG_CMD]) { @@ -832,7 +1041,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,  	}  	rcu_read_lock(); -	queue = instance_lookup(queue_num); +	queue = instance_lookup(q, queue_num);  	if (queue && queue->peer_portid != NETLINK_CB(skb).portid) {  		ret = -EPERM;  		goto err_out_unlock; @@ -845,7 +1054,8 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,  				ret = -EBUSY;  				goto err_out_unlock;  			} -			queue = instance_create(queue_num, NETLINK_CB(skb).portid); +			queue = instance_create(q, queue_num, +						NETLINK_CB(skb).portid);  			if (IS_ERR(queue)) {  				ret = PTR_ERR(queue);  				goto err_out_unlock; @@ -856,7 +1066,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,  				ret = -ENODEV;  				goto err_out_unlock;  			} -			instance_destroy(queue); +			instance_destroy(q, queue);  			break;  		case NFQNL_CFG_CMD_PF_BIND:  		case NFQNL_CFG_CMD_PF_UNBIND: @@ -950,19 +1160,24 @@ static const struct nfnetlink_subsystem nfqnl_subsys = {  #ifdef CONFIG_PROC_FS  struct iter_state { +	struct seq_net_private p;  	unsigned int bucket;  };  static struct hlist_node *get_first(struct seq_file *seq)  {  	struct iter_state *st = seq->private; +	struct net *net; +	struct nfnl_queue_net *q;  	if (!st)  		return NULL; +	net = seq_file_net(seq); +	q = nfnl_queue_pernet(net);  	for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { -		if (!hlist_empty(&instance_table[st->bucket])) -			return instance_table[st->bucket].first; +		if (!hlist_empty(&q->instance_table[st->bucket])) +			return q->instance_table[st->bucket].first;  	}  	return NULL;  } @@ -970,13 +1185,17 @@ static struct hlist_node *get_first(struct seq_file *seq)  static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h)  {  	struct iter_state *st = seq->private; +	struct net *net = seq_file_net(seq);  	h = h->next;  	while (!h) { +		struct nfnl_queue_net *q; +  		if (++st->bucket >= INSTANCE_BUCKETS)  			return NULL; -		h = instance_table[st->bucket].first; +		q = nfnl_queue_pernet(net); +		h = q->instance_table[st->bucket].first;  	}  	return h;  } @@ -992,11 +1211,11 @@ static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)  	return pos ? NULL : head;  } -static void *seq_start(struct seq_file *seq, loff_t *pos) -	__acquires(instances_lock) +static void *seq_start(struct seq_file *s, loff_t *pos) +	__acquires(nfnl_queue_pernet(seq_file_net(s))->instances_lock)  { -	spin_lock(&instances_lock); -	return get_idx(seq, *pos); +	spin_lock(&nfnl_queue_pernet(seq_file_net(s))->instances_lock); +	return get_idx(s, *pos);  }  static void *seq_next(struct seq_file *s, void *v, loff_t *pos) @@ -1006,9 +1225,9 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos)  }  static void seq_stop(struct seq_file *s, void *v) -	__releases(instances_lock) +	__releases(nfnl_queue_pernet(seq_file_net(s))->instances_lock)  { -	spin_unlock(&instances_lock); +	spin_unlock(&nfnl_queue_pernet(seq_file_net(s))->instances_lock);  }  static int seq_show(struct seq_file *s, void *v) @@ -1032,7 +1251,7 @@ static const struct seq_operations nfqnl_seq_ops = {  static int nfqnl_open(struct inode *inode, struct file *file)  { -	return seq_open_private(file, &nfqnl_seq_ops, +	return seq_open_net(inode, file, &nfqnl_seq_ops,  			sizeof(struct iter_state));  } @@ -1041,41 +1260,63 @@ static const struct file_operations nfqnl_file_ops = {  	.open	 = nfqnl_open,  	.read	 = seq_read,  	.llseek	 = seq_lseek, -	.release = seq_release_private, +	.release = seq_release_net,  };  #endif /* PROC_FS */ -static int __init nfnetlink_queue_init(void) +static int __net_init nfnl_queue_net_init(struct net *net)  { -	int i, status = -ENOMEM; +	unsigned int i; +	struct nfnl_queue_net *q = nfnl_queue_pernet(net);  	for (i = 0; i < INSTANCE_BUCKETS; i++) -		INIT_HLIST_HEAD(&instance_table[i]); +		INIT_HLIST_HEAD(&q->instance_table[i]); + +	spin_lock_init(&q->instances_lock); + +#ifdef CONFIG_PROC_FS +	if (!proc_create("nfnetlink_queue", 0440, +			 net->nf.proc_netfilter, &nfqnl_file_ops)) +		return -ENOMEM; +#endif +	return 0; +} + +static void __net_exit nfnl_queue_net_exit(struct net *net) +{ +	remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter); +} + +static struct pernet_operations nfnl_queue_net_ops = { +	.init	= nfnl_queue_net_init, +	.exit	= nfnl_queue_net_exit, +	.id	= &nfnl_queue_net_id, +	.size	= sizeof(struct nfnl_queue_net), +}; + +static int __init nfnetlink_queue_init(void) +{ +	int status = -ENOMEM;  	netlink_register_notifier(&nfqnl_rtnl_notifier);  	status = nfnetlink_subsys_register(&nfqnl_subsys);  	if (status < 0) { -		printk(KERN_ERR "nf_queue: failed to create netlink socket\n"); +		pr_err("nf_queue: failed to create netlink socket\n");  		goto cleanup_netlink_notifier;  	} -#ifdef CONFIG_PROC_FS -	if (!proc_create("nfnetlink_queue", 0440, -			 proc_net_netfilter, &nfqnl_file_ops)) { -		status = -ENOMEM; +	status = register_pernet_subsys(&nfnl_queue_net_ops); +	if (status < 0) { +		pr_err("nf_queue: failed to register pernet ops\n");  		goto cleanup_subsys;  	} -#endif -  	register_netdevice_notifier(&nfqnl_dev_notifier);  	nf_register_queue_handler(&nfqh);  	return status; -#ifdef CONFIG_PROC_FS  cleanup_subsys:  	nfnetlink_subsys_unregister(&nfqnl_subsys); -#endif  cleanup_netlink_notifier:  	netlink_unregister_notifier(&nfqnl_rtnl_notifier);  	return status; @@ -1085,9 +1326,7 @@ static void __exit nfnetlink_queue_fini(void)  {  	nf_unregister_queue_handler();  	unregister_netdevice_notifier(&nfqnl_dev_notifier); -#ifdef CONFIG_PROC_FS -	remove_proc_entry("nfnetlink_queue", proc_net_netfilter); -#endif +	unregister_pernet_subsys(&nfnl_queue_net_ops);  	nfnetlink_subsys_unregister(&nfqnl_subsys);  	netlink_unregister_notifier(&nfqnl_rtnl_notifier); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 686c7715d77..1a73b18683b 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -2,6 +2,7 @@   * x_tables core - Backend for {ip,ip6,arp}_tables   *   * Copyright (C) 2006-2006 Harald Welte <laforge@netfilter.org> + * Copyright (C) 2006-2012 Patrick McHardy <kaber@trash.net>   *   * Based on existing ip_tables code which is   *   Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c index fa40096940a..fe573f6c9e9 100644 --- a/net/netfilter/xt_LOG.c +++ b/net/netfilter/xt_LOG.c @@ -474,7 +474,14 @@ ipt_log_packet(u_int8_t pf,  	       const struct nf_loginfo *loginfo,  	       const char *prefix)  { -	struct sbuff *m = sb_open(); +	struct sbuff *m; +	struct net *net = dev_net(in ? in : out); + +	/* FIXME: Disabled from containers until syslog ns is supported */ +	if (!net_eq(net, &init_net)) +		return; + +	m = sb_open();  	if (!loginfo)  		loginfo = &default_loginfo; @@ -798,7 +805,14 @@ ip6t_log_packet(u_int8_t pf,  		const struct nf_loginfo *loginfo,  		const char *prefix)  { -	struct sbuff *m = sb_open(); +	struct sbuff *m; +	struct net *net = dev_net(in ? in : out); + +	/* FIXME: Disabled from containers until syslog ns is supported */ +	if (!net_eq(net, &init_net)) +		return; + +	m = sb_open();  	if (!loginfo)  		loginfo = &default_loginfo; @@ -893,23 +907,55 @@ static struct nf_logger ip6t_log_logger __read_mostly = {  };  #endif +static int __net_init log_net_init(struct net *net) +{ +	nf_log_set(net, NFPROTO_IPV4, &ipt_log_logger); +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) +	nf_log_set(net, NFPROTO_IPV6, &ip6t_log_logger); +#endif +	return 0; +} + +static void __net_exit log_net_exit(struct net *net) +{ +	nf_log_unset(net, &ipt_log_logger); +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) +	nf_log_unset(net, &ip6t_log_logger); +#endif +} + +static struct pernet_operations log_net_ops = { +	.init = log_net_init, +	.exit = log_net_exit, +}; +  static int __init log_tg_init(void)  {  	int ret; +	ret = register_pernet_subsys(&log_net_ops); +	if (ret < 0) +		goto err_pernet; +  	ret = xt_register_targets(log_tg_regs, ARRAY_SIZE(log_tg_regs));  	if (ret < 0) -		return ret; +		goto err_target;  	nf_log_register(NFPROTO_IPV4, &ipt_log_logger);  #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)  	nf_log_register(NFPROTO_IPV6, &ip6t_log_logger);  #endif  	return 0; + +err_target: +	unregister_pernet_subsys(&log_net_ops); +err_pernet: +	return ret;  }  static void __exit log_tg_exit(void)  { +	unregister_pernet_subsys(&log_net_ops);  	nf_log_unregister(&ipt_log_logger);  #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)  	nf_log_unregister(&ip6t_log_logger); diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 817f9e9f2b1..1e2fae32f81 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -76,22 +76,31 @@ static u32 hash_v6(const struct sk_buff *skb)  }  #endif -static unsigned int -nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par) +static u32 +nfqueue_hash(const struct sk_buff *skb, const struct xt_action_param *par)  {  	const struct xt_NFQ_info_v1 *info = par->targinfo;  	u32 queue = info->queuenum; -	if (info->queues_total > 1) { -		if (par->family == NFPROTO_IPV4) -			queue = (((u64) hash_v4(skb) * info->queues_total) >> -				 32) + queue; +	if (par->family == NFPROTO_IPV4) +		queue += ((u64) hash_v4(skb) * info->queues_total) >> 32;  #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) -		else if (par->family == NFPROTO_IPV6) -			queue = (((u64) hash_v6(skb) * info->queues_total) >> -				 32) + queue; +	else if (par->family == NFPROTO_IPV6) +		queue += ((u64) hash_v6(skb) * info->queues_total) >> 32;  #endif -	} + +	return queue; +} + +static unsigned int +nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par) +{ +	const struct xt_NFQ_info_v1 *info = par->targinfo; +	u32 queue = info->queuenum; + +	if (info->queues_total > 1) +		queue = nfqueue_hash(skb, par); +  	return NF_QUEUE_NR(queue);  } @@ -108,7 +117,7 @@ nfqueue_tg_v2(struct sk_buff *skb, const struct xt_action_param *par)  static int nfqueue_tg_check(const struct xt_tgchk_param *par)  { -	const struct xt_NFQ_info_v2 *info = par->targinfo; +	const struct xt_NFQ_info_v3 *info = par->targinfo;  	u32 maxid;  	if (unlikely(!rnd_inited)) { @@ -125,11 +134,32 @@ static int nfqueue_tg_check(const struct xt_tgchk_param *par)  		       info->queues_total, maxid);  		return -ERANGE;  	} -	if (par->target->revision == 2 && info->bypass > 1) +	if (par->target->revision == 2 && info->flags > 1)  		return -EINVAL; +	if (par->target->revision == 3 && info->flags & ~NFQ_FLAG_MASK) +		return -EINVAL; +  	return 0;  } +static unsigned int +nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par) +{ +	const struct xt_NFQ_info_v3 *info = par->targinfo; +	u32 queue = info->queuenum; + +	if (info->queues_total > 1) { +		if (info->flags & NFQ_FLAG_CPU_FANOUT) { +			int cpu = smp_processor_id(); + +			queue = info->queuenum + cpu % info->queues_total; +		} else +			queue = nfqueue_hash(skb, par); +	} + +	return NF_QUEUE_NR(queue); +} +  static struct xt_target nfqueue_tg_reg[] __read_mostly = {  	{  		.name		= "NFQUEUE", @@ -156,6 +186,15 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = {  		.targetsize	= sizeof(struct xt_NFQ_info_v2),  		.me		= THIS_MODULE,  	}, +	{ +		.name		= "NFQUEUE", +		.revision	= 3, +		.family		= NFPROTO_UNSPEC, +		.checkentry	= nfqueue_tg_check, +		.target		= nfqueue_tg_v3, +		.targetsize	= sizeof(struct xt_NFQ_info_v3), +		.me		= THIS_MODULE, +	},  };  static int __init nfqueue_tg_init(void) diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 71a266de5fb..a75240f0d42 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -2,6 +2,7 @@   * This is a module which is used for setting the MSS option in TCP packets.   *   * Copyright (C) 2000 Marc Boucher <marc@mbsi.ca> + * Copyright (C) 2007 Patrick McHardy <kaber@trash.net>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License version 2 as diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 61805d7b38a..188404b9b00 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -3,6 +3,7 @@   *	information. (Superset of Rusty's minimalistic state match.)   *   *	(C) 2001  Marc Boucher (marc@mbsi.ca). + *	(C) 2006-2012 Patrick McHardy <kaber@trash.net>   *	Copyright © CC Computer Consultants GmbH, 2007 - 2008   *   *	This program is free software; you can redistribute it and/or modify diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index f330e8beaf6..0199e7bb8f8 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -3,6 +3,7 @@   *	separately for each hashbucket (sourceip/sourceport/dstip/dstport)   *   *	(C) 2003-2004 by Harald Welte <laforge@netfilter.org> + *	(C) 2006-2012 Patrick McHardy <kaber@trash.net>   *	Copyright © CC Computer Consultants GmbH, 2007 - 2008   *   * Development of this code was funded by Astaro AG, http://www.astaro.com/ diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index a4c1e4528ca..bef85059655 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -1,5 +1,6 @@  /* (C) 1999 Jérôme de Vivie <devivie@info.enserb.u-bordeaux.fr>   * (C) 1999 Hervé Eychenne <eychenne@info.enserb.u-bordeaux.fr> + * (C) 2006-2012 Patrick McHardy <kaber@trash.net>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License version 2 as diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index a5e673d32bd..647d989a01e 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -201,6 +201,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)  	unsigned char opts[MAX_IPOPTLEN];  	const struct xt_osf_finger *kf;  	const struct xt_osf_user_finger *f; +	struct net *net = dev_net(p->in ? p->in : p->out);  	if (!info)  		return false; @@ -325,7 +326,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)  			fcount++;  			if (info->flags & XT_OSF_LOG) -				nf_log_packet(p->family, p->hooknum, skb, +				nf_log_packet(net, p->family, p->hooknum, skb,  					p->in, p->out, NULL,  					"%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n",  					f->genre, f->version, f->subtype, @@ -341,7 +342,8 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)  	rcu_read_unlock();  	if (!fcount && (info->flags & XT_OSF_LOG)) -		nf_log_packet(p->family, p->hooknum, skb, p->in, p->out, NULL, +		nf_log_packet(net, p->family, p->hooknum, skb, p->in, +			      p->out, NULL,  			"Remote OS is not known: %pI4:%u -> %pI4:%u\n",  				&ip->saddr, ntohs(tcp->source),  				&ip->daddr, ntohs(tcp->dest)); diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 865a9e54f3a..31790e789e2 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -1,7 +1,7 @@  /* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>   *                         Patrick Schaaf <bof@bof.de>   *                         Martin Josefsson <gandalf@wlug.westbo.se> - * Copyright (C) 2003-2011 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> + * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>   *   * This program is free software; you can redistribute it and/or modify   * it under the terms of the GNU General Public License version 2 as @@ -30,7 +30,7 @@ MODULE_ALIAS("ip6t_SET");  static inline int  match_set(ip_set_id_t index, const struct sk_buff *skb,  	  const struct xt_action_param *par, -	  const struct ip_set_adt_opt *opt, int inv) +	  struct ip_set_adt_opt *opt, int inv)  {  	if (ip_set_test(index, skb, par, opt))  		inv = !inv; @@ -38,20 +38,12 @@ match_set(ip_set_id_t index, const struct sk_buff *skb,  }  #define ADT_OPT(n, f, d, fs, cfs, t)	\ -const struct ip_set_adt_opt n = {	\ -	.family	= f,			\ -	.dim = d,			\ -	.flags = fs,			\ -	.cmdflags = cfs,		\ -	.timeout = t,			\ -} -#define ADT_MOPT(n, f, d, fs, cfs, t)	\  struct ip_set_adt_opt n = {		\  	.family	= f,			\  	.dim = d,			\  	.flags = fs,			\  	.cmdflags = cfs,		\ -	.timeout = t,			\ +	.ext.timeout = t,		\  }  /* Revision 0 interface: backward compatible with netfilter/iptables */ @@ -197,6 +189,9 @@ set_match_v1(const struct sk_buff *skb, struct xt_action_param *par)  	ADT_OPT(opt, par->family, info->match_set.dim,  		info->match_set.flags, 0, UINT_MAX); +	if (opt.flags & IPSET_RETURN_NOMATCH) +		opt.cmdflags |= IPSET_FLAG_RETURN_NOMATCH; +  	return match_set(info->match_set.index, skb, par, &opt,  			 info->match_set.flags & IPSET_INV_MATCH);  } @@ -305,15 +300,15 @@ static unsigned int  set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)  {  	const struct xt_set_info_target_v2 *info = par->targinfo; -	ADT_MOPT(add_opt, par->family, info->add_set.dim, -		 info->add_set.flags, info->flags, info->timeout); +	ADT_OPT(add_opt, par->family, info->add_set.dim, +		info->add_set.flags, info->flags, info->timeout);  	ADT_OPT(del_opt, par->family, info->del_set.dim,  		info->del_set.flags, 0, UINT_MAX);  	/* Normalize to fit into jiffies */ -	if (add_opt.timeout != IPSET_NO_TIMEOUT && -	    add_opt.timeout > UINT_MAX/MSEC_PER_SEC) -		add_opt.timeout = UINT_MAX/MSEC_PER_SEC; +	if (add_opt.ext.timeout != IPSET_NO_TIMEOUT && +	    add_opt.ext.timeout > UINT_MAX/MSEC_PER_SEC) +		add_opt.ext.timeout = UINT_MAX/MSEC_PER_SEC;  	if (info->add_set.index != IPSET_INVALID_ID)  		ip_set_add(info->add_set.index, skb, par, &add_opt);  	if (info->del_set.index != IPSET_INVALID_ID) @@ -325,6 +320,52 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)  #define set_target_v2_checkentry	set_target_v1_checkentry  #define set_target_v2_destroy		set_target_v1_destroy +/* Revision 3 match */ + +static bool +match_counter(u64 counter, const struct ip_set_counter_match *info) +{ +	switch (info->op) { +	case IPSET_COUNTER_NONE: +		return true; +	case IPSET_COUNTER_EQ: +		return counter == info->value; +	case IPSET_COUNTER_NE: +		return counter != info->value; +	case IPSET_COUNTER_LT: +		return counter < info->value; +	case IPSET_COUNTER_GT: +		return counter > info->value; +	} +	return false; +} + +static bool +set_match_v3(const struct sk_buff *skb, struct xt_action_param *par) +{ +	const struct xt_set_info_match_v3 *info = par->matchinfo; +	ADT_OPT(opt, par->family, info->match_set.dim, +		info->match_set.flags, info->flags, UINT_MAX); +	int ret; + +	if (info->packets.op != IPSET_COUNTER_NONE || +	    info->bytes.op != IPSET_COUNTER_NONE) +		opt.cmdflags |= IPSET_FLAG_MATCH_COUNTERS; + +	ret = match_set(info->match_set.index, skb, par, &opt, +			info->match_set.flags & IPSET_INV_MATCH); + +	if (!(ret && opt.cmdflags & IPSET_FLAG_MATCH_COUNTERS)) +		return ret; + +	if (!match_counter(opt.ext.packets, &info->packets)) +		return 0; +	return match_counter(opt.ext.bytes, &info->bytes); +} + +#define set_match_v3_checkentry	set_match_v1_checkentry +#define set_match_v3_destroy	set_match_v1_destroy +  static struct xt_match set_matches[] __read_mostly = {  	{  		.name		= "set", @@ -377,6 +418,27 @@ static struct xt_match set_matches[] __read_mostly = {  		.destroy	= set_match_v1_destroy,  		.me		= THIS_MODULE  	}, +	/* counters support: update, match */ +	{ +		.name		= "set", +		.family		= NFPROTO_IPV4, +		.revision	= 3, +		.match		= set_match_v3, +		.matchsize	= sizeof(struct xt_set_info_match_v3), +		.checkentry	= set_match_v3_checkentry, +		.destroy	= set_match_v3_destroy, +		.me		= THIS_MODULE +	}, +	{ +		.name		= "set", +		.family		= NFPROTO_IPV6, +		.revision	= 3, +		.match		= set_match_v3, +		.matchsize	= sizeof(struct xt_set_info_match_v3), +		.checkentry	= set_match_v3_checkentry, +		.destroy	= set_match_v3_destroy, +		.me		= THIS_MODULE +	},  };  static struct xt_target set_targets[] __read_mostly = { diff --git a/net/netlink/Kconfig b/net/netlink/Kconfig new file mode 100644 index 00000000000..2c5e95e9bfb --- /dev/null +++ b/net/netlink/Kconfig @@ -0,0 +1,19 @@ +# +# Netlink Sockets +# + +config NETLINK_MMAP +	bool "NETLINK: mmaped IO" +	---help--- +	  This option enables support for memory mapped netlink IO. This +	  reduces overhead by avoiding copying data between kernel- and +	  userspace. + +	  If unsure, say N. + +config NETLINK_DIAG +	tristate "NETLINK: socket monitoring interface" +	default n +	---help--- +	  Support for NETLINK socket monitoring interface used by the ss tool. +	  If unsure, say Y. diff --git a/net/netlink/Makefile b/net/netlink/Makefile index bdd6ddf4e95..e837917f6c0 100644 --- a/net/netlink/Makefile +++ b/net/netlink/Makefile @@ -3,3 +3,6 @@  #  obj-y  				:= af_netlink.o genetlink.o + +obj-$(CONFIG_NETLINK_DIAG)	+= netlink_diag.o +netlink_diag-y			:= diag.o diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 1e3fd5bfcd8..12ac6b47a35 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -3,6 +3,7 @@   *   * 		Authors:	Alan Cox <alan@lxorguk.ukuu.org.uk>   * 				Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> + * 				Patrick McHardy <kaber@trash.net>   *   *		This program is free software; you can redistribute it and/or   *		modify it under the terms of the GNU General Public License @@ -55,87 +56,45 @@  #include <linux/types.h>  #include <linux/audit.h>  #include <linux/mutex.h> +#include <linux/vmalloc.h> +#include <asm/cacheflush.h>  #include <net/net_namespace.h>  #include <net/sock.h>  #include <net/scm.h>  #include <net/netlink.h> -#define NLGRPSZ(x)	(ALIGN(x, sizeof(unsigned long) * 8) / 8) -#define NLGRPLONGS(x)	(NLGRPSZ(x)/sizeof(unsigned long)) - -struct netlink_sock { -	/* struct sock has to be the first member of netlink_sock */ -	struct sock		sk; -	u32			portid; -	u32			dst_portid; -	u32			dst_group; -	u32			flags; -	u32			subscriptions; -	u32			ngroups; -	unsigned long		*groups; -	unsigned long		state; -	wait_queue_head_t	wait; -	struct netlink_callback	*cb; -	struct mutex		*cb_mutex; -	struct mutex		cb_def_mutex; -	void			(*netlink_rcv)(struct sk_buff *skb); -	void			(*netlink_bind)(int group); -	struct module		*module; -}; +#include "af_netlink.h"  struct listeners {  	struct rcu_head		rcu;  	unsigned long		masks[0];  }; +/* state bits */ +#define NETLINK_CONGESTED	0x0 + +/* flags */  #define NETLINK_KERNEL_SOCKET	0x1  #define NETLINK_RECV_PKTINFO	0x2  #define NETLINK_BROADCAST_SEND_ERROR	0x4  #define NETLINK_RECV_NO_ENOBUFS	0x8 -static inline struct netlink_sock *nlk_sk(struct sock *sk) -{ -	return container_of(sk, struct netlink_sock, sk); -} -  static inline int netlink_is_kernel(struct sock *sk)  {  	return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET;  } -struct nl_portid_hash { -	struct hlist_head	*table; -	unsigned long		rehash_time; - -	unsigned int		mask; -	unsigned int		shift; - -	unsigned int		entries; -	unsigned int		max_shift; - -	u32			rnd; -}; - -struct netlink_table { -	struct nl_portid_hash	hash; -	struct hlist_head	mc_list; -	struct listeners __rcu	*listeners; -	unsigned int		flags; -	unsigned int		groups; -	struct mutex		*cb_mutex; -	struct module		*module; -	void			(*bind)(int group); -	int			registered; -}; - -static struct netlink_table *nl_table; +struct netlink_table *nl_table; +EXPORT_SYMBOL_GPL(nl_table);  static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);  static int netlink_dump(struct sock *sk); +static void netlink_skb_destructor(struct sk_buff *skb); -static DEFINE_RWLOCK(nl_table_lock); +DEFINE_RWLOCK(nl_table_lock); +EXPORT_SYMBOL_GPL(nl_table_lock);  static atomic_t nl_table_users = ATOMIC_INIT(0);  #define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock)); @@ -152,6 +111,599 @@ static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u  	return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask];  } +static void netlink_overrun(struct sock *sk) +{ +	struct netlink_sock *nlk = nlk_sk(sk); + +	if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) { +		if (!test_and_set_bit(NETLINK_CONGESTED, &nlk_sk(sk)->state)) { +			sk->sk_err = ENOBUFS; +			sk->sk_error_report(sk); +		} +	} +	atomic_inc(&sk->sk_drops); +} + +static void netlink_rcv_wake(struct sock *sk) +{ +	struct netlink_sock *nlk = nlk_sk(sk); + +	if (skb_queue_empty(&sk->sk_receive_queue)) +		clear_bit(NETLINK_CONGESTED, &nlk->state); +	if (!test_bit(NETLINK_CONGESTED, &nlk->state)) +		wake_up_interruptible(&nlk->wait); +} + +#ifdef CONFIG_NETLINK_MMAP +static bool netlink_skb_is_mmaped(const struct sk_buff *skb) +{ +	return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED; +} + +static bool netlink_rx_is_mmaped(struct sock *sk) +{ +	return nlk_sk(sk)->rx_ring.pg_vec != NULL; +} + +static bool netlink_tx_is_mmaped(struct sock *sk) +{ +	return nlk_sk(sk)->tx_ring.pg_vec != NULL; +} + +static __pure struct page *pgvec_to_page(const void *addr) +{ +	if (is_vmalloc_addr(addr)) +		return vmalloc_to_page(addr); +	else +		return virt_to_page(addr); +} + +static void free_pg_vec(void **pg_vec, unsigned int order, unsigned int len) +{ +	unsigned int i; + +	for (i = 0; i < len; i++) { +		if (pg_vec[i] != NULL) { +			if (is_vmalloc_addr(pg_vec[i])) +				vfree(pg_vec[i]); +			else +				free_pages((unsigned long)pg_vec[i], order); +		} +	} +	kfree(pg_vec); +} + +static void *alloc_one_pg_vec_page(unsigned long order) +{ +	void *buffer; +	gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | +			  __GFP_NOWARN | __GFP_NORETRY; + +	buffer = (void *)__get_free_pages(gfp_flags, order); +	if (buffer != NULL) +		return buffer; + +	buffer = vzalloc((1 << order) * PAGE_SIZE); +	if (buffer != NULL) +		return buffer; + +	gfp_flags &= ~__GFP_NORETRY; +	return (void *)__get_free_pages(gfp_flags, order); +} + +static void **alloc_pg_vec(struct netlink_sock *nlk, +			   struct nl_mmap_req *req, unsigned int order) +{ +	unsigned int block_nr = req->nm_block_nr; +	unsigned int i; +	void **pg_vec, *ptr; + +	pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL); +	if (pg_vec == NULL) +		return NULL; + +	for (i = 0; i < block_nr; i++) { +		pg_vec[i] = ptr = alloc_one_pg_vec_page(order); +		if (pg_vec[i] == NULL) +			goto err1; +	} + +	return pg_vec; +err1: +	free_pg_vec(pg_vec, order, block_nr); +	return NULL; +} + +static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, +			    bool closing, bool tx_ring) +{ +	struct netlink_sock *nlk = nlk_sk(sk); +	struct netlink_ring *ring; +	struct sk_buff_head *queue; +	void **pg_vec = NULL; +	unsigned int order = 0; +	int err; + +	ring  = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; +	queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; + +	if (!closing) { +		if (atomic_read(&nlk->mapped)) +			return -EBUSY; +		if (atomic_read(&ring->pending)) +			return -EBUSY; +	} + +	if (req->nm_block_nr) { +		if (ring->pg_vec != NULL) +			return -EBUSY; + +		if ((int)req->nm_block_size <= 0) +			return -EINVAL; +		if (!IS_ALIGNED(req->nm_block_size, PAGE_SIZE)) +			return -EINVAL; +		if (req->nm_frame_size < NL_MMAP_HDRLEN) +			return -EINVAL; +		if (!IS_ALIGNED(req->nm_frame_size, NL_MMAP_MSG_ALIGNMENT)) +			return -EINVAL; + +		ring->frames_per_block = req->nm_block_size / +					 req->nm_frame_size; +		if (ring->frames_per_block == 0) +			return -EINVAL; +		if (ring->frames_per_block * req->nm_block_nr != +		    req->nm_frame_nr) +			return -EINVAL; + +		order = get_order(req->nm_block_size); +		pg_vec = alloc_pg_vec(nlk, req, order); +		if (pg_vec == NULL) +			return -ENOMEM; +	} else { +		if (req->nm_frame_nr) +			return -EINVAL; +	} + +	err = -EBUSY; +	mutex_lock(&nlk->pg_vec_lock); +	if (closing || atomic_read(&nlk->mapped) == 0) { +		err = 0; +		spin_lock_bh(&queue->lock); + +		ring->frame_max		= req->nm_frame_nr - 1; +		ring->head		= 0; +		ring->frame_size	= req->nm_frame_size; +		ring->pg_vec_pages	= req->nm_block_size / PAGE_SIZE; + +		swap(ring->pg_vec_len, req->nm_block_nr); +		swap(ring->pg_vec_order, order); +		swap(ring->pg_vec, pg_vec); + +		__skb_queue_purge(queue); +		spin_unlock_bh(&queue->lock); + +		WARN_ON(atomic_read(&nlk->mapped)); +	} +	mutex_unlock(&nlk->pg_vec_lock); + +	if (pg_vec) +		free_pg_vec(pg_vec, order, req->nm_block_nr); +	return err; +} + +static void netlink_mm_open(struct vm_area_struct *vma) +{ +	struct file *file = vma->vm_file; +	struct socket *sock = file->private_data; +	struct sock *sk = sock->sk; + +	if (sk) +		atomic_inc(&nlk_sk(sk)->mapped); +} + +static void netlink_mm_close(struct vm_area_struct *vma) +{ +	struct file *file = vma->vm_file; +	struct socket *sock = file->private_data; +	struct sock *sk = sock->sk; + +	if (sk) +		atomic_dec(&nlk_sk(sk)->mapped); +} + +static const struct vm_operations_struct netlink_mmap_ops = { +	.open	= netlink_mm_open, +	.close	= netlink_mm_close, +}; + +static int netlink_mmap(struct file *file, struct socket *sock, +			struct vm_area_struct *vma) +{ +	struct sock *sk = sock->sk; +	struct netlink_sock *nlk = nlk_sk(sk); +	struct netlink_ring *ring; +	unsigned long start, size, expected; +	unsigned int i; +	int err = -EINVAL; + +	if (vma->vm_pgoff) +		return -EINVAL; + +	mutex_lock(&nlk->pg_vec_lock); + +	expected = 0; +	for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) { +		if (ring->pg_vec == NULL) +			continue; +		expected += ring->pg_vec_len * ring->pg_vec_pages * PAGE_SIZE; +	} + +	if (expected == 0) +		goto out; + +	size = vma->vm_end - vma->vm_start; +	if (size != expected) +		goto out; + +	start = vma->vm_start; +	for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) { +		if (ring->pg_vec == NULL) +			continue; + +		for (i = 0; i < ring->pg_vec_len; i++) { +			struct page *page; +			void *kaddr = ring->pg_vec[i]; +			unsigned int pg_num; + +			for (pg_num = 0; pg_num < ring->pg_vec_pages; pg_num++) { +				page = pgvec_to_page(kaddr); +				err = vm_insert_page(vma, start, page); +				if (err < 0) +					goto out; +				start += PAGE_SIZE; +				kaddr += PAGE_SIZE; +			} +		} +	} + +	atomic_inc(&nlk->mapped); +	vma->vm_ops = &netlink_mmap_ops; +	err = 0; +out: +	mutex_unlock(&nlk->pg_vec_lock); +	return 0; +} + +static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr) +{ +#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 +	struct page *p_start, *p_end; + +	/* First page is flushed through netlink_{get,set}_status */ +	p_start = pgvec_to_page(hdr + PAGE_SIZE); +	p_end   = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + hdr->nm_len - 1); +	while (p_start <= p_end) { +		flush_dcache_page(p_start); +		p_start++; +	} +#endif +} + +static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr) +{ +	smp_rmb(); +	flush_dcache_page(pgvec_to_page(hdr)); +	return hdr->nm_status; +} + +static void netlink_set_status(struct nl_mmap_hdr *hdr, +			       enum nl_mmap_status status) +{ +	hdr->nm_status = status; +	flush_dcache_page(pgvec_to_page(hdr)); +	smp_wmb(); +} + +static struct nl_mmap_hdr * +__netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos) +{ +	unsigned int pg_vec_pos, frame_off; + +	pg_vec_pos = pos / ring->frames_per_block; +	frame_off  = pos % ring->frames_per_block; + +	return ring->pg_vec[pg_vec_pos] + (frame_off * ring->frame_size); +} + +static struct nl_mmap_hdr * +netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos, +		     enum nl_mmap_status status) +{ +	struct nl_mmap_hdr *hdr; + +	hdr = __netlink_lookup_frame(ring, pos); +	if (netlink_get_status(hdr) != status) +		return NULL; + +	return hdr; +} + +static struct nl_mmap_hdr * +netlink_current_frame(const struct netlink_ring *ring, +		      enum nl_mmap_status status) +{ +	return netlink_lookup_frame(ring, ring->head, status); +} + +static struct nl_mmap_hdr * +netlink_previous_frame(const struct netlink_ring *ring, +		       enum nl_mmap_status status) +{ +	unsigned int prev; + +	prev = ring->head ? ring->head - 1 : ring->frame_max; +	return netlink_lookup_frame(ring, prev, status); +} + +static void netlink_increment_head(struct netlink_ring *ring) +{ +	ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0; +} + +static void netlink_forward_ring(struct netlink_ring *ring) +{ +	unsigned int head = ring->head, pos = head; +	const struct nl_mmap_hdr *hdr; + +	do { +		hdr = __netlink_lookup_frame(ring, pos); +		if (hdr->nm_status == NL_MMAP_STATUS_UNUSED) +			break; +		if (hdr->nm_status != NL_MMAP_STATUS_SKIP) +			break; +		netlink_increment_head(ring); +	} while (ring->head != head); +} + +static bool netlink_dump_space(struct netlink_sock *nlk) +{ +	struct netlink_ring *ring = &nlk->rx_ring; +	struct nl_mmap_hdr *hdr; +	unsigned int n; + +	hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); +	if (hdr == NULL) +		return false; + +	n = ring->head + ring->frame_max / 2; +	if (n > ring->frame_max) +		n -= ring->frame_max; + +	hdr = __netlink_lookup_frame(ring, n); + +	return hdr->nm_status == NL_MMAP_STATUS_UNUSED; +} + +static unsigned int netlink_poll(struct file *file, struct socket *sock, +				 poll_table *wait) +{ +	struct sock *sk = sock->sk; +	struct netlink_sock *nlk = nlk_sk(sk); +	unsigned int mask; +	int err; + +	if (nlk->rx_ring.pg_vec != NULL) { +		/* Memory mapped sockets don't call recvmsg(), so flow control +		 * for dumps is performed here. A dump is allowed to continue +		 * if at least half the ring is unused. +		 */ +		while (nlk->cb != NULL && netlink_dump_space(nlk)) { +			err = netlink_dump(sk); +			if (err < 0) { +				sk->sk_err = err; +				sk->sk_error_report(sk); +				break; +			} +		} +		netlink_rcv_wake(sk); +	} + +	mask = datagram_poll(file, sock, wait); + +	spin_lock_bh(&sk->sk_receive_queue.lock); +	if (nlk->rx_ring.pg_vec) { +		netlink_forward_ring(&nlk->rx_ring); +		if (!netlink_previous_frame(&nlk->rx_ring, NL_MMAP_STATUS_UNUSED)) +			mask |= POLLIN | POLLRDNORM; +	} +	spin_unlock_bh(&sk->sk_receive_queue.lock); + +	spin_lock_bh(&sk->sk_write_queue.lock); +	if (nlk->tx_ring.pg_vec) { +		if (netlink_current_frame(&nlk->tx_ring, NL_MMAP_STATUS_UNUSED)) +			mask |= POLLOUT | POLLWRNORM; +	} +	spin_unlock_bh(&sk->sk_write_queue.lock); + +	return mask; +} + +static struct nl_mmap_hdr *netlink_mmap_hdr(struct sk_buff *skb) +{ +	return (struct nl_mmap_hdr *)(skb->head - NL_MMAP_HDRLEN); +} + +static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk, +				   struct netlink_ring *ring, +				   struct nl_mmap_hdr *hdr) +{ +	unsigned int size; +	void *data; + +	size = ring->frame_size - NL_MMAP_HDRLEN; +	data = (void *)hdr + NL_MMAP_HDRLEN; + +	skb->head	= data; +	skb->data	= data; +	skb_reset_tail_pointer(skb); +	skb->end	= skb->tail + size; +	skb->len	= 0; + +	skb->destructor	= netlink_skb_destructor; +	NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED; +	NETLINK_CB(skb).sk = sk; +} + +static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg, +				u32 dst_portid, u32 dst_group, +				struct sock_iocb *siocb) +{ +	struct netlink_sock *nlk = nlk_sk(sk); +	struct netlink_ring *ring; +	struct nl_mmap_hdr *hdr; +	struct sk_buff *skb; +	unsigned int maxlen; +	bool excl = true; +	int err = 0, len = 0; + +	/* Netlink messages are validated by the receiver before processing. +	 * In order to avoid userspace changing the contents of the message +	 * after validation, the socket and the ring may only be used by a +	 * single process, otherwise we fall back to copying. +	 */ +	if (atomic_long_read(&sk->sk_socket->file->f_count) > 2 || +	    atomic_read(&nlk->mapped) > 1) +		excl = false; + +	mutex_lock(&nlk->pg_vec_lock); + +	ring   = &nlk->tx_ring; +	maxlen = ring->frame_size - NL_MMAP_HDRLEN; + +	do { +		hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID); +		if (hdr == NULL) { +			if (!(msg->msg_flags & MSG_DONTWAIT) && +			    atomic_read(&nlk->tx_ring.pending)) +				schedule(); +			continue; +		} +		if (hdr->nm_len > maxlen) { +			err = -EINVAL; +			goto out; +		} + +		netlink_frame_flush_dcache(hdr); + +		if (likely(dst_portid == 0 && dst_group == 0 && excl)) { +			skb = alloc_skb_head(GFP_KERNEL); +			if (skb == NULL) { +				err = -ENOBUFS; +				goto out; +			} +			sock_hold(sk); +			netlink_ring_setup_skb(skb, sk, ring, hdr); +			NETLINK_CB(skb).flags |= NETLINK_SKB_TX; +			__skb_put(skb, hdr->nm_len); +			netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED); +			atomic_inc(&ring->pending); +		} else { +			skb = alloc_skb(hdr->nm_len, GFP_KERNEL); +			if (skb == NULL) { +				err = -ENOBUFS; +				goto out; +			} +			__skb_put(skb, hdr->nm_len); +			memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, hdr->nm_len); +			netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); +		} + +		netlink_increment_head(ring); + +		NETLINK_CB(skb).portid	  = nlk->portid; +		NETLINK_CB(skb).dst_group = dst_group; +		NETLINK_CB(skb).creds	  = siocb->scm->creds; + +		err = security_netlink_send(sk, skb); +		if (err) { +			kfree_skb(skb); +			goto out; +		} + +		if (unlikely(dst_group)) { +			atomic_inc(&skb->users); +			netlink_broadcast(sk, skb, dst_portid, dst_group, +					  GFP_KERNEL); +		} +		err = netlink_unicast(sk, skb, dst_portid, +				      msg->msg_flags & MSG_DONTWAIT); +		if (err < 0) +			goto out; +		len += err; + +	} while (hdr != NULL || +		 (!(msg->msg_flags & MSG_DONTWAIT) && +		  atomic_read(&nlk->tx_ring.pending))); + +	if (len > 0) +		err = len; +out: +	mutex_unlock(&nlk->pg_vec_lock); +	return err; +} + +static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb) +{ +	struct nl_mmap_hdr *hdr; + +	hdr = netlink_mmap_hdr(skb); +	hdr->nm_len	= skb->len; +	hdr->nm_group	= NETLINK_CB(skb).dst_group; +	hdr->nm_pid	= NETLINK_CB(skb).creds.pid; +	hdr->nm_uid	= from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid); +	hdr->nm_gid	= from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid); +	netlink_frame_flush_dcache(hdr); +	netlink_set_status(hdr, NL_MMAP_STATUS_VALID); + +	NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED; +	kfree_skb(skb); +} + +static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb) +{ +	struct netlink_sock *nlk = nlk_sk(sk); +	struct netlink_ring *ring = &nlk->rx_ring; +	struct nl_mmap_hdr *hdr; + +	spin_lock_bh(&sk->sk_receive_queue.lock); +	hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); +	if (hdr == NULL) { +		spin_unlock_bh(&sk->sk_receive_queue.lock); +		kfree_skb(skb); +		netlink_overrun(sk); +		return; +	} +	netlink_increment_head(ring); +	__skb_queue_tail(&sk->sk_receive_queue, skb); +	spin_unlock_bh(&sk->sk_receive_queue.lock); + +	hdr->nm_len	= skb->len; +	hdr->nm_group	= NETLINK_CB(skb).dst_group; +	hdr->nm_pid	= NETLINK_CB(skb).creds.pid; +	hdr->nm_uid	= from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid); +	hdr->nm_gid	= from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid); +	netlink_set_status(hdr, NL_MMAP_STATUS_COPY); +} + +#else /* CONFIG_NETLINK_MMAP */ +#define netlink_skb_is_mmaped(skb)	false +#define netlink_rx_is_mmaped(sk)	false +#define netlink_tx_is_mmaped(sk)	false +#define netlink_mmap			sock_no_mmap +#define netlink_poll			datagram_poll +#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, siocb)	0 +#endif /* CONFIG_NETLINK_MMAP */ +  static void netlink_destroy_callback(struct netlink_callback *cb)  {  	kfree_skb(cb->skb); @@ -164,6 +716,53 @@ static void netlink_consume_callback(struct netlink_callback *cb)  	kfree(cb);  } +static void netlink_skb_destructor(struct sk_buff *skb) +{ +#ifdef CONFIG_NETLINK_MMAP +	struct nl_mmap_hdr *hdr; +	struct netlink_ring *ring; +	struct sock *sk; + +	/* If a packet from the kernel to userspace was freed because of an +	 * error without being delivered to userspace, the kernel must reset +	 * the status. In the direction userspace to kernel, the status is +	 * always reset here after the packet was processed and freed. +	 */ +	if (netlink_skb_is_mmaped(skb)) { +		hdr = netlink_mmap_hdr(skb); +		sk = NETLINK_CB(skb).sk; + +		if (NETLINK_CB(skb).flags & NETLINK_SKB_TX) { +			netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED); +			ring = &nlk_sk(sk)->tx_ring; +		} else { +			if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) { +				hdr->nm_len = 0; +				netlink_set_status(hdr, NL_MMAP_STATUS_VALID); +			} +			ring = &nlk_sk(sk)->rx_ring; +		} + +		WARN_ON(atomic_read(&ring->pending) == 0); +		atomic_dec(&ring->pending); +		sock_put(sk); + +		skb->data = NULL; +	} +#endif +	if (skb->sk != NULL) +		sock_rfree(skb); +} + +static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) +{ +	WARN_ON(skb->sk != NULL); +	skb->sk = sk; +	skb->destructor = netlink_skb_destructor; +	atomic_add(skb->truesize, &sk->sk_rmem_alloc); +	sk_mem_charge(sk, skb->truesize); +} +  static void netlink_sock_destruct(struct sock *sk)  {  	struct netlink_sock *nlk = nlk_sk(sk); @@ -177,6 +776,18 @@ static void netlink_sock_destruct(struct sock *sk)  	}  	skb_queue_purge(&sk->sk_receive_queue); +#ifdef CONFIG_NETLINK_MMAP +	if (1) { +		struct nl_mmap_req req; + +		memset(&req, 0, sizeof(req)); +		if (nlk->rx_ring.pg_vec) +			netlink_set_ring(sk, &req, true, false); +		memset(&req, 0, sizeof(req)); +		if (nlk->tx_ring.pg_vec) +			netlink_set_ring(sk, &req, true, true); +	} +#endif /* CONFIG_NETLINK_MMAP */  	if (!sock_flag(sk, SOCK_DEAD)) {  		printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); @@ -440,6 +1051,9 @@ static int __netlink_create(struct net *net, struct socket *sock,  		mutex_init(nlk->cb_mutex);  	}  	init_waitqueue_head(&nlk->wait); +#ifdef CONFIG_NETLINK_MMAP +	mutex_init(&nlk->pg_vec_lock); +#endif  	sk->sk_destruct = netlink_sock_destruct;  	sk->sk_protocol = protocol; @@ -771,19 +1385,6 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr,  	return 0;  } -static void netlink_overrun(struct sock *sk) -{ -	struct netlink_sock *nlk = nlk_sk(sk); - -	if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) { -		if (!test_and_set_bit(0, &nlk_sk(sk)->state)) { -			sk->sk_err = ENOBUFS; -			sk->sk_error_report(sk); -		} -	} -	atomic_inc(&sk->sk_drops); -} -  static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)  {  	struct sock *sock; @@ -836,8 +1437,9 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,  	nlk = nlk_sk(sk); -	if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || -	    test_bit(0, &nlk->state)) { +	if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || +	     test_bit(NETLINK_CONGESTED, &nlk->state)) && +	    !netlink_skb_is_mmaped(skb)) {  		DECLARE_WAITQUEUE(wait, current);  		if (!*timeo) {  			if (!ssk || netlink_is_kernel(ssk)) @@ -851,7 +1453,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,  		add_wait_queue(&nlk->wait, &wait);  		if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || -		     test_bit(0, &nlk->state)) && +		     test_bit(NETLINK_CONGESTED, &nlk->state)) &&  		    !sock_flag(sk, SOCK_DEAD))  			*timeo = schedule_timeout(*timeo); @@ -865,7 +1467,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,  		}  		return 1;  	} -	skb_set_owner_r(skb, sk); +	netlink_skb_set_owner_r(skb, sk);  	return 0;  } @@ -873,7 +1475,14 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)  {  	int len = skb->len; -	skb_queue_tail(&sk->sk_receive_queue, skb); +#ifdef CONFIG_NETLINK_MMAP +	if (netlink_skb_is_mmaped(skb)) +		netlink_queue_mmaped_skb(sk, skb); +	else if (netlink_rx_is_mmaped(sk)) +		netlink_ring_set_copied(sk, skb); +	else +#endif /* CONFIG_NETLINK_MMAP */ +		skb_queue_tail(&sk->sk_receive_queue, skb);  	sk->sk_data_ready(sk, len);  	return len;  } @@ -896,7 +1505,9 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)  {  	int delta; -	skb_orphan(skb); +	WARN_ON(skb->sk != NULL); +	if (netlink_skb_is_mmaped(skb)) +		return skb;  	delta = skb->end - skb->tail;  	if (delta * 2 < skb->truesize) @@ -916,16 +1527,6 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)  	return skb;  } -static void netlink_rcv_wake(struct sock *sk) -{ -	struct netlink_sock *nlk = nlk_sk(sk); - -	if (skb_queue_empty(&sk->sk_receive_queue)) -		clear_bit(0, &nlk->state); -	if (!test_bit(0, &nlk->state)) -		wake_up_interruptible(&nlk->wait); -} -  static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,  				  struct sock *ssk)  { @@ -935,8 +1536,8 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,  	ret = -ECONNREFUSED;  	if (nlk->netlink_rcv != NULL) {  		ret = skb->len; -		skb_set_owner_r(skb, sk); -		NETLINK_CB(skb).ssk = ssk; +		netlink_skb_set_owner_r(skb, sk); +		NETLINK_CB(skb).sk = ssk;  		nlk->netlink_rcv(skb);  		consume_skb(skb);  	} else { @@ -982,6 +1583,69 @@ retry:  }  EXPORT_SYMBOL(netlink_unicast); +struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size, +				  u32 dst_portid, gfp_t gfp_mask) +{ +#ifdef CONFIG_NETLINK_MMAP +	struct sock *sk = NULL; +	struct sk_buff *skb; +	struct netlink_ring *ring; +	struct nl_mmap_hdr *hdr; +	unsigned int maxlen; + +	sk = netlink_getsockbyportid(ssk, dst_portid); +	if (IS_ERR(sk)) +		goto out; + +	ring = &nlk_sk(sk)->rx_ring; +	/* fast-path without atomic ops for common case: non-mmaped receiver */ +	if (ring->pg_vec == NULL) +		goto out_put; + +	skb = alloc_skb_head(gfp_mask); +	if (skb == NULL) +		goto err1; + +	spin_lock_bh(&sk->sk_receive_queue.lock); +	/* check again under lock */ +	if (ring->pg_vec == NULL) +		goto out_free; + +	maxlen = ring->frame_size - NL_MMAP_HDRLEN; +	if (maxlen < size) +		goto out_free; + +	netlink_forward_ring(ring); +	hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED); +	if (hdr == NULL) +		goto err2; +	netlink_ring_setup_skb(skb, sk, ring, hdr); +	netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED); +	atomic_inc(&ring->pending); +	netlink_increment_head(ring); + +	spin_unlock_bh(&sk->sk_receive_queue.lock); +	return skb; + +err2: +	kfree_skb(skb); +	spin_unlock_bh(&sk->sk_receive_queue.lock); +	netlink_overrun(sk); +err1: +	sock_put(sk); +	return NULL; + +out_free: +	kfree_skb(skb); +	spin_unlock_bh(&sk->sk_receive_queue.lock); +out_put: +	sock_put(sk); +out: +#endif +	return alloc_skb(size, gfp_mask); +} +EXPORT_SYMBOL_GPL(netlink_alloc_skb); +  int netlink_has_listeners(struct sock *sk, unsigned int group)  {  	int res = 0; @@ -1006,8 +1670,8 @@ static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)  	struct netlink_sock *nlk = nlk_sk(sk);  	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && -	    !test_bit(0, &nlk->state)) { -		skb_set_owner_r(skb, sk); +	    !test_bit(NETLINK_CONGESTED, &nlk->state)) { +		netlink_skb_set_owner_r(skb, sk);  		__netlink_sendskb(sk, skb);  		return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);  	} @@ -1242,7 +1906,8 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,  	if (level != SOL_NETLINK)  		return -ENOPROTOOPT; -	if (optlen >= sizeof(int) && +	if (optname != NETLINK_RX_RING && optname != NETLINK_TX_RING && +	    optlen >= sizeof(int) &&  	    get_user(val, (unsigned int __user *)optval))  		return -EFAULT; @@ -1284,13 +1949,32 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,  	case NETLINK_NO_ENOBUFS:  		if (val) {  			nlk->flags |= NETLINK_RECV_NO_ENOBUFS; -			clear_bit(0, &nlk->state); +			clear_bit(NETLINK_CONGESTED, &nlk->state);  			wake_up_interruptible(&nlk->wait);  		} else {  			nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS;  		}  		err = 0;  		break; +#ifdef CONFIG_NETLINK_MMAP +	case NETLINK_RX_RING: +	case NETLINK_TX_RING: { +		struct nl_mmap_req req; + +		/* Rings might consume more memory than queue limits, require +		 * CAP_NET_ADMIN. +		 */ +		if (!capable(CAP_NET_ADMIN)) +			return -EPERM; +		if (optlen < sizeof(req)) +			return -EINVAL; +		if (copy_from_user(&req, optval, sizeof(req))) +			return -EFAULT; +		err = netlink_set_ring(sk, &req, false, +				       optname == NETLINK_TX_RING); +		break; +	} +#endif /* CONFIG_NETLINK_MMAP */  	default:  		err = -ENOPROTOOPT;  	} @@ -1401,6 +2085,13 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,  			goto out;  	} +	if (netlink_tx_is_mmaped(sk) && +	    msg->msg_iov->iov_base == NULL) { +		err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, +					   siocb); +		goto out; +	} +  	err = -EMSGSIZE;  	if (len > sk->sk_sndbuf - 32)  		goto out; @@ -1695,7 +2386,7 @@ struct nlmsghdr *  __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags)  {  	struct nlmsghdr *nlh; -	int size = NLMSG_LENGTH(len); +	int size = nlmsg_msg_size(len);  	nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size));  	nlh->nlmsg_type = type; @@ -1704,7 +2395,7 @@ __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int fla  	nlh->nlmsg_pid = portid;  	nlh->nlmsg_seq = seq;  	if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0) -		memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size); +		memset(nlmsg_data(nlh) + len, 0, NLMSG_ALIGN(size) - size);  	return nlh;  }  EXPORT_SYMBOL(__nlmsg_put); @@ -1733,9 +2424,13 @@ static int netlink_dump(struct sock *sk)  	alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); -	skb = sock_rmalloc(sk, alloc_size, 0, GFP_KERNEL); +	if (!netlink_rx_is_mmaped(sk) && +	    atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) +		goto errout_skb; +	skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, GFP_KERNEL);  	if (!skb)  		goto errout_skb; +	netlink_skb_set_owner_r(skb, sk);  	len = cb->dump(skb, cb); @@ -1790,13 +2485,25 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,  	if (cb == NULL)  		return -ENOBUFS; +	/* Memory mapped dump requests need to be copied to avoid looping +	 * on the pending state in netlink_mmap_sendmsg() while the CB hold +	 * a reference to the skb. +	 */ +	if (netlink_skb_is_mmaped(skb)) { +		skb = skb_copy(skb, GFP_KERNEL); +		if (skb == NULL) { +			kfree(cb); +			return -ENOBUFS; +		} +	} else +		atomic_inc(&skb->users); +  	cb->dump = control->dump;  	cb->done = control->done;  	cb->nlh = nlh;  	cb->data = control->data;  	cb->module = control->module;  	cb->min_dump_alloc = control->min_dump_alloc; -	atomic_inc(&skb->users);  	cb->skb = skb;  	sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid); @@ -1850,7 +2557,8 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)  	if (err)  		payload += nlmsg_len(nlh); -	skb = nlmsg_new(payload, GFP_KERNEL); +	skb = netlink_alloc_skb(in_skb->sk, nlmsg_total_size(payload), +				NETLINK_CB(in_skb).portid, GFP_KERNEL);  	if (!skb) {  		struct sock *sk; @@ -2116,7 +2824,7 @@ static const struct proto_ops netlink_ops = {  	.socketpair =	sock_no_socketpair,  	.accept =	sock_no_accept,  	.getname =	netlink_getname, -	.poll =		datagram_poll, +	.poll =		netlink_poll,  	.ioctl =	sock_no_ioctl,  	.listen =	sock_no_listen,  	.shutdown =	sock_no_shutdown, @@ -2124,7 +2832,7 @@ static const struct proto_ops netlink_ops = {  	.getsockopt =	netlink_getsockopt,  	.sendmsg =	netlink_sendmsg,  	.recvmsg =	netlink_recvmsg, -	.mmap =		sock_no_mmap, +	.mmap =		netlink_mmap,  	.sendpage =	sock_no_sendpage,  }; diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h new file mode 100644 index 00000000000..ed8522265f4 --- /dev/null +++ b/net/netlink/af_netlink.h @@ -0,0 +1,82 @@ +#ifndef _AF_NETLINK_H +#define _AF_NETLINK_H + +#include <net/sock.h> + +#define NLGRPSZ(x)	(ALIGN(x, sizeof(unsigned long) * 8) / 8) +#define NLGRPLONGS(x)	(NLGRPSZ(x)/sizeof(unsigned long)) + +struct netlink_ring { +	void			**pg_vec; +	unsigned int		head; +	unsigned int		frames_per_block; +	unsigned int		frame_size; +	unsigned int		frame_max; + +	unsigned int		pg_vec_order; +	unsigned int		pg_vec_pages; +	unsigned int		pg_vec_len; + +	atomic_t		pending; +}; + +struct netlink_sock { +	/* struct sock has to be the first member of netlink_sock */ +	struct sock		sk; +	u32			portid; +	u32			dst_portid; +	u32			dst_group; +	u32			flags; +	u32			subscriptions; +	u32			ngroups; +	unsigned long		*groups; +	unsigned long		state; +	wait_queue_head_t	wait; +	struct netlink_callback	*cb; +	struct mutex		*cb_mutex; +	struct mutex		cb_def_mutex; +	void			(*netlink_rcv)(struct sk_buff *skb); +	void			(*netlink_bind)(int group); +	struct module		*module; +#ifdef CONFIG_NETLINK_MMAP +	struct mutex		pg_vec_lock; +	struct netlink_ring	rx_ring; +	struct netlink_ring	tx_ring; +	atomic_t		mapped; +#endif /* CONFIG_NETLINK_MMAP */ +}; + +static inline struct netlink_sock *nlk_sk(struct sock *sk) +{ +	return container_of(sk, struct netlink_sock, sk); +} + +struct nl_portid_hash { +	struct hlist_head	*table; +	unsigned long		rehash_time; + +	unsigned int		mask; +	unsigned int		shift; + +	unsigned int		entries; +	unsigned int		max_shift; + +	u32			rnd; +}; + +struct netlink_table { +	struct nl_portid_hash	hash; +	struct hlist_head	mc_list; +	struct listeners __rcu	*listeners; +	unsigned int		flags; +	unsigned int		groups; +	struct mutex		*cb_mutex; +	struct module		*module; +	void			(*bind)(int group); +	int			registered; +}; + +extern struct netlink_table *nl_table; +extern rwlock_t nl_table_lock; + +#endif diff --git a/net/netlink/diag.c b/net/netlink/diag.c new file mode 100644 index 00000000000..1af29624b92 --- /dev/null +++ b/net/netlink/diag.c @@ -0,0 +1,227 @@ +#include <linux/module.h> + +#include <net/sock.h> +#include <linux/netlink.h> +#include <linux/sock_diag.h> +#include <linux/netlink_diag.h> + +#include "af_netlink.h" + +#ifdef CONFIG_NETLINK_MMAP +static int sk_diag_put_ring(struct netlink_ring *ring, int nl_type, +			    struct sk_buff *nlskb) +{ +	struct netlink_diag_ring ndr; + +	ndr.ndr_block_size = ring->pg_vec_pages << PAGE_SHIFT; +	ndr.ndr_block_nr   = ring->pg_vec_len; +	ndr.ndr_frame_size = ring->frame_size; +	ndr.ndr_frame_nr   = ring->frame_max + 1; + +	return nla_put(nlskb, nl_type, sizeof(ndr), &ndr); +} + +static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb) +{ +	struct netlink_sock *nlk = nlk_sk(sk); +	int ret; + +	mutex_lock(&nlk->pg_vec_lock); +	ret = sk_diag_put_ring(&nlk->rx_ring, NETLINK_DIAG_RX_RING, nlskb); +	if (!ret) +		ret = sk_diag_put_ring(&nlk->tx_ring, NETLINK_DIAG_TX_RING, +				       nlskb); +	mutex_unlock(&nlk->pg_vec_lock); + +	return ret; +} +#else +static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb) +{ +	return 0; +} +#endif + +static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb) +{ +	struct netlink_sock *nlk = nlk_sk(sk); + +	if (nlk->groups == NULL) +		return 0; + +	return nla_put(nlskb, NETLINK_DIAG_GROUPS, NLGRPSZ(nlk->ngroups), +		       nlk->groups); +} + +static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, +			struct netlink_diag_req *req, +			u32 portid, u32 seq, u32 flags, int sk_ino) +{ +	struct nlmsghdr *nlh; +	struct netlink_diag_msg *rep; +	struct netlink_sock *nlk = nlk_sk(sk); + +	nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep), +			flags); +	if (!nlh) +		return -EMSGSIZE; + +	rep = nlmsg_data(nlh); +	rep->ndiag_family	= AF_NETLINK; +	rep->ndiag_type		= sk->sk_type; +	rep->ndiag_protocol	= sk->sk_protocol; +	rep->ndiag_state	= sk->sk_state; + +	rep->ndiag_ino		= sk_ino; +	rep->ndiag_portid	= nlk->portid; +	rep->ndiag_dst_portid	= nlk->dst_portid; +	rep->ndiag_dst_group	= nlk->dst_group; +	sock_diag_save_cookie(sk, rep->ndiag_cookie); + +	if ((req->ndiag_show & NDIAG_SHOW_GROUPS) && +	    sk_diag_dump_groups(sk, skb)) +		goto out_nlmsg_trim; + +	if ((req->ndiag_show & NDIAG_SHOW_MEMINFO) && +	    sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO)) +		goto out_nlmsg_trim; + +	if ((req->ndiag_show & NDIAG_SHOW_RING_CFG) && +	    sk_diag_put_rings_cfg(sk, skb)) +		goto out_nlmsg_trim; + +	return nlmsg_end(skb, nlh); + +out_nlmsg_trim: +	nlmsg_cancel(skb, nlh); +	return -EMSGSIZE; +} + +static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, +				int protocol, int s_num) +{ +	struct netlink_table *tbl = &nl_table[protocol]; +	struct nl_portid_hash *hash = &tbl->hash; +	struct net *net = sock_net(skb->sk); +	struct netlink_diag_req *req; +	struct sock *sk; +	int ret = 0, num = 0, i; + +	req = nlmsg_data(cb->nlh); + +	for (i = 0; i <= hash->mask; i++) { +		sk_for_each(sk, &hash->table[i]) { +			if (!net_eq(sock_net(sk), net)) +				continue; +			if (num < s_num) { +				num++; +				continue; +			} + +			if (sk_diag_fill(sk, skb, req, +					 NETLINK_CB(cb->skb).portid, +					 cb->nlh->nlmsg_seq, +					 NLM_F_MULTI, +					 sock_i_ino(sk)) < 0) { +				ret = 1; +				goto done; +			} + +			num++; +		} +	} + +	sk_for_each_bound(sk, &tbl->mc_list) { +		if (sk_hashed(sk)) +			continue; +		if (!net_eq(sock_net(sk), net)) +			continue; +		if (num < s_num) { +			num++; +			continue; +		} + +		if (sk_diag_fill(sk, skb, req, +				 NETLINK_CB(cb->skb).portid, +				 cb->nlh->nlmsg_seq, +				 NLM_F_MULTI, +				 sock_i_ino(sk)) < 0) { +			ret = 1; +			goto done; +		} +		num++; +	} +done: +	cb->args[0] = num; +	cb->args[1] = protocol; + +	return ret; +} + +static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ +	struct netlink_diag_req *req; +	int s_num = cb->args[0]; + +	req = nlmsg_data(cb->nlh); + +	read_lock(&nl_table_lock); + +	if (req->sdiag_protocol == NDIAG_PROTO_ALL) { +		int i; + +		for (i = cb->args[1]; i < MAX_LINKS; i++) { +			if (__netlink_diag_dump(skb, cb, i, s_num)) +				break; +			s_num = 0; +		} +	} else { +		if (req->sdiag_protocol >= MAX_LINKS) { +			read_unlock(&nl_table_lock); +			return -ENOENT; +		} + +		__netlink_diag_dump(skb, cb, req->sdiag_protocol, s_num); +	} + +	read_unlock(&nl_table_lock); + +	return skb->len; +} + +static int netlink_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) +{ +	int hdrlen = sizeof(struct netlink_diag_req); +	struct net *net = sock_net(skb->sk); + +	if (nlmsg_len(h) < hdrlen) +		return -EINVAL; + +	if (h->nlmsg_flags & NLM_F_DUMP) { +		struct netlink_dump_control c = { +			.dump = netlink_diag_dump, +		}; +		return netlink_dump_start(net->diag_nlsk, skb, h, &c); +	} else +		return -EOPNOTSUPP; +} + +static const struct sock_diag_handler netlink_diag_handler = { +	.family = AF_NETLINK, +	.dump = netlink_diag_handler_dump, +}; + +static int __init netlink_diag_init(void) +{ +	return sock_diag_register(&netlink_diag_handler); +} + +static void __exit netlink_diag_exit(void) +{ +	sock_diag_unregister(&netlink_diag_handler); +} + +module_init(netlink_diag_init); +module_exit(netlink_diag_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 16 /* AF_NETLINK */); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 5a55be3f17a..2fd6dbea327 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -16,10 +16,12 @@  #include <linux/skbuff.h>  #include <linux/mutex.h>  #include <linux/bitmap.h> +#include <linux/rwsem.h>  #include <net/sock.h>  #include <net/genetlink.h>  static DEFINE_MUTEX(genl_mutex); /* serialization of message processing */ +static DECLARE_RWSEM(cb_lock);  void genl_lock(void)  { @@ -41,6 +43,18 @@ int lockdep_genl_is_held(void)  EXPORT_SYMBOL(lockdep_genl_is_held);  #endif +static void genl_lock_all(void) +{ +	down_write(&cb_lock); +	genl_lock(); +} + +static void genl_unlock_all(void) +{ +	genl_unlock(); +	up_write(&cb_lock); +} +  #define GENL_FAM_TAB_SIZE	16  #define GENL_FAM_TAB_MASK	(GENL_FAM_TAB_SIZE - 1) @@ -144,7 +158,7 @@ int genl_register_mc_group(struct genl_family *family,  	BUG_ON(grp->name[0] == '\0');  	BUG_ON(memchr(grp->name, '\0', GENL_NAMSIZ) == NULL); -	genl_lock(); +	genl_lock_all();  	/* special-case our own group */  	if (grp == ¬ify_grp) @@ -213,7 +227,7 @@ int genl_register_mc_group(struct genl_family *family,  	genl_ctrl_event(CTRL_CMD_NEWMCAST_GRP, grp);   out: -	genl_unlock(); +	genl_unlock_all();  	return err;  }  EXPORT_SYMBOL(genl_register_mc_group); @@ -255,9 +269,9 @@ static void __genl_unregister_mc_group(struct genl_family *family,  void genl_unregister_mc_group(struct genl_family *family,  			      struct genl_multicast_group *grp)  { -	genl_lock(); +	genl_lock_all();  	__genl_unregister_mc_group(family, grp); -	genl_unlock(); +	genl_unlock_all();  }  EXPORT_SYMBOL(genl_unregister_mc_group); @@ -303,9 +317,9 @@ int genl_register_ops(struct genl_family *family, struct genl_ops *ops)  	if (ops->policy)  		ops->flags |= GENL_CMD_CAP_HASPOL; -	genl_lock(); +	genl_lock_all();  	list_add_tail(&ops->ops_list, &family->ops_list); -	genl_unlock(); +	genl_unlock_all();  	genl_ctrl_event(CTRL_CMD_NEWOPS, ops);  	err = 0; @@ -334,16 +348,16 @@ int genl_unregister_ops(struct genl_family *family, struct genl_ops *ops)  {  	struct genl_ops *rc; -	genl_lock(); +	genl_lock_all();  	list_for_each_entry(rc, &family->ops_list, ops_list) {  		if (rc == ops) {  			list_del(&ops->ops_list); -			genl_unlock(); +			genl_unlock_all();  			genl_ctrl_event(CTRL_CMD_DELOPS, ops);  			return 0;  		}  	} -	genl_unlock(); +	genl_unlock_all();  	return -ENOENT;  } @@ -373,7 +387,7 @@ int genl_register_family(struct genl_family *family)  	INIT_LIST_HEAD(&family->ops_list);  	INIT_LIST_HEAD(&family->mcast_groups); -	genl_lock(); +	genl_lock_all();  	if (genl_family_find_byname(family->name)) {  		err = -EEXIST; @@ -394,7 +408,7 @@ int genl_register_family(struct genl_family *family)  		goto errout_locked;  	} -	if (family->maxattr) { +	if (family->maxattr && !family->parallel_ops) {  		family->attrbuf = kmalloc((family->maxattr+1) *  					sizeof(struct nlattr *), GFP_KERNEL);  		if (family->attrbuf == NULL) { @@ -405,14 +419,14 @@ int genl_register_family(struct genl_family *family)  		family->attrbuf = NULL;  	list_add_tail(&family->family_list, genl_family_chain(family->id)); -	genl_unlock(); +	genl_unlock_all();  	genl_ctrl_event(CTRL_CMD_NEWFAMILY, family);  	return 0;  errout_locked: -	genl_unlock(); +	genl_unlock_all();  errout:  	return err;  } @@ -476,7 +490,7 @@ int genl_unregister_family(struct genl_family *family)  {  	struct genl_family *rc; -	genl_lock(); +	genl_lock_all();  	genl_unregister_mc_groups(family); @@ -486,14 +500,14 @@ int genl_unregister_family(struct genl_family *family)  		list_del(&rc->family_list);  		INIT_LIST_HEAD(&family->ops_list); -		genl_unlock(); +		genl_unlock_all();  		kfree(family->attrbuf);  		genl_ctrl_event(CTRL_CMD_DELFAMILY, family);  		return 0;  	} -	genl_unlock(); +	genl_unlock_all();  	return -ENOENT;  } @@ -530,19 +544,17 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,  }  EXPORT_SYMBOL(genlmsg_put); -static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int genl_family_rcv_msg(struct genl_family *family, +			       struct sk_buff *skb, +			       struct nlmsghdr *nlh)  {  	struct genl_ops *ops; -	struct genl_family *family;  	struct net *net = sock_net(skb->sk);  	struct genl_info info;  	struct genlmsghdr *hdr = nlmsg_data(nlh); +	struct nlattr **attrbuf;  	int hdrlen, err; -	family = genl_family_find_byid(nlh->nlmsg_type); -	if (family == NULL) -		return -ENOENT; -  	/* this family doesn't exist in this netns */  	if (!family->netnsok && !net_eq(net, &init_net))  		return -ENOENT; @@ -560,29 +572,33 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)  		return -EPERM;  	if (nlh->nlmsg_flags & NLM_F_DUMP) { +		struct netlink_dump_control c = { +			.dump = ops->dumpit, +			.done = ops->done, +		}; +  		if (ops->dumpit == NULL)  			return -EOPNOTSUPP; -		genl_unlock(); -		{ -			struct netlink_dump_control c = { -				.dump = ops->dumpit, -				.done = ops->done, -			}; -			err = netlink_dump_start(net->genl_sock, skb, nlh, &c); -		} -		genl_lock(); -		return err; +		return netlink_dump_start(net->genl_sock, skb, nlh, &c);  	}  	if (ops->doit == NULL)  		return -EOPNOTSUPP; -	if (family->attrbuf) { -		err = nlmsg_parse(nlh, hdrlen, family->attrbuf, family->maxattr, +	if (family->maxattr && family->parallel_ops) { +		attrbuf = kmalloc((family->maxattr+1) * +					sizeof(struct nlattr *), GFP_KERNEL); +		if (attrbuf == NULL) +			return -ENOMEM; +	} else +		attrbuf = family->attrbuf; + +	if (attrbuf) { +		err = nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr,  				  ops->policy);  		if (err < 0) -			return err; +			goto out;  	}  	info.snd_seq = nlh->nlmsg_seq; @@ -590,14 +606,14 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)  	info.nlhdr = nlh;  	info.genlhdr = nlmsg_data(nlh);  	info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN; -	info.attrs = family->attrbuf; +	info.attrs = attrbuf;  	genl_info_net_set(&info, net);  	memset(&info.user_ptr, 0, sizeof(info.user_ptr));  	if (family->pre_doit) {  		err = family->pre_doit(ops, skb, &info);  		if (err) -			return err; +			goto out;  	}  	err = ops->doit(skb, &info); @@ -605,14 +621,38 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)  	if (family->post_doit)  		family->post_doit(ops, skb, &info); +out: +	if (family->parallel_ops) +		kfree(attrbuf); + +	return err; +} + +static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +{ +	struct genl_family *family; +	int err; + +	family = genl_family_find_byid(nlh->nlmsg_type); +	if (family == NULL) +		return -ENOENT; + +	if (!family->parallel_ops) +		genl_lock(); + +	err = genl_family_rcv_msg(family, skb, nlh); + +	if (!family->parallel_ops) +		genl_unlock(); +  	return err;  }  static void genl_rcv(struct sk_buff *skb)  { -	genl_lock(); +	down_read(&cb_lock);  	netlink_rcv_skb(skb, &genl_rcv_msg); -	genl_unlock(); +	up_read(&cb_lock);  }  /************************************************************************** @@ -918,7 +958,6 @@ static int __net_init genl_pernet_init(struct net *net)  {  	struct netlink_kernel_cfg cfg = {  		.input		= genl_rcv, -		.cb_mutex	= &genl_mutex,  		.flags		= NL_CFG_F_NONROOT_RECV,  	}; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 103bd704b5f..ec0c80fde69 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -834,6 +834,8 @@ static int nr_getname(struct socket *sock, struct sockaddr *uaddr,  	struct sock *sk = sock->sk;  	struct nr_sock *nr = nr_sk(sk); +	memset(&sax->fsa_ax25, 0, sizeof(struct sockaddr_ax25)); +  	lock_sock(sk);  	if (peer != 0) {  		if (sk->sk_state != TCP_ESTABLISHED) { diff --git a/net/nfc/Kconfig b/net/nfc/Kconfig index 60c3bbb63e8..5948b2fc72f 100644 --- a/net/nfc/Kconfig +++ b/net/nfc/Kconfig @@ -4,6 +4,7 @@  menuconfig NFC  	depends on NET +	depends on RFKILL || !RFKILL  	tristate "NFC subsystem support"  	default n  	help @@ -15,6 +16,5 @@ menuconfig NFC  source "net/nfc/nci/Kconfig"  source "net/nfc/hci/Kconfig" -source "net/nfc/llcp/Kconfig"  source "drivers/nfc/Kconfig" diff --git a/net/nfc/Makefile b/net/nfc/Makefile index d1a117c2c40..fb799deaed4 100644 --- a/net/nfc/Makefile +++ b/net/nfc/Makefile @@ -5,6 +5,8 @@  obj-$(CONFIG_NFC) += nfc.o  obj-$(CONFIG_NFC_NCI) += nci/  obj-$(CONFIG_NFC_HCI) += hci/ +#obj-$(CONFIG_NFC_LLCP) += llcp/ + +nfc-objs := core.o netlink.o af_nfc.o rawsock.o llcp_core.o llcp_commands.o \ +		llcp_sock.o -nfc-objs := core.o netlink.o af_nfc.o rawsock.o -nfc-$(CONFIG_NFC_LLCP)	+= llcp/llcp.o llcp/commands.o llcp/sock.o diff --git a/net/nfc/core.c b/net/nfc/core.c index 6ceee8e181c..40d2527693d 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -27,6 +27,7 @@  #include <linux/kernel.h>  #include <linux/module.h>  #include <linux/slab.h> +#include <linux/rfkill.h>  #include <linux/nfc.h>  #include <net/genetlink.h> @@ -58,6 +59,11 @@ int nfc_dev_up(struct nfc_dev *dev)  	device_lock(&dev->dev); +	if (dev->rfkill && rfkill_blocked(dev->rfkill)) { +		rc = -ERFKILL; +		goto error; +	} +  	if (!device_is_registered(&dev->dev)) {  		rc = -ENODEV;  		goto error; @@ -117,6 +123,24 @@ error:  	return rc;  } +static int nfc_rfkill_set_block(void *data, bool blocked) +{ +	struct nfc_dev *dev = data; + +	pr_debug("%s blocked %d", dev_name(&dev->dev), blocked); + +	if (!blocked) +		return 0; + +	nfc_dev_down(dev); + +	return 0; +} + +static const struct rfkill_ops nfc_rfkill_ops = { +	.set_block = nfc_rfkill_set_block, +}; +  /**   * nfc_start_poll - start polling for nfc targets   * @@ -143,6 +167,11 @@ int nfc_start_poll(struct nfc_dev *dev, u32 im_protocols, u32 tm_protocols)  		goto error;  	} +	if (!dev->dev_up) { +		rc = -ENODEV; +		goto error; +	} +  	if (dev->polling) {  		rc = -EBUSY;  		goto error; @@ -835,6 +864,15 @@ int nfc_register_device(struct nfc_dev *dev)  		pr_debug("The userspace won't be notified that the device %s was added\n",  			 dev_name(&dev->dev)); +	dev->rfkill = rfkill_alloc(dev_name(&dev->dev), &dev->dev, +				   RFKILL_TYPE_NFC, &nfc_rfkill_ops, dev); +	if (dev->rfkill) { +		if (rfkill_register(dev->rfkill) < 0) { +			rfkill_destroy(dev->rfkill); +			dev->rfkill = NULL; +		} +	} +  	return 0;  }  EXPORT_SYMBOL(nfc_register_device); @@ -852,6 +890,11 @@ void nfc_unregister_device(struct nfc_dev *dev)  	id = dev->idx; +	if (dev->rfkill) { +		rfkill_unregister(dev->rfkill); +		rfkill_destroy(dev->rfkill); +	} +  	if (dev->ops->check_presence) {  		device_lock(&dev->dev);  		dev->shutting_down = true; diff --git a/net/nfc/llcp/llcp.h b/net/nfc/llcp.h index 0eae5c50950..ff8c434f7df 100644 --- a/net/nfc/llcp/llcp.h +++ b/net/nfc/llcp.h @@ -31,6 +31,7 @@ enum llcp_state {  #define LLCP_MAX_LTO  0xff  #define LLCP_MAX_RW   15  #define LLCP_MAX_MIUX 0x7ff +#define LLCP_MAX_MIU (LLCP_MAX_MIUX + 128)  #define LLCP_WKS_NUM_SAP   16  #define LLCP_SDP_NUM_SAP   16 @@ -46,6 +47,19 @@ struct llcp_sock_list {  	rwlock_t          lock;  }; +struct nfc_llcp_sdp_tlv { +	u8 *tlv; +	u8 tlv_len; + +	char *uri; +	u8 tid; +	u8 sap; + +	unsigned long time; + +	struct hlist_node node; +}; +  struct nfc_llcp_local {  	struct list_head list;  	struct nfc_dev *dev; @@ -86,6 +100,12 @@ struct nfc_llcp_local {  	u8  remote_opt;  	u16 remote_wks; +	struct mutex sdreq_lock; +	struct hlist_head pending_sdreqs; +	struct timer_list sdreq_timer; +	struct work_struct sdreq_timeout_work; +	u8 sdreq_next_tid; +  	/* sockets array */  	struct llcp_sock_list sockets;  	struct llcp_sock_list connecting_sockets; @@ -105,7 +125,12 @@ struct nfc_llcp_sock {  	char *service_name;  	size_t service_name_len;  	u8 rw; -	u16 miu; +	__be16 miux; + + +	/* Remote link parameters */ +	u8 remote_rw; +	u16 remote_miu;  	/* Link variables */  	u8 send_n; @@ -138,6 +163,7 @@ struct nfc_llcp_ui_cb {  #define LLCP_HEADER_SIZE   2  #define LLCP_SEQUENCE_SIZE 1 +#define LLCP_AGF_PDU_HEADER_SIZE 2  /* LLCP versions: 1.1 is 1.0 plus SDP */  #define LLCP_VERSION_10 0x10 @@ -186,6 +212,7 @@ struct nfc_llcp_ui_cb {  void nfc_llcp_sock_link(struct llcp_sock_list *l, struct sock *s);  void nfc_llcp_sock_unlink(struct llcp_sock_list *l, struct sock *s); +void nfc_llcp_socket_remote_param_init(struct nfc_llcp_sock *sock);  struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev);  struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local);  int nfc_llcp_local_put(struct nfc_llcp_local *local); @@ -213,12 +240,20 @@ int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock,  /* Commands API */  void nfc_llcp_recv(void *data, struct sk_buff *skb, int err);  u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length); +struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdres_tlv(u8 tid, u8 sap); +struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri, +						  size_t uri_len); +void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp); +void nfc_llcp_free_sdp_tlv_list(struct hlist_head *sdp_head);  void nfc_llcp_recv(void *data, struct sk_buff *skb, int err);  int nfc_llcp_disconnect(struct nfc_llcp_sock *sock);  int nfc_llcp_send_symm(struct nfc_dev *dev);  int nfc_llcp_send_connect(struct nfc_llcp_sock *sock);  int nfc_llcp_send_cc(struct nfc_llcp_sock *sock); -int nfc_llcp_send_snl(struct nfc_llcp_local *local, u8 tid, u8 sap); +int nfc_llcp_send_snl_sdres(struct nfc_llcp_local *local, +			    struct hlist_head *tlv_list, size_t tlvs_len); +int nfc_llcp_send_snl_sdreq(struct nfc_llcp_local *local, +			    struct hlist_head *tlv_list, size_t tlvs_len);  int nfc_llcp_send_dm(struct nfc_llcp_local *local, u8 ssap, u8 dsap, u8 reason);  int nfc_llcp_send_disconnect(struct nfc_llcp_sock *sock);  int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock, diff --git a/net/nfc/llcp/Kconfig b/net/nfc/llcp/Kconfig deleted file mode 100644 index a1a41cd6825..00000000000 --- a/net/nfc/llcp/Kconfig +++ /dev/null @@ -1,7 +0,0 @@ -config NFC_LLCP -       depends on NFC -       bool "NFC LLCP support" -       default n -       help -	 Say Y here if you want to build support for a kernel NFC LLCP -	 implementation.
\ No newline at end of file diff --git a/net/nfc/llcp/commands.c b/net/nfc/llcp_commands.c index c6bc3bd9505..c1b23eef83c 100644 --- a/net/nfc/llcp/commands.c +++ b/net/nfc/llcp_commands.c @@ -26,7 +26,7 @@  #include <net/nfc/nfc.h> -#include "../nfc.h" +#include "nfc.h"  #include "llcp.h"  static u8 llcp_tlv_length[LLCP_TLV_MAX] = { @@ -117,6 +117,88 @@ u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length)  	return tlv;  } +struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdres_tlv(u8 tid, u8 sap) +{ +	struct nfc_llcp_sdp_tlv *sdres; +	u8 value[2]; + +	sdres = kzalloc(sizeof(struct nfc_llcp_sdp_tlv), GFP_KERNEL); +	if (sdres == NULL) +		return NULL; + +	value[0] = tid; +	value[1] = sap; + +	sdres->tlv = nfc_llcp_build_tlv(LLCP_TLV_SDRES, value, 2, +					&sdres->tlv_len); +	if (sdres->tlv == NULL) { +		kfree(sdres); +		return NULL; +	} + +	sdres->tid = tid; +	sdres->sap = sap; + +	INIT_HLIST_NODE(&sdres->node); + +	return sdres; +} + +struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri, +						  size_t uri_len) +{ +	struct nfc_llcp_sdp_tlv *sdreq; + +	pr_debug("uri: %s, len: %zu\n", uri, uri_len); + +	sdreq = kzalloc(sizeof(struct nfc_llcp_sdp_tlv), GFP_KERNEL); +	if (sdreq == NULL) +		return NULL; + +	sdreq->tlv_len = uri_len + 3; + +	if (uri[uri_len - 1] == 0) +		sdreq->tlv_len--; + +	sdreq->tlv = kzalloc(sdreq->tlv_len + 1, GFP_KERNEL); +	if (sdreq->tlv == NULL) { +		kfree(sdreq); +		return NULL; +	} + +	sdreq->tlv[0] = LLCP_TLV_SDREQ; +	sdreq->tlv[1] = sdreq->tlv_len - 2; +	sdreq->tlv[2] = tid; + +	sdreq->tid = tid; +	sdreq->uri = sdreq->tlv + 3; +	memcpy(sdreq->uri, uri, uri_len); + +	sdreq->time = jiffies; + +	INIT_HLIST_NODE(&sdreq->node); + +	return sdreq; +} + +void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp) +{ +	kfree(sdp->tlv); +	kfree(sdp); +} + +void nfc_llcp_free_sdp_tlv_list(struct hlist_head *head) +{ +	struct nfc_llcp_sdp_tlv *sdp; +	struct hlist_node *n; + +	hlist_for_each_entry_safe(sdp, n, head, node) { +		hlist_del(&sdp->node); + +		nfc_llcp_free_sdp_tlv(sdp); +	} +} +  int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local,  			  u8 *tlv_array, u16 tlv_array_len)  { @@ -184,10 +266,10 @@ int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock,  		switch (type) {  		case LLCP_TLV_MIUX: -			sock->miu = llcp_tlv_miux(tlv) + 128; +			sock->remote_miu = llcp_tlv_miux(tlv) + 128;  			break;  		case LLCP_TLV_RW: -			sock->rw = llcp_tlv_rw(tlv); +			sock->remote_rw = llcp_tlv_rw(tlv);  			break;  		case LLCP_TLV_SN:  			break; @@ -200,7 +282,8 @@ int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock,  		tlv += length + 2;  	} -	pr_debug("sock %p rw %d miu %d\n", sock, sock->rw, sock->miu); +	pr_debug("sock %p rw %d miu %d\n", sock, +		 sock->remote_rw, sock->remote_miu);  	return 0;  } @@ -318,9 +401,9 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock)  	struct sk_buff *skb;  	u8 *service_name_tlv = NULL, service_name_tlv_length;  	u8 *miux_tlv = NULL, miux_tlv_length; -	u8 *rw_tlv = NULL, rw_tlv_length; +	u8 *rw_tlv = NULL, rw_tlv_length, rw;  	int err; -	u16 size = 0; +	u16 size = 0, miux;  	pr_debug("Sending CONNECT\n"); @@ -336,11 +419,16 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock)  		size += service_name_tlv_length;  	} -	miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&local->miux, 0, +	/* If the socket parameters are not set, use the local ones */ +	miux = be16_to_cpu(sock->miux) > LLCP_MAX_MIUX ? +		local->miux : sock->miux; +	rw = sock->rw > LLCP_MAX_RW ? local->rw : sock->rw; + +	miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0,  				      &miux_tlv_length);  	size += miux_tlv_length; -	rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &local->rw, 0, &rw_tlv_length); +	rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &rw, 0, &rw_tlv_length);  	size += rw_tlv_length;  	pr_debug("SKB size %d SN length %zu\n", size, sock->service_name_len); @@ -377,9 +465,9 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock)  	struct nfc_llcp_local *local;  	struct sk_buff *skb;  	u8 *miux_tlv = NULL, miux_tlv_length; -	u8 *rw_tlv = NULL, rw_tlv_length; +	u8 *rw_tlv = NULL, rw_tlv_length, rw;  	int err; -	u16 size = 0; +	u16 size = 0, miux;  	pr_debug("Sending CC\n"); @@ -387,11 +475,16 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock)  	if (local == NULL)  		return -ENODEV; -	miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&local->miux, 0, +	/* If the socket parameters are not set, use the local ones */ +	miux = be16_to_cpu(sock->miux) > LLCP_MAX_MIUX ? +		local->miux : sock->miux; +	rw = sock->rw > LLCP_MAX_RW ? local->rw : sock->rw; + +	miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0,  				      &miux_tlv_length);  	size += miux_tlv_length; -	rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &local->rw, 0, &rw_tlv_length); +	rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &rw, 0, &rw_tlv_length);  	size += rw_tlv_length;  	skb = llcp_allocate_pdu(sock, LLCP_PDU_CC, size); @@ -416,48 +509,90 @@ error_tlv:  	return err;  } -int nfc_llcp_send_snl(struct nfc_llcp_local *local, u8 tid, u8 sap) +static struct sk_buff *nfc_llcp_allocate_snl(struct nfc_llcp_local *local, +					     size_t tlv_length)  {  	struct sk_buff *skb;  	struct nfc_dev *dev; -	u8 *sdres_tlv = NULL, sdres_tlv_length, sdres[2];  	u16 size = 0; -	pr_debug("Sending SNL tid 0x%x sap 0x%x\n", tid, sap); -  	if (local == NULL) -		return -ENODEV; +		return ERR_PTR(-ENODEV);  	dev = local->dev;  	if (dev == NULL) -		return -ENODEV; - -	sdres[0] = tid; -	sdres[1] = sap; -	sdres_tlv = nfc_llcp_build_tlv(LLCP_TLV_SDRES, sdres, 0, -				       &sdres_tlv_length); -	if (sdres_tlv == NULL) -		return -ENOMEM; +		return ERR_PTR(-ENODEV);  	size += LLCP_HEADER_SIZE;  	size += dev->tx_headroom + dev->tx_tailroom + NFC_HEADER_SIZE; -	size += sdres_tlv_length; +	size += tlv_length;  	skb = alloc_skb(size, GFP_KERNEL); -	if (skb == NULL) { -		kfree(sdres_tlv); -		return -ENOMEM; -	} +	if (skb == NULL) +		return ERR_PTR(-ENOMEM);  	skb_reserve(skb, dev->tx_headroom + NFC_HEADER_SIZE);  	skb = llcp_add_header(skb, LLCP_SAP_SDP, LLCP_SAP_SDP, LLCP_PDU_SNL); -	memcpy(skb_put(skb, sdres_tlv_length), sdres_tlv, sdres_tlv_length); +	return skb; +} + +int nfc_llcp_send_snl_sdres(struct nfc_llcp_local *local, +			    struct hlist_head *tlv_list, size_t tlvs_len) +{ +	struct nfc_llcp_sdp_tlv *sdp; +	struct hlist_node *n; +	struct sk_buff *skb; + +	skb = nfc_llcp_allocate_snl(local, tlvs_len); +	if (IS_ERR(skb)) +		return PTR_ERR(skb); + +	hlist_for_each_entry_safe(sdp, n, tlv_list, node) { +		memcpy(skb_put(skb, sdp->tlv_len), sdp->tlv, sdp->tlv_len); + +		hlist_del(&sdp->node); + +		nfc_llcp_free_sdp_tlv(sdp); +	}  	skb_queue_tail(&local->tx_queue, skb); -	kfree(sdres_tlv); +	return 0; +} + +int nfc_llcp_send_snl_sdreq(struct nfc_llcp_local *local, +			    struct hlist_head *tlv_list, size_t tlvs_len) +{ +	struct nfc_llcp_sdp_tlv *sdreq; +	struct hlist_node *n; +	struct sk_buff *skb; + +	skb = nfc_llcp_allocate_snl(local, tlvs_len); +	if (IS_ERR(skb)) +		return PTR_ERR(skb); + +	mutex_lock(&local->sdreq_lock); + +	if (hlist_empty(&local->pending_sdreqs)) +		mod_timer(&local->sdreq_timer, +			  jiffies + msecs_to_jiffies(3 * local->remote_lto)); + +	hlist_for_each_entry_safe(sdreq, n, tlv_list, node) { +		pr_debug("tid %d for %s\n", sdreq->tid, sdreq->uri); + +		memcpy(skb_put(skb, sdreq->tlv_len), sdreq->tlv, +		       sdreq->tlv_len); + +		hlist_del(&sdreq->node); + +		hlist_add_head(&sdreq->node, &local->pending_sdreqs); +	} + +	mutex_unlock(&local->sdreq_lock); + +	skb_queue_tail(&local->tx_queue, skb);  	return 0;  } @@ -523,6 +658,7 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,  	struct nfc_llcp_local *local;  	size_t frag_len = 0, remaining_len;  	u8 *msg_data, *msg_ptr; +	u16 remote_miu;  	pr_debug("Send I frame len %zd\n", len); @@ -532,8 +668,8 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,  	/* Remote is ready but has not acknowledged our frames */  	if((sock->remote_ready && -	    skb_queue_len(&sock->tx_pending_queue) >= sock->rw && -	    skb_queue_len(&sock->tx_queue) >= 2 * sock->rw)) { +	    skb_queue_len(&sock->tx_pending_queue) >= sock->remote_rw && +	    skb_queue_len(&sock->tx_queue) >= 2 * sock->remote_rw)) {  		pr_err("Pending queue is full %d frames\n",  		       skb_queue_len(&sock->tx_pending_queue));  		return -ENOBUFS; @@ -541,7 +677,7 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,  	/* Remote is not ready and we've been queueing enough frames */  	if ((!sock->remote_ready && -	     skb_queue_len(&sock->tx_queue) >= 2 * sock->rw)) { +	     skb_queue_len(&sock->tx_queue) >= 2 * sock->remote_rw)) {  		pr_err("Tx queue is full %d frames\n",  		       skb_queue_len(&sock->tx_queue));  		return -ENOBUFS; @@ -559,9 +695,11 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,  	remaining_len = len;  	msg_ptr = msg_data; -	while (remaining_len > 0) { +	do { +		remote_miu = sock->remote_miu > LLCP_MAX_MIU ? +				local->remote_miu : sock->remote_miu; -		frag_len = min_t(size_t, sock->miu, remaining_len); +		frag_len = min_t(size_t, remote_miu, remaining_len);  		pr_debug("Fragment %zd bytes remaining %zd",  			 frag_len, remaining_len); @@ -573,7 +711,8 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,  		skb_put(pdu, LLCP_SEQUENCE_SIZE); -		memcpy(skb_put(pdu, frag_len), msg_ptr, frag_len); +		if (likely(frag_len > 0)) +			memcpy(skb_put(pdu, frag_len), msg_ptr, frag_len);  		skb_queue_tail(&sock->tx_queue, pdu); @@ -585,7 +724,7 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,  		remaining_len -= frag_len;  		msg_ptr += frag_len; -	} +	} while (remaining_len > 0);  	kfree(msg_data); @@ -599,6 +738,7 @@ int nfc_llcp_send_ui_frame(struct nfc_llcp_sock *sock, u8 ssap, u8 dsap,  	struct nfc_llcp_local *local;  	size_t frag_len = 0, remaining_len;  	u8 *msg_ptr, *msg_data; +	u16 remote_miu;  	int err;  	pr_debug("Send UI frame len %zd\n", len); @@ -619,9 +759,11 @@ int nfc_llcp_send_ui_frame(struct nfc_llcp_sock *sock, u8 ssap, u8 dsap,  	remaining_len = len;  	msg_ptr = msg_data; -	while (remaining_len > 0) { +	do { +		remote_miu = sock->remote_miu > LLCP_MAX_MIU ? +				local->remote_miu : sock->remote_miu; -		frag_len = min_t(size_t, sock->miu, remaining_len); +		frag_len = min_t(size_t, remote_miu, remaining_len);  		pr_debug("Fragment %zd bytes remaining %zd",  			 frag_len, remaining_len); @@ -635,14 +777,15 @@ int nfc_llcp_send_ui_frame(struct nfc_llcp_sock *sock, u8 ssap, u8 dsap,  		pdu = llcp_add_header(pdu, dsap, ssap, LLCP_PDU_UI); -		memcpy(skb_put(pdu, frag_len), msg_ptr, frag_len); +		if (likely(frag_len > 0)) +			memcpy(skb_put(pdu, frag_len), msg_ptr, frag_len);  		/* No need to check for the peer RW for UI frames */  		skb_queue_tail(&local->tx_queue, pdu);  		remaining_len -= frag_len;  		msg_ptr += frag_len; -	} +	} while (remaining_len > 0);  	kfree(msg_data); diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp_core.c index ee25f25f0cd..158bdbf668c 100644 --- a/net/nfc/llcp/llcp.c +++ b/net/nfc/llcp_core.c @@ -24,13 +24,15 @@  #include <linux/list.h>  #include <linux/nfc.h> -#include "../nfc.h" +#include "nfc.h"  #include "llcp.h"  static u8 llcp_magic[3] = {0x46, 0x66, 0x6d};  static struct list_head llcp_devices; +static void nfc_llcp_rx_skb(struct nfc_llcp_local *local, struct sk_buff *skb); +  void nfc_llcp_sock_link(struct llcp_sock_list *l, struct sock *sk)  {  	write_lock(&l->lock); @@ -45,6 +47,12 @@ void nfc_llcp_sock_unlink(struct llcp_sock_list *l, struct sock *sk)  	write_unlock(&l->lock);  } +void nfc_llcp_socket_remote_param_init(struct nfc_llcp_sock *sock) +{ +	sock->remote_rw = LLCP_DEFAULT_RW; +	sock->remote_miu = LLCP_MAX_MIU + 1; +} +  static void nfc_llcp_socket_purge(struct nfc_llcp_sock *sock)  {  	struct nfc_llcp_local *local = sock->local; @@ -68,7 +76,7 @@ static void nfc_llcp_socket_purge(struct nfc_llcp_sock *sock)  	}  } -static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen, +static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool device,  				    int err)  {  	struct sock *sk; @@ -108,21 +116,6 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen,  				bh_unlock_sock(accept_sk);  			} - -			if (listen == true) { -				bh_unlock_sock(sk); -				continue; -			} -		} - -		/* -		 * If we have a connection less socket bound, we keep it alive -		 * if the device is still present. -		 */ -		if (sk->sk_state == LLCP_BOUND && sk->sk_type == SOCK_DGRAM && -		    listen == true) { -			bh_unlock_sock(sk); -			continue;  		}  		if (err) @@ -137,11 +130,8 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool listen,  	write_unlock(&local->sockets.lock); -	/* -	 * If we want to keep the listening sockets alive, -	 * we don't touch the RAW ones. -	 */ -	if (listen == true) +	/* If we still have a device, we keep the RAW sockets alive */ +	if (device == true)  		return;  	write_lock(&local->raw_sockets.lock); @@ -173,15 +163,18 @@ struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local)  	return local;  } -static void local_cleanup(struct nfc_llcp_local *local, bool listen) +static void local_cleanup(struct nfc_llcp_local *local)  { -	nfc_llcp_socket_release(local, listen, ENXIO); +	nfc_llcp_socket_release(local, false, ENXIO);  	del_timer_sync(&local->link_timer);  	skb_queue_purge(&local->tx_queue);  	cancel_work_sync(&local->tx_work);  	cancel_work_sync(&local->rx_work);  	cancel_work_sync(&local->timeout_work);  	kfree_skb(local->rx_pending); +	del_timer_sync(&local->sdreq_timer); +	cancel_work_sync(&local->sdreq_timeout_work); +	nfc_llcp_free_sdp_tlv_list(&local->pending_sdreqs);  }  static void local_release(struct kref *ref) @@ -191,7 +184,7 @@ static void local_release(struct kref *ref)  	local = container_of(ref, struct nfc_llcp_local, ref);  	list_del(&local->list); -	local_cleanup(local, false); +	local_cleanup(local);  	kfree(local);  } @@ -259,6 +252,47 @@ static void nfc_llcp_symm_timer(unsigned long data)  	schedule_work(&local->timeout_work);  } +static void nfc_llcp_sdreq_timeout_work(struct work_struct *work) +{ +	unsigned long time; +	HLIST_HEAD(nl_sdres_list); +	struct hlist_node *n; +	struct nfc_llcp_sdp_tlv *sdp; +	struct nfc_llcp_local *local = container_of(work, struct nfc_llcp_local, +						    sdreq_timeout_work); + +	mutex_lock(&local->sdreq_lock); + +	time = jiffies - msecs_to_jiffies(3 * local->remote_lto); + +	hlist_for_each_entry_safe(sdp, n, &local->pending_sdreqs, node) { +		if (time_after(sdp->time, time)) +			continue; + +		sdp->sap = LLCP_SDP_UNBOUND; + +		hlist_del(&sdp->node); + +		hlist_add_head(&sdp->node, &nl_sdres_list); +	} + +	if (!hlist_empty(&local->pending_sdreqs)) +		mod_timer(&local->sdreq_timer, +			  jiffies + msecs_to_jiffies(3 * local->remote_lto)); + +	mutex_unlock(&local->sdreq_lock); + +	if (!hlist_empty(&nl_sdres_list)) +		nfc_genl_llc_send_sdres(local->dev, &nl_sdres_list); +} + +static void nfc_llcp_sdreq_timer(unsigned long data) +{ +	struct nfc_llcp_local *local = (struct nfc_llcp_local *) data; + +	schedule_work(&local->sdreq_timeout_work); +} +  struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev)  {  	struct nfc_llcp_local *local, *n; @@ -802,8 +836,6 @@ static void nfc_llcp_recv_ui(struct nfc_llcp_local *local,  	ui_cb->dsap = dsap;  	ui_cb->ssap = ssap; -	printk("%s %d %d\n", __func__, dsap, ssap); -  	pr_debug("%d %d\n", dsap, ssap);  	/* We're looking for a bound socket, not a client one */ @@ -900,7 +932,9 @@ static void nfc_llcp_recv_connect(struct nfc_llcp_local *local,  	new_sock = nfc_llcp_sock(new_sk);  	new_sock->dev = local->dev;  	new_sock->local = nfc_llcp_local_get(local); -	new_sock->miu = local->remote_miu; +	new_sock->rw = sock->rw; +	new_sock->miux = sock->miux; +	new_sock->remote_miu = local->remote_miu;  	new_sock->nfc_protocol = sock->nfc_protocol;  	new_sock->dsap = ssap;  	new_sock->target_idx = local->target_idx; @@ -954,11 +988,11 @@ int nfc_llcp_queue_i_frames(struct nfc_llcp_sock *sock)  	pr_debug("Remote ready %d tx queue len %d remote rw %d",  		 sock->remote_ready, skb_queue_len(&sock->tx_pending_queue), -		 sock->rw); +		 sock->remote_rw);  	/* Try to queue some I frames for transmission */  	while (sock->remote_ready && -	       skb_queue_len(&sock->tx_pending_queue) < sock->rw) { +	       skb_queue_len(&sock->tx_pending_queue) < sock->remote_rw) {  		struct sk_buff *pdu;  		pdu = skb_dequeue(&sock->tx_queue); @@ -1072,6 +1106,12 @@ static void nfc_llcp_recv_disc(struct nfc_llcp_local *local,  	dsap = nfc_llcp_dsap(skb);  	ssap = nfc_llcp_ssap(skb); +	if ((dsap == 0) && (ssap == 0)) { +		pr_debug("Connection termination"); +		nfc_dep_link_down(local->dev); +		return; +	} +  	llcp_sock = nfc_llcp_sock_get(local, dsap, ssap);  	if (llcp_sock == NULL) {  		nfc_llcp_send_dm(local, dsap, ssap, LLCP_DM_NOCONN); @@ -1178,6 +1218,10 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,  	u16 tlv_len, offset;  	char *service_name;  	size_t service_name_len; +	struct nfc_llcp_sdp_tlv *sdp; +	HLIST_HEAD(llc_sdres_list); +	size_t sdres_tlvs_len; +	HLIST_HEAD(nl_sdres_list);  	dsap = nfc_llcp_dsap(skb);  	ssap = nfc_llcp_ssap(skb); @@ -1192,6 +1236,7 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,  	tlv = &skb->data[LLCP_HEADER_SIZE];  	tlv_len = skb->len - LLCP_HEADER_SIZE;  	offset = 0; +	sdres_tlvs_len = 0;  	while (offset < tlv_len) {  		type = tlv[0]; @@ -1209,14 +1254,14 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,  			    !strncmp(service_name, "urn:nfc:sn:sdp",  				     service_name_len)) {  				sap = 1; -				goto send_snl; +				goto add_snl;  			}  			llcp_sock = nfc_llcp_sock_from_sn(local, service_name,  							  service_name_len);  			if (!llcp_sock) {  				sap = 0; -				goto send_snl; +				goto add_snl;  			}  			/* @@ -1233,7 +1278,7 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,  				if (sap == LLCP_SAP_MAX) {  					sap = 0; -					goto send_snl; +					goto add_snl;  				}  				client_count = @@ -1250,8 +1295,37 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,  			pr_debug("%p %d\n", llcp_sock, sap); -send_snl: -			nfc_llcp_send_snl(local, tid, sap); +add_snl: +			sdp = nfc_llcp_build_sdres_tlv(tid, sap); +			if (sdp == NULL) +				goto exit; + +			sdres_tlvs_len += sdp->tlv_len; +			hlist_add_head(&sdp->node, &llc_sdres_list); +			break; + +		case LLCP_TLV_SDRES: +			mutex_lock(&local->sdreq_lock); + +			pr_debug("LLCP_TLV_SDRES: searching tid %d\n", tlv[2]); + +			hlist_for_each_entry(sdp, &local->pending_sdreqs, node) { +				if (sdp->tid != tlv[2]) +					continue; + +				sdp->sap = tlv[3]; + +				pr_debug("Found: uri=%s, sap=%d\n", +					 sdp->uri, sdp->sap); + +				hlist_del(&sdp->node); + +				hlist_add_head(&sdp->node, &nl_sdres_list); + +				break; +			} + +			mutex_unlock(&local->sdreq_lock);  			break;  		default: @@ -1262,21 +1336,63 @@ send_snl:  		offset += length + 2;  		tlv += length + 2;  	} + +exit: +	if (!hlist_empty(&nl_sdres_list)) +		nfc_genl_llc_send_sdres(local->dev, &nl_sdres_list); + +	if (!hlist_empty(&llc_sdres_list)) +		nfc_llcp_send_snl_sdres(local, &llc_sdres_list, sdres_tlvs_len);  } -static void nfc_llcp_rx_work(struct work_struct *work) +static void nfc_llcp_recv_agf(struct nfc_llcp_local *local, struct sk_buff *skb)  { -	struct nfc_llcp_local *local = container_of(work, struct nfc_llcp_local, -						    rx_work); -	u8 dsap, ssap, ptype; -	struct sk_buff *skb; +	u8 ptype; +	u16 pdu_len; +	struct sk_buff *new_skb; -	skb = local->rx_pending; -	if (skb == NULL) { -		pr_debug("No pending SKB\n"); +	if (skb->len <= LLCP_HEADER_SIZE) { +		pr_err("Malformed AGF PDU\n");  		return;  	} +	skb_pull(skb, LLCP_HEADER_SIZE); + +	while (skb->len > LLCP_AGF_PDU_HEADER_SIZE) { +		pdu_len = skb->data[0] << 8 | skb->data[1]; + +		skb_pull(skb, LLCP_AGF_PDU_HEADER_SIZE); + +		if (pdu_len < LLCP_HEADER_SIZE || pdu_len > skb->len) { +			pr_err("Malformed AGF PDU\n"); +			return; +		} + +		ptype = nfc_llcp_ptype(skb); + +		if (ptype == LLCP_PDU_SYMM || ptype == LLCP_PDU_AGF) +			goto next; + +		new_skb = nfc_alloc_recv_skb(pdu_len, GFP_KERNEL); +		if (new_skb == NULL) { +			pr_err("Could not allocate PDU\n"); +			return; +		} + +		memcpy(skb_put(new_skb, pdu_len), skb->data, pdu_len); + +		nfc_llcp_rx_skb(local, new_skb); + +		kfree_skb(new_skb); +next: +		skb_pull(skb, pdu_len); +	} +} + +static void nfc_llcp_rx_skb(struct nfc_llcp_local *local, struct sk_buff *skb) +{ +	u8 dsap, ssap, ptype; +  	ptype = nfc_llcp_ptype(skb);  	dsap = nfc_llcp_dsap(skb);  	ssap = nfc_llcp_ssap(skb); @@ -1287,10 +1403,6 @@ static void nfc_llcp_rx_work(struct work_struct *work)  		print_hex_dump(KERN_DEBUG, "LLCP Rx: ", DUMP_PREFIX_OFFSET,  			       16, 1, skb->data, skb->len, true); -	__net_timestamp(skb); - -	nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_RX); -  	switch (ptype) {  	case LLCP_PDU_SYMM:  		pr_debug("SYMM\n"); @@ -1333,8 +1445,31 @@ static void nfc_llcp_rx_work(struct work_struct *work)  		nfc_llcp_recv_hdlc(local, skb);  		break; +	case LLCP_PDU_AGF: +		pr_debug("AGF frame\n"); +		nfc_llcp_recv_agf(local, skb); +		break; +	} +} + +static void nfc_llcp_rx_work(struct work_struct *work) +{ +	struct nfc_llcp_local *local = container_of(work, struct nfc_llcp_local, +						    rx_work); +	struct sk_buff *skb; + +	skb = local->rx_pending; +	if (skb == NULL) { +		pr_debug("No pending SKB\n"); +		return;  	} +	__net_timestamp(skb); + +	nfc_llcp_send_to_raw_sock(local, skb, NFC_LLCP_DIRECTION_RX); + +	nfc_llcp_rx_skb(local, skb); +  	schedule_work(&local->tx_work);  	kfree_skb(local->rx_pending);  	local->rx_pending = NULL; @@ -1381,6 +1516,9 @@ void nfc_llcp_mac_is_down(struct nfc_dev *dev)  	if (local == NULL)  		return; +	local->remote_miu = LLCP_DEFAULT_MIU; +	local->remote_lto = LLCP_DEFAULT_LTO; +  	/* Close and purge all existing sockets */  	nfc_llcp_socket_release(local, true, 0);  } @@ -1447,6 +1585,13 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)  	local->remote_miu = LLCP_DEFAULT_MIU;  	local->remote_lto = LLCP_DEFAULT_LTO; +	mutex_init(&local->sdreq_lock); +	INIT_HLIST_HEAD(&local->pending_sdreqs); +	init_timer(&local->sdreq_timer); +	local->sdreq_timer.data = (unsigned long) local; +	local->sdreq_timer.function = nfc_llcp_sdreq_timer; +	INIT_WORK(&local->sdreq_timeout_work, nfc_llcp_sdreq_timeout_work); +  	list_add(&local->list, &llcp_devices);  	return 0; @@ -1461,7 +1606,7 @@ void nfc_llcp_unregister_device(struct nfc_dev *dev)  		return;  	} -	local_cleanup(local, false); +	local_cleanup(local);  	nfc_llcp_local_put(local);  } diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp_sock.c index 6c94447ec41..380253eccb7 100644 --- a/net/nfc/llcp/sock.c +++ b/net/nfc/llcp_sock.c @@ -24,7 +24,7 @@  #include <linux/module.h>  #include <linux/nfc.h> -#include "../nfc.h" +#include "nfc.h"  #include "llcp.h"  static int sock_wait_state(struct sock *sk, int state, unsigned long timeo) @@ -223,6 +223,156 @@ error:  	return ret;  } +static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname, +			       char __user *optval, unsigned int optlen) +{ +	struct sock *sk = sock->sk; +	struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); +	u32 opt; +	int err = 0; + +	pr_debug("%p optname %d\n", sk, optname); + +	if (level != SOL_NFC) +		return -ENOPROTOOPT; + +	lock_sock(sk); + +	switch (optname) { +	case NFC_LLCP_RW: +		if (sk->sk_state == LLCP_CONNECTED || +		    sk->sk_state == LLCP_BOUND || +		    sk->sk_state == LLCP_LISTEN) { +			err = -EINVAL; +			break; +		} + +		if (get_user(opt, (u32 __user *) optval)) { +			err = -EFAULT; +			break; +		} + +		if (opt > LLCP_MAX_RW) { +			err = -EINVAL; +			break; +		} + +		llcp_sock->rw = (u8) opt; + +		break; + +	case NFC_LLCP_MIUX: +		if (sk->sk_state == LLCP_CONNECTED || +		    sk->sk_state == LLCP_BOUND || +		    sk->sk_state == LLCP_LISTEN) { +			err = -EINVAL; +			break; +		} + +		if (get_user(opt, (u32 __user *) optval)) { +			err = -EFAULT; +			break; +		} + +		if (opt > LLCP_MAX_MIUX) { +			err = -EINVAL; +			break; +		} + +		llcp_sock->miux = cpu_to_be16((u16) opt); + +		break; + +	default: +		err = -ENOPROTOOPT; +		break; +	} + +	release_sock(sk); + +	pr_debug("%p rw %d miux %d\n", llcp_sock, +		 llcp_sock->rw, llcp_sock->miux); + +	return err; +} + +static int nfc_llcp_getsockopt(struct socket *sock, int level, int optname, +			       char __user *optval, int __user *optlen) +{ +	struct nfc_llcp_local *local; +	struct sock *sk = sock->sk; +	struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); +	int len, err = 0; +	u16 miux, remote_miu; +	u8 rw; + +	pr_debug("%p optname %d\n", sk, optname); + +	if (level != SOL_NFC) +		return -ENOPROTOOPT; + +	if (get_user(len, optlen)) +		return -EFAULT; + +	local = llcp_sock->local; +	if (!local) +		return -ENODEV; + +	len = min_t(u32, len, sizeof(u32)); + +	lock_sock(sk); + +	switch (optname) { +	case NFC_LLCP_RW: +		rw = llcp_sock->rw > LLCP_MAX_RW ? local->rw : llcp_sock->rw; +		if (put_user(rw, (u32 __user *) optval)) +			err = -EFAULT; + +		break; + +	case NFC_LLCP_MIUX: +		miux = be16_to_cpu(llcp_sock->miux) > LLCP_MAX_MIUX ? +			be16_to_cpu(local->miux) : be16_to_cpu(llcp_sock->miux); + +		if (put_user(miux, (u32 __user *) optval)) +			err = -EFAULT; + +		break; + +	case NFC_LLCP_REMOTE_MIU: +		remote_miu = llcp_sock->remote_miu > LLCP_MAX_MIU ? +				local->remote_miu : llcp_sock->remote_miu; + +		if (put_user(remote_miu, (u32 __user *) optval)) +			err = -EFAULT; + +		break; + +	case NFC_LLCP_REMOTE_LTO: +		if (put_user(local->remote_lto / 10, (u32 __user *) optval)) +			err = -EFAULT; + +		break; + +	case NFC_LLCP_REMOTE_RW: +		if (put_user(llcp_sock->remote_rw, (u32 __user *) optval)) +			err = -EFAULT; + +		break; + +	default: +		err = -ENOPROTOOPT; +		break; +	} + +	release_sock(sk); + +	if (put_user(len, optlen)) +		return -EFAULT; + +	return err; +} +  void nfc_llcp_accept_unlink(struct sock *sk)  {  	struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk); @@ -358,12 +508,13 @@ static int llcp_sock_getname(struct socket *sock, struct sockaddr *uaddr,  	pr_debug("%p %d %d %d\n", sk, llcp_sock->target_idx,  		 llcp_sock->dsap, llcp_sock->ssap); -	uaddr->sa_family = AF_NFC; - +	memset(llcp_addr, 0, sizeof(*llcp_addr));  	*len = sizeof(struct sockaddr_nfc_llcp); +	llcp_addr->sa_family = AF_NFC;  	llcp_addr->dev_idx = llcp_sock->dev->idx;  	llcp_addr->target_idx = llcp_sock->target_idx; +	llcp_addr->nfc_protocol = llcp_sock->nfc_protocol;  	llcp_addr->dsap = llcp_sock->dsap;  	llcp_addr->ssap = llcp_sock->ssap;  	llcp_addr->service_name_len = llcp_sock->service_name_len; @@ -405,7 +556,8 @@ static unsigned int llcp_sock_poll(struct file *file, struct socket *sock,  		return llcp_accept_poll(sk);  	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) -		mask |= POLLERR; +		mask |= POLLERR | +			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);  	if (!skb_queue_empty(&sk->sk_receive_queue))  		mask |= POLLIN | POLLRDNORM; @@ -543,7 +695,7 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr,  	llcp_sock->dev = dev;  	llcp_sock->local = nfc_llcp_local_get(local); -	llcp_sock->miu = llcp_sock->local->remote_miu; +	llcp_sock->remote_miu = llcp_sock->local->remote_miu;  	llcp_sock->ssap = nfc_llcp_get_local_ssap(local);  	if (llcp_sock->ssap == LLCP_SAP_MAX) {  		ret = -ENOMEM; @@ -740,8 +892,8 @@ static const struct proto_ops llcp_sock_ops = {  	.ioctl          = sock_no_ioctl,  	.listen         = llcp_sock_listen,  	.shutdown       = sock_no_shutdown, -	.setsockopt     = sock_no_setsockopt, -	.getsockopt     = sock_no_getsockopt, +	.setsockopt     = nfc_llcp_setsockopt, +	.getsockopt     = nfc_llcp_getsockopt,  	.sendmsg        = llcp_sock_sendmsg,  	.recvmsg        = llcp_sock_recvmsg,  	.mmap           = sock_no_mmap, @@ -805,12 +957,13 @@ struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp)  	llcp_sock->ssap = 0;  	llcp_sock->dsap = LLCP_SAP_SDP; -	llcp_sock->rw = LLCP_DEFAULT_RW; -	llcp_sock->miu = LLCP_DEFAULT_MIU; +	llcp_sock->rw = LLCP_MAX_RW + 1; +	llcp_sock->miux = cpu_to_be16(LLCP_MAX_MIUX + 1);  	llcp_sock->send_n = llcp_sock->send_ack_n = 0;  	llcp_sock->recv_n = llcp_sock->recv_ack_n = 0;  	llcp_sock->remote_ready = 1;  	llcp_sock->reserved_ssap = LLCP_SAP_MAX; +	nfc_llcp_socket_remote_param_init(llcp_sock);  	skb_queue_head_init(&llcp_sock->tx_queue);  	skb_queue_head_init(&llcp_sock->tx_pending_queue);  	INIT_LIST_HEAD(&llcp_sock->accept_queue); diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 504b883439f..f0c4d61f37c 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -28,8 +28,7 @@  #include <linux/slab.h>  #include "nfc.h" - -#include "llcp/llcp.h" +#include "llcp.h"  static struct genl_multicast_group nfc_genl_event_mcgrp = {  	.name = NFC_GENL_MCAST_EVENT_NAME, @@ -53,6 +52,15 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = {  	[NFC_ATTR_DEVICE_POWERED] = { .type = NLA_U8 },  	[NFC_ATTR_IM_PROTOCOLS] = { .type = NLA_U32 },  	[NFC_ATTR_TM_PROTOCOLS] = { .type = NLA_U32 }, +	[NFC_ATTR_LLC_PARAM_LTO] = { .type = NLA_U8 }, +	[NFC_ATTR_LLC_PARAM_RW] = { .type = NLA_U8 }, +	[NFC_ATTR_LLC_PARAM_MIUX] = { .type = NLA_U16 }, +	[NFC_ATTR_LLC_SDP] = { .type = NLA_NESTED }, +}; + +static const struct nla_policy nfc_sdp_genl_policy[NFC_SDP_ATTR_MAX + 1] = { +	[NFC_SDP_ATTR_URI] = { .type = NLA_STRING }, +	[NFC_SDP_ATTR_SAP] = { .type = NLA_U8 },  };  static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target, @@ -348,6 +356,74 @@ free_msg:  	return -EMSGSIZE;  } +int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list) +{ +	struct sk_buff *msg; +	struct nlattr *sdp_attr, *uri_attr; +	struct nfc_llcp_sdp_tlv *sdres; +	struct hlist_node *n; +	void *hdr; +	int rc = -EMSGSIZE; +	int i; + +	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); +	if (!msg) +		return -ENOMEM; + +	hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0, +			  NFC_EVENT_LLC_SDRES); +	if (!hdr) +		goto free_msg; + +	if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx)) +		goto nla_put_failure; + +	sdp_attr = nla_nest_start(msg, NFC_ATTR_LLC_SDP); +	if (sdp_attr == NULL) { +		rc = -ENOMEM; +		goto nla_put_failure; +	} + +	i = 1; +	hlist_for_each_entry_safe(sdres, n, sdres_list, node) { +		pr_debug("uri: %s, sap: %d\n", sdres->uri, sdres->sap); + +		uri_attr = nla_nest_start(msg, i++); +		if (uri_attr == NULL) { +			rc = -ENOMEM; +			goto nla_put_failure; +		} + +		if (nla_put_u8(msg, NFC_SDP_ATTR_SAP, sdres->sap)) +			goto nla_put_failure; + +		if (nla_put_string(msg, NFC_SDP_ATTR_URI, sdres->uri)) +			goto nla_put_failure; + +		nla_nest_end(msg, uri_attr); + +		hlist_del(&sdres->node); + +		nfc_llcp_free_sdp_tlv(sdres); +	} + +	nla_nest_end(msg, sdp_attr); + +	genlmsg_end(msg, hdr); + +	return genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_ATOMIC); + +nla_put_failure: +	genlmsg_cancel(msg, hdr); + +free_msg: +	nlmsg_free(msg); + +	nfc_llcp_free_sdp_tlv_list(sdres_list); + +	return rc; +} +  static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev,  				u32 portid, u32 seq,  				struct netlink_callback *cb, @@ -859,6 +935,96 @@ exit:  	return rc;  } +static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info) +{ +	struct nfc_dev *dev; +	struct nfc_llcp_local *local; +	struct nlattr *attr, *sdp_attrs[NFC_SDP_ATTR_MAX+1]; +	u32 idx; +	u8 tid; +	char *uri; +	int rc = 0, rem; +	size_t uri_len, tlvs_len; +	struct hlist_head sdreq_list; +	struct nfc_llcp_sdp_tlv *sdreq; + +	if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || +	    !info->attrs[NFC_ATTR_LLC_SDP]) +		return -EINVAL; + +	idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); + +	dev = nfc_get_device(idx); +	if (!dev) { +		rc = -ENODEV; +		goto exit; +	} + +	device_lock(&dev->dev); + +	if (dev->dep_link_up == false) { +		rc = -ENOLINK; +		goto exit; +	} + +	local = nfc_llcp_find_local(dev); +	if (!local) { +		nfc_put_device(dev); +		rc = -ENODEV; +		goto exit; +	} + +	INIT_HLIST_HEAD(&sdreq_list); + +	tlvs_len = 0; + +	nla_for_each_nested(attr, info->attrs[NFC_ATTR_LLC_SDP], rem) { +		rc = nla_parse_nested(sdp_attrs, NFC_SDP_ATTR_MAX, attr, +				      nfc_sdp_genl_policy); + +		if (rc != 0) { +			rc = -EINVAL; +			goto exit; +		} + +		if (!sdp_attrs[NFC_SDP_ATTR_URI]) +			continue; + +		uri_len = nla_len(sdp_attrs[NFC_SDP_ATTR_URI]); +		if (uri_len == 0) +			continue; + +		uri = nla_data(sdp_attrs[NFC_SDP_ATTR_URI]); +		if (uri == NULL || *uri == 0) +			continue; + +		tid = local->sdreq_next_tid++; + +		sdreq = nfc_llcp_build_sdreq_tlv(tid, uri, uri_len); +		if (sdreq == NULL) { +			rc = -ENOMEM; +			goto exit; +		} + +		tlvs_len += sdreq->tlv_len; + +		hlist_add_head(&sdreq->node, &sdreq_list); +	} + +	if (hlist_empty(&sdreq_list)) { +		rc = -EINVAL; +		goto exit; +	} + +	rc = nfc_llcp_send_snl_sdreq(local, &sdreq_list, tlvs_len); +exit: +	device_unlock(&dev->dev); + +	nfc_put_device(dev); + +	return rc; +} +  static struct genl_ops nfc_genl_ops[] = {  	{  		.cmd = NFC_CMD_GET_DEVICE, @@ -913,6 +1079,11 @@ static struct genl_ops nfc_genl_ops[] = {  		.doit = nfc_genl_llc_set_params,  		.policy = nfc_genl_policy,  	}, +	{ +		.cmd = NFC_CMD_LLC_SDREQ, +		.doit = nfc_genl_llc_sdreq, +		.policy = nfc_genl_policy, +	},  }; diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index 87d914d2876..afa1f84ba04 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -46,7 +46,7 @@ struct nfc_rawsock {  #define to_rawsock_sk(_tx_work) \  	((struct sock *) container_of(_tx_work, struct nfc_rawsock, tx_work)) -#ifdef CONFIG_NFC_LLCP +struct nfc_llcp_sdp_tlv;  void nfc_llcp_mac_is_down(struct nfc_dev *dev);  void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx, @@ -59,60 +59,8 @@ int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb);  struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev);  int __init nfc_llcp_init(void);  void nfc_llcp_exit(void); - -#else - -static inline void nfc_llcp_mac_is_down(struct nfc_dev *dev) -{ -} - -static inline void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx, -				      u8 comm_mode, u8 rf_mode) -{ -} - -static inline int nfc_llcp_register_device(struct nfc_dev *dev) -{ -	return 0; -} - -static inline void nfc_llcp_unregister_device(struct nfc_dev *dev) -{ -} - -static inline int nfc_llcp_set_remote_gb(struct nfc_dev *dev, -					 u8 *gb, u8 gb_len) -{ -	return 0; -} - -static inline u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *gb_len) -{ -	*gb_len = 0; -	return NULL; -} - -static inline int nfc_llcp_data_received(struct nfc_dev *dev, -					 struct sk_buff *skb) -{ -	return 0; -} - -static inline struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev) -{ -	return NULL; -} - -static inline int nfc_llcp_init(void) -{ -	return 0; -} - -static inline void nfc_llcp_exit(void) -{ -} - -#endif +void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp); +void nfc_llcp_free_sdp_tlv_list(struct hlist_head *head);  int __init rawsock_init(void);  void rawsock_exit(void); @@ -144,6 +92,8 @@ int nfc_genl_dep_link_down_event(struct nfc_dev *dev);  int nfc_genl_tm_activated(struct nfc_dev *dev, u32 protocol);  int nfc_genl_tm_deactivated(struct nfc_dev *dev); +int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list); +  struct nfc_dev *nfc_get_device(unsigned int idx);  static inline void nfc_put_device(struct nfc_dev *dev) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index d4d5363c7ba..894b6cbdd92 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -98,7 +98,7 @@ static int pop_vlan(struct sk_buff *skb)  	if (unlikely(err))  		return err; -	__vlan_hwaccel_put_tag(skb, ntohs(tci)); +	__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(tci));  	return 0;  } @@ -110,7 +110,7 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla  		/* push down current VLAN tag */  		current_tag = vlan_tx_tag_get(skb); -		if (!__vlan_put_tag(skb, current_tag)) +		if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag))  			return -ENOMEM;  		if (skb->ip_summed == CHECKSUM_COMPLETE) @@ -118,7 +118,7 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla  					+ (2 * ETH_ALEN), VLAN_HLEN, 0));  	} -	__vlan_hwaccel_put_tag(skb, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT); +	__vlan_hwaccel_put_tag(skb, vlan->vlan_tpid, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);  	return 0;  } diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 6980c3e6f06..d12d6b8b5e8 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -44,6 +44,7 @@  #include <linux/netfilter_ipv4.h>  #include <linux/inetdevice.h>  #include <linux/list.h> +#include <linux/lockdep.h>  #include <linux/openvswitch.h>  #include <linux/rculist.h>  #include <linux/dmi.h> @@ -55,39 +56,61 @@  #include "datapath.h"  #include "flow.h"  #include "vport-internal_dev.h" +#include "vport-netdev.h" -/** - * struct ovs_net - Per net-namespace data for ovs. - * @dps: List of datapaths to enable dumping them all out. - * Protected by genl_mutex. - */ -struct ovs_net { -	struct list_head dps; -}; - -static int ovs_net_id __read_mostly;  #define REHASH_FLOW_INTERVAL (10 * 60 * HZ)  static void rehash_flow_table(struct work_struct *work);  static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table); +int ovs_net_id __read_mostly; + +static void ovs_notify(struct sk_buff *skb, struct genl_info *info, +		       struct genl_multicast_group *grp) +{ +	genl_notify(skb, genl_info_net(info), info->snd_portid, +		    grp->id, info->nlhdr, GFP_KERNEL); +} +  /**   * DOC: Locking:   * - * Writes to device state (add/remove datapath, port, set operations on vports, - * etc.) are protected by RTNL. - * - * Writes to other state (flow table modifications, set miscellaneous datapath - * parameters, etc.) are protected by genl_mutex.  The RTNL lock nests inside - * genl_mutex. + * All writes e.g. Writes to device state (add/remove datapath, port, set + * operations on vports, etc.), Writes to other state (flow table + * modifications, set miscellaneous datapath parameters, etc.) are protected + * by ovs_lock.   *   * Reads are protected by RCU.   *   * There are a few special cases (mostly stats) that have their own   * synchronization but they nest under all of above and don't interact with   * each other. + * + * The RTNL lock nests inside ovs_mutex.   */ +static DEFINE_MUTEX(ovs_mutex); + +void ovs_lock(void) +{ +	mutex_lock(&ovs_mutex); +} + +void ovs_unlock(void) +{ +	mutex_unlock(&ovs_mutex); +} + +#ifdef CONFIG_LOCKDEP +int lockdep_ovsl_is_held(void) +{ +	if (debug_locks) +		return lockdep_is_held(&ovs_mutex); +	else +		return 1; +} +#endif +  static struct vport *new_vport(const struct vport_parms *);  static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *,  			     const struct dp_upcall_info *); @@ -95,7 +118,7 @@ static int queue_userspace_packet(struct net *, int dp_ifindex,  				  struct sk_buff *,  				  const struct dp_upcall_info *); -/* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */ +/* Must be called with rcu_read_lock or ovs_mutex. */  static struct datapath *get_dp(struct net *net, int dp_ifindex)  {  	struct datapath *dp = NULL; @@ -113,10 +136,10 @@ static struct datapath *get_dp(struct net *net, int dp_ifindex)  	return dp;  } -/* Must be called with rcu_read_lock or RTNL lock. */ +/* Must be called with rcu_read_lock or ovs_mutex. */  const char *ovs_dp_name(const struct datapath *dp)  { -	struct vport *vport = ovs_vport_rtnl_rcu(dp, OVSP_LOCAL); +	struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);  	return vport->ops->get_name(vport);  } @@ -129,7 +152,7 @@ static int get_dpifindex(struct datapath *dp)  	local = ovs_vport_rcu(dp, OVSP_LOCAL);  	if (local) -		ifindex = local->ops->get_ifindex(local); +		ifindex = netdev_vport_priv(local)->dev->ifindex;  	else  		ifindex = 0; @@ -168,7 +191,7 @@ struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)  	return NULL;  } -/* Called with RTNL lock and genl_lock. */ +/* Called with ovs_mutex. */  static struct vport *new_vport(const struct vport_parms *parms)  {  	struct vport *vport; @@ -180,14 +203,12 @@ static struct vport *new_vport(const struct vport_parms *parms)  		hlist_add_head_rcu(&vport->dp_hash_node, head);  	} -  	return vport;  } -/* Called with RTNL lock. */  void ovs_dp_detach_port(struct vport *p)  { -	ASSERT_RTNL(); +	ASSERT_OVSL();  	/* First drop references to device. */  	hlist_del_rcu(&p->dp_hash_node); @@ -250,7 +271,8 @@ static struct genl_family dp_packet_genl_family = {  	.name = OVS_PACKET_FAMILY,  	.version = OVS_PACKET_VERSION,  	.maxattr = OVS_PACKET_ATTR_MAX, -	.netnsok = true +	.netnsok = true, +	.parallel_ops = true,  };  int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, @@ -337,6 +359,35 @@ static int queue_gso_packets(struct net *net, int dp_ifindex,  	return err;  } +static size_t key_attr_size(void) +{ +	return    nla_total_size(4)   /* OVS_KEY_ATTR_PRIORITY */ +		+ nla_total_size(4)   /* OVS_KEY_ATTR_IN_PORT */ +		+ nla_total_size(4)   /* OVS_KEY_ATTR_SKB_MARK */ +		+ nla_total_size(12)  /* OVS_KEY_ATTR_ETHERNET */ +		+ nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */ +		+ nla_total_size(4)   /* OVS_KEY_ATTR_8021Q */ +		+ nla_total_size(0)   /* OVS_KEY_ATTR_ENCAP */ +		+ nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */ +		+ nla_total_size(40)  /* OVS_KEY_ATTR_IPV6 */ +		+ nla_total_size(2)   /* OVS_KEY_ATTR_ICMPV6 */ +		+ nla_total_size(28); /* OVS_KEY_ATTR_ND */ +} + +static size_t upcall_msg_size(const struct sk_buff *skb, +			      const struct nlattr *userdata) +{ +	size_t size = NLMSG_ALIGN(sizeof(struct ovs_header)) +		+ nla_total_size(skb->len) /* OVS_PACKET_ATTR_PACKET */ +		+ nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */ + +	/* OVS_PACKET_ATTR_USERDATA */ +	if (userdata) +		size += NLA_ALIGN(userdata->nla_len); + +	return size; +} +  static int queue_userspace_packet(struct net *net, int dp_ifindex,  				  struct sk_buff *skb,  				  const struct dp_upcall_info *upcall_info) @@ -345,7 +396,6 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,  	struct sk_buff *nskb = NULL;  	struct sk_buff *user_skb; /* to be queued to userspace */  	struct nlattr *nla; -	unsigned int len;  	int err;  	if (vlan_tx_tag_present(skb)) { @@ -353,7 +403,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,  		if (!nskb)  			return -ENOMEM; -		nskb = __vlan_put_tag(nskb, vlan_tx_tag_get(nskb)); +		nskb = __vlan_put_tag(nskb, nskb->vlan_proto, vlan_tx_tag_get(nskb));  		if (!nskb)  			return -ENOMEM; @@ -366,13 +416,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,  		goto out;  	} -	len = sizeof(struct ovs_header); -	len += nla_total_size(skb->len); -	len += nla_total_size(FLOW_BUFSIZE); -	if (upcall_info->cmd == OVS_PACKET_CMD_ACTION) -		len += nla_total_size(8); - -	user_skb = genlmsg_new(len, GFP_ATOMIC); +	user_skb = genlmsg_new(upcall_msg_size(skb, upcall_info->userdata), GFP_ATOMIC);  	if (!user_skb) {  		err = -ENOMEM;  		goto out; @@ -387,8 +431,9 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex,  	nla_nest_end(user_skb, nla);  	if (upcall_info->userdata) -		nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA, -			    nla_get_u64(upcall_info->userdata)); +		__nla_put(user_skb, OVS_PACKET_ATTR_USERDATA, +			  nla_len(upcall_info->userdata), +			  nla_data(upcall_info->userdata));  	nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len); @@ -402,13 +447,13 @@ out:  	return err;  } -/* Called with genl_mutex. */ +/* Called with ovs_mutex. */  static int flush_flows(struct datapath *dp)  {  	struct flow_table *old_table;  	struct flow_table *new_table; -	old_table = genl_dereference(dp->table); +	old_table = ovsl_dereference(dp->table);  	new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS);  	if (!new_table)  		return -ENOMEM; @@ -544,7 +589,7 @@ static int validate_userspace(const struct nlattr *attr)  {  	static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] =	{  		[OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, -		[OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_U64 }, +		[OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },  	};  	struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];  	int error; @@ -661,8 +706,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)  	err = -EINVAL;  	if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] || -	    !a[OVS_PACKET_ATTR_ACTIONS] || -	    nla_len(a[OVS_PACKET_ATTR_PACKET]) < ETH_HLEN) +	    !a[OVS_PACKET_ATTR_ACTIONS])  		goto err;  	len = nla_len(a[OVS_PACKET_ATTR_PACKET]); @@ -672,7 +716,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)  		goto err;  	skb_reserve(packet, NET_IP_ALIGN); -	memcpy(__skb_put(packet, len), nla_data(a[OVS_PACKET_ATTR_PACKET]), len); +	nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);  	skb_reset_mac_header(packet);  	eth = eth_hdr(packet); @@ -680,7 +724,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)  	/* Normally, setting the skb 'protocol' field would be handled by a  	 * call to eth_type_trans(), but it assumes there's a sending  	 * device, which we may not have. */ -	if (ntohs(eth->h_proto) >= 1536) +	if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN)  		packet->protocol = eth->h_proto;  	else  		packet->protocol = htons(ETH_P_802_2); @@ -743,7 +787,7 @@ err:  }  static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { -	[OVS_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC }, +	[OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },  	[OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },  	[OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },  }; @@ -759,7 +803,7 @@ static struct genl_ops dp_packet_genl_ops[] = {  static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)  {  	int i; -	struct flow_table *table = genl_dereference(dp->table); +	struct flow_table *table = ovsl_dereference(dp->table);  	stats->n_flows = ovs_flow_tbl_count(table); @@ -794,14 +838,25 @@ static struct genl_family dp_flow_genl_family = {  	.name = OVS_FLOW_FAMILY,  	.version = OVS_FLOW_VERSION,  	.maxattr = OVS_FLOW_ATTR_MAX, -	.netnsok = true +	.netnsok = true, +	.parallel_ops = true,  };  static struct genl_multicast_group ovs_dp_flow_multicast_group = {  	.name = OVS_FLOW_MCGROUP  }; -/* Called with genl_lock. */ +static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) +{ +	return NLMSG_ALIGN(sizeof(struct ovs_header)) +		+ nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */ +		+ nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */ +		+ nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */ +		+ nla_total_size(8) /* OVS_FLOW_ATTR_USED */ +		+ nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */ +} + +/* Called with ovs_mutex. */  static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,  				  struct sk_buff *skb, u32 portid,  				  u32 seq, u32 flags, u8 cmd) @@ -815,8 +870,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,  	u8 tcp_flags;  	int err; -	sf_acts = rcu_dereference_protected(flow->sf_acts, -					    lockdep_genl_is_held()); +	sf_acts = ovsl_dereference(flow->sf_acts);  	ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);  	if (!ovs_header) @@ -879,25 +933,10 @@ error:  static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow)  {  	const struct sw_flow_actions *sf_acts; -	int len; -	sf_acts = rcu_dereference_protected(flow->sf_acts, -					    lockdep_genl_is_held()); +	sf_acts = ovsl_dereference(flow->sf_acts); -	/* OVS_FLOW_ATTR_KEY */ -	len = nla_total_size(FLOW_BUFSIZE); -	/* OVS_FLOW_ATTR_ACTIONS */ -	len += nla_total_size(sf_acts->actions_len); -	/* OVS_FLOW_ATTR_STATS */ -	len += nla_total_size(sizeof(struct ovs_flow_stats)); -	/* OVS_FLOW_ATTR_TCP_FLAGS */ -	len += nla_total_size(1); -	/* OVS_FLOW_ATTR_USED */ -	len += nla_total_size(8); - -	len += NLMSG_ALIGN(sizeof(struct ovs_header)); - -	return genlmsg_new(len, GFP_KERNEL); +	return genlmsg_new(ovs_flow_cmd_msg_size(sf_acts), GFP_KERNEL);  }  static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, @@ -946,12 +985,13 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)  		goto error;  	} +	ovs_lock();  	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);  	error = -ENODEV;  	if (!dp) -		goto error; +		goto err_unlock_ovs; -	table = genl_dereference(dp->table); +	table = ovsl_dereference(dp->table);  	flow = ovs_flow_tbl_lookup(table, &key, key_len);  	if (!flow) {  		struct sw_flow_actions *acts; @@ -959,7 +999,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)  		/* Bail out if we're not allowed to create a new flow. */  		error = -ENOENT;  		if (info->genlhdr->cmd == OVS_FLOW_CMD_SET) -			goto error; +			goto err_unlock_ovs;  		/* Expand table, if necessary, to make room. */  		if (ovs_flow_tbl_need_to_expand(table)) { @@ -969,7 +1009,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)  			if (!IS_ERR(new_table)) {  				rcu_assign_pointer(dp->table, new_table);  				ovs_flow_tbl_deferred_destroy(table); -				table = genl_dereference(dp->table); +				table = ovsl_dereference(dp->table);  			}  		} @@ -977,7 +1017,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)  		flow = ovs_flow_alloc();  		if (IS_ERR(flow)) {  			error = PTR_ERR(flow); -			goto error; +			goto err_unlock_ovs;  		}  		flow->key = key;  		clear_stats(flow); @@ -1010,11 +1050,10 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)  		error = -EEXIST;  		if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&  		    info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) -			goto error; +			goto err_unlock_ovs;  		/* Update actions. */ -		old_acts = rcu_dereference_protected(flow->sf_acts, -						     lockdep_genl_is_held()); +		old_acts = ovsl_dereference(flow->sf_acts);  		acts_attrs = a[OVS_FLOW_ATTR_ACTIONS];  		if (acts_attrs &&  		   (old_acts->actions_len != nla_len(acts_attrs) || @@ -1025,7 +1064,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)  			new_acts = ovs_flow_actions_alloc(acts_attrs);  			error = PTR_ERR(new_acts);  			if (IS_ERR(new_acts)) -				goto error; +				goto err_unlock_ovs;  			rcu_assign_pointer(flow->sf_acts, new_acts);  			ovs_flow_deferred_free_acts(old_acts); @@ -1041,11 +1080,10 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)  			spin_unlock_bh(&flow->lock);  		}  	} +	ovs_unlock();  	if (!IS_ERR(reply)) -		genl_notify(reply, genl_info_net(info), info->snd_portid, -			   ovs_dp_flow_multicast_group.id, info->nlhdr, -			   GFP_KERNEL); +		ovs_notify(reply, info, &ovs_dp_flow_multicast_group);  	else  		netlink_set_err(sock_net(skb->sk)->genl_sock, 0,  				ovs_dp_flow_multicast_group.id, PTR_ERR(reply)); @@ -1053,6 +1091,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)  error_free_flow:  	ovs_flow_free(flow); +err_unlock_ovs: +	ovs_unlock();  error:  	return error;  } @@ -1075,21 +1115,32 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)  	if (err)  		return err; +	ovs_lock();  	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); -	if (!dp) -		return -ENODEV; +	if (!dp) { +		err = -ENODEV; +		goto unlock; +	} -	table = genl_dereference(dp->table); +	table = ovsl_dereference(dp->table);  	flow = ovs_flow_tbl_lookup(table, &key, key_len); -	if (!flow) -		return -ENOENT; +	if (!flow) { +		err = -ENOENT; +		goto unlock; +	}  	reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,  					info->snd_seq, OVS_FLOW_CMD_NEW); -	if (IS_ERR(reply)) -		return PTR_ERR(reply); +	if (IS_ERR(reply)) { +		err = PTR_ERR(reply); +		goto unlock; +	} +	ovs_unlock();  	return genlmsg_reply(reply, info); +unlock: +	ovs_unlock(); +	return err;  }  static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) @@ -1104,25 +1155,33 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)  	int err;  	int key_len; +	ovs_lock();  	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); -	if (!dp) -		return -ENODEV; - -	if (!a[OVS_FLOW_ATTR_KEY]) -		return flush_flows(dp); +	if (!dp) { +		err = -ENODEV; +		goto unlock; +	} +	if (!a[OVS_FLOW_ATTR_KEY]) { +		err = flush_flows(dp); +		goto unlock; +	}  	err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);  	if (err) -		return err; +		goto unlock; -	table = genl_dereference(dp->table); +	table = ovsl_dereference(dp->table);  	flow = ovs_flow_tbl_lookup(table, &key, key_len); -	if (!flow) -		return -ENOENT; +	if (!flow) { +		err = -ENOENT; +		goto unlock; +	}  	reply = ovs_flow_cmd_alloc_info(flow); -	if (!reply) -		return -ENOMEM; +	if (!reply) { +		err = -ENOMEM; +		goto unlock; +	}  	ovs_flow_tbl_remove(table, flow); @@ -1131,10 +1190,13 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)  	BUG_ON(err < 0);  	ovs_flow_deferred_free(flow); +	ovs_unlock(); -	genl_notify(reply, genl_info_net(info), info->snd_portid, -		    ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL); +	ovs_notify(reply, info, &ovs_dp_flow_multicast_group);  	return 0; +unlock: +	ovs_unlock(); +	return err;  }  static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) @@ -1143,11 +1205,14 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)  	struct datapath *dp;  	struct flow_table *table; +	ovs_lock();  	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); -	if (!dp) +	if (!dp) { +		ovs_unlock();  		return -ENODEV; +	} -	table = genl_dereference(dp->table); +	table = ovsl_dereference(dp->table);  	for (;;) {  		struct sw_flow *flow; @@ -1168,6 +1233,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)  		cb->args[0] = bucket;  		cb->args[1] = obj;  	} +	ovs_unlock();  	return skb->len;  } @@ -1206,13 +1272,24 @@ static struct genl_family dp_datapath_genl_family = {  	.name = OVS_DATAPATH_FAMILY,  	.version = OVS_DATAPATH_VERSION,  	.maxattr = OVS_DP_ATTR_MAX, -	.netnsok = true +	.netnsok = true, +	.parallel_ops = true,  };  static struct genl_multicast_group ovs_dp_datapath_multicast_group = {  	.name = OVS_DATAPATH_MCGROUP  }; +static size_t ovs_dp_cmd_msg_size(void) +{ +	size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header)); + +	msgsize += nla_total_size(IFNAMSIZ); +	msgsize += nla_total_size(sizeof(struct ovs_dp_stats)); + +	return msgsize; +} +  static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,  				u32 portid, u32 seq, u32 flags, u8 cmd)  { @@ -1251,7 +1328,7 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid,  	struct sk_buff *skb;  	int retval; -	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); +	skb = genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);  	if (!skb)  		return ERR_PTR(-ENOMEM); @@ -1263,7 +1340,7 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid,  	return skb;  } -/* Called with genl_mutex and optionally with RTNL lock also. */ +/* Called with ovs_mutex. */  static struct datapath *lookup_datapath(struct net *net,  					struct ovs_header *ovs_header,  					struct nlattr *a[OVS_DP_ATTR_MAX + 1]) @@ -1297,12 +1374,12 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)  	if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])  		goto err; -	rtnl_lock(); +	ovs_lock();  	err = -ENOMEM;  	dp = kzalloc(sizeof(*dp), GFP_KERNEL);  	if (dp == NULL) -		goto err_unlock_rtnl; +		goto err_unlock_ovs;  	ovs_dp_set_net(dp, hold_net(sock_net(skb->sk))); @@ -1353,37 +1430,34 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)  	ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);  	list_add_tail(&dp->list_node, &ovs_net->dps); -	rtnl_unlock(); -	genl_notify(reply, genl_info_net(info), info->snd_portid, -		    ovs_dp_datapath_multicast_group.id, info->nlhdr, -		    GFP_KERNEL); +	ovs_unlock(); + +	ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);  	return 0;  err_destroy_local_port: -	ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL)); +	ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));  err_destroy_ports_array:  	kfree(dp->ports);  err_destroy_percpu:  	free_percpu(dp->stats_percpu);  err_destroy_table: -	ovs_flow_tbl_destroy(genl_dereference(dp->table)); +	ovs_flow_tbl_destroy(ovsl_dereference(dp->table));  err_free_dp:  	release_net(ovs_dp_get_net(dp));  	kfree(dp); -err_unlock_rtnl: -	rtnl_unlock(); +err_unlock_ovs: +	ovs_unlock();  err:  	return err;  } -/* Called with genl_mutex. */ +/* Called with ovs_mutex. */  static void __dp_destroy(struct datapath *dp)  {  	int i; -	rtnl_lock(); -  	for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {  		struct vport *vport;  		struct hlist_node *n; @@ -1394,14 +1468,11 @@ static void __dp_destroy(struct datapath *dp)  	}  	list_del(&dp->list_node); -	ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL)); -	/* rtnl_unlock() will wait until all the references to devices that -	 * are pending unregistration have been dropped.  We do it here to -	 * ensure that any internal devices (which contain DP pointers) are -	 * fully destroyed before freeing the datapath. +	/* OVSP_LOCAL is datapath internal port. We need to make sure that +	 * all port in datapath are destroyed first before freeing datapath.  	 */ -	rtnl_unlock(); +	ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));  	call_rcu(&dp->rcu, destroy_dp_rcu);  } @@ -1412,24 +1483,27 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)  	struct datapath *dp;  	int err; +	ovs_lock();  	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);  	err = PTR_ERR(dp);  	if (IS_ERR(dp)) -		return err; +		goto unlock;  	reply = ovs_dp_cmd_build_info(dp, info->snd_portid,  				      info->snd_seq, OVS_DP_CMD_DEL);  	err = PTR_ERR(reply);  	if (IS_ERR(reply)) -		return err; +		goto unlock;  	__dp_destroy(dp); +	ovs_unlock(); -	genl_notify(reply, genl_info_net(info), info->snd_portid, -		    ovs_dp_datapath_multicast_group.id, info->nlhdr, -		    GFP_KERNEL); +	ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);  	return 0; +unlock: +	ovs_unlock(); +	return err;  }  static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) @@ -1438,9 +1512,11 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)  	struct datapath *dp;  	int err; +	ovs_lock();  	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); +	err = PTR_ERR(dp);  	if (IS_ERR(dp)) -		return PTR_ERR(dp); +		goto unlock;  	reply = ovs_dp_cmd_build_info(dp, info->snd_portid,  				      info->snd_seq, OVS_DP_CMD_NEW); @@ -1448,31 +1524,45 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)  		err = PTR_ERR(reply);  		netlink_set_err(sock_net(skb->sk)->genl_sock, 0,  				ovs_dp_datapath_multicast_group.id, err); -		return 0; +		err = 0; +		goto unlock;  	} -	genl_notify(reply, genl_info_net(info), info->snd_portid, -		    ovs_dp_datapath_multicast_group.id, info->nlhdr, -		    GFP_KERNEL); +	ovs_unlock(); +	ovs_notify(reply, info, &ovs_dp_datapath_multicast_group);  	return 0; +unlock: +	ovs_unlock(); +	return err;  }  static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)  {  	struct sk_buff *reply;  	struct datapath *dp; +	int err; +	ovs_lock();  	dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs); -	if (IS_ERR(dp)) -		return PTR_ERR(dp); +	if (IS_ERR(dp)) { +		err = PTR_ERR(dp); +		goto unlock; +	}  	reply = ovs_dp_cmd_build_info(dp, info->snd_portid,  				      info->snd_seq, OVS_DP_CMD_NEW); -	if (IS_ERR(reply)) -		return PTR_ERR(reply); +	if (IS_ERR(reply)) { +		err = PTR_ERR(reply); +		goto unlock; +	} +	ovs_unlock();  	return genlmsg_reply(reply, info); + +unlock: +	ovs_unlock(); +	return err;  }  static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) @@ -1482,6 +1572,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)  	int skip = cb->args[0];  	int i = 0; +	ovs_lock();  	list_for_each_entry(dp, &ovs_net->dps, list_node) {  		if (i >= skip &&  		    ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid, @@ -1490,6 +1581,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)  			break;  		i++;  	} +	ovs_unlock();  	cb->args[0] = i; @@ -1535,14 +1627,15 @@ static struct genl_family dp_vport_genl_family = {  	.name = OVS_VPORT_FAMILY,  	.version = OVS_VPORT_VERSION,  	.maxattr = OVS_VPORT_ATTR_MAX, -	.netnsok = true +	.netnsok = true, +	.parallel_ops = true,  };  struct genl_multicast_group ovs_dp_vport_multicast_group = {  	.name = OVS_VPORT_MCGROUP  }; -/* Called with RTNL lock or RCU read lock. */ +/* Called with ovs_mutex or RCU read lock. */  static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,  				   u32 portid, u32 seq, u32 flags, u8 cmd)  { @@ -1581,7 +1674,7 @@ error:  	return err;  } -/* Called with RTNL lock or RCU read lock. */ +/* Called with ovs_mutex or RCU read lock. */  struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,  					 u32 seq, u8 cmd)  { @@ -1598,7 +1691,7 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,  	return skb;  } -/* Called with RTNL lock or RCU read lock. */ +/* Called with ovs_mutex or RCU read lock. */  static struct vport *lookup_vport(struct net *net,  				  struct ovs_header *ovs_header,  				  struct nlattr *a[OVS_VPORT_ATTR_MAX + 1]) @@ -1624,9 +1717,9 @@ static struct vport *lookup_vport(struct net *net,  		if (!dp)  			return ERR_PTR(-ENODEV); -		vport = ovs_vport_rtnl_rcu(dp, port_no); +		vport = ovs_vport_ovsl_rcu(dp, port_no);  		if (!vport) -			return ERR_PTR(-ENOENT); +			return ERR_PTR(-ENODEV);  		return vport;  	} else  		return ERR_PTR(-EINVAL); @@ -1648,7 +1741,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)  	    !a[OVS_VPORT_ATTR_UPCALL_PID])  		goto exit; -	rtnl_lock(); +	ovs_lock();  	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);  	err = -ENODEV;  	if (!dp) @@ -1661,7 +1754,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)  		if (port_no >= DP_MAX_PORTS)  			goto exit_unlock; -		vport = ovs_vport_rtnl_rcu(dp, port_no); +		vport = ovs_vport_ovsl(dp, port_no);  		err = -EBUSY;  		if (vport)  			goto exit_unlock; @@ -1671,7 +1764,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)  				err = -EFBIG;  				goto exit_unlock;  			} -			vport = ovs_vport_rtnl(dp, port_no); +			vport = ovs_vport_ovsl(dp, port_no);  			if (!vport)  				break;  		} @@ -1697,11 +1790,11 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)  		ovs_dp_detach_port(vport);  		goto exit_unlock;  	} -	genl_notify(reply, genl_info_net(info), info->snd_portid, -		    ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); + +	ovs_notify(reply, info, &ovs_dp_vport_multicast_group);  exit_unlock: -	rtnl_unlock(); +	ovs_unlock();  exit:  	return err;  } @@ -1713,7 +1806,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)  	struct vport *vport;  	int err; -	rtnl_lock(); +	ovs_lock();  	vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);  	err = PTR_ERR(vport);  	if (IS_ERR(vport)) @@ -1742,8 +1835,9 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)  				      info->snd_seq, 0, OVS_VPORT_CMD_NEW);  	BUG_ON(err < 0); -	genl_notify(reply, genl_info_net(info), info->snd_portid, -		    ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); +	ovs_unlock(); +	ovs_notify(reply, info, &ovs_dp_vport_multicast_group); +	return 0;  	rtnl_unlock();  	return 0; @@ -1751,7 +1845,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)  exit_free:  	kfree_skb(reply);  exit_unlock: -	rtnl_unlock(); +	ovs_unlock();  	return err;  } @@ -1762,7 +1856,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)  	struct vport *vport;  	int err; -	rtnl_lock(); +	ovs_lock();  	vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);  	err = PTR_ERR(vport);  	if (IS_ERR(vport)) @@ -1782,11 +1876,10 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)  	err = 0;  	ovs_dp_detach_port(vport); -	genl_notify(reply, genl_info_net(info), info->snd_portid, -		    ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); +	ovs_notify(reply, info, &ovs_dp_vport_multicast_group);  exit_unlock: -	rtnl_unlock(); +	ovs_unlock();  	return err;  } @@ -1946,13 +2039,13 @@ static void rehash_flow_table(struct work_struct *work)  	struct datapath *dp;  	struct net *net; -	genl_lock(); +	ovs_lock();  	rtnl_lock();  	for_each_net(net) {  		struct ovs_net *ovs_net = net_generic(net, ovs_net_id);  		list_for_each_entry(dp, &ovs_net->dps, list_node) { -			struct flow_table *old_table = genl_dereference(dp->table); +			struct flow_table *old_table = ovsl_dereference(dp->table);  			struct flow_table *new_table;  			new_table = ovs_flow_tbl_rehash(old_table); @@ -1963,8 +2056,7 @@ static void rehash_flow_table(struct work_struct *work)  		}  	}  	rtnl_unlock(); -	genl_unlock(); - +	ovs_unlock();  	schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);  } @@ -1973,18 +2065,21 @@ static int __net_init ovs_init_net(struct net *net)  	struct ovs_net *ovs_net = net_generic(net, ovs_net_id);  	INIT_LIST_HEAD(&ovs_net->dps); +	INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);  	return 0;  }  static void __net_exit ovs_exit_net(struct net *net)  { -	struct ovs_net *ovs_net = net_generic(net, ovs_net_id);  	struct datapath *dp, *dp_next; +	struct ovs_net *ovs_net = net_generic(net, ovs_net_id); -	genl_lock(); +	ovs_lock();  	list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)  		__dp_destroy(dp); -	genl_unlock(); +	ovs_unlock(); + +	cancel_work_sync(&ovs_net->dp_notify_work);  }  static struct pernet_operations ovs_net_ops = { diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 031dfbf37c9..16b84069521 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -57,10 +57,9 @@ struct dp_stats_percpu {   * struct datapath - datapath for flow-based packet switching   * @rcu: RCU callback head for deferred destruction.   * @list_node: Element in global 'dps' list. - * @n_flows: Number of flows currently in flow table. - * @table: Current flow table.  Protected by genl_lock and RCU. + * @table: Current flow table.  Protected by ovs_mutex and RCU.   * @ports: Hash table for ports.  %OVSP_LOCAL port always exists.  Protected by - * RTNL and RCU. + * ovs_mutex and RCU.   * @stats_percpu: Per-CPU datapath statistics.   * @net: Reference to net namespace.   * @@ -86,26 +85,6 @@ struct datapath {  #endif  }; -struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no); - -static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no) -{ -	WARN_ON_ONCE(!rcu_read_lock_held()); -	return ovs_lookup_vport(dp, port_no); -} - -static inline struct vport *ovs_vport_rtnl_rcu(const struct datapath *dp, int port_no) -{ -	WARN_ON_ONCE(!rcu_read_lock_held() && !rtnl_is_locked()); -	return ovs_lookup_vport(dp, port_no); -} - -static inline struct vport *ovs_vport_rtnl(const struct datapath *dp, int port_no) -{ -	ASSERT_RTNL(); -	return ovs_lookup_vport(dp, port_no); -} -  /**   * struct ovs_skb_cb - OVS data in skb CB   * @flow: The flow associated with this packet.  May be %NULL if no flow. @@ -119,7 +98,7 @@ struct ovs_skb_cb {   * struct dp_upcall - metadata to include with a packet to send to userspace   * @cmd: One of %OVS_PACKET_CMD_*.   * @key: Becomes %OVS_PACKET_ATTR_KEY.  Must be nonnull. - * @userdata: If nonnull, its u64 value is extracted and passed to userspace as + * @userdata: If nonnull, its variable-length value is passed to userspace as   * %OVS_PACKET_ATTR_USERDATA.   * @pid: Netlink PID to which packet should be sent.  If @pid is 0 then no   * packet is sent and the packet is accounted in the datapath's @n_lost @@ -132,6 +111,30 @@ struct dp_upcall_info {  	u32 portid;  }; +/** + * struct ovs_net - Per net-namespace data for ovs. + * @dps: List of datapaths to enable dumping them all out. + * Protected by genl_mutex. + */ +struct ovs_net { +	struct list_head dps; +	struct work_struct dp_notify_work; +}; + +extern int ovs_net_id; +void ovs_lock(void); +void ovs_unlock(void); + +#ifdef CONFIG_LOCKDEP +int lockdep_ovsl_is_held(void); +#else +#define lockdep_ovsl_is_held()	1 +#endif + +#define ASSERT_OVSL()		WARN_ON(unlikely(!lockdep_ovsl_is_held())) +#define ovsl_dereference(p)					\ +	rcu_dereference_protected(p, lockdep_ovsl_is_held()) +  static inline struct net *ovs_dp_get_net(struct datapath *dp)  {  	return read_pnet(&dp->net); @@ -142,6 +145,26 @@ static inline void ovs_dp_set_net(struct datapath *dp, struct net *net)  	write_pnet(&dp->net, net);  } +struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no); + +static inline struct vport *ovs_vport_rcu(const struct datapath *dp, int port_no) +{ +	WARN_ON_ONCE(!rcu_read_lock_held()); +	return ovs_lookup_vport(dp, port_no); +} + +static inline struct vport *ovs_vport_ovsl_rcu(const struct datapath *dp, int port_no) +{ +	WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held()); +	return ovs_lookup_vport(dp, port_no); +} + +static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_no) +{ +	ASSERT_OVSL(); +	return ovs_lookup_vport(dp, port_no); +} +  extern struct notifier_block ovs_dp_device_notifier;  extern struct genl_multicast_group ovs_dp_vport_multicast_group; @@ -155,4 +178,5 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,  					 u8 cmd);  int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb); +void ovs_dp_notify_wq(struct work_struct *work);  #endif /* datapath.h */ diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c index 5558350e0d3..ef4feec6cd8 100644 --- a/net/openvswitch/dp_notify.c +++ b/net/openvswitch/dp_notify.c @@ -18,46 +18,78 @@  #include <linux/netdevice.h>  #include <net/genetlink.h> +#include <net/netns/generic.h>  #include "datapath.h"  #include "vport-internal_dev.h"  #include "vport-netdev.h" +static void dp_detach_port_notify(struct vport *vport) +{ +	struct sk_buff *notify; +	struct datapath *dp; + +	dp = vport->dp; +	notify = ovs_vport_cmd_build_info(vport, 0, 0, +					  OVS_VPORT_CMD_DEL); +	ovs_dp_detach_port(vport); +	if (IS_ERR(notify)) { +		netlink_set_err(ovs_dp_get_net(dp)->genl_sock, 0, +				ovs_dp_vport_multicast_group.id, +				PTR_ERR(notify)); +		return; +	} + +	genlmsg_multicast_netns(ovs_dp_get_net(dp), notify, 0, +				ovs_dp_vport_multicast_group.id, +				GFP_KERNEL); +} + +void ovs_dp_notify_wq(struct work_struct *work) +{ +	struct ovs_net *ovs_net = container_of(work, struct ovs_net, dp_notify_work); +	struct datapath *dp; + +	ovs_lock(); +	list_for_each_entry(dp, &ovs_net->dps, list_node) { +		int i; + +		for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) { +			struct vport *vport; +			struct hlist_node *n; + +			hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) { +				struct netdev_vport *netdev_vport; + +				if (vport->ops->type != OVS_VPORT_TYPE_NETDEV) +					continue; + +				netdev_vport = netdev_vport_priv(vport); +				if (netdev_vport->dev->reg_state == NETREG_UNREGISTERED || +				    netdev_vport->dev->reg_state == NETREG_UNREGISTERING) +					dp_detach_port_notify(vport); +			} +		} +	} +	ovs_unlock(); +} +  static int dp_device_event(struct notifier_block *unused, unsigned long event,  			   void *ptr)  { +	struct ovs_net *ovs_net;  	struct net_device *dev = ptr; -	struct vport *vport; +	struct vport *vport = NULL; -	if (ovs_is_internal_dev(dev)) -		vport = ovs_internal_dev_get_vport(dev); -	else +	if (!ovs_is_internal_dev(dev))  		vport = ovs_netdev_get_vport(dev);  	if (!vport)  		return NOTIFY_DONE; -	switch (event) { -	case NETDEV_UNREGISTER: -		if (!ovs_is_internal_dev(dev)) { -			struct sk_buff *notify; -			struct datapath *dp = vport->dp; - -			notify = ovs_vport_cmd_build_info(vport, 0, 0, -							  OVS_VPORT_CMD_DEL); -			ovs_dp_detach_port(vport); -			if (IS_ERR(notify)) { -				netlink_set_err(ovs_dp_get_net(dp)->genl_sock, 0, -						ovs_dp_vport_multicast_group.id, -						PTR_ERR(notify)); -				break; -			} - -			genlmsg_multicast_netns(ovs_dp_get_net(dp), notify, 0, -						ovs_dp_vport_multicast_group.id, -						GFP_KERNEL); -		} -		break; +	if (event == NETDEV_UNREGISTER) { +		ovs_net = net_generic(dev_net(dev), ovs_net_id); +		queue_work(system_wq, &ovs_net->dp_notify_work);  	}  	return NOTIFY_DONE; diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 67a2b783fe7..b15321a2228 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -211,7 +211,7 @@ struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *actions)  		return ERR_PTR(-ENOMEM);  	sfa->actions_len = actions_len; -	memcpy(sfa->actions, nla_data(actions), actions_len); +	nla_memcpy(sfa->actions, actions, actions_len);  	return sfa;  } @@ -466,7 +466,7 @@ static __be16 parse_ethertype(struct sk_buff *skb)  	proto = *(__be16 *) skb->data;  	__skb_pull(skb, sizeof(__be16)); -	if (ntohs(proto) >= 1536) +	if (ntohs(proto) >= ETH_P_802_3_MIN)  		return proto;  	if (skb->len < sizeof(struct llc_snap_hdr)) @@ -483,7 +483,7 @@ static __be16 parse_ethertype(struct sk_buff *skb)  	__skb_pull(skb, sizeof(struct llc_snap_hdr)); -	if (ntohs(llc->ethertype) >= 1536) +	if (ntohs(llc->ethertype) >= ETH_P_802_3_MIN)  		return llc->ethertype;  	return htons(ETH_P_802_2); @@ -1038,7 +1038,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,  	if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {  		swkey->eth.type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); -		if (ntohs(swkey->eth.type) < 1536) +		if (ntohs(swkey->eth.type) < ETH_P_802_3_MIN)  			return -EINVAL;  		attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);  	} else { diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index a7bb60ff3b5..0875fde65b9 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -138,27 +138,6 @@ int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *,  void ovs_flow_used(struct sw_flow *, struct sk_buff *);  u64 ovs_flow_used_time(unsigned long flow_jiffies); -/* Upper bound on the length of a nlattr-formatted flow key.  The longest - * nlattr-formatted flow key would be: - * - *                         struct  pad  nl hdr  total - *                         ------  ---  ------  ----- - *  OVS_KEY_ATTR_PRIORITY      4    --     4      8 - *  OVS_KEY_ATTR_IN_PORT       4    --     4      8 - *  OVS_KEY_ATTR_SKB_MARK      4    --     4      8 - *  OVS_KEY_ATTR_ETHERNET     12    --     4     16 - *  OVS_KEY_ATTR_ETHERTYPE     2     2     4      8  (outer VLAN ethertype) - *  OVS_KEY_ATTR_8021Q         4    --     4      8 - *  OVS_KEY_ATTR_ENCAP         0    --     4      4  (VLAN encapsulation) - *  OVS_KEY_ATTR_ETHERTYPE     2     2     4      8  (inner VLAN ethertype) - *  OVS_KEY_ATTR_IPV6         40    --     4     44 - *  OVS_KEY_ATTR_ICMPV6        2     2     4      8 - *  OVS_KEY_ATTR_ND           28    --     4     32 - *  ------------------------------------------------- - *  total                                       152 - */ -#define FLOW_BUFSIZE 152 -  int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *);  int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,  		      const struct nlattr *); diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 0531de6c7a4..84e0a037918 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -63,16 +63,6 @@ static struct rtnl_link_stats64 *internal_dev_get_stats(struct net_device *netde  	return stats;  } -static int internal_dev_mac_addr(struct net_device *dev, void *p) -{ -	struct sockaddr *addr = p; - -	if (!is_valid_ether_addr(addr->sa_data)) -		return -EADDRNOTAVAIL; -	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); -	return 0; -} -  /* Called with rcu_read_lock_bh. */  static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev)  { @@ -126,7 +116,7 @@ static const struct net_device_ops internal_dev_netdev_ops = {  	.ndo_open = internal_dev_open,  	.ndo_stop = internal_dev_stop,  	.ndo_start_xmit = internal_dev_xmit, -	.ndo_set_mac_address = internal_dev_mac_addr, +	.ndo_set_mac_address = eth_mac_addr,  	.ndo_change_mtu = internal_dev_change_mtu,  	.ndo_get_stats64 = internal_dev_get_stats,  }; @@ -138,6 +128,7 @@ static void do_setup(struct net_device *netdev)  	netdev->netdev_ops = &internal_dev_netdev_ops;  	netdev->priv_flags &= ~IFF_TX_SKB_SHARING; +	netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE;  	netdev->destructor = internal_dev_destructor;  	SET_ETHTOOL_OPS(netdev, &internal_dev_ethtool_ops);  	netdev->tx_queue_len = 0; @@ -146,7 +137,7 @@ static void do_setup(struct net_device *netdev)  			   NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_TSO;  	netdev->vlan_features = netdev->features; -	netdev->features |= NETIF_F_HW_VLAN_TX; +	netdev->features |= NETIF_F_HW_VLAN_CTAG_TX;  	netdev->hw_features = netdev->features & ~NETIF_F_LLTX;  	eth_hw_addr_random(netdev);  } @@ -182,16 +173,19 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)  	if (vport->port_no == OVSP_LOCAL)  		netdev_vport->dev->features |= NETIF_F_NETNS_LOCAL; +	rtnl_lock();  	err = register_netdevice(netdev_vport->dev);  	if (err)  		goto error_free_netdev;  	dev_set_promiscuity(netdev_vport->dev, 1); +	rtnl_unlock();  	netif_start_queue(netdev_vport->dev);  	return vport;  error_free_netdev: +	rtnl_unlock();  	free_netdev(netdev_vport->dev);  error_free_vport:  	ovs_vport_free(vport); @@ -204,10 +198,13 @@ static void internal_dev_destroy(struct vport *vport)  	struct netdev_vport *netdev_vport = netdev_vport_priv(vport);  	netif_stop_queue(netdev_vport->dev); +	rtnl_lock();  	dev_set_promiscuity(netdev_vport->dev, -1);  	/* unregister_netdevice() waits for an RCU grace period. */  	unregister_netdevice(netdev_vport->dev); + +	rtnl_unlock();  }  static int internal_dev_recv(struct vport *vport, struct sk_buff *skb) @@ -235,7 +232,6 @@ const struct vport_ops ovs_internal_vport_ops = {  	.create		= internal_dev_create,  	.destroy	= internal_dev_destroy,  	.get_name	= ovs_netdev_get_name, -	.get_ifindex	= ovs_netdev_get_ifindex,  	.send		= internal_dev_recv,  }; diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 2130d61c384..4f01c6d2ffa 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -100,16 +100,20 @@ static struct vport *netdev_create(const struct vport_parms *parms)  		goto error_put;  	} +	rtnl_lock();  	err = netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook,  					 vport);  	if (err) -		goto error_put; +		goto error_unlock;  	dev_set_promiscuity(netdev_vport->dev, 1);  	netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH; +	rtnl_unlock();  	return vport; +error_unlock: +	rtnl_unlock();  error_put:  	dev_put(netdev_vport->dev);  error_free_vport: @@ -131,9 +135,11 @@ static void netdev_destroy(struct vport *vport)  {  	struct netdev_vport *netdev_vport = netdev_vport_priv(vport); +	rtnl_lock();  	netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH;  	netdev_rx_handler_unregister(netdev_vport->dev);  	dev_set_promiscuity(netdev_vport->dev, -1); +	rtnl_unlock();  	call_rcu(&netdev_vport->rcu, free_port_rcu);  } @@ -144,12 +150,6 @@ const char *ovs_netdev_get_name(const struct vport *vport)  	return netdev_vport->dev->name;  } -int ovs_netdev_get_ifindex(const struct vport *vport) -{ -	const struct netdev_vport *netdev_vport = netdev_vport_priv(vport); -	return netdev_vport->dev->ifindex; -} -  static unsigned int packet_length(const struct sk_buff *skb)  {  	unsigned int length = skb->len - ETH_HLEN; @@ -200,6 +200,5 @@ const struct vport_ops ovs_netdev_vport_ops = {  	.create		= netdev_create,  	.destroy	= netdev_destroy,  	.get_name	= ovs_netdev_get_name, -	.get_ifindex	= ovs_netdev_get_ifindex,  	.send		= netdev_send,  }; diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h index 6478079b341..a3cb3a32cd7 100644 --- a/net/openvswitch/vport-netdev.h +++ b/net/openvswitch/vport-netdev.h @@ -40,6 +40,5 @@ netdev_vport_priv(const struct vport *vport)  const char *ovs_netdev_get_name(const struct vport *);  const char *ovs_netdev_get_config(const struct vport *); -int ovs_netdev_get_ifindex(const struct vport *);  #endif /* vport_netdev.h */ diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index f6b8132ce4c..720623190ea 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -40,7 +40,7 @@ static const struct vport_ops *vport_ops_list[] = {  	&ovs_internal_vport_ops,  }; -/* Protected by RCU read lock for reading, RTNL lock for writing. */ +/* Protected by RCU read lock for reading, ovs_mutex for writing. */  static struct hlist_head *dev_table;  #define VPORT_HASH_BUCKETS 1024 @@ -80,7 +80,7 @@ static struct hlist_head *hash_bucket(struct net *net, const char *name)   *   * @name: name of port to find   * - * Must be called with RTNL or RCU read lock. + * Must be called with ovs or RCU read lock.   */  struct vport *ovs_vport_locate(struct net *net, const char *name)  { @@ -128,7 +128,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,  	vport->ops = ops;  	INIT_HLIST_NODE(&vport->dp_hash_node); -	vport->percpu_stats = alloc_percpu(struct vport_percpu_stats); +	vport->percpu_stats = alloc_percpu(struct pcpu_tstats);  	if (!vport->percpu_stats) {  		kfree(vport);  		return ERR_PTR(-ENOMEM); @@ -161,7 +161,7 @@ void ovs_vport_free(struct vport *vport)   * @parms: Information about new vport.   *   * Creates a new vport with the specified configuration (which is dependent on - * device type).  RTNL lock must be held. + * device type).  ovs_mutex must be held.   */  struct vport *ovs_vport_add(const struct vport_parms *parms)  { @@ -169,8 +169,6 @@ struct vport *ovs_vport_add(const struct vport_parms *parms)  	int err = 0;  	int i; -	ASSERT_RTNL(); -  	for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) {  		if (vport_ops_list[i]->type == parms->type) {  			struct hlist_head *bucket; @@ -201,12 +199,10 @@ out:   * @port: New configuration.   *   * Modifies an existing device with the specified configuration (which is - * dependent on device type).  RTNL lock must be held. + * dependent on device type).  ovs_mutex must be held.   */  int ovs_vport_set_options(struct vport *vport, struct nlattr *options)  { -	ASSERT_RTNL(); -  	if (!vport->ops->set_options)  		return -EOPNOTSUPP;  	return vport->ops->set_options(vport, options); @@ -218,11 +214,11 @@ int ovs_vport_set_options(struct vport *vport, struct nlattr *options)   * @vport: vport to delete.   *   * Detaches @vport from its datapath and destroys it.  It is possible to fail - * for reasons such as lack of memory.  RTNL lock must be held. + * for reasons such as lack of memory.  ovs_mutex must be held.   */  void ovs_vport_del(struct vport *vport)  { -	ASSERT_RTNL(); +	ASSERT_OVSL();  	hlist_del_rcu(&vport->hash_node); @@ -237,7 +233,7 @@ void ovs_vport_del(struct vport *vport)   *   * Retrieves transmit, receive, and error stats for the given device.   * - * Must be called with RTNL lock or rcu_read_lock. + * Must be called with ovs_mutex or rcu_read_lock.   */  void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)  { @@ -264,16 +260,16 @@ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)  	spin_unlock_bh(&vport->stats_lock);  	for_each_possible_cpu(i) { -		const struct vport_percpu_stats *percpu_stats; -		struct vport_percpu_stats local_stats; +		const struct pcpu_tstats *percpu_stats; +		struct pcpu_tstats local_stats;  		unsigned int start;  		percpu_stats = per_cpu_ptr(vport->percpu_stats, i);  		do { -			start = u64_stats_fetch_begin_bh(&percpu_stats->sync); +			start = u64_stats_fetch_begin_bh(&percpu_stats->syncp);  			local_stats = *percpu_stats; -		} while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start)); +		} while (u64_stats_fetch_retry_bh(&percpu_stats->syncp, start));  		stats->rx_bytes		+= local_stats.rx_bytes;  		stats->rx_packets	+= local_stats.rx_packets; @@ -296,22 +292,24 @@ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)   * negative error code if a real error occurred.  If an error occurs, @skb is   * left unmodified.   * - * Must be called with RTNL lock or rcu_read_lock. + * Must be called with ovs_mutex or rcu_read_lock.   */  int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)  {  	struct nlattr *nla; +	int err; + +	if (!vport->ops->get_options) +		return 0;  	nla = nla_nest_start(skb, OVS_VPORT_ATTR_OPTIONS);  	if (!nla)  		return -EMSGSIZE; -	if (vport->ops->get_options) { -		int err = vport->ops->get_options(vport, skb); -		if (err) { -			nla_nest_cancel(skb, nla); -			return err; -		} +	err = vport->ops->get_options(vport, skb); +	if (err) { +		nla_nest_cancel(skb, nla); +		return err;  	}  	nla_nest_end(skb, nla); @@ -329,13 +327,13 @@ int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)   */  void ovs_vport_receive(struct vport *vport, struct sk_buff *skb)  { -	struct vport_percpu_stats *stats; +	struct pcpu_tstats *stats;  	stats = this_cpu_ptr(vport->percpu_stats); -	u64_stats_update_begin(&stats->sync); +	u64_stats_update_begin(&stats->syncp);  	stats->rx_packets++;  	stats->rx_bytes += skb->len; -	u64_stats_update_end(&stats->sync); +	u64_stats_update_end(&stats->syncp);  	ovs_dp_process_received_packet(vport, skb);  } @@ -346,7 +344,7 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb)   * @vport: vport on which to send the packet   * @skb: skb to send   * - * Sends the given packet and returns the length of data sent.  Either RTNL + * Sends the given packet and returns the length of data sent.  Either ovs   * lock or rcu_read_lock must be held.   */  int ovs_vport_send(struct vport *vport, struct sk_buff *skb) @@ -354,14 +352,14 @@ int ovs_vport_send(struct vport *vport, struct sk_buff *skb)  	int sent = vport->ops->send(vport, skb);  	if (likely(sent)) { -		struct vport_percpu_stats *stats; +		struct pcpu_tstats *stats;  		stats = this_cpu_ptr(vport->percpu_stats); -		u64_stats_update_begin(&stats->sync); +		u64_stats_update_begin(&stats->syncp);  		stats->tx_packets++;  		stats->tx_bytes += sent; -		u64_stats_update_end(&stats->sync); +		u64_stats_update_end(&stats->syncp);  	}  	return sent;  } diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 3f7961ea3c5..68a377bc084 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -19,6 +19,7 @@  #ifndef VPORT_H  #define VPORT_H 1 +#include <linux/if_tunnel.h>  #include <linux/list.h>  #include <linux/netlink.h>  #include <linux/openvswitch.h> @@ -50,14 +51,6 @@ int ovs_vport_send(struct vport *, struct sk_buff *);  /* The following definitions are for implementers of vport devices: */ -struct vport_percpu_stats { -	u64 rx_bytes; -	u64 rx_packets; -	u64 tx_bytes; -	u64 tx_packets; -	struct u64_stats_sync sync; -}; -  struct vport_err_stats {  	u64 rx_dropped;  	u64 rx_errors; @@ -68,10 +61,10 @@ struct vport_err_stats {  /**   * struct vport - one port within a datapath   * @rcu: RCU callback head for deferred destruction. - * @port_no: Index into @dp's @ports array.   * @dp: Datapath to which this port belongs.   * @upcall_portid: The Netlink port to use for packets received on this port that   * miss the flow table. + * @port_no: Index into @dp's @ports array.   * @hash_node: Element in @dev_table hash table in vport.c.   * @dp_hash_node: Element in @datapath->ports hash table in datapath.c.   * @ops: Class structure. @@ -81,15 +74,15 @@ struct vport_err_stats {   */  struct vport {  	struct rcu_head rcu; -	u16 port_no;  	struct datapath	*dp;  	u32 upcall_portid; +	u16 port_no;  	struct hlist_node hash_node;  	struct hlist_node dp_hash_node;  	const struct vport_ops *ops; -	struct vport_percpu_stats __percpu *percpu_stats; +	struct pcpu_tstats __percpu *percpu_stats;  	spinlock_t stats_lock;  	struct vport_err_stats err_stats; @@ -131,24 +124,22 @@ struct vport_parms {   * have any configuration.   * @get_name: Get the device's name.   * @get_config: Get the device's configuration. - * @get_ifindex: Get the system interface index associated with the device.   * May be null if the device does not have an ifindex.   * @send: Send a packet on the device.  Returns the length of the packet sent.   */  struct vport_ops {  	enum ovs_vport_type type; -	/* Called with RTNL lock. */ +	/* Called with ovs_mutex. */  	struct vport *(*create)(const struct vport_parms *);  	void (*destroy)(struct vport *);  	int (*set_options)(struct vport *, struct nlattr *);  	int (*get_options)(const struct vport *, struct sk_buff *); -	/* Called with rcu_read_lock or RTNL lock. */ +	/* Called with rcu_read_lock or ovs_mutex. */  	const char *(*get_name)(const struct vport *);  	void (*get_config)(const struct vport *, void *); -	int (*get_ifindex)(const struct vport *);  	int (*send)(struct vport *, struct sk_buff *);  }; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 1d6793dbfba..dd5cd49b0e0 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -158,10 +158,16 @@ struct packet_mreq_max {  	unsigned char	mr_address[MAX_ADDR_LEN];  }; +union tpacket_uhdr { +	struct tpacket_hdr  *h1; +	struct tpacket2_hdr *h2; +	struct tpacket3_hdr *h3; +	void *raw; +}; +  static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,  		int closing, int tx_ring); -  #define V3_ALIGNMENT	(8)  #define BLK_HDR_LEN	(ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT)) @@ -181,6 +187,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,  struct packet_sock;  static int tpacket_snd(struct packet_sock *po, struct msghdr *msg); +static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, +		       struct packet_type *pt, struct net_device *orig_dev);  static void *packet_previous_frame(struct packet_sock *po,  		struct packet_ring_buffer *rb, @@ -288,11 +296,7 @@ static inline __pure struct page *pgv_to_page(void *addr)  static void __packet_set_status(struct packet_sock *po, void *frame, int status)  { -	union { -		struct tpacket_hdr *h1; -		struct tpacket2_hdr *h2; -		void *raw; -	} h; +	union tpacket_uhdr h;  	h.raw = frame;  	switch (po->tp_version) { @@ -315,11 +319,7 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status)  static int __packet_get_status(struct packet_sock *po, void *frame)  { -	union { -		struct tpacket_hdr *h1; -		struct tpacket2_hdr *h2; -		void *raw; -	} h; +	union tpacket_uhdr h;  	smp_rmb(); @@ -339,17 +339,66 @@ static int __packet_get_status(struct packet_sock *po, void *frame)  	}  } +static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts, +				   unsigned int flags) +{ +	struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); + +	if (shhwtstamps) { +		if ((flags & SOF_TIMESTAMPING_SYS_HARDWARE) && +		    ktime_to_timespec_cond(shhwtstamps->syststamp, ts)) +			return TP_STATUS_TS_SYS_HARDWARE; +		if ((flags & SOF_TIMESTAMPING_RAW_HARDWARE) && +		    ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts)) +			return TP_STATUS_TS_RAW_HARDWARE; +	} + +	if (ktime_to_timespec_cond(skb->tstamp, ts)) +		return TP_STATUS_TS_SOFTWARE; + +	return 0; +} + +static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame, +				    struct sk_buff *skb) +{ +	union tpacket_uhdr h; +	struct timespec ts; +	__u32 ts_status; + +	if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp))) +		return 0; + +	h.raw = frame; +	switch (po->tp_version) { +	case TPACKET_V1: +		h.h1->tp_sec = ts.tv_sec; +		h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC; +		break; +	case TPACKET_V2: +		h.h2->tp_sec = ts.tv_sec; +		h.h2->tp_nsec = ts.tv_nsec; +		break; +	case TPACKET_V3: +	default: +		WARN(1, "TPACKET version not supported.\n"); +		BUG(); +	} + +	/* one flush is safe, as both fields always lie on the same cacheline */ +	flush_dcache_page(pgv_to_page(&h.h1->tp_sec)); +	smp_wmb(); + +	return ts_status; +} +  static void *packet_lookup_frame(struct packet_sock *po,  		struct packet_ring_buffer *rb,  		unsigned int position,  		int status)  {  	unsigned int pg_vec_pos, frame_offset; -	union { -		struct tpacket_hdr *h1; -		struct tpacket2_hdr *h2; -		void *raw; -	} h; +	union tpacket_uhdr h;  	pg_vec_pos = position / rb->frames_per_block;  	frame_offset = position % rb->frames_per_block; @@ -479,7 +528,7 @@ static void init_prb_bdqc(struct packet_sock *po,  	p1->hdrlen = po->tp_hdrlen;  	p1->version = po->tp_version;  	p1->last_kactive_blk_num = 0; -	po->stats_u.stats3.tp_freeze_q_cnt = 0; +	po->stats.stats3.tp_freeze_q_cnt = 0;  	if (req_u->req3.tp_retire_blk_tov)  		p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov;  	else @@ -647,7 +696,7 @@ static void prb_close_block(struct tpacket_kbdq_core *pkc1,  	struct tpacket3_hdr *last_pkt;  	struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1; -	if (po->stats.tp_drops) +	if (po->stats.stats3.tp_drops)  		status |= TP_STATUS_LOSING;  	last_pkt = (struct tpacket3_hdr *)pkc1->prev; @@ -752,7 +801,7 @@ static void prb_freeze_queue(struct tpacket_kbdq_core *pkc,  				  struct packet_sock *po)  {  	pkc->reset_pending_on_curr_blk = 1; -	po->stats_u.stats3.tp_freeze_q_cnt++; +	po->stats.stats3.tp_freeze_q_cnt++;  }  #define TOTAL_PKT_LEN_INCL_ALIGN(length) (ALIGN((length), V3_ALIGNMENT)) @@ -973,11 +1022,11 @@ static void *packet_current_rx_frame(struct packet_sock *po,  static void *prb_lookup_block(struct packet_sock *po,  				     struct packet_ring_buffer *rb, -				     unsigned int previous, +				     unsigned int idx,  				     int status)  {  	struct tpacket_kbdq_core *pkc  = GET_PBDQC_FROM_RB(rb); -	struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, previous); +	struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, idx);  	if (status != BLOCK_STATUS(pbd))  		return NULL; @@ -1041,6 +1090,29 @@ static void packet_increment_head(struct packet_ring_buffer *buff)  	buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;  } +static bool packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb) +{ +	struct sock *sk = &po->sk; +	bool has_room; + +	if (po->prot_hook.func != tpacket_rcv) +		return (atomic_read(&sk->sk_rmem_alloc) + skb->truesize) +			<= sk->sk_rcvbuf; + +	spin_lock(&sk->sk_receive_queue.lock); +	if (po->tp_version == TPACKET_V3) +		has_room = prb_lookup_block(po, &po->rx_ring, +					    po->rx_ring.prb_bdqc.kactive_blk_num, +					    TP_STATUS_KERNEL); +	else +		has_room = packet_lookup_frame(po, &po->rx_ring, +					       po->rx_ring.head, +					       TP_STATUS_KERNEL); +	spin_unlock(&sk->sk_receive_queue.lock); + +	return has_room; +} +  static void packet_sock_destruct(struct sock *sk)  {  	skb_queue_purge(&sk->sk_error_queue); @@ -1066,16 +1138,16 @@ static int fanout_rr_next(struct packet_fanout *f, unsigned int num)  	return x;  } -static struct sock *fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) +static unsigned int fanout_demux_hash(struct packet_fanout *f, +				      struct sk_buff *skb, +				      unsigned int num)  { -	u32 idx, hash = skb->rxhash; - -	idx = ((u64)hash * num) >> 32; - -	return f->arr[idx]; +	return (((u64)skb->rxhash) * num) >> 32;  } -static struct sock *fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) +static unsigned int fanout_demux_lb(struct packet_fanout *f, +				    struct sk_buff *skb, +				    unsigned int num)  {  	int cur, old; @@ -1083,14 +1155,40 @@ static struct sock *fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb  	while ((old = atomic_cmpxchg(&f->rr_cur, cur,  				     fanout_rr_next(f, num))) != cur)  		cur = old; -	return f->arr[cur]; +	return cur;  } -static struct sock *fanout_demux_cpu(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) +static unsigned int fanout_demux_cpu(struct packet_fanout *f, +				     struct sk_buff *skb, +				     unsigned int num)  { -	unsigned int cpu = smp_processor_id(); +	return smp_processor_id() % num; +} -	return f->arr[cpu % num]; +static unsigned int fanout_demux_rollover(struct packet_fanout *f, +					  struct sk_buff *skb, +					  unsigned int idx, unsigned int skip, +					  unsigned int num) +{ +	unsigned int i, j; + +	i = j = min_t(int, f->next[idx], num - 1); +	do { +		if (i != skip && packet_rcv_has_room(pkt_sk(f->arr[i]), skb)) { +			if (i != j) +				f->next[idx] = i; +			return i; +		} +		if (++i == num) +			i = 0; +	} while (i != j); + +	return idx; +} + +static bool fanout_has_flag(struct packet_fanout *f, u16 flag) +{ +	return f->flags & (flag >> 8);  }  static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, @@ -1099,7 +1197,7 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,  	struct packet_fanout *f = pt->af_packet_priv;  	unsigned int num = f->num_members;  	struct packet_sock *po; -	struct sock *sk; +	unsigned int idx;  	if (!net_eq(dev_net(dev), read_pnet(&f->net)) ||  	    !num) { @@ -1110,23 +1208,31 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,  	switch (f->type) {  	case PACKET_FANOUT_HASH:  	default: -		if (f->defrag) { +		if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) {  			skb = ip_check_defrag(skb, IP_DEFRAG_AF_PACKET);  			if (!skb)  				return 0;  		}  		skb_get_rxhash(skb); -		sk = fanout_demux_hash(f, skb, num); +		idx = fanout_demux_hash(f, skb, num);  		break;  	case PACKET_FANOUT_LB: -		sk = fanout_demux_lb(f, skb, num); +		idx = fanout_demux_lb(f, skb, num);  		break;  	case PACKET_FANOUT_CPU: -		sk = fanout_demux_cpu(f, skb, num); +		idx = fanout_demux_cpu(f, skb, num); +		break; +	case PACKET_FANOUT_ROLLOVER: +		idx = fanout_demux_rollover(f, skb, 0, (unsigned int) -1, num);  		break;  	} -	po = pkt_sk(sk); +	po = pkt_sk(f->arr[idx]); +	if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER) && +	    unlikely(!packet_rcv_has_room(po, skb))) { +		idx = fanout_demux_rollover(f, skb, idx, idx, num); +		po = pkt_sk(f->arr[idx]); +	}  	return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);  } @@ -1175,10 +1281,13 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)  	struct packet_sock *po = pkt_sk(sk);  	struct packet_fanout *f, *match;  	u8 type = type_flags & 0xff; -	u8 defrag = (type_flags & PACKET_FANOUT_FLAG_DEFRAG) ? 1 : 0; +	u8 flags = type_flags >> 8;  	int err;  	switch (type) { +	case PACKET_FANOUT_ROLLOVER: +		if (type_flags & PACKET_FANOUT_FLAG_ROLLOVER) +			return -EINVAL;  	case PACKET_FANOUT_HASH:  	case PACKET_FANOUT_LB:  	case PACKET_FANOUT_CPU: @@ -1203,7 +1312,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)  		}  	}  	err = -EINVAL; -	if (match && match->defrag != defrag) +	if (match && match->flags != flags)  		goto out;  	if (!match) {  		err = -ENOMEM; @@ -1213,7 +1322,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)  		write_pnet(&match->net, sock_net(sk));  		match->id = id;  		match->type = type; -		match->defrag = defrag; +		match->flags = flags;  		atomic_set(&match->rr_cur, 0);  		INIT_LIST_HEAD(&match->list);  		spin_lock_init(&match->lock); @@ -1443,13 +1552,14 @@ retry:  	skb->dev = dev;  	skb->priority = sk->sk_priority;  	skb->mark = sk->sk_mark; -	err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); -	if (err < 0) -		goto out_unlock; + +	sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);  	if (unlikely(extra_len == 4))  		skb->no_fcs = 1; +	skb_probe_transport_header(skb, 0); +  	dev_queue_xmit(skb);  	rcu_read_unlock();  	return len; @@ -1577,7 +1687,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,  	nf_reset(skb);  	spin_lock(&sk->sk_receive_queue.lock); -	po->stats.tp_packets++; +	po->stats.stats1.tp_packets++;  	skb->dropcount = atomic_read(&sk->sk_drops);  	__skb_queue_tail(&sk->sk_receive_queue, skb);  	spin_unlock(&sk->sk_receive_queue.lock); @@ -1586,7 +1696,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,  drop_n_acct:  	spin_lock(&sk->sk_receive_queue.lock); -	po->stats.tp_drops++; +	po->stats.stats1.tp_drops++;  	atomic_inc(&sk->sk_drops);  	spin_unlock(&sk->sk_receive_queue.lock); @@ -1606,21 +1716,15 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,  	struct sock *sk;  	struct packet_sock *po;  	struct sockaddr_ll *sll; -	union { -		struct tpacket_hdr *h1; -		struct tpacket2_hdr *h2; -		struct tpacket3_hdr *h3; -		void *raw; -	} h; +	union tpacket_uhdr h;  	u8 *skb_head = skb->data;  	int skb_len = skb->len;  	unsigned int snaplen, res;  	unsigned long status = TP_STATUS_USER;  	unsigned short macoff, netoff, hdrlen;  	struct sk_buff *copy_skb = NULL; -	struct timeval tv;  	struct timespec ts; -	struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); +	__u32 ts_status;  	if (skb->pkt_type == PACKET_LOOPBACK)  		goto drop; @@ -1692,10 +1796,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,  	 * Anyways, moving it for V1/V2 only as V3 doesn't need this  	 * at packet level.  	 */ -		if (po->stats.tp_drops) +		if (po->stats.stats1.tp_drops)  			status |= TP_STATUS_LOSING;  	} -	po->stats.tp_packets++; +	po->stats.stats1.tp_packets++;  	if (copy_skb) {  		status |= TP_STATUS_COPY;  		__skb_queue_tail(&sk->sk_receive_queue, copy_skb); @@ -1704,24 +1808,19 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,  	skb_copy_bits(skb, 0, h.raw + macoff, snaplen); +	if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp))) +		getnstimeofday(&ts); + +	status |= ts_status; +  	switch (po->tp_version) {  	case TPACKET_V1:  		h.h1->tp_len = skb->len;  		h.h1->tp_snaplen = snaplen;  		h.h1->tp_mac = macoff;  		h.h1->tp_net = netoff; -		if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE) -				&& shhwtstamps->syststamp.tv64) -			tv = ktime_to_timeval(shhwtstamps->syststamp); -		else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE) -				&& shhwtstamps->hwtstamp.tv64) -			tv = ktime_to_timeval(shhwtstamps->hwtstamp); -		else if (skb->tstamp.tv64) -			tv = ktime_to_timeval(skb->tstamp); -		else -			do_gettimeofday(&tv); -		h.h1->tp_sec = tv.tv_sec; -		h.h1->tp_usec = tv.tv_usec; +		h.h1->tp_sec = ts.tv_sec; +		h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;  		hdrlen = sizeof(*h.h1);  		break;  	case TPACKET_V2: @@ -1729,16 +1828,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,  		h.h2->tp_snaplen = snaplen;  		h.h2->tp_mac = macoff;  		h.h2->tp_net = netoff; -		if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE) -				&& shhwtstamps->syststamp.tv64) -			ts = ktime_to_timespec(shhwtstamps->syststamp); -		else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE) -				&& shhwtstamps->hwtstamp.tv64) -			ts = ktime_to_timespec(shhwtstamps->hwtstamp); -		else if (skb->tstamp.tv64) -			ts = ktime_to_timespec(skb->tstamp); -		else -			getnstimeofday(&ts);  		h.h2->tp_sec = ts.tv_sec;  		h.h2->tp_nsec = ts.tv_nsec;  		if (vlan_tx_tag_present(skb)) { @@ -1759,16 +1848,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,  		h.h3->tp_snaplen = snaplen;  		h.h3->tp_mac = macoff;  		h.h3->tp_net = netoff; -		if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE) -				&& shhwtstamps->syststamp.tv64) -			ts = ktime_to_timespec(shhwtstamps->syststamp); -		else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE) -				&& shhwtstamps->hwtstamp.tv64) -			ts = ktime_to_timespec(shhwtstamps->hwtstamp); -		else if (skb->tstamp.tv64) -			ts = ktime_to_timespec(skb->tstamp); -		else -			getnstimeofday(&ts);  		h.h3->tp_sec  = ts.tv_sec;  		h.h3->tp_nsec = ts.tv_nsec;  		hdrlen = sizeof(*h.h3); @@ -1819,7 +1898,7 @@ drop:  	return 0;  ring_is_full: -	po->stats.tp_drops++; +	po->stats.stats1.tp_drops++;  	spin_unlock(&sk->sk_receive_queue.lock);  	sk->sk_data_ready(sk, 0); @@ -1833,10 +1912,14 @@ static void tpacket_destruct_skb(struct sk_buff *skb)  	void *ph;  	if (likely(po->tx_ring.pg_vec)) { +		__u32 ts; +  		ph = skb_shinfo(skb)->destructor_arg;  		BUG_ON(atomic_read(&po->tx_ring.pending) == 0);  		atomic_dec(&po->tx_ring.pending); -		__packet_set_status(po, ph, TP_STATUS_AVAILABLE); + +		ts = __packet_set_timestamp(po, ph, skb); +		__packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts);  	}  	sock_wfree(skb); @@ -1846,11 +1929,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,  		void *frame, struct net_device *dev, int size_max,  		__be16 proto, unsigned char *addr, int hlen)  { -	union { -		struct tpacket_hdr *h1; -		struct tpacket2_hdr *h2; -		void *raw; -	} ph; +	union tpacket_uhdr ph;  	int to_write, offset, len, tp_len, nr_frags, len_max;  	struct socket *sock = po->sk.sk_socket;  	struct page *page; @@ -1863,6 +1942,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,  	skb->dev = dev;  	skb->priority = po->sk.sk_priority;  	skb->mark = po->sk.sk_mark; +	sock_tx_timestamp(&po->sk, &skb_shinfo(skb)->tx_flags);  	skb_shinfo(skb)->destructor_arg = ph.raw;  	switch (po->tp_version) { @@ -1880,6 +1960,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,  	skb_reserve(skb, hlen);  	skb_reset_network_header(skb); +	skb_probe_transport_header(skb, 0);  	if (po->tp_tx_has_off) {  		int off_min, off_max, off; @@ -2247,9 +2328,8 @@ static int packet_snd(struct socket *sock,  	err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);  	if (err)  		goto out_free; -	err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); -	if (err < 0) -		goto out_free; + +	sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);  	if (!gso_type && (len > dev->mtu + reserve + extra_len)) {  		/* Earlier code assumed this would be a VLAN pkt, @@ -2289,6 +2369,8 @@ static int packet_snd(struct socket *sock,  		len += vnet_hdr_len;  	} +	skb_probe_transport_header(skb, reserve); +  	if (unlikely(extra_len == 4))  		skb->no_fcs = 1; @@ -3165,8 +3247,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,  	struct sock *sk = sock->sk;  	struct packet_sock *po = pkt_sk(sk);  	void *data = &val; -	struct tpacket_stats st; -	union tpacket_stats_u st_u; +	union tpacket_stats_u st;  	if (level != SOL_PACKET)  		return -ENOPROTOOPT; @@ -3180,22 +3261,18 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,  	switch (optname) {  	case PACKET_STATISTICS:  		spin_lock_bh(&sk->sk_receive_queue.lock); +		memcpy(&st, &po->stats, sizeof(st)); +		memset(&po->stats, 0, sizeof(po->stats)); +		spin_unlock_bh(&sk->sk_receive_queue.lock); +  		if (po->tp_version == TPACKET_V3) {  			lv = sizeof(struct tpacket_stats_v3); -			memcpy(&st_u.stats3, &po->stats, -			       sizeof(struct tpacket_stats)); -			st_u.stats3.tp_freeze_q_cnt = -					po->stats_u.stats3.tp_freeze_q_cnt; -			st_u.stats3.tp_packets += po->stats.tp_drops; -			data = &st_u.stats3; +			data = &st.stats3;  		} else {  			lv = sizeof(struct tpacket_stats); -			st = po->stats; -			st.tp_packets += st.tp_drops; -			data = &st; +			data = &st.stats1;  		} -		memset(&po->stats, 0, sizeof(st)); -		spin_unlock_bh(&sk->sk_receive_queue.lock); +  		break;  	case PACKET_AUXDATA:  		val = po->auxdata; @@ -3240,7 +3317,8 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,  	case PACKET_FANOUT:  		val = (po->fanout ?  		       ((u32)po->fanout->id | -			((u32)po->fanout->type << 16)) : +			((u32)po->fanout->type << 16) | +			((u32)po->fanout->flags << 24)) :  		       0);  		break;  	case PACKET_TX_HAS_OFF: diff --git a/net/packet/diag.c b/net/packet/diag.c index d3fcd1ebef7..a9584a2f6d6 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -125,8 +125,10 @@ static int pdiag_put_fanout(struct packet_sock *po, struct sk_buff *nlskb)  	return ret;  } -static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag_req *req, -		u32 portid, u32 seq, u32 flags, int sk_ino) +static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, +			struct packet_diag_req *req, +			struct user_namespace *user_ns, +			u32 portid, u32 seq, u32 flags, int sk_ino)  {  	struct nlmsghdr *nlh;  	struct packet_diag_msg *rp; @@ -147,6 +149,11 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag  			pdiag_put_info(po, skb))  		goto out_nlmsg_trim; +	if ((req->pdiag_show & PACKET_SHOW_INFO) && +	    nla_put_u32(skb, PACKET_DIAG_UID, +			from_kuid_munged(user_ns, sock_i_uid(sk)))) +		goto out_nlmsg_trim; +  	if ((req->pdiag_show & PACKET_SHOW_MCLIST) &&  			pdiag_put_mclist(po, skb))  		goto out_nlmsg_trim; @@ -159,6 +166,14 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag  			pdiag_put_fanout(po, skb))  		goto out_nlmsg_trim; +	if ((req->pdiag_show & PACKET_SHOW_MEMINFO) && +	    sock_diag_put_meminfo(sk, skb, PACKET_DIAG_MEMINFO)) +		goto out_nlmsg_trim; + +	if ((req->pdiag_show & PACKET_SHOW_FILTER) && +	    sock_diag_put_filterinfo(user_ns, sk, skb, PACKET_DIAG_FILTER)) +		goto out_nlmsg_trim; +  	return nlmsg_end(skb, nlh);  out_nlmsg_trim: @@ -183,9 +198,11 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)  		if (num < s_num)  			goto next; -		if (sk_diag_fill(sk, skb, req, NETLINK_CB(cb->skb).portid, -					cb->nlh->nlmsg_seq, NLM_F_MULTI, -					sock_i_ino(sk)) < 0) +		if (sk_diag_fill(sk, skb, req, +				 sk_user_ns(NETLINK_CB(cb->skb).sk), +				 NETLINK_CB(cb->skb).portid, +				 cb->nlh->nlmsg_seq, NLM_F_MULTI, +				 sock_i_ino(sk)) < 0)  			goto done;  next:  		num++; diff --git a/net/packet/internal.h b/net/packet/internal.h index e84cab8cb7a..c4e4b456120 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -54,6 +54,7 @@ struct pgv {  struct packet_ring_buffer {  	struct pgv		*pg_vec; +  	unsigned int		head;  	unsigned int		frames_per_block;  	unsigned int		frame_size; @@ -63,8 +64,9 @@ struct packet_ring_buffer {  	unsigned int		pg_vec_pages;  	unsigned int		pg_vec_len; -	struct tpacket_kbdq_core	prb_bdqc;  	atomic_t		pending; + +	struct tpacket_kbdq_core	prb_bdqc;  };  extern struct mutex fanout_mutex; @@ -77,10 +79,11 @@ struct packet_fanout {  	unsigned int		num_members;  	u16			id;  	u8			type; -	u8			defrag; +	u8			flags;  	atomic_t		rr_cur;  	struct list_head	list;  	struct sock		*arr[PACKET_FANOUT_MAX]; +	int			next[PACKET_FANOUT_MAX];  	spinlock_t		lock;  	atomic_t		sk_ref;  	struct packet_type	prot_hook ____cacheline_aligned_in_smp; @@ -90,8 +93,7 @@ struct packet_sock {  	/* struct sock has to be the first member of packet_sock */  	struct sock		sk;  	struct packet_fanout	*fanout; -	struct tpacket_stats	stats; -	union  tpacket_stats_u	stats_u; +	union  tpacket_stats_u	stats;  	struct packet_ring_buffer	rx_ring;  	struct packet_ring_buffer	tx_ring;  	int			copy_thresh; diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 0193630d306..dc15f430080 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -61,7 +61,7 @@ static const struct nla_policy ifa_phonet_policy[IFA_MAX+1] = {  	[IFA_LOCAL] = { .type = NLA_U8 },  }; -static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr) +static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh)  {  	struct net *net = sock_net(skb->sk);  	struct nlattr *tb[IFA_MAX+1]; @@ -224,7 +224,7 @@ static const struct nla_policy rtm_phonet_policy[RTA_MAX+1] = {  	[RTA_OIF] = { .type = NLA_U32 },  }; -static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr) +static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh)  {  	struct net *net = sock_net(skb->sk);  	struct nlattr *tb[RTA_MAX+1]; diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 9b9be5279f5..1cec5e4f3a5 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -587,7 +587,7 @@ static ssize_t rfkill_name_show(struct device *dev,  static const char *rfkill_get_type_str(enum rfkill_type type)  { -	BUILD_BUG_ON(NUM_RFKILL_TYPES != RFKILL_TYPE_FM + 1); +	BUILD_BUG_ON(NUM_RFKILL_TYPES != RFKILL_TYPE_NFC + 1);  	switch (type) {  	case RFKILL_TYPE_WLAN: @@ -604,6 +604,8 @@ static const char *rfkill_get_type_str(enum rfkill_type type)  		return "gps";  	case RFKILL_TYPE_FM:  		return "fm"; +	case RFKILL_TYPE_NFC: +		return "nfc";  	default:  		BUG();  	} diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c index 78fc0937948..fb076cd6f80 100644 --- a/net/rfkill/rfkill-gpio.c +++ b/net/rfkill/rfkill-gpio.c @@ -131,6 +131,7 @@ static int rfkill_gpio_probe(struct platform_device *pdev)  		rfkill->pwr_clk = clk_get(&pdev->dev, pdata->power_clk_name);  		if (IS_ERR(rfkill->pwr_clk)) {  			pr_warn("%s: can't find pwr_clk.\n", __func__); +			ret = PTR_ERR(rfkill->pwr_clk);  			goto fail_shutdown_name;  		}  	} @@ -152,9 +153,11 @@ static int rfkill_gpio_probe(struct platform_device *pdev)  	}  	rfkill->rfkill_dev = rfkill_alloc(pdata->name, &pdev->dev, pdata->type, -				&rfkill_gpio_ops, rfkill); -	if (!rfkill->rfkill_dev) +					  &rfkill_gpio_ops, rfkill); +	if (!rfkill->rfkill_dev) { +		ret = -ENOMEM;  		goto fail_shutdown; +	}  	ret = rfkill_register(rfkill->rfkill_dev);  	if (ret < 0) diff --git a/net/rfkill/rfkill-regulator.c b/net/rfkill/rfkill-regulator.c index 4b5ab21ecb2..d11ac79246e 100644 --- a/net/rfkill/rfkill-regulator.c +++ b/net/rfkill/rfkill-regulator.c @@ -51,7 +51,7 @@ static int rfkill_regulator_set_block(void *data, bool blocked)  	return 0;  } -struct rfkill_ops rfkill_regulator_ops = { +static struct rfkill_ops rfkill_regulator_ops = {  	.set_block = rfkill_regulator_set_block,  }; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 8579c4bb20c..fd7072827a4 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -982,7 +982,7 @@ done:  	return ret;  } -static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) +static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n)  {  	struct net *net = sock_net(skb->sk);  	struct nlattr *tca[TCA_ACT_MAX + 1]; diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 08fa1e8a4ca..3a4c0caa1f7 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -166,15 +166,17 @@ static int tcf_csum_ipv4_igmp(struct sk_buff *skb,  	return 1;  } -static int tcf_csum_ipv6_icmp(struct sk_buff *skb, struct ipv6hdr *ip6h, +static int tcf_csum_ipv6_icmp(struct sk_buff *skb,  			      unsigned int ihl, unsigned int ipl)  {  	struct icmp6hdr *icmp6h; +	const struct ipv6hdr *ip6h;  	icmp6h = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmp6h));  	if (icmp6h == NULL)  		return 0; +	ip6h = ipv6_hdr(skb);  	icmp6h->icmp6_cksum = 0;  	skb->csum = csum_partial(icmp6h, ipl - ihl, 0);  	icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, @@ -186,15 +188,17 @@ static int tcf_csum_ipv6_icmp(struct sk_buff *skb, struct ipv6hdr *ip6h,  	return 1;  } -static int tcf_csum_ipv4_tcp(struct sk_buff *skb, struct iphdr *iph, +static int tcf_csum_ipv4_tcp(struct sk_buff *skb,  			     unsigned int ihl, unsigned int ipl)  {  	struct tcphdr *tcph; +	const struct iphdr *iph;  	tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));  	if (tcph == NULL)  		return 0; +	iph = ip_hdr(skb);  	tcph->check = 0;  	skb->csum = csum_partial(tcph, ipl - ihl, 0);  	tcph->check = tcp_v4_check(ipl - ihl, @@ -205,15 +209,17 @@ static int tcf_csum_ipv4_tcp(struct sk_buff *skb, struct iphdr *iph,  	return 1;  } -static int tcf_csum_ipv6_tcp(struct sk_buff *skb, struct ipv6hdr *ip6h, +static int tcf_csum_ipv6_tcp(struct sk_buff *skb,  			     unsigned int ihl, unsigned int ipl)  {  	struct tcphdr *tcph; +	const struct ipv6hdr *ip6h;  	tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));  	if (tcph == NULL)  		return 0; +	ip6h = ipv6_hdr(skb);  	tcph->check = 0;  	skb->csum = csum_partial(tcph, ipl - ihl, 0);  	tcph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, @@ -225,10 +231,11 @@ static int tcf_csum_ipv6_tcp(struct sk_buff *skb, struct ipv6hdr *ip6h,  	return 1;  } -static int tcf_csum_ipv4_udp(struct sk_buff *skb, struct iphdr *iph, +static int tcf_csum_ipv4_udp(struct sk_buff *skb,  			     unsigned int ihl, unsigned int ipl, int udplite)  {  	struct udphdr *udph; +	const struct iphdr *iph;  	u16 ul;  	/* @@ -242,6 +249,7 @@ static int tcf_csum_ipv4_udp(struct sk_buff *skb, struct iphdr *iph,  	if (udph == NULL)  		return 0; +	iph = ip_hdr(skb);  	ul = ntohs(udph->len);  	if (udplite || udph->check) { @@ -276,10 +284,11 @@ ignore_obscure_skb:  	return 1;  } -static int tcf_csum_ipv6_udp(struct sk_buff *skb, struct ipv6hdr *ip6h, +static int tcf_csum_ipv6_udp(struct sk_buff *skb,  			     unsigned int ihl, unsigned int ipl, int udplite)  {  	struct udphdr *udph; +	const struct ipv6hdr *ip6h;  	u16 ul;  	/* @@ -293,6 +302,7 @@ static int tcf_csum_ipv6_udp(struct sk_buff *skb, struct ipv6hdr *ip6h,  	if (udph == NULL)  		return 0; +	ip6h = ipv6_hdr(skb);  	ul = ntohs(udph->len);  	udph->check = 0; @@ -328,7 +338,7 @@ ignore_obscure_skb:  static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)  { -	struct iphdr *iph; +	const struct iphdr *iph;  	int ntkoff;  	ntkoff = skb_network_offset(skb); @@ -353,19 +363,19 @@ static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)  		break;  	case IPPROTO_TCP:  		if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP) -			if (!tcf_csum_ipv4_tcp(skb, iph, iph->ihl * 4, +			if (!tcf_csum_ipv4_tcp(skb, iph->ihl * 4,  					       ntohs(iph->tot_len)))  				goto fail;  		break;  	case IPPROTO_UDP:  		if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP) -			if (!tcf_csum_ipv4_udp(skb, iph, iph->ihl * 4, +			if (!tcf_csum_ipv4_udp(skb, iph->ihl * 4,  					       ntohs(iph->tot_len), 0))  				goto fail;  		break;  	case IPPROTO_UDPLITE:  		if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE) -			if (!tcf_csum_ipv4_udp(skb, iph, iph->ihl * 4, +			if (!tcf_csum_ipv4_udp(skb, iph->ihl * 4,  					       ntohs(iph->tot_len), 1))  				goto fail;  		break; @@ -377,7 +387,7 @@ static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)  		    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))  			goto fail; -		ip_send_check(iph); +		ip_send_check(ip_hdr(skb));  	}  	return 1; @@ -456,6 +466,7 @@ static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags)  			ixhl = ipv6_optlen(ip6xh);  			if (!pskb_may_pull(skb, hl + ixhl + ntkoff))  				goto fail; +			ip6xh = (void *)(skb_network_header(skb) + hl);  			if ((nexthdr == NEXTHDR_HOP) &&  			    !(tcf_csum_ipv6_hopopts(ip6xh, ixhl, &pl)))  				goto fail; @@ -464,25 +475,25 @@ static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags)  			break;  		case IPPROTO_ICMPV6:  			if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP) -				if (!tcf_csum_ipv6_icmp(skb, ip6h, +				if (!tcf_csum_ipv6_icmp(skb,  							hl, pl + sizeof(*ip6h)))  					goto fail;  			goto done;  		case IPPROTO_TCP:  			if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP) -				if (!tcf_csum_ipv6_tcp(skb, ip6h, +				if (!tcf_csum_ipv6_tcp(skb,  						       hl, pl + sizeof(*ip6h)))  					goto fail;  			goto done;  		case IPPROTO_UDP:  			if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP) -				if (!tcf_csum_ipv6_udp(skb, ip6h, hl, +				if (!tcf_csum_ipv6_udp(skb, hl,  						       pl + sizeof(*ip6h), 0))  					goto fail;  			goto done;  		case IPPROTO_UDPLITE:  			if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE) -				if (!tcf_csum_ipv6_udp(skb, ip6h, hl, +				if (!tcf_csum_ipv6_udp(skb, hl,  						       pl + sizeof(*ip6h), 1))  					goto fail;  			goto done; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index e0f6de64afe..60d88b6b956 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -8,7 +8,7 @@   *		as published by the Free Software Foundation; either version   *		2 of the License, or (at your option) any later version.   * - * Copyright:	Jamal Hadi Salim (2002-4) + * Copyright:	Jamal Hadi Salim (2002-13)   */  #include <linux/types.h> @@ -303,17 +303,44 @@ static struct tc_action_ops act_ipt_ops = {  	.walk		=	tcf_generic_walker  }; -MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); +static struct tc_action_ops act_xt_ops = { +	.kind		=	"xt", +	.hinfo		=	&ipt_hash_info, +	.type		=	TCA_ACT_IPT, +	.capab		=	TCA_CAP_NONE, +	.owner		=	THIS_MODULE, +	.act		=	tcf_ipt, +	.dump		=	tcf_ipt_dump, +	.cleanup	=	tcf_ipt_cleanup, +	.lookup		=	tcf_hash_search, +	.init		=	tcf_ipt_init, +	.walk		=	tcf_generic_walker +}; + +MODULE_AUTHOR("Jamal Hadi Salim(2002-13)");  MODULE_DESCRIPTION("Iptables target actions");  MODULE_LICENSE("GPL"); +MODULE_ALIAS("act_xt");  static int __init ipt_init_module(void)  { -	return tcf_register_action(&act_ipt_ops); +	int ret1, ret2; +	ret1 = tcf_register_action(&act_xt_ops); +	if (ret1 < 0) +		printk("Failed to load xt action\n"); +	ret2 = tcf_register_action(&act_ipt_ops); +	if (ret2 < 0) +		printk("Failed to load ipt action\n"); + +	if (ret1 < 0 && ret2 < 0) +		return ret1; +	else +		return 0;  }  static void __exit ipt_cleanup_module(void)  { +	tcf_unregister_action(&act_xt_ops);  	tcf_unregister_action(&act_ipt_ops);  } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 964f5e4f4b8..8e118af9097 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -22,7 +22,6 @@  #include <linux/skbuff.h>  #include <linux/init.h>  #include <linux/kmod.h> -#include <linux/netlink.h>  #include <linux/err.h>  #include <linux/slab.h>  #include <net/net_namespace.h> @@ -118,7 +117,7 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp)  /* Add/change/delete/get a filter node */ -static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg) +static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n)  {  	struct net *net = sock_net(skb->sk);  	struct nlattr *tca[TCA_MAX + 1]; @@ -141,7 +140,12 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)  	if ((n->nlmsg_type != RTM_GETTFILTER) && !capable(CAP_NET_ADMIN))  		return -EPERM; +  replay: +	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL); +	if (err < 0) +		return err; +  	t = nlmsg_data(n);  	protocol = TC_H_MIN(t->tcm_info);  	prio = TC_H_MAJ(t->tcm_info); @@ -164,10 +168,6 @@ replay:  	if (dev == NULL)  		return -ENODEV; -	err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL); -	if (err < 0) -		return err; -  	/* Find qdisc */  	if (!parent) {  		q = dev->qdisc; @@ -427,7 +427,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)  	const struct Qdisc_class_ops *cops;  	struct tcf_dump_args arg; -	if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) +	if (nlmsg_len(cb->nlh) < sizeof(*tcm))  		return skb->len;  	dev = __dev_get_by_index(net, tcm->tcm_ifindex);  	if (!dev) diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index aa36a8c8b33..7881e2fccbc 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -393,7 +393,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,  			return -EOPNOTSUPP;  		if ((keymask & (FLOW_KEY_SKUID|FLOW_KEY_SKGID)) && -		    sk_user_ns(NETLINK_CB(in_skb).ssk) != &init_user_ns) +		    sk_user_ns(NETLINK_CB(in_skb).sk) != &init_user_ns)  			return -EOPNOTSUPP;  	} diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c index 3130320997e..938b7cbf562 100644 --- a/net/sched/em_ipset.c +++ b/net/sched/em_ipset.c @@ -83,7 +83,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,  	opt.dim = set->dim;  	opt.flags = set->flags;  	opt.cmdflags = 0; -	opt.timeout = ~0u; +	opt.ext.timeout = ~0u;  	network_offset = skb_network_offset(skb);  	skb_pull(skb, network_offset); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index c297e2a8e2a..2b935e7cfe7 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -971,13 +971,13 @@ check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)   * Delete/get qdisc.   */ -static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) +static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n)  {  	struct net *net = sock_net(skb->sk);  	struct tcmsg *tcm = nlmsg_data(n);  	struct nlattr *tca[TCA_MAX + 1];  	struct net_device *dev; -	u32 clid = tcm->tcm_parent; +	u32 clid;  	struct Qdisc *q = NULL;  	struct Qdisc *p = NULL;  	int err; @@ -985,14 +985,15 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)  	if ((n->nlmsg_type != RTM_GETQDISC) && !capable(CAP_NET_ADMIN))  		return -EPERM; -	dev = __dev_get_by_index(net, tcm->tcm_ifindex); -	if (!dev) -		return -ENODEV; -  	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);  	if (err < 0)  		return err; +	dev = __dev_get_by_index(net, tcm->tcm_ifindex); +	if (!dev) +		return -ENODEV; + +	clid = tcm->tcm_parent;  	if (clid) {  		if (clid != TC_H_ROOT) {  			if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) { @@ -1038,7 +1039,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)   * Create/change qdisc.   */ -static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) +static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n)  {  	struct net *net = sock_net(skb->sk);  	struct tcmsg *tcm; @@ -1053,6 +1054,10 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)  replay:  	/* Reinit, just in case something touches this. */ +	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); +	if (err < 0) +		return err; +  	tcm = nlmsg_data(n);  	clid = tcm->tcm_parent;  	q = p = NULL; @@ -1061,9 +1066,6 @@ replay:  	if (!dev)  		return -ENODEV; -	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); -	if (err < 0) -		return err;  	if (clid) {  		if (clid != TC_H_ROOT) { @@ -1372,7 +1374,7 @@ done: -static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) +static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n)  {  	struct net *net = sock_net(skb->sk);  	struct tcmsg *tcm = nlmsg_data(n); @@ -1382,22 +1384,22 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)  	const struct Qdisc_class_ops *cops;  	unsigned long cl = 0;  	unsigned long new_cl; -	u32 portid = tcm->tcm_parent; -	u32 clid = tcm->tcm_handle; -	u32 qid = TC_H_MAJ(clid); +	u32 portid; +	u32 clid; +	u32 qid;  	int err;  	if ((n->nlmsg_type != RTM_GETTCLASS) && !capable(CAP_NET_ADMIN))  		return -EPERM; -	dev = __dev_get_by_index(net, tcm->tcm_ifindex); -	if (!dev) -		return -ENODEV; -  	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);  	if (err < 0)  		return err; +	dev = __dev_get_by_index(net, tcm->tcm_ifindex); +	if (!dev) +		return -ENODEV; +  	/*  	   parent == TC_H_UNSPEC - unspecified parent.  	   parent == TC_H_ROOT   - class is root, which has no parent. @@ -1413,6 +1415,10 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)  	/* Step 1. Determine qdisc handle X:0 */ +	portid = tcm->tcm_parent; +	clid = tcm->tcm_handle; +	qid = TC_H_MAJ(clid); +  	if (portid != TC_H_ROOT) {  		u32 qid1 = TC_H_MAJ(portid); @@ -1636,7 +1642,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)  	struct net_device *dev;  	int t, s_t; -	if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) +	if (nlmsg_len(cb->nlh) < sizeof(*tcm))  		return 0;  	dev = dev_get_by_index(net, tcm->tcm_ifindex);  	if (!dev) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 571f1d211f4..79b1876b6cd 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -981,6 +981,7 @@ static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {  	[TCA_HTB_INIT]	= { .len = sizeof(struct tc_htb_glob) },  	[TCA_HTB_CTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },  	[TCA_HTB_RTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, +	[TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },  };  static void htb_work_func(struct work_struct *work) @@ -994,7 +995,7 @@ static void htb_work_func(struct work_struct *work)  static int htb_init(struct Qdisc *sch, struct nlattr *opt)  {  	struct htb_sched *q = qdisc_priv(sch); -	struct nlattr *tb[TCA_HTB_INIT + 1]; +	struct nlattr *tb[TCA_HTB_MAX + 1];  	struct tc_htb_glob *gopt;  	int err;  	int i; @@ -1002,20 +1003,16 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)  	if (!opt)  		return -EINVAL; -	err = nla_parse_nested(tb, TCA_HTB_INIT, opt, htb_policy); +	err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy);  	if (err < 0)  		return err; -	if (tb[TCA_HTB_INIT] == NULL) { -		pr_err("HTB: hey probably you have bad tc tool ?\n"); +	if (!tb[TCA_HTB_INIT])  		return -EINVAL; -	} +  	gopt = nla_data(tb[TCA_HTB_INIT]); -	if (gopt->version != HTB_VER >> 16) { -		pr_err("HTB: need tc/htb version %d (minor is %d), you have %d\n", -		       HTB_VER >> 16, HTB_VER & 0xffff, gopt->version); +	if (gopt->version != HTB_VER >> 16)  		return -EINVAL; -	}  	err = qdisc_class_hash_init(&q->clhash);  	if (err < 0) @@ -1027,10 +1024,13 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)  	INIT_WORK(&q->work, htb_work_func);  	skb_queue_head_init(&q->direct_queue); -	q->direct_qlen = qdisc_dev(sch)->tx_queue_len; -	if (q->direct_qlen < 2)	/* some devices have zero tx_queue_len */ -		q->direct_qlen = 2; - +	if (tb[TCA_HTB_DIRECT_QLEN]) +		q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]); +	else { +		q->direct_qlen = qdisc_dev(sch)->tx_queue_len; +		if (q->direct_qlen < 2)	/* some devices have zero tx_queue_len */ +			q->direct_qlen = 2; +	}  	if ((q->rate2quantum = gopt->rate2quantum) < 1)  		q->rate2quantum = 1;  	q->defcls = gopt->defcls; @@ -1056,7 +1056,8 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)  	nest = nla_nest_start(skb, TCA_OPTIONS);  	if (nest == NULL)  		goto nla_put_failure; -	if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt)) +	if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) || +	    nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))  		goto nla_put_failure;  	nla_nest_end(skb, nest); @@ -1311,7 +1312,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,  	struct htb_sched *q = qdisc_priv(sch);  	struct htb_class *cl = (struct htb_class *)*arg, *parent;  	struct nlattr *opt = tca[TCA_OPTIONS]; -	struct nlattr *tb[__TCA_HTB_MAX]; +	struct nlattr *tb[TCA_HTB_MAX + 1];  	struct tc_htb_opt *hopt;  	/* extract all subattrs from opt attr */ diff --git a/net/sctp/associola.c b/net/sctp/associola.c index fa261a3594c..91cfd8f94a1 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -97,8 +97,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a  	/* Initialize the object handling fields.  */  	atomic_set(&asoc->base.refcnt, 1); -	asoc->base.dead = 0; -	asoc->base.malloced = 0; +	asoc->base.dead = false;  	/* Initialize the bind addr area.  */  	sctp_bind_addr_init(&asoc->base.bind_addr, ep->base.bind_addr.port); @@ -364,7 +363,6 @@ struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep,  	if (!sctp_association_init(asoc, ep, sk, scope, gfp))  		goto fail_init; -	asoc->base.malloced = 1;  	SCTP_DBG_OBJCNT_INC(assoc);  	SCTP_DEBUG_PRINTK("Created asoc %p\n", asoc); @@ -402,7 +400,7 @@ void sctp_association_free(struct sctp_association *asoc)  	/* Mark as dead, so other users can know this structure is  	 * going away.  	 */ -	asoc->base.dead = 1; +	asoc->base.dead = true;  	/* Dispose of any data lying around in the outqueue. */  	sctp_outq_free(&asoc->outqueue); @@ -477,10 +475,8 @@ static void sctp_association_destroy(struct sctp_association *asoc)  	WARN_ON(atomic_read(&asoc->rmem_alloc)); -	if (asoc->base.malloced) { -		kfree(asoc); -		SCTP_DBG_OBJCNT_DEC(assoc); -	} +	kfree(asoc); +	SCTP_DBG_OBJCNT_DEC(assoc);  }  /* Change the primary destination address for the peer. */ diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index d886b3bf84f..41145fe3181 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -131,8 +131,6 @@ int sctp_bind_addr_dup(struct sctp_bind_addr *dest,   */  void sctp_bind_addr_init(struct sctp_bind_addr *bp, __u16 port)  { -	bp->malloced = 0; -  	INIT_LIST_HEAD(&bp->address_list);  	bp->port = port;  } @@ -155,11 +153,6 @@ void sctp_bind_addr_free(struct sctp_bind_addr *bp)  {  	/* Empty the bind address list. */  	sctp_bind_addr_clean(bp); - -	if (bp->malloced) { -		kfree(bp); -		SCTP_DBG_OBJCNT_DEC(bind_addr); -	}  }  /* Add an address to the bind address list in the SCTP_bind_addr structure. */ diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 12ed45dbe75..5fbd7bc6bb1 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -121,8 +121,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,  	/* Initialize the basic object fields. */  	atomic_set(&ep->base.refcnt, 1); -	ep->base.dead = 0; -	ep->base.malloced = 1; +	ep->base.dead = false;  	/* Create an input queue.  */  	sctp_inq_init(&ep->base.inqueue); @@ -198,7 +197,7 @@ struct sctp_endpoint *sctp_endpoint_new(struct sock *sk, gfp_t gfp)  		goto fail;  	if (!sctp_endpoint_init(ep, sk, gfp))  		goto fail_init; -	ep->base.malloced = 1; +  	SCTP_DBG_OBJCNT_INC(ep);  	return ep; @@ -234,7 +233,7 @@ void sctp_endpoint_add_asoc(struct sctp_endpoint *ep,   */  void sctp_endpoint_free(struct sctp_endpoint *ep)  { -	ep->base.dead = 1; +	ep->base.dead = true;  	ep->base.sk->sk_state = SCTP_SS_CLOSED; @@ -279,11 +278,8 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep)  	if (ep->base.sk)  		sock_put(ep->base.sk); -	/* Finally, free up our memory. */ -	if (ep->base.malloced) { -		kfree(ep); -		SCTP_DBG_OBJCNT_DEC(ep); -	} +	kfree(ep); +	SCTP_DBG_OBJCNT_DEC(ep);  }  /* Hold a reference to an endpoint. */ diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 2d5ad280de3..3221d073448 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -58,8 +58,6 @@ void sctp_inq_init(struct sctp_inq *queue)  	/* Create a task for delivering data.  */  	INIT_WORK(&queue->immediate, NULL); - -	queue->malloced = 0;  }  /* Release the memory associated with an SCTP inqueue.  */ @@ -80,11 +78,6 @@ void sctp_inq_free(struct sctp_inq *queue)  		sctp_chunk_free(queue->in_progress);  		queue->in_progress = NULL;  	} - -	if (queue->malloced) { -		/* Dump the master memory segment.  */ -		kfree(queue); -	}  }  /* Put a new packet in an SCTP inqueue. diff --git a/net/sctp/output.c b/net/sctp/output.c index f5200a2ad85..bbef4a7a9b5 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -136,7 +136,7 @@ struct sctp_packet *sctp_packet_init(struct sctp_packet *packet,  	packet->overhead = overhead;  	sctp_packet_reset(packet);  	packet->vtag = 0; -	packet->malloced = 0; +  	return packet;  } @@ -151,9 +151,6 @@ void sctp_packet_free(struct sctp_packet *packet)  		list_del_init(&chunk->list);  		sctp_chunk_free(chunk);  	} - -	if (packet->malloced) -		kfree(packet);  }  /* This routine tries to append the chunk to the offered packet. If adding diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 01dca753db1..32a4625fef7 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -217,8 +217,6 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q)  	q->outstanding_bytes = 0;  	q->empty = 1;  	q->cork  = 0; - -	q->malloced = 0;  	q->out_qlen = 0;  } @@ -295,10 +293,6 @@ void sctp_outq_free(struct sctp_outq *q)  {  	/* Throw away leftover chunks. */  	__sctp_outq_teardown(q); - -	/* If we were kmalloc()'d, free the memory.  */ -	if (q->malloced) -		kfree(q);  }  /* Put a new chunk in an sctp_outq.  */ @@ -707,11 +701,10 @@ redo:  /* Cork the outqueue so queued chunks are really queued. */  int sctp_outq_uncork(struct sctp_outq *q)  { -	int error = 0;  	if (q->cork)  		q->cork = 0; -	error = sctp_outq_flush(q, 0); -	return error; + +	return sctp_outq_flush(q, 0);  } diff --git a/net/sctp/probe.c b/net/sctp/probe.c index ad0dba87034..e62c22535be 100644 --- a/net/sctp/probe.c +++ b/net/sctp/probe.c @@ -63,7 +63,7 @@ static struct {  	struct timespec	  tstart;  } sctpw; -static void printl(const char *fmt, ...) +static __printf(1, 2) void printl(const char *fmt, ...)  {  	va_list args;  	int len; diff --git a/net/sctp/proc.c b/net/sctp/proc.c index ab3bba8cb0a..4e45ee35d0d 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -295,7 +295,8 @@ static void * sctp_assocs_seq_start(struct seq_file *seq, loff_t *pos)  		seq_printf(seq, " ASSOC     SOCK   STY SST ST HBKT "  				"ASSOC-ID TX_QUEUE RX_QUEUE UID INODE LPORT "  				"RPORT LADDRS <-> RADDRS " -				"HBINT INS OUTS MAXRT T1X T2X RTXC\n"); +				"HBINT INS OUTS MAXRT T1X T2X RTXC " +				"wmema wmemq sndbuf rcvbuf\n");  	return (void *)pos;  } @@ -349,11 +350,16 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)  		sctp_seq_dump_local_addrs(seq, epb);  		seq_printf(seq, "<-> ");  		sctp_seq_dump_remote_addrs(seq, assoc); -		seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d ", +		seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d " +			   "%8d %8d %8d %8d",  			assoc->hbinterval, assoc->c.sinit_max_instreams,  			assoc->c.sinit_num_ostreams, assoc->max_retrans,  			assoc->init_retries, assoc->shutdown_retries, -			assoc->rtx_data_chunks); +			assoc->rtx_data_chunks, +			atomic_read(&sk->sk_wmem_alloc), +			sk->sk_wmem_queued, +			sk->sk_sndbuf, +			sk->sk_rcvbuf);  		seq_printf(seq, "\n");  	}  	read_unlock(&head->lock); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b9070736b8d..f631c5ff4db 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1119,9 +1119,10 @@ static int __sctp_connect(struct sock* sk,  		/* Make sure the destination port is correctly set  		 * in all addresses.  		 */ -		if (asoc && asoc->peer.port && asoc->peer.port != port) +		if (asoc && asoc->peer.port && asoc->peer.port != port) { +			err = -EINVAL;  			goto out_free; - +		}  		/* Check if there already is a matching association on the  		 * endpoint (other than the one created here). @@ -6185,7 +6186,8 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)  	/* Is there any exceptional events?  */  	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) -		mask |= POLLERR; +		mask |= POLLERR | +			sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0;  	if (sk->sk_shutdown & RCV_SHUTDOWN)  		mask |= POLLRDHUP | POLLIN | POLLRDNORM;  	if (sk->sk_shutdown == SHUTDOWN_MASK) diff --git a/net/sctp/ssnmap.c b/net/sctp/ssnmap.c index 825ea94415b..da860352380 100644 --- a/net/sctp/ssnmap.c +++ b/net/sctp/ssnmap.c @@ -74,7 +74,6 @@ struct sctp_ssnmap *sctp_ssnmap_new(__u16 in, __u16 out,  	if (!sctp_ssnmap_init(retval, in, out))  		goto fail_map; -	retval->malloced = 1;  	SCTP_DBG_OBJCNT_INC(ssnmap);  	return retval; @@ -118,14 +117,16 @@ void sctp_ssnmap_clear(struct sctp_ssnmap *map)  /* Dispose of a ssnmap.  */  void sctp_ssnmap_free(struct sctp_ssnmap *map)  { -	if (map && map->malloced) { -		int size; +	int size; + +	if (unlikely(!map)) +		return; + +	size = sctp_ssnmap_size(map->in.len, map->out.len); +	if (size <= KMALLOC_MAX_SIZE) +		kfree(map); +	else +		free_pages((unsigned long)map, get_order(size)); -		size = sctp_ssnmap_size(map->in.len, map->out.len); -		if (size <= KMALLOC_MAX_SIZE) -			kfree(map); -		else -			free_pages((unsigned long)map, get_order(size)); -		SCTP_DBG_OBJCNT_DEC(ssnmap); -	} +	SCTP_DBG_OBJCNT_DEC(ssnmap);  } diff --git a/net/sctp/transport.c b/net/sctp/transport.c index fafd2a461ba..098f1d5f769 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -123,7 +123,6 @@ struct sctp_transport *sctp_transport_new(struct net *net,  	if (!sctp_transport_init(net, transport, addr, gfp))  		goto fail_init; -	transport->malloced = 1;  	SCTP_DBG_OBJCNT_INC(transport);  	return transport; diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 0fd5b3d2df0..04e3d470f87 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -68,7 +68,6 @@ struct sctp_ulpq *sctp_ulpq_init(struct sctp_ulpq *ulpq,  	skb_queue_head_init(&ulpq->reasm);  	skb_queue_head_init(&ulpq->lobby);  	ulpq->pd_mode  = 0; -	ulpq->malloced = 0;  	return ulpq;  } @@ -96,8 +95,6 @@ void sctp_ulpq_flush(struct sctp_ulpq *ulpq)  void sctp_ulpq_free(struct sctp_ulpq *ulpq)  {  	sctp_ulpq_flush(ulpq); -	if (ulpq->malloced) -		kfree(ulpq);  }  /* Process an incoming DATA chunk.  */ diff --git a/net/socket.c b/net/socket.c index 88f759adf3a..280283f03cc 100644 --- a/net/socket.c +++ b/net/socket.c @@ -600,7 +600,7 @@ void sock_release(struct socket *sock)  }  EXPORT_SYMBOL(sock_release); -int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) +void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)  {  	*tx_flags = 0;  	if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) @@ -609,7 +609,6 @@ int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)  		*tx_flags |= SKBTX_SW_TSTAMP;  	if (sock_flag(sk, SOCK_WIFI_STATUS))  		*tx_flags |= SKBTX_WIFI_STATUS; -	return 0;  }  EXPORT_SYMBOL(sock_tx_timestamp); @@ -682,16 +681,6 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,  }  EXPORT_SYMBOL(kernel_sendmsg); -static int ktime2ts(ktime_t kt, struct timespec *ts) -{ -	if (kt.tv64) { -		*ts = ktime_to_timespec(kt); -		return 1; -	} else { -		return 0; -	} -} -  /*   * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)   */ @@ -724,17 +713,15 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,  	memset(ts, 0, sizeof(ts)); -	if (skb->tstamp.tv64 && -	    sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) { -		skb_get_timestampns(skb, ts + 0); +	if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE) && +	    ktime_to_timespec_cond(skb->tstamp, ts + 0))  		empty = 0; -	}  	if (shhwtstamps) {  		if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) && -		    ktime2ts(shhwtstamps->syststamp, ts + 1)) +		    ktime_to_timespec_cond(shhwtstamps->syststamp, ts + 1))  			empty = 0;  		if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) && -		    ktime2ts(shhwtstamps->hwtstamp, ts + 2)) +		    ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts + 2))  			empty = 0;  	}  	if (!empty) diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig index 4f99600a5fe..c890848f9d5 100644 --- a/net/tipc/Kconfig +++ b/net/tipc/Kconfig @@ -31,3 +31,10 @@ config TIPC_PORTS  	  Setting this to a smaller value saves some memory,  	  setting it to higher allows for more ports. + +config TIPC_MEDIA_IB +	bool "InfiniBand media type support" +	depends on TIPC && INFINIBAND_IPOIB +	help +	  Saying Y here will enable support for running TIPC on +	  IP-over-InfiniBand devices. diff --git a/net/tipc/Makefile b/net/tipc/Makefile index 6cd55d671d3..4df8e02d900 100644 --- a/net/tipc/Makefile +++ b/net/tipc/Makefile @@ -9,3 +9,5 @@ tipc-y	+= addr.o bcast.o bearer.o config.o \  	   name_distr.o  subscr.o name_table.o net.o  \  	   netlink.o node.o node_subscr.o port.o ref.o  \  	   socket.o log.o eth_media.o + +tipc-$(CONFIG_TIPC_MEDIA_IB)	+= ib_media.o diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 2655c9f4eca..25e159c2feb 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -620,10 +620,10 @@ static int tipc_bcbearer_send(struct sk_buff *buf,  			continue;	/* bearer pair doesn't add anything */  		if (!tipc_bearer_blocked(p)) -			tipc_bearer_send(p, buf, &p->media->bcast_addr); +			tipc_bearer_send(p, buf, &p->bcast_addr);  		else if (s && !tipc_bearer_blocked(s))  			/* unable to send on primary bearer */ -			tipc_bearer_send(s, buf, &s->media->bcast_addr); +			tipc_bearer_send(s, buf, &s->bcast_addr);  		else  			/* unable to send on either bearer */  			continue; diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index aa62f93a912..cb29ef7ba2f 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -39,7 +39,7 @@  #include "bearer.h"  #include "discover.h" -#define MAX_ADDR_STR 32 +#define MAX_ADDR_STR 60  static struct tipc_media *media_list[MAX_MEDIA];  static u32 media_count; @@ -89,9 +89,6 @@ int tipc_register_media(struct tipc_media *m_ptr)  	if ((strlen(m_ptr->name) + 1) > TIPC_MAX_MEDIA_NAME)  		goto exit; -	if ((m_ptr->bcast_addr.media_id != m_ptr->type_id) || -	    !m_ptr->bcast_addr.broadcast) -		goto exit;  	if (m_ptr->priority > TIPC_MAX_LINK_PRI)  		goto exit;  	if ((m_ptr->tolerance < TIPC_MIN_LINK_TOL) || @@ -407,7 +404,7 @@ restart:  	INIT_LIST_HEAD(&b_ptr->links);  	spin_lock_init(&b_ptr->lock); -	res = tipc_disc_create(b_ptr, &m_ptr->bcast_addr, disc_domain); +	res = tipc_disc_create(b_ptr, &b_ptr->bcast_addr, disc_domain);  	if (res) {  		bearer_disable(b_ptr);  		pr_warn("Bearer <%s> rejected, discovery object creation failed\n", diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 39f1192d04b..09c869adcfc 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -56,6 +56,7 @@   * Identifiers of supported TIPC media types   */  #define TIPC_MEDIA_TYPE_ETH	1 +#define TIPC_MEDIA_TYPE_IB	2  /**   * struct tipc_media_addr - destination address used by TIPC bearers @@ -77,7 +78,6 @@ struct tipc_bearer;   * @enable_bearer: routine which enables a bearer   * @disable_bearer: routine which disables a bearer   * @addr2str: routine which converts media address to string - * @str2addr: routine which converts media address from string   * @addr2msg: routine which converts media address to protocol message area   * @msg2addr: routine which converts media address from protocol message area   * @bcast_addr: media address used in broadcasting @@ -94,10 +94,9 @@ struct tipc_media {  	int (*enable_bearer)(struct tipc_bearer *b_ptr);  	void (*disable_bearer)(struct tipc_bearer *b_ptr);  	int (*addr2str)(struct tipc_media_addr *a, char *str_buf, int str_size); -	int (*str2addr)(struct tipc_media_addr *a, char *str_buf);  	int (*addr2msg)(struct tipc_media_addr *a, char *msg_area); -	int (*msg2addr)(struct tipc_media_addr *a, char *msg_area); -	struct tipc_media_addr bcast_addr; +	int (*msg2addr)(const struct tipc_bearer *b_ptr, +			struct tipc_media_addr *a, char *msg_area);  	u32 priority;  	u32 tolerance;  	u32 window; @@ -136,6 +135,7 @@ struct tipc_bearer {  	char name[TIPC_MAX_BEARER_NAME];  	spinlock_t lock;  	struct tipc_media *media; +	struct tipc_media_addr bcast_addr;  	u32 priority;  	u32 window;  	u32 tolerance; @@ -175,6 +175,14 @@ int tipc_disable_bearer(const char *name);  int  tipc_eth_media_start(void);  void tipc_eth_media_stop(void); +#ifdef CONFIG_TIPC_MEDIA_IB +int  tipc_ib_media_start(void); +void tipc_ib_media_stop(void); +#else +static inline int tipc_ib_media_start(void) { return 0; } +static inline void tipc_ib_media_stop(void) { return; } +#endif +  int tipc_media_set_priority(const char *name, u32 new_value);  int tipc_media_set_window(const char *name, u32 new_value);  void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a); diff --git a/net/tipc/core.c b/net/tipc/core.c index fc05cecd748..7ec2c1eb94f 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -82,6 +82,7 @@ static void tipc_core_stop_net(void)  {  	tipc_net_stop();  	tipc_eth_media_stop(); +	tipc_ib_media_stop();  }  /** @@ -93,8 +94,15 @@ int tipc_core_start_net(unsigned long addr)  	tipc_net_start(addr);  	res = tipc_eth_media_start(); -	if (res) -		tipc_core_stop_net(); +	if (res < 0) +		goto err; +	res = tipc_ib_media_start(); +	if (res < 0) +		goto err; +	return res; + +err: +	tipc_core_stop_net();  	return res;  } diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 1074b9587e8..eedff58d038 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -129,7 +129,7 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct tipc_bearer *b_ptr)  	int link_fully_up;  	media_addr.broadcast = 1; -	b_ptr->media->msg2addr(&media_addr, msg_media_addr(msg)); +	b_ptr->media->msg2addr(b_ptr, &media_addr, msg_media_addr(msg));  	kfree_skb(buf);  	/* Ensure message from node is valid and communication is permitted */ diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 2132c1ef295..120a676a336 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -77,12 +77,13 @@ static struct notifier_block notifier = {   * Media-dependent "value" field stores MAC address in first 6 bytes   * and zeroes out the remaining bytes.   */ -static void eth_media_addr_set(struct tipc_media_addr *a, char *mac) +static void eth_media_addr_set(const struct tipc_bearer *tb_ptr, +			       struct tipc_media_addr *a, char *mac)  {  	memcpy(a->value, mac, ETH_ALEN);  	memset(a->value + ETH_ALEN, 0, sizeof(a->value) - ETH_ALEN);  	a->media_id = TIPC_MEDIA_TYPE_ETH; -	a->broadcast = !memcmp(mac, eth_media_info.bcast_addr.value, ETH_ALEN); +	a->broadcast = !memcmp(mac, tb_ptr->bcast_addr.value, ETH_ALEN);  }  /** @@ -110,6 +111,7 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr,  	skb_reset_network_header(clone);  	clone->dev = dev; +	clone->protocol = htons(ETH_P_TIPC);  	dev_hard_header(clone, dev, ETH_P_TIPC, dest->value,  			dev->dev_addr, clone->len);  	dev_queue_xmit(clone); @@ -201,9 +203,13 @@ static int enable_bearer(struct tipc_bearer *tb_ptr)  	/* Associate TIPC bearer with Ethernet bearer */  	eb_ptr->bearer = tb_ptr;  	tb_ptr->usr_handle = (void *)eb_ptr; +	memset(tb_ptr->bcast_addr.value, 0, sizeof(tb_ptr->bcast_addr.value)); +	memcpy(tb_ptr->bcast_addr.value, dev->broadcast, ETH_ALEN); +	tb_ptr->bcast_addr.media_id = TIPC_MEDIA_TYPE_ETH; +	tb_ptr->bcast_addr.broadcast = 1;  	tb_ptr->mtu = dev->mtu;  	tb_ptr->blocked = 0; -	eth_media_addr_set(&tb_ptr->addr, (char *)dev->dev_addr); +	eth_media_addr_set(tb_ptr, &tb_ptr->addr, (char *)dev->dev_addr);  	return 0;  } @@ -302,25 +308,6 @@ static int eth_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size)  }  /** - * eth_str2addr - convert string to Ethernet address - */ -static int eth_str2addr(struct tipc_media_addr *a, char *str_buf) -{ -	char mac[ETH_ALEN]; -	int r; - -	r = sscanf(str_buf, "%02x:%02x:%02x:%02x:%02x:%02x", -		       (u32 *)&mac[0], (u32 *)&mac[1], (u32 *)&mac[2], -		       (u32 *)&mac[3], (u32 *)&mac[4], (u32 *)&mac[5]); - -	if (r != ETH_ALEN) -		return 1; - -	eth_media_addr_set(a, mac); -	return 0; -} - -/**   * eth_str2addr - convert Ethernet address format to message header format   */  static int eth_addr2msg(struct tipc_media_addr *a, char *msg_area) @@ -334,12 +321,13 @@ static int eth_addr2msg(struct tipc_media_addr *a, char *msg_area)  /**   * eth_str2addr - convert message header address format to Ethernet format   */ -static int eth_msg2addr(struct tipc_media_addr *a, char *msg_area) +static int eth_msg2addr(const struct tipc_bearer *tb_ptr, +			struct tipc_media_addr *a, char *msg_area)  {  	if (msg_area[TIPC_MEDIA_TYPE_OFFSET] != TIPC_MEDIA_TYPE_ETH)  		return 1; -	eth_media_addr_set(a, msg_area + ETH_ADDR_OFFSET); +	eth_media_addr_set(tb_ptr, a, msg_area + ETH_ADDR_OFFSET);  	return 0;  } @@ -351,11 +339,8 @@ static struct tipc_media eth_media_info = {  	.enable_bearer	= enable_bearer,  	.disable_bearer	= disable_bearer,  	.addr2str	= eth_addr2str, -	.str2addr	= eth_str2addr,  	.addr2msg	= eth_addr2msg,  	.msg2addr	= eth_msg2addr, -	.bcast_addr	= { { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, -			    TIPC_MEDIA_TYPE_ETH, 1 },  	.priority	= TIPC_DEF_LINK_PRI,  	.tolerance	= TIPC_DEF_LINK_TOL,  	.window		= TIPC_DEF_LINK_WIN, diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c new file mode 100644 index 00000000000..2a2864c25e1 --- /dev/null +++ b/net/tipc/ib_media.c @@ -0,0 +1,387 @@ +/* + * net/tipc/ib_media.c: Infiniband bearer support for TIPC + * + * Copyright (c) 2013 Patrick McHardy <kaber@trash.net> + * + * Based on eth_media.c, which carries the following copyright notice: + * + * Copyright (c) 2001-2007, Ericsson AB + * Copyright (c) 2005-2008, 2011, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + *    contributors may be used to endorse or promote products derived from + *    this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/if_infiniband.h> +#include "core.h" +#include "bearer.h" + +#define MAX_IB_BEARERS		MAX_BEARERS + +/** + * struct ib_bearer - Infiniband bearer data structure + * @bearer: ptr to associated "generic" bearer structure + * @dev: ptr to associated Infiniband network device + * @tipc_packet_type: used in binding TIPC to Infiniband driver + * @cleanup: work item used when disabling bearer + */ + +struct ib_bearer { +	struct tipc_bearer *bearer; +	struct net_device *dev; +	struct packet_type tipc_packet_type; +	struct work_struct setup; +	struct work_struct cleanup; +}; + +static struct tipc_media ib_media_info; +static struct ib_bearer ib_bearers[MAX_IB_BEARERS]; +static int ib_started; + +/** + * ib_media_addr_set - initialize Infiniband media address structure + * + * Media-dependent "value" field stores MAC address in first 6 bytes + * and zeroes out the remaining bytes. + */ +static void ib_media_addr_set(const struct tipc_bearer *tb_ptr, +			      struct tipc_media_addr *a, char *mac) +{ +	BUILD_BUG_ON(sizeof(a->value) < INFINIBAND_ALEN); +	memcpy(a->value, mac, INFINIBAND_ALEN); +	a->media_id = TIPC_MEDIA_TYPE_IB; +	a->broadcast = !memcmp(mac, tb_ptr->bcast_addr.value, INFINIBAND_ALEN); +} + +/** + * send_msg - send a TIPC message out over an InfiniBand interface + */ +static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr, +		    struct tipc_media_addr *dest) +{ +	struct sk_buff *clone; +	struct net_device *dev; +	int delta; + +	clone = skb_clone(buf, GFP_ATOMIC); +	if (!clone) +		return 0; + +	dev = ((struct ib_bearer *)(tb_ptr->usr_handle))->dev; +	delta = dev->hard_header_len - skb_headroom(buf); + +	if ((delta > 0) && +	    pskb_expand_head(clone, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { +		kfree_skb(clone); +		return 0; +	} + +	skb_reset_network_header(clone); +	clone->dev = dev; +	clone->protocol = htons(ETH_P_TIPC); +	dev_hard_header(clone, dev, ETH_P_TIPC, dest->value, +			dev->dev_addr, clone->len); +	dev_queue_xmit(clone); +	return 0; +} + +/** + * recv_msg - handle incoming TIPC message from an InfiniBand interface + * + * Accept only packets explicitly sent to this node, or broadcast packets; + * ignores packets sent using InfiniBand multicast, and traffic sent to other + * nodes (which can happen if interface is running in promiscuous mode). + */ +static int recv_msg(struct sk_buff *buf, struct net_device *dev, +		    struct packet_type *pt, struct net_device *orig_dev) +{ +	struct ib_bearer *ib_ptr = (struct ib_bearer *)pt->af_packet_priv; + +	if (!net_eq(dev_net(dev), &init_net)) { +		kfree_skb(buf); +		return 0; +	} + +	if (likely(ib_ptr->bearer)) { +		if (likely(buf->pkt_type <= PACKET_BROADCAST)) { +			buf->next = NULL; +			tipc_recv_msg(buf, ib_ptr->bearer); +			return 0; +		} +	} +	kfree_skb(buf); +	return 0; +} + +/** + * setup_bearer - setup association between InfiniBand bearer and interface + */ +static void setup_bearer(struct work_struct *work) +{ +	struct ib_bearer *ib_ptr = +		container_of(work, struct ib_bearer, setup); + +	dev_add_pack(&ib_ptr->tipc_packet_type); +} + +/** + * enable_bearer - attach TIPC bearer to an InfiniBand interface + */ +static int enable_bearer(struct tipc_bearer *tb_ptr) +{ +	struct net_device *dev = NULL; +	struct net_device *pdev = NULL; +	struct ib_bearer *ib_ptr = &ib_bearers[0]; +	struct ib_bearer *stop = &ib_bearers[MAX_IB_BEARERS]; +	char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1; +	int pending_dev = 0; + +	/* Find unused InfiniBand bearer structure */ +	while (ib_ptr->dev) { +		if (!ib_ptr->bearer) +			pending_dev++; +		if (++ib_ptr == stop) +			return pending_dev ? -EAGAIN : -EDQUOT; +	} + +	/* Find device with specified name */ +	read_lock(&dev_base_lock); +	for_each_netdev(&init_net, pdev) { +		if (!strncmp(pdev->name, driver_name, IFNAMSIZ)) { +			dev = pdev; +			dev_hold(dev); +			break; +		} +	} +	read_unlock(&dev_base_lock); +	if (!dev) +		return -ENODEV; + +	/* Create InfiniBand bearer for device */ +	ib_ptr->dev = dev; +	ib_ptr->tipc_packet_type.type = htons(ETH_P_TIPC); +	ib_ptr->tipc_packet_type.dev = dev; +	ib_ptr->tipc_packet_type.func = recv_msg; +	ib_ptr->tipc_packet_type.af_packet_priv = ib_ptr; +	INIT_LIST_HEAD(&(ib_ptr->tipc_packet_type.list)); +	INIT_WORK(&ib_ptr->setup, setup_bearer); +	schedule_work(&ib_ptr->setup); + +	/* Associate TIPC bearer with InfiniBand bearer */ +	ib_ptr->bearer = tb_ptr; +	tb_ptr->usr_handle = (void *)ib_ptr; +	memset(tb_ptr->bcast_addr.value, 0, sizeof(tb_ptr->bcast_addr.value)); +	memcpy(tb_ptr->bcast_addr.value, dev->broadcast, INFINIBAND_ALEN); +	tb_ptr->bcast_addr.media_id = TIPC_MEDIA_TYPE_IB; +	tb_ptr->bcast_addr.broadcast = 1; +	tb_ptr->mtu = dev->mtu; +	tb_ptr->blocked = 0; +	ib_media_addr_set(tb_ptr, &tb_ptr->addr, (char *)dev->dev_addr); +	return 0; +} + +/** + * cleanup_bearer - break association between InfiniBand bearer and interface + * + * This routine must be invoked from a work queue because it can sleep. + */ +static void cleanup_bearer(struct work_struct *work) +{ +	struct ib_bearer *ib_ptr = +		container_of(work, struct ib_bearer, cleanup); + +	dev_remove_pack(&ib_ptr->tipc_packet_type); +	dev_put(ib_ptr->dev); +	ib_ptr->dev = NULL; +} + +/** + * disable_bearer - detach TIPC bearer from an InfiniBand interface + * + * Mark InfiniBand bearer as inactive so that incoming buffers are thrown away, + * then get worker thread to complete bearer cleanup.  (Can't do cleanup + * here because cleanup code needs to sleep and caller holds spinlocks.) + */ +static void disable_bearer(struct tipc_bearer *tb_ptr) +{ +	struct ib_bearer *ib_ptr = (struct ib_bearer *)tb_ptr->usr_handle; + +	ib_ptr->bearer = NULL; +	INIT_WORK(&ib_ptr->cleanup, cleanup_bearer); +	schedule_work(&ib_ptr->cleanup); +} + +/** + * recv_notification - handle device updates from OS + * + * Change the state of the InfiniBand bearer (if any) associated with the + * specified device. + */ +static int recv_notification(struct notifier_block *nb, unsigned long evt, +			     void *dv) +{ +	struct net_device *dev = (struct net_device *)dv; +	struct ib_bearer *ib_ptr = &ib_bearers[0]; +	struct ib_bearer *stop = &ib_bearers[MAX_IB_BEARERS]; + +	if (!net_eq(dev_net(dev), &init_net)) +		return NOTIFY_DONE; + +	while ((ib_ptr->dev != dev)) { +		if (++ib_ptr == stop) +			return NOTIFY_DONE;	/* couldn't find device */ +	} +	if (!ib_ptr->bearer) +		return NOTIFY_DONE;		/* bearer had been disabled */ + +	ib_ptr->bearer->mtu = dev->mtu; + +	switch (evt) { +	case NETDEV_CHANGE: +		if (netif_carrier_ok(dev)) +			tipc_continue(ib_ptr->bearer); +		else +			tipc_block_bearer(ib_ptr->bearer->name); +		break; +	case NETDEV_UP: +		tipc_continue(ib_ptr->bearer); +		break; +	case NETDEV_DOWN: +		tipc_block_bearer(ib_ptr->bearer->name); +		break; +	case NETDEV_CHANGEMTU: +	case NETDEV_CHANGEADDR: +		tipc_block_bearer(ib_ptr->bearer->name); +		tipc_continue(ib_ptr->bearer); +		break; +	case NETDEV_UNREGISTER: +	case NETDEV_CHANGENAME: +		tipc_disable_bearer(ib_ptr->bearer->name); +		break; +	} +	return NOTIFY_OK; +} + +static struct notifier_block notifier = { +	.notifier_call	= recv_notification, +	.priority	= 0, +}; + +/** + * ib_addr2str - convert InfiniBand address to string + */ +static int ib_addr2str(struct tipc_media_addr *a, char *str_buf, int str_size) +{ +	if (str_size < 60)	/* 60 = 19 * strlen("xx:") + strlen("xx\0") */ +		return 1; + +	sprintf(str_buf, "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:" +			 "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x", +		a->value[0], a->value[1], a->value[2], a->value[3], +		a->value[4], a->value[5], a->value[6], a->value[7], +		a->value[8], a->value[9], a->value[10], a->value[11], +		a->value[12], a->value[13], a->value[14], a->value[15], +		a->value[16], a->value[17], a->value[18], a->value[19]); + +	return 0; +} + +/** + * ib_addr2msg - convert InfiniBand address format to message header format + */ +static int ib_addr2msg(struct tipc_media_addr *a, char *msg_area) +{ +	memset(msg_area, 0, TIPC_MEDIA_ADDR_SIZE); +	msg_area[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_IB; +	memcpy(msg_area, a->value, INFINIBAND_ALEN); +	return 0; +} + +/** + * ib_msg2addr - convert message header address format to InfiniBand format + */ +static int ib_msg2addr(const struct tipc_bearer *tb_ptr, +		       struct tipc_media_addr *a, char *msg_area) +{ +	ib_media_addr_set(tb_ptr, a, msg_area); +	return 0; +} + +/* + * InfiniBand media registration info + */ +static struct tipc_media ib_media_info = { +	.send_msg	= send_msg, +	.enable_bearer	= enable_bearer, +	.disable_bearer	= disable_bearer, +	.addr2str	= ib_addr2str, +	.addr2msg	= ib_addr2msg, +	.msg2addr	= ib_msg2addr, +	.priority	= TIPC_DEF_LINK_PRI, +	.tolerance	= TIPC_DEF_LINK_TOL, +	.window		= TIPC_DEF_LINK_WIN, +	.type_id	= TIPC_MEDIA_TYPE_IB, +	.name		= "ib" +}; + +/** + * tipc_ib_media_start - activate InfiniBand bearer support + * + * Register InfiniBand media type with TIPC bearer code.  Also register + * with OS for notifications about device state changes. + */ +int tipc_ib_media_start(void) +{ +	int res; + +	if (ib_started) +		return -EINVAL; + +	res = tipc_register_media(&ib_media_info); +	if (res) +		return res; + +	res = register_netdevice_notifier(¬ifier); +	if (!res) +		ib_started = 1; +	return res; +} + +/** + * tipc_ib_media_stop - deactivate InfiniBand bearer support + */ +void tipc_ib_media_stop(void) +{ +	if (!ib_started) +		return; + +	flush_scheduled_work(); +	unregister_netdevice_notifier(¬ifier); +	ib_started = 0; +} diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 6675914dc59..8bcd4985d0f 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -44,7 +44,7 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info)  	struct nlmsghdr *rep_nlh;  	struct nlmsghdr *req_nlh = info->nlhdr;  	struct tipc_genlmsghdr *req_userhdr = info->userhdr; -	int hdr_space = NLMSG_SPACE(GENL_HDRLEN + TIPC_GENL_HDRLEN); +	int hdr_space = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN);  	u16 cmd;  	if ((req_userhdr->cmd & 0xC000) && (!capable(CAP_NET_ADMIN))) @@ -53,8 +53,8 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info)  		cmd = req_userhdr->cmd;  	rep_buf = tipc_cfg_do_cmd(req_userhdr->dest, cmd, -			NLMSG_DATA(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN, -			NLMSG_PAYLOAD(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN), +			nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN, +			nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN),  			hdr_space);  	if (rep_buf) { diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 2db702d82e7..826e09938bf 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1340,7 +1340,6 @@ static void unix_destruct_scm(struct sk_buff *skb)  	struct scm_cookie scm;  	memset(&scm, 0, sizeof(scm));  	scm.pid  = UNIXCB(skb).pid; -	scm.cred = UNIXCB(skb).cred;  	if (UNIXCB(skb).fp)  		unix_detach_fds(&scm, skb); @@ -1391,8 +1390,8 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen  	int err = 0;  	UNIXCB(skb).pid  = get_pid(scm->pid); -	if (scm->cred) -		UNIXCB(skb).cred = get_cred(scm->cred); +	UNIXCB(skb).uid = scm->creds.uid; +	UNIXCB(skb).gid = scm->creds.gid;  	UNIXCB(skb).fp = NULL;  	if (scm->fp && send_fds)  		err = unix_attach_fds(scm, skb); @@ -1409,13 +1408,13 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen  static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,  			    const struct sock *other)  { -	if (UNIXCB(skb).cred) +	if (UNIXCB(skb).pid)  		return;  	if (test_bit(SOCK_PASSCRED, &sock->flags) ||  	    !other->sk_socket ||  	    test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {  		UNIXCB(skb).pid  = get_pid(task_tgid(current)); -		UNIXCB(skb).cred = get_current_cred(); +		current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);  	}  } @@ -1819,7 +1818,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,  		siocb->scm = &tmp_scm;  		memset(&tmp_scm, 0, sizeof(tmp_scm));  	} -	scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); +	scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);  	unix_set_secdata(siocb->scm, skb);  	if (!(flags & MSG_PEEK)) { @@ -1859,10 +1858,10 @@ out:  }  /* - *	Sleep until data has arrive. But check for races.. + *	Sleep until more data has arrived. But check for races..   */ - -static long unix_stream_data_wait(struct sock *sk, long timeo) +static long unix_stream_data_wait(struct sock *sk, long timeo, +				  struct sk_buff *last)  {  	DEFINE_WAIT(wait); @@ -1871,7 +1870,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo)  	for (;;) {  		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); -		if (!skb_queue_empty(&sk->sk_receive_queue) || +		if (skb_peek_tail(&sk->sk_receive_queue) != last ||  		    sk->sk_err ||  		    (sk->sk_shutdown & RCV_SHUTDOWN) ||  		    signal_pending(current) || @@ -1890,8 +1889,6 @@ static long unix_stream_data_wait(struct sock *sk, long timeo)  	return timeo;  } - -  static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,  			       struct msghdr *msg, size_t size,  			       int flags) @@ -1936,14 +1933,12 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,  		goto out;  	} -	skip = sk_peek_offset(sk, flags); -  	do {  		int chunk; -		struct sk_buff *skb; +		struct sk_buff *skb, *last;  		unix_state_lock(sk); -		skb = skb_peek(&sk->sk_receive_queue); +		last = skb = skb_peek(&sk->sk_receive_queue);  again:  		if (skb == NULL) {  			unix_sk(sk)->recursion_level = 0; @@ -1966,7 +1961,7 @@ again:  				break;  			mutex_unlock(&u->readlock); -			timeo = unix_stream_data_wait(sk, timeo); +			timeo = unix_stream_data_wait(sk, timeo, last);  			if (signal_pending(current)  			    ||  mutex_lock_interruptible(&u->readlock)) { @@ -1980,10 +1975,13 @@ again:  			break;  		} -		if (skip >= skb->len) { +		skip = sk_peek_offset(sk, flags); +		while (skip >= skb->len) {  			skip -= skb->len; +			last = skb;  			skb = skb_peek_next(skb, &sk->sk_receive_queue); -			goto again; +			if (!skb) +				goto again;  		}  		unix_state_unlock(sk); @@ -1991,11 +1989,12 @@ again:  		if (check_creds) {  			/* Never glue messages from different writers */  			if ((UNIXCB(skb).pid  != siocb->scm->pid) || -			    (UNIXCB(skb).cred != siocb->scm->cred)) +			    !uid_eq(UNIXCB(skb).uid, siocb->scm->creds.uid) || +			    !gid_eq(UNIXCB(skb).gid, siocb->scm->creds.gid))  				break;  		} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {  			/* Copy credentials */ -			scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); +			scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);  			check_creds = 1;  		} @@ -2196,7 +2195,9 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,  	/* exceptional events? */  	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) -		mask |= POLLERR; +		mask |= POLLERR | +			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); +  	if (sk->sk_shutdown & RCV_SHUTDOWN)  		mask |= POLLRDHUP | POLLIN | POLLRDNORM;  	if (sk->sk_shutdown == SHUTDOWN_MASK) diff --git a/net/unix/garbage.c b/net/unix/garbage.c index d0f6545b001..9bc73f87f64 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -185,7 +185,7 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),  					 * have been added to the queues after  					 * starting the garbage collection  					 */ -					if (u->gc_candidate) { +					if (test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) {  						hit = true;  						func(u);  					} @@ -254,7 +254,7 @@ static void inc_inflight_move_tail(struct unix_sock *u)  	 * of the list, so that it's checked even if it was already  	 * passed over  	 */ -	if (u->gc_maybe_cycle) +	if (test_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags))  		list_move_tail(&u->link, &gc_candidates);  } @@ -315,8 +315,8 @@ void unix_gc(void)  		BUG_ON(total_refs < inflight_refs);  		if (total_refs == inflight_refs) {  			list_move_tail(&u->link, &gc_candidates); -			u->gc_candidate = 1; -			u->gc_maybe_cycle = 1; +			__set_bit(UNIX_GC_CANDIDATE, &u->gc_flags); +			__set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);  		}  	} @@ -344,7 +344,7 @@ void unix_gc(void)  		if (atomic_long_read(&u->inflight) > 0) {  			list_move_tail(&u->link, ¬_cycle_list); -			u->gc_maybe_cycle = 0; +			__clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);  			scan_children(&u->sk, inc_inflight_move_tail, NULL);  		}  	} @@ -356,7 +356,7 @@ void unix_gc(void)  	 */  	while (!list_empty(¬_cycle_list)) {  		u = list_entry(not_cycle_list.next, struct unix_sock, link); -		u->gc_candidate = 0; +		__clear_bit(UNIX_GC_CANDIDATE, &u->gc_flags);  		list_move_tail(&u->link, &gc_inflight_list);  	} diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 7f93e2a42d7..3f77f42a3b5 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -165,7 +165,7 @@ static struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1];  static struct list_head vsock_connected_table[VSOCK_HASH_SIZE];  static DEFINE_SPINLOCK(vsock_table_lock); -static __init void vsock_init_tables(void) +static void vsock_init_tables(void)  {  	int i; @@ -1932,7 +1932,6 @@ static const struct file_operations vsock_device_ops = {  static struct miscdevice vsock_device = {  	.name		= "vsock", -	.minor		= MISC_DYNAMIC_MINOR,  	.fops		= &vsock_device_ops,  }; @@ -1942,6 +1941,7 @@ static int __vsock_core_init(void)  	vsock_init_tables(); +	vsock_device.minor = MISC_DYNAMIC_MINOR;  	err = misc_register(&vsock_device);  	if (err) {  		pr_err("Failed to register misc device\n"); diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 5e04d3d9628..daff75200e2 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -123,6 +123,14 @@ static s32 vmci_transport_error_to_vsock_error(s32 vmci_error)  	return err > 0 ? -err : err;  } +static u32 vmci_transport_peer_rid(u32 peer_cid) +{ +	if (VMADDR_CID_HYPERVISOR == peer_cid) +		return VMCI_TRANSPORT_HYPERVISOR_PACKET_RID; + +	return VMCI_TRANSPORT_PACKET_RID; +} +  static inline void  vmci_transport_packet_init(struct vmci_transport_packet *pkt,  			   struct sockaddr_vm *src, @@ -140,7 +148,7 @@ vmci_transport_packet_init(struct vmci_transport_packet *pkt,  	pkt->dg.src = vmci_make_handle(VMADDR_CID_ANY,  				       VMCI_TRANSPORT_PACKET_RID);  	pkt->dg.dst = vmci_make_handle(dst->svm_cid, -				       VMCI_TRANSPORT_PACKET_RID); +				       vmci_transport_peer_rid(dst->svm_cid));  	pkt->dg.payload_size = sizeof(*pkt) - sizeof(pkt->dg);  	pkt->version = VMCI_TRANSPORT_PACKET_VERSION;  	pkt->type = type; @@ -508,6 +516,9 @@ static bool vmci_transport_is_trusted(struct vsock_sock *vsock, u32 peer_cid)  static bool vmci_transport_allow_dgram(struct vsock_sock *vsock, u32 peer_cid)  { +	if (VMADDR_CID_HYPERVISOR == peer_cid) +		return true; +  	if (vsock->cached_peer != peer_cid) {  		vsock->cached_peer = peer_cid;  		if (!vmci_transport_is_trusted(vsock, peer_cid) && @@ -628,7 +639,6 @@ static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg)  static bool vmci_transport_stream_allow(u32 cid, u32 port)  {  	static const u32 non_socket_contexts[] = { -		VMADDR_CID_HYPERVISOR,  		VMADDR_CID_RESERVED,  	};  	int i; @@ -667,7 +677,7 @@ static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg)  	 */  	if (!vmci_transport_stream_allow(dg->src.context, -1) -	    || VMCI_TRANSPORT_PACKET_RID != dg->src.resource) +	    || vmci_transport_peer_rid(dg->src.context) != dg->src.resource)  		return VMCI_ERROR_NO_ACCESS;  	if (VMCI_DG_SIZE(dg) < sizeof(*pkt)) diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h index 1bf991803ec..fd88ea8924e 100644 --- a/net/vmw_vsock/vmci_transport.h +++ b/net/vmw_vsock/vmci_transport.h @@ -28,6 +28,9 @@  /* The resource ID on which control packets are sent. */  #define VMCI_TRANSPORT_PACKET_RID 1 +/* The resource ID on which control packets are sent to the hypervisor. */ +#define VMCI_TRANSPORT_HYPERVISOR_PACKET_RID 15 +  #define VSOCK_PROTO_INVALID        0  #define VSOCK_PROTO_PKT_ON_NOTIFY (1 << 0)  #define VSOCK_PROTO_ALL_SUPPORTED (VSOCK_PROTO_PKT_ON_NOTIFY) diff --git a/net/wireless/ap.c b/net/wireless/ap.c index a4a14e8f55c..324e8d851dc 100644 --- a/net/wireless/ap.c +++ b/net/wireless/ap.c @@ -46,65 +46,3 @@ int cfg80211_stop_ap(struct cfg80211_registered_device *rdev,  	return err;  } - -void cfg80211_ch_switch_notify(struct net_device *dev, -			       struct cfg80211_chan_def *chandef) -{ -	struct wireless_dev *wdev = dev->ieee80211_ptr; -	struct wiphy *wiphy = wdev->wiphy; -	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - -	trace_cfg80211_ch_switch_notify(dev, chandef); - -	wdev_lock(wdev); - -	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && -		    wdev->iftype != NL80211_IFTYPE_P2P_GO)) -		goto out; - -	wdev->channel = chandef->chan; -	nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL); -out: -	wdev_unlock(wdev); -	return; -} -EXPORT_SYMBOL(cfg80211_ch_switch_notify); - -bool cfg80211_rx_spurious_frame(struct net_device *dev, -				const u8 *addr, gfp_t gfp) -{ -	struct wireless_dev *wdev = dev->ieee80211_ptr; -	bool ret; - -	trace_cfg80211_rx_spurious_frame(dev, addr); - -	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && -		    wdev->iftype != NL80211_IFTYPE_P2P_GO)) { -		trace_cfg80211_return_bool(false); -		return false; -	} -	ret = nl80211_unexpected_frame(dev, addr, gfp); -	trace_cfg80211_return_bool(ret); -	return ret; -} -EXPORT_SYMBOL(cfg80211_rx_spurious_frame); - -bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev, -					const u8 *addr, gfp_t gfp) -{ -	struct wireless_dev *wdev = dev->ieee80211_ptr; -	bool ret; - -	trace_cfg80211_rx_unexpected_4addr_frame(dev, addr); - -	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && -		    wdev->iftype != NL80211_IFTYPE_P2P_GO && -		    wdev->iftype != NL80211_IFTYPE_AP_VLAN)) { -		trace_cfg80211_return_bool(false); -		return false; -	} -	ret = nl80211_unexpected_4addr_frame(dev, addr, gfp); -	trace_cfg80211_return_bool(ret); -	return ret; -} -EXPORT_SYMBOL(cfg80211_rx_unexpected_4addr_frame); diff --git a/net/wireless/core.c b/net/wireless/core.c index 6ddf74f0ae1..84c9ad7e1dc 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -842,6 +842,46 @@ void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev,  		rdev->num_running_monitor_ifaces += num;  } +void cfg80211_leave(struct cfg80211_registered_device *rdev, +		   struct wireless_dev *wdev) +{ +	struct net_device *dev = wdev->netdev; + +	switch (wdev->iftype) { +	case NL80211_IFTYPE_ADHOC: +		cfg80211_leave_ibss(rdev, dev, true); +		break; +	case NL80211_IFTYPE_P2P_CLIENT: +	case NL80211_IFTYPE_STATION: +		mutex_lock(&rdev->sched_scan_mtx); +		__cfg80211_stop_sched_scan(rdev, false); +		mutex_unlock(&rdev->sched_scan_mtx); + +		wdev_lock(wdev); +#ifdef CONFIG_CFG80211_WEXT +		kfree(wdev->wext.ie); +		wdev->wext.ie = NULL; +		wdev->wext.ie_len = 0; +		wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC; +#endif +		__cfg80211_disconnect(rdev, dev, +				      WLAN_REASON_DEAUTH_LEAVING, true); +		cfg80211_mlme_down(rdev, dev); +		wdev_unlock(wdev); +		break; +	case NL80211_IFTYPE_MESH_POINT: +		cfg80211_leave_mesh(rdev, dev); +		break; +	case NL80211_IFTYPE_AP: +		cfg80211_stop_ap(rdev, dev); +		break; +	default: +		break; +	} + +	wdev->beacon_interval = 0; +} +  static int cfg80211_netdev_notifier_call(struct notifier_block *nb,  					 unsigned long state,  					 void *ndev) @@ -910,38 +950,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,  			dev->priv_flags |= IFF_DONT_BRIDGE;  		break;  	case NETDEV_GOING_DOWN: -		switch (wdev->iftype) { -		case NL80211_IFTYPE_ADHOC: -			cfg80211_leave_ibss(rdev, dev, true); -			break; -		case NL80211_IFTYPE_P2P_CLIENT: -		case NL80211_IFTYPE_STATION: -			mutex_lock(&rdev->sched_scan_mtx); -			__cfg80211_stop_sched_scan(rdev, false); -			mutex_unlock(&rdev->sched_scan_mtx); - -			wdev_lock(wdev); -#ifdef CONFIG_CFG80211_WEXT -			kfree(wdev->wext.ie); -			wdev->wext.ie = NULL; -			wdev->wext.ie_len = 0; -			wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC; -#endif -			__cfg80211_disconnect(rdev, dev, -					      WLAN_REASON_DEAUTH_LEAVING, true); -			cfg80211_mlme_down(rdev, dev); -			wdev_unlock(wdev); -			break; -		case NL80211_IFTYPE_MESH_POINT: -			cfg80211_leave_mesh(rdev, dev); -			break; -		case NL80211_IFTYPE_AP: -			cfg80211_stop_ap(rdev, dev); -			break; -		default: -			break; -		} -		wdev->beacon_interval = 0; +		cfg80211_leave(rdev, wdev);  		break;  	case NETDEV_DOWN:  		cfg80211_update_iface_num(rdev, wdev->iftype, -1); @@ -1117,8 +1126,10 @@ static int __init cfg80211_init(void)  		goto out_fail_reg;  	cfg80211_wq = create_singlethread_workqueue("cfg80211"); -	if (!cfg80211_wq) +	if (!cfg80211_wq) { +		err = -ENOMEM;  		goto out_fail_wq; +	}  	return 0; diff --git a/net/wireless/core.h b/net/wireless/core.h index 5845c2b37aa..fd35dae547c 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -88,6 +88,9 @@ struct cfg80211_registered_device {  	struct delayed_work dfs_update_channels_wk; +	/* netlink port which started critical protocol (0 means not started) */ +	u32 crit_proto_nlportid; +  	/* must be last because of the way we do wiphy_priv(),  	 * and it should at least be aligned to NETDEV_ALIGN */  	struct wiphy wiphy __aligned(NETDEV_ALIGN); @@ -330,20 +333,15 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,  int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,  			  struct net_device *dev,  			  struct ieee80211_channel *chan, -			  const u8 *bssid, const u8 *prev_bssid, +			  const u8 *bssid,  			  const u8 *ssid, int ssid_len, -			  const u8 *ie, int ie_len, bool use_mfp, -			  struct cfg80211_crypto_settings *crypt, -			  u32 assoc_flags, struct ieee80211_ht_cap *ht_capa, -			  struct ieee80211_ht_cap *ht_capa_mask); +			  struct cfg80211_assoc_request *req);  int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, -			struct net_device *dev, struct ieee80211_channel *chan, -			const u8 *bssid, const u8 *prev_bssid, +			struct net_device *dev, +			struct ieee80211_channel *chan, +			const u8 *bssid,  			const u8 *ssid, int ssid_len, -			const u8 *ie, int ie_len, bool use_mfp, -			struct cfg80211_crypto_settings *crypt, -			u32 assoc_flags, struct ieee80211_ht_cap *ht_capa, -			struct ieee80211_ht_cap *ht_capa_mask); +			struct cfg80211_assoc_request *req);  int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,  			   struct net_device *dev, const u8 *bssid,  			   const u8 *ie, int ie_len, u16 reason, @@ -375,6 +373,8 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,  			  bool no_cck, bool dont_wait_for_ack, u64 *cookie);  void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa,  			       const struct ieee80211_ht_cap *ht_capa_mask); +void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa, +				const struct ieee80211_vht_cap *vht_capa_mask);  /* SME */  int __cfg80211_connect(struct cfg80211_registered_device *rdev, @@ -503,6 +503,9 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,  void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev,  			       enum nl80211_iftype iftype, int num); +void cfg80211_leave(struct cfg80211_registered_device *rdev, +		    struct wireless_dev *wdev); +  void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev,  			      struct wireless_dev *wdev); diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 55957a284f6..0bb93f3061a 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -85,6 +85,7 @@ const struct mesh_setup default_mesh_setup = {  	.ie = NULL,  	.ie_len = 0,  	.is_secure = false, +	.user_mpm = false,  	.beacon_interval = MESH_DEFAULT_BEACON_INTERVAL,  	.dtim_period = MESH_DEFAULT_DTIM_PERIOD,  }; @@ -233,20 +234,6 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev,  	return 0;  } -void cfg80211_notify_new_peer_candidate(struct net_device *dev, -		const u8 *macaddr, const u8* ie, u8 ie_len, gfp_t gfp) -{ -	struct wireless_dev *wdev = dev->ieee80211_ptr; - -	trace_cfg80211_notify_new_peer_candidate(dev, macaddr); -	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_MESH_POINT)) -		return; - -	nl80211_send_new_peer_candidate(wiphy_to_dev(wdev->wiphy), dev, -			macaddr, ie, ie_len, gfp); -} -EXPORT_SYMBOL(cfg80211_notify_new_peer_candidate); -  static int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev,  				 struct net_device *dev)  { diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index caddca35d68..0c7b7dd855f 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -187,30 +187,6 @@ void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len)  }  EXPORT_SYMBOL(cfg80211_send_disassoc); -void cfg80211_send_unprot_deauth(struct net_device *dev, const u8 *buf, -				 size_t len) -{ -	struct wireless_dev *wdev = dev->ieee80211_ptr; -	struct wiphy *wiphy = wdev->wiphy; -	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - -	trace_cfg80211_send_unprot_deauth(dev); -	nl80211_send_unprot_deauth(rdev, dev, buf, len, GFP_ATOMIC); -} -EXPORT_SYMBOL(cfg80211_send_unprot_deauth); - -void cfg80211_send_unprot_disassoc(struct net_device *dev, const u8 *buf, -				   size_t len) -{ -	struct wireless_dev *wdev = dev->ieee80211_ptr; -	struct wiphy *wiphy = wdev->wiphy; -	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - -	trace_cfg80211_send_unprot_disassoc(dev); -	nl80211_send_unprot_disassoc(rdev, dev, buf, len, GFP_ATOMIC); -} -EXPORT_SYMBOL(cfg80211_send_unprot_disassoc); -  void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr)  {  	struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -367,27 +343,38 @@ void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa,  		p1[i] &= p2[i];  } +/*  Do a logical ht_capa &= ht_capa_mask.  */ +void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa, +				const struct ieee80211_vht_cap *vht_capa_mask) +{ +	int i; +	u8 *p1, *p2; +	if (!vht_capa_mask) { +		memset(vht_capa, 0, sizeof(*vht_capa)); +		return; +	} + +	p1 = (u8*)(vht_capa); +	p2 = (u8*)(vht_capa_mask); +	for (i = 0; i < sizeof(*vht_capa); i++) +		p1[i] &= p2[i]; +} +  int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,  			  struct net_device *dev,  			  struct ieee80211_channel *chan, -			  const u8 *bssid, const u8 *prev_bssid, +			  const u8 *bssid,  			  const u8 *ssid, int ssid_len, -			  const u8 *ie, int ie_len, bool use_mfp, -			  struct cfg80211_crypto_settings *crypt, -			  u32 assoc_flags, struct ieee80211_ht_cap *ht_capa, -			  struct ieee80211_ht_cap *ht_capa_mask) +			  struct cfg80211_assoc_request *req)  {  	struct wireless_dev *wdev = dev->ieee80211_ptr; -	struct cfg80211_assoc_request req;  	int err;  	bool was_connected = false;  	ASSERT_WDEV_LOCK(wdev); -	memset(&req, 0, sizeof(req)); - -	if (wdev->current_bss && prev_bssid && -	    ether_addr_equal(wdev->current_bss->pub.bssid, prev_bssid)) { +	if (wdev->current_bss && req->prev_bssid && +	    ether_addr_equal(wdev->current_bss->pub.bssid, req->prev_bssid)) {  		/*  		 * Trying to reassociate: Allow this to proceed and let the old  		 * association to be dropped when the new one is completed. @@ -399,40 +386,30 @@ int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,  	} else if (wdev->current_bss)  		return -EALREADY; -	req.ie = ie; -	req.ie_len = ie_len; -	memcpy(&req.crypto, crypt, sizeof(req.crypto)); -	req.use_mfp = use_mfp; -	req.prev_bssid = prev_bssid; -	req.flags = assoc_flags; -	if (ht_capa) -		memcpy(&req.ht_capa, ht_capa, sizeof(req.ht_capa)); -	if (ht_capa_mask) -		memcpy(&req.ht_capa_mask, ht_capa_mask, -		       sizeof(req.ht_capa_mask)); -	cfg80211_oper_and_ht_capa(&req.ht_capa_mask, +	cfg80211_oper_and_ht_capa(&req->ht_capa_mask,  				  rdev->wiphy.ht_capa_mod_mask); +	cfg80211_oper_and_vht_capa(&req->vht_capa_mask, +				   rdev->wiphy.vht_capa_mod_mask); -	req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len, -				   WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS); -	if (!req.bss) { +	req->bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len, +				    WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS); +	if (!req->bss) {  		if (was_connected)  			wdev->sme_state = CFG80211_SME_CONNECTED;  		return -ENOENT;  	} -	err = cfg80211_can_use_chan(rdev, wdev, req.bss->channel, -				    CHAN_MODE_SHARED); +	err = cfg80211_can_use_chan(rdev, wdev, chan, CHAN_MODE_SHARED);  	if (err)  		goto out; -	err = rdev_assoc(rdev, dev, &req); +	err = rdev_assoc(rdev, dev, req);  out:  	if (err) {  		if (was_connected)  			wdev->sme_state = CFG80211_SME_CONNECTED; -		cfg80211_put_bss(&rdev->wiphy, req.bss); +		cfg80211_put_bss(&rdev->wiphy, req->bss);  	}  	return err; @@ -441,21 +418,17 @@ out:  int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,  			struct net_device *dev,  			struct ieee80211_channel *chan, -			const u8 *bssid, const u8 *prev_bssid, +			const u8 *bssid,  			const u8 *ssid, int ssid_len, -			const u8 *ie, int ie_len, bool use_mfp, -			struct cfg80211_crypto_settings *crypt, -			u32 assoc_flags, struct ieee80211_ht_cap *ht_capa, -			struct ieee80211_ht_cap *ht_capa_mask) +			struct cfg80211_assoc_request *req)  {  	struct wireless_dev *wdev = dev->ieee80211_ptr;  	int err;  	mutex_lock(&rdev->devlist_mtx);  	wdev_lock(wdev); -	err = __cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid, -				    ssid, ssid_len, ie, ie_len, use_mfp, crypt, -				    assoc_flags, ht_capa, ht_capa_mask); +	err = __cfg80211_mlme_assoc(rdev, dev, chan, bssid, +				    ssid, ssid_len, req);  	wdev_unlock(wdev);  	mutex_unlock(&rdev->devlist_mtx); @@ -577,62 +550,6 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev,  	}  } -void cfg80211_ready_on_channel(struct wireless_dev *wdev, u64 cookie, -			       struct ieee80211_channel *chan, -			       unsigned int duration, gfp_t gfp) -{ -	struct wiphy *wiphy = wdev->wiphy; -	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - -	trace_cfg80211_ready_on_channel(wdev, cookie, chan, duration); -	nl80211_send_remain_on_channel(rdev, wdev, cookie, chan, duration, gfp); -} -EXPORT_SYMBOL(cfg80211_ready_on_channel); - -void cfg80211_remain_on_channel_expired(struct wireless_dev *wdev, u64 cookie, -					struct ieee80211_channel *chan, -					gfp_t gfp) -{ -	struct wiphy *wiphy = wdev->wiphy; -	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - -	trace_cfg80211_ready_on_channel_expired(wdev, cookie, chan); -	nl80211_send_remain_on_channel_cancel(rdev, wdev, cookie, chan, gfp); -} -EXPORT_SYMBOL(cfg80211_remain_on_channel_expired); - -void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr, -		      struct station_info *sinfo, gfp_t gfp) -{ -	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; -	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - -	trace_cfg80211_new_sta(dev, mac_addr, sinfo); -	nl80211_send_sta_event(rdev, dev, mac_addr, sinfo, gfp); -} -EXPORT_SYMBOL(cfg80211_new_sta); - -void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp) -{ -	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; -	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - -	trace_cfg80211_del_sta(dev, mac_addr); -	nl80211_send_sta_del_event(rdev, dev, mac_addr, gfp); -} -EXPORT_SYMBOL(cfg80211_del_sta); - -void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr, -			  enum nl80211_connect_failed_reason reason, -			  gfp_t gfp) -{ -	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; -	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - -	nl80211_send_conn_failed_event(rdev, dev, mac_addr, reason, gfp); -} -EXPORT_SYMBOL(cfg80211_conn_failed); -  struct cfg80211_mgmt_registration {  	struct list_head list; @@ -731,6 +648,11 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid)  	spin_unlock_bh(&wdev->mgmt_registrations_lock); +	if (nlportid && rdev->crit_proto_nlportid == nlportid) { +		rdev->crit_proto_nlportid = 0; +		rdev_crit_proto_stop(rdev, wdev); +	} +  	if (nlportid == wdev->ap_unexpected_nlportid)  		wdev->ap_unexpected_nlportid = 0;  } @@ -909,85 +831,6 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm,  }  EXPORT_SYMBOL(cfg80211_rx_mgmt); -void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, -			     const u8 *buf, size_t len, bool ack, gfp_t gfp) -{ -	struct wiphy *wiphy = wdev->wiphy; -	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - -	trace_cfg80211_mgmt_tx_status(wdev, cookie, ack); - -	/* Indicate TX status of the Action frame to user space */ -	nl80211_send_mgmt_tx_status(rdev, wdev, cookie, buf, len, ack, gfp); -} -EXPORT_SYMBOL(cfg80211_mgmt_tx_status); - -void cfg80211_cqm_rssi_notify(struct net_device *dev, -			      enum nl80211_cqm_rssi_threshold_event rssi_event, -			      gfp_t gfp) -{ -	struct wireless_dev *wdev = dev->ieee80211_ptr; -	struct wiphy *wiphy = wdev->wiphy; -	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - -	trace_cfg80211_cqm_rssi_notify(dev, rssi_event); - -	/* Indicate roaming trigger event to user space */ -	nl80211_send_cqm_rssi_notify(rdev, dev, rssi_event, gfp); -} -EXPORT_SYMBOL(cfg80211_cqm_rssi_notify); - -void cfg80211_cqm_pktloss_notify(struct net_device *dev, -				 const u8 *peer, u32 num_packets, gfp_t gfp) -{ -	struct wireless_dev *wdev = dev->ieee80211_ptr; -	struct wiphy *wiphy = wdev->wiphy; -	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - -	trace_cfg80211_cqm_pktloss_notify(dev, peer, num_packets); - -	/* Indicate roaming trigger event to user space */ -	nl80211_send_cqm_pktloss_notify(rdev, dev, peer, num_packets, gfp); -} -EXPORT_SYMBOL(cfg80211_cqm_pktloss_notify); - -void cfg80211_cqm_txe_notify(struct net_device *dev, -			     const u8 *peer, u32 num_packets, -			     u32 rate, u32 intvl, gfp_t gfp) -{ -	struct wireless_dev *wdev = dev->ieee80211_ptr; -	struct wiphy *wiphy = wdev->wiphy; -	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - -	nl80211_send_cqm_txe_notify(rdev, dev, peer, num_packets, -				    rate, intvl, gfp); -} -EXPORT_SYMBOL(cfg80211_cqm_txe_notify); - -void cfg80211_gtk_rekey_notify(struct net_device *dev, const u8 *bssid, -			       const u8 *replay_ctr, gfp_t gfp) -{ -	struct wireless_dev *wdev = dev->ieee80211_ptr; -	struct wiphy *wiphy = wdev->wiphy; -	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - -	trace_cfg80211_gtk_rekey_notify(dev, bssid); -	nl80211_gtk_rekey_notify(rdev, dev, bssid, replay_ctr, gfp); -} -EXPORT_SYMBOL(cfg80211_gtk_rekey_notify); - -void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index, -				     const u8 *bssid, bool preauth, gfp_t gfp) -{ -	struct wireless_dev *wdev = dev->ieee80211_ptr; -	struct wiphy *wiphy = wdev->wiphy; -	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - -	trace_cfg80211_pmksa_candidate_notify(dev, index, bssid, preauth); -	nl80211_pmksa_candidate_notify(rdev, dev, index, bssid, preauth, gfp); -} -EXPORT_SYMBOL(cfg80211_pmksa_candidate_notify); -  void cfg80211_dfs_channels_update_work(struct work_struct *work)  {  	struct delayed_work *delayed_work; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 58e13a8c95f..afa283841e8 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -370,6 +370,14 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {  	[NL80211_ATTR_MAC_ADDRS] = { .type = NLA_NESTED },  	[NL80211_ATTR_STA_CAPABILITY] = { .type = NLA_U16 },  	[NL80211_ATTR_STA_EXT_CAPABILITY] = { .type = NLA_BINARY, }, +	[NL80211_ATTR_SPLIT_WIPHY_DUMP] = { .type = NLA_FLAG, }, +	[NL80211_ATTR_DISABLE_VHT] = { .type = NLA_FLAG }, +	[NL80211_ATTR_VHT_CAPABILITY_MASK] = { +		.len = NL80211_VHT_CAPABILITY_LEN, +	}, +	[NL80211_ATTR_MDID] = { .type = NLA_U16 }, +	[NL80211_ATTR_IE_RIC] = { .type = NLA_BINARY, +				  .len = IEEE80211_MAX_DATA_LEN },  };  /* policy for the key attributes */ @@ -439,62 +447,69 @@ nl80211_match_policy[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1] = {  	[NL80211_SCHED_SCAN_MATCH_ATTR_RSSI] = { .type = NLA_U32 },  }; -/* ifidx get helper */ -static int nl80211_get_ifidx(struct netlink_callback *cb) +static int nl80211_prepare_wdev_dump(struct sk_buff *skb, +				     struct netlink_callback *cb, +				     struct cfg80211_registered_device **rdev, +				     struct wireless_dev **wdev)  { -	int res; - -	res = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, -			  nl80211_fam.attrbuf, nl80211_fam.maxattr, -			  nl80211_policy); -	if (res) -		return res; - -	if (!nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX]) -		return -EINVAL; +	int err; -	res = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX]); -	if (!res) -		return -EINVAL; -	return res; -} +	rtnl_lock(); +	mutex_lock(&cfg80211_mutex); -static int nl80211_prepare_netdev_dump(struct sk_buff *skb, -				       struct netlink_callback *cb, -				       struct cfg80211_registered_device **rdev, -				       struct net_device **dev) -{ -	int ifidx = cb->args[0]; -	int err; +	if (!cb->args[0]) { +		err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, +				  nl80211_fam.attrbuf, nl80211_fam.maxattr, +				  nl80211_policy); +		if (err) +			goto out_unlock; -	if (!ifidx) -		ifidx = nl80211_get_ifidx(cb); -	if (ifidx < 0) -		return ifidx; +		*wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk), +						   nl80211_fam.attrbuf); +		if (IS_ERR(*wdev)) { +			err = PTR_ERR(*wdev); +			goto out_unlock; +		} +		*rdev = wiphy_to_dev((*wdev)->wiphy); +		cb->args[0] = (*rdev)->wiphy_idx; +		cb->args[1] = (*wdev)->identifier; +	} else { +		struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0]); +		struct wireless_dev *tmp; -	cb->args[0] = ifidx; +		if (!wiphy) { +			err = -ENODEV; +			goto out_unlock; +		} +		*rdev = wiphy_to_dev(wiphy); +		*wdev = NULL; -	rtnl_lock(); +		mutex_lock(&(*rdev)->devlist_mtx); +		list_for_each_entry(tmp, &(*rdev)->wdev_list, list) { +			if (tmp->identifier == cb->args[1]) { +				*wdev = tmp; +				break; +			} +		} +		mutex_unlock(&(*rdev)->devlist_mtx); -	*dev = __dev_get_by_index(sock_net(skb->sk), ifidx); -	if (!*dev) { -		err = -ENODEV; -		goto out_rtnl; +		if (!*wdev) { +			err = -ENODEV; +			goto out_unlock; +		}  	} -	*rdev = cfg80211_get_dev_from_ifindex(sock_net(skb->sk), ifidx); -	if (IS_ERR(*rdev)) { -		err = PTR_ERR(*rdev); -		goto out_rtnl; -	} +	cfg80211_lock_rdev(*rdev); +	mutex_unlock(&cfg80211_mutex);  	return 0; - out_rtnl: + out_unlock: +	mutex_unlock(&cfg80211_mutex);  	rtnl_unlock();  	return err;  } -static void nl80211_finish_netdev_dump(struct cfg80211_registered_device *rdev) +static void nl80211_finish_wdev_dump(struct cfg80211_registered_device *rdev)  {  	cfg80211_unlock_rdev(rdev);  	rtnl_unlock(); @@ -539,7 +554,8 @@ static inline void *nl80211hdr_put(struct sk_buff *skb, u32 portid, u32 seq,  }  static int nl80211_msg_put_channel(struct sk_buff *msg, -				   struct ieee80211_channel *chan) +				   struct ieee80211_channel *chan, +				   bool large)  {  	if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_FREQ,  			chan->center_freq)) @@ -554,9 +570,37 @@ static int nl80211_msg_put_channel(struct sk_buff *msg,  	if ((chan->flags & IEEE80211_CHAN_NO_IBSS) &&  	    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_IBSS))  		goto nla_put_failure; -	if ((chan->flags & IEEE80211_CHAN_RADAR) && -	    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_RADAR)) -		goto nla_put_failure; +	if (chan->flags & IEEE80211_CHAN_RADAR) { +		if (nla_put_flag(msg, NL80211_FREQUENCY_ATTR_RADAR)) +			goto nla_put_failure; +		if (large) { +			u32 time; + +			time = elapsed_jiffies_msecs(chan->dfs_state_entered); + +			if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_DFS_STATE, +					chan->dfs_state)) +				goto nla_put_failure; +			if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_DFS_TIME, +					time)) +				goto nla_put_failure; +		} +	} + +	if (large) { +		if ((chan->flags & IEEE80211_CHAN_NO_HT40MINUS) && +		    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_HT40_MINUS)) +			goto nla_put_failure; +		if ((chan->flags & IEEE80211_CHAN_NO_HT40PLUS) && +		    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_HT40_PLUS)) +			goto nla_put_failure; +		if ((chan->flags & IEEE80211_CHAN_NO_80MHZ) && +		    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_80MHZ)) +			goto nla_put_failure; +		if ((chan->flags & IEEE80211_CHAN_NO_160MHZ) && +		    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_160MHZ)) +			goto nla_put_failure; +	}  	if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER,  			DBM_TO_MBM(chan->max_power))) @@ -832,7 +876,8 @@ nla_put_failure:  }  static int nl80211_put_iface_combinations(struct wiphy *wiphy, -					  struct sk_buff *msg) +					  struct sk_buff *msg, +					  bool large)  {  	struct nlattr *nl_combis;  	int i, j; @@ -881,6 +926,10 @@ static int nl80211_put_iface_combinations(struct wiphy *wiphy,  		    nla_put_u32(msg, NL80211_IFACE_COMB_MAXNUM,  				c->max_interfaces))  			goto nla_put_failure; +		if (large && +		    nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS, +				c->radar_detect_widths)) +			goto nla_put_failure;  		nla_nest_end(msg, nl_combi);  	} @@ -892,412 +941,615 @@ nla_put_failure:  	return -ENOBUFS;  } -static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flags, -			      struct cfg80211_registered_device *dev) +#ifdef CONFIG_PM +static int nl80211_send_wowlan_tcp_caps(struct cfg80211_registered_device *rdev, +					struct sk_buff *msg)  { -	void *hdr; -	struct nlattr *nl_bands, *nl_band; -	struct nlattr *nl_freqs, *nl_freq; -	struct nlattr *nl_rates, *nl_rate; -	struct nlattr *nl_cmds; -	enum ieee80211_band band; -	struct ieee80211_channel *chan; -	struct ieee80211_rate *rate; -	int i; -	const struct ieee80211_txrx_stypes *mgmt_stypes = -				dev->wiphy.mgmt_stypes; +	const struct wiphy_wowlan_tcp_support *tcp = rdev->wiphy.wowlan.tcp; +	struct nlattr *nl_tcp; -	hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_WIPHY); -	if (!hdr) -		return -1; +	if (!tcp) +		return 0; -	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, dev->wiphy_idx) || -	    nla_put_string(msg, NL80211_ATTR_WIPHY_NAME, wiphy_name(&dev->wiphy)) || -	    nla_put_u32(msg, NL80211_ATTR_GENERATION, -			cfg80211_rdev_list_generation) || -	    nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_SHORT, -		       dev->wiphy.retry_short) || -	    nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_LONG, -		       dev->wiphy.retry_long) || -	    nla_put_u32(msg, NL80211_ATTR_WIPHY_FRAG_THRESHOLD, -			dev->wiphy.frag_threshold) || -	    nla_put_u32(msg, NL80211_ATTR_WIPHY_RTS_THRESHOLD, -			dev->wiphy.rts_threshold) || -	    nla_put_u8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS, -		       dev->wiphy.coverage_class) || -	    nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS, -		       dev->wiphy.max_scan_ssids) || -	    nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCHED_SCAN_SSIDS, -		       dev->wiphy.max_sched_scan_ssids) || -	    nla_put_u16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN, -			dev->wiphy.max_scan_ie_len) || -	    nla_put_u16(msg, NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN, -			dev->wiphy.max_sched_scan_ie_len) || -	    nla_put_u8(msg, NL80211_ATTR_MAX_MATCH_SETS, -		       dev->wiphy.max_match_sets)) -		goto nla_put_failure; +	nl_tcp = nla_nest_start(msg, NL80211_WOWLAN_TRIG_TCP_CONNECTION); +	if (!nl_tcp) +		return -ENOBUFS; -	if ((dev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) && -	    nla_put_flag(msg, NL80211_ATTR_SUPPORT_IBSS_RSN)) -		goto nla_put_failure; -	if ((dev->wiphy.flags & WIPHY_FLAG_MESH_AUTH) && -	    nla_put_flag(msg, NL80211_ATTR_SUPPORT_MESH_AUTH)) -		goto nla_put_failure; -	if ((dev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) && -	    nla_put_flag(msg, NL80211_ATTR_SUPPORT_AP_UAPSD)) -		goto nla_put_failure; -	if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_FW_ROAM) && -	    nla_put_flag(msg, NL80211_ATTR_ROAM_SUPPORT)) -		goto nla_put_failure; -	if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) && -	    nla_put_flag(msg, NL80211_ATTR_TDLS_SUPPORT)) -		goto nla_put_failure; -	if ((dev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) && -	    nla_put_flag(msg, NL80211_ATTR_TDLS_EXTERNAL_SETUP)) -		goto nla_put_failure; +	if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD, +			tcp->data_payload_max)) +		return -ENOBUFS; -	if (nla_put(msg, NL80211_ATTR_CIPHER_SUITES, -		    sizeof(u32) * dev->wiphy.n_cipher_suites, -		    dev->wiphy.cipher_suites)) -		goto nla_put_failure; +	if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD, +			tcp->data_payload_max)) +		return -ENOBUFS; -	if (nla_put_u8(msg, NL80211_ATTR_MAX_NUM_PMKIDS, -		       dev->wiphy.max_num_pmkids)) -		goto nla_put_failure; +	if (tcp->seq && nla_put_flag(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ)) +		return -ENOBUFS; -	if ((dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) && -	    nla_put_flag(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE)) -		goto nla_put_failure; +	if (tcp->tok && nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN, +				sizeof(*tcp->tok), tcp->tok)) +		return -ENOBUFS; -	if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX, -			dev->wiphy.available_antennas_tx) || -	    nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX, -			dev->wiphy.available_antennas_rx)) -		goto nla_put_failure; +	if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_INTERVAL, +			tcp->data_interval_max)) +		return -ENOBUFS; -	if ((dev->wiphy.flags & WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD) && -	    nla_put_u32(msg, NL80211_ATTR_PROBE_RESP_OFFLOAD, -			dev->wiphy.probe_resp_offload)) -		goto nla_put_failure; +	if (nla_put_u32(msg, NL80211_WOWLAN_TCP_WAKE_PAYLOAD, +			tcp->wake_payload_max)) +		return -ENOBUFS; -	if ((dev->wiphy.available_antennas_tx || -	     dev->wiphy.available_antennas_rx) && dev->ops->get_antenna) { -		u32 tx_ant = 0, rx_ant = 0; -		int res; -		res = rdev_get_antenna(dev, &tx_ant, &rx_ant); -		if (!res) { -			if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_TX, -					tx_ant) || -			    nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_RX, -					rx_ant)) -				goto nla_put_failure; -		} +	nla_nest_end(msg, nl_tcp); +	return 0; +} + +static int nl80211_send_wowlan(struct sk_buff *msg, +			       struct cfg80211_registered_device *dev, +			       bool large) +{ +	struct nlattr *nl_wowlan; + +	if (!dev->wiphy.wowlan.flags && !dev->wiphy.wowlan.n_patterns) +		return 0; + +	nl_wowlan = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED); +	if (!nl_wowlan) +		return -ENOBUFS; + +	if (((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_ANY) && +	     nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) || +	    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_DISCONNECT) && +	     nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) || +	    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_MAGIC_PKT) && +	     nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) || +	    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) && +	     nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED)) || +	    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && +	     nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) || +	    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) && +	     nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) || +	    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) && +	     nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) || +	    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_RFKILL_RELEASE) && +	     nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE))) +		return -ENOBUFS; + +	if (dev->wiphy.wowlan.n_patterns) { +		struct nl80211_wowlan_pattern_support pat = { +			.max_patterns = dev->wiphy.wowlan.n_patterns, +			.min_pattern_len = dev->wiphy.wowlan.pattern_min_len, +			.max_pattern_len = dev->wiphy.wowlan.pattern_max_len, +			.max_pkt_offset = dev->wiphy.wowlan.max_pkt_offset, +		}; + +		if (nla_put(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN, +			    sizeof(pat), &pat)) +			return -ENOBUFS;  	} -	if (nl80211_put_iftypes(msg, NL80211_ATTR_SUPPORTED_IFTYPES, -				dev->wiphy.interface_modes)) -		goto nla_put_failure; +	if (large && nl80211_send_wowlan_tcp_caps(dev, msg)) +		return -ENOBUFS; -	nl_bands = nla_nest_start(msg, NL80211_ATTR_WIPHY_BANDS); -	if (!nl_bands) -		goto nla_put_failure; +	nla_nest_end(msg, nl_wowlan); -	for (band = 0; band < IEEE80211_NUM_BANDS; band++) { -		if (!dev->wiphy.bands[band]) -			continue; +	return 0; +} +#endif -		nl_band = nla_nest_start(msg, band); -		if (!nl_band) -			goto nla_put_failure; +static int nl80211_send_band_rateinfo(struct sk_buff *msg, +				      struct ieee80211_supported_band *sband) +{ +	struct nlattr *nl_rates, *nl_rate; +	struct ieee80211_rate *rate; +	int i; -		/* add HT info */ -		if (dev->wiphy.bands[band]->ht_cap.ht_supported && -		    (nla_put(msg, NL80211_BAND_ATTR_HT_MCS_SET, -			     sizeof(dev->wiphy.bands[band]->ht_cap.mcs), -			     &dev->wiphy.bands[band]->ht_cap.mcs) || -		     nla_put_u16(msg, NL80211_BAND_ATTR_HT_CAPA, -				 dev->wiphy.bands[band]->ht_cap.cap) || -		     nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_FACTOR, -				dev->wiphy.bands[band]->ht_cap.ampdu_factor) || -		     nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_DENSITY, -				dev->wiphy.bands[band]->ht_cap.ampdu_density))) -			goto nla_put_failure; +	/* add HT info */ +	if (sband->ht_cap.ht_supported && +	    (nla_put(msg, NL80211_BAND_ATTR_HT_MCS_SET, +		     sizeof(sband->ht_cap.mcs), +		     &sband->ht_cap.mcs) || +	     nla_put_u16(msg, NL80211_BAND_ATTR_HT_CAPA, +			 sband->ht_cap.cap) || +	     nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_FACTOR, +			sband->ht_cap.ampdu_factor) || +	     nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_DENSITY, +			sband->ht_cap.ampdu_density))) +		return -ENOBUFS; -		/* add VHT info */ -		if (dev->wiphy.bands[band]->vht_cap.vht_supported && -		    (nla_put(msg, NL80211_BAND_ATTR_VHT_MCS_SET, -			     sizeof(dev->wiphy.bands[band]->vht_cap.vht_mcs), -			     &dev->wiphy.bands[band]->vht_cap.vht_mcs) || -		     nla_put_u32(msg, NL80211_BAND_ATTR_VHT_CAPA, -				 dev->wiphy.bands[band]->vht_cap.cap))) -			goto nla_put_failure; +	/* add VHT info */ +	if (sband->vht_cap.vht_supported && +	    (nla_put(msg, NL80211_BAND_ATTR_VHT_MCS_SET, +		     sizeof(sband->vht_cap.vht_mcs), +		     &sband->vht_cap.vht_mcs) || +	     nla_put_u32(msg, NL80211_BAND_ATTR_VHT_CAPA, +			 sband->vht_cap.cap))) +		return -ENOBUFS; -		/* add frequencies */ -		nl_freqs = nla_nest_start(msg, NL80211_BAND_ATTR_FREQS); -		if (!nl_freqs) -			goto nla_put_failure; +	/* add bitrates */ +	nl_rates = nla_nest_start(msg, NL80211_BAND_ATTR_RATES); +	if (!nl_rates) +		return -ENOBUFS; -		for (i = 0; i < dev->wiphy.bands[band]->n_channels; i++) { -			nl_freq = nla_nest_start(msg, i); -			if (!nl_freq) -				goto nla_put_failure; +	for (i = 0; i < sband->n_bitrates; i++) { +		nl_rate = nla_nest_start(msg, i); +		if (!nl_rate) +			return -ENOBUFS; + +		rate = &sband->bitrates[i]; +		if (nla_put_u32(msg, NL80211_BITRATE_ATTR_RATE, +				rate->bitrate)) +			return -ENOBUFS; +		if ((rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) && +		    nla_put_flag(msg, +				 NL80211_BITRATE_ATTR_2GHZ_SHORTPREAMBLE)) +			return -ENOBUFS; -			chan = &dev->wiphy.bands[band]->channels[i]; +		nla_nest_end(msg, nl_rate); +	} -			if (nl80211_msg_put_channel(msg, chan)) -				goto nla_put_failure; +	nla_nest_end(msg, nl_rates); -			nla_nest_end(msg, nl_freq); -		} +	return 0; +} -		nla_nest_end(msg, nl_freqs); +static int +nl80211_send_mgmt_stypes(struct sk_buff *msg, +			 const struct ieee80211_txrx_stypes *mgmt_stypes) +{ +	u16 stypes; +	struct nlattr *nl_ftypes, *nl_ifs; +	enum nl80211_iftype ift; +	int i; -		/* add bitrates */ -		nl_rates = nla_nest_start(msg, NL80211_BAND_ATTR_RATES); -		if (!nl_rates) -			goto nla_put_failure; +	if (!mgmt_stypes) +		return 0; -		for (i = 0; i < dev->wiphy.bands[band]->n_bitrates; i++) { -			nl_rate = nla_nest_start(msg, i); -			if (!nl_rate) -				goto nla_put_failure; +	nl_ifs = nla_nest_start(msg, NL80211_ATTR_TX_FRAME_TYPES); +	if (!nl_ifs) +		return -ENOBUFS; -			rate = &dev->wiphy.bands[band]->bitrates[i]; -			if (nla_put_u32(msg, NL80211_BITRATE_ATTR_RATE, -					rate->bitrate)) -				goto nla_put_failure; -			if ((rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) && -			    nla_put_flag(msg, -					 NL80211_BITRATE_ATTR_2GHZ_SHORTPREAMBLE)) -				goto nla_put_failure; +	for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) { +		nl_ftypes = nla_nest_start(msg, ift); +		if (!nl_ftypes) +			return -ENOBUFS; +		i = 0; +		stypes = mgmt_stypes[ift].tx; +		while (stypes) { +			if ((stypes & 1) && +			    nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE, +					(i << 4) | IEEE80211_FTYPE_MGMT)) +				return -ENOBUFS; +			stypes >>= 1; +			i++; +		} +		nla_nest_end(msg, nl_ftypes); +	} + +	nla_nest_end(msg, nl_ifs); + +	nl_ifs = nla_nest_start(msg, NL80211_ATTR_RX_FRAME_TYPES); +	if (!nl_ifs) +		return -ENOBUFS; -			nla_nest_end(msg, nl_rate); +	for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) { +		nl_ftypes = nla_nest_start(msg, ift); +		if (!nl_ftypes) +			return -ENOBUFS; +		i = 0; +		stypes = mgmt_stypes[ift].rx; +		while (stypes) { +			if ((stypes & 1) && +			    nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE, +					(i << 4) | IEEE80211_FTYPE_MGMT)) +				return -ENOBUFS; +			stypes >>= 1; +			i++;  		} +		nla_nest_end(msg, nl_ftypes); +	} +	nla_nest_end(msg, nl_ifs); -		nla_nest_end(msg, nl_rates); +	return 0; +} -		nla_nest_end(msg, nl_band); +static int nl80211_send_wiphy(struct cfg80211_registered_device *dev, +			      struct sk_buff *msg, u32 portid, u32 seq, +			      int flags, bool split, long *split_start, +			      long *band_start, long *chan_start) +{ +	void *hdr; +	struct nlattr *nl_bands, *nl_band; +	struct nlattr *nl_freqs, *nl_freq; +	struct nlattr *nl_cmds; +	enum ieee80211_band band; +	struct ieee80211_channel *chan; +	int i; +	const struct ieee80211_txrx_stypes *mgmt_stypes = +				dev->wiphy.mgmt_stypes; +	long start = 0, start_chan = 0, start_band = 0; +	u32 features; + +	hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_WIPHY); +	if (!hdr) +		return -ENOBUFS; + +	/* allow always using the variables */ +	if (!split) { +		split_start = &start; +		band_start = &start_band; +		chan_start = &start_chan;  	} -	nla_nest_end(msg, nl_bands); -	nl_cmds = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_COMMANDS); -	if (!nl_cmds) +	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, dev->wiphy_idx) || +	    nla_put_string(msg, NL80211_ATTR_WIPHY_NAME, +			   wiphy_name(&dev->wiphy)) || +	    nla_put_u32(msg, NL80211_ATTR_GENERATION, +			cfg80211_rdev_list_generation))  		goto nla_put_failure; -	i = 0; -#define CMD(op, n)						\ -	 do {							\ -		if (dev->ops->op) {				\ -			i++;					\ -			if (nla_put_u32(msg, i, NL80211_CMD_ ## n)) \ -				goto nla_put_failure;		\ -		}						\ -	} while (0) +	switch (*split_start) { +	case 0: +		if (nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_SHORT, +			       dev->wiphy.retry_short) || +		    nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_LONG, +			       dev->wiphy.retry_long) || +		    nla_put_u32(msg, NL80211_ATTR_WIPHY_FRAG_THRESHOLD, +				dev->wiphy.frag_threshold) || +		    nla_put_u32(msg, NL80211_ATTR_WIPHY_RTS_THRESHOLD, +				dev->wiphy.rts_threshold) || +		    nla_put_u8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS, +			       dev->wiphy.coverage_class) || +		    nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS, +			       dev->wiphy.max_scan_ssids) || +		    nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCHED_SCAN_SSIDS, +			       dev->wiphy.max_sched_scan_ssids) || +		    nla_put_u16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN, +				dev->wiphy.max_scan_ie_len) || +		    nla_put_u16(msg, NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN, +				dev->wiphy.max_sched_scan_ie_len) || +		    nla_put_u8(msg, NL80211_ATTR_MAX_MATCH_SETS, +			       dev->wiphy.max_match_sets)) +			goto nla_put_failure; -	CMD(add_virtual_intf, NEW_INTERFACE); -	CMD(change_virtual_intf, SET_INTERFACE); -	CMD(add_key, NEW_KEY); -	CMD(start_ap, START_AP); -	CMD(add_station, NEW_STATION); -	CMD(add_mpath, NEW_MPATH); -	CMD(update_mesh_config, SET_MESH_CONFIG); -	CMD(change_bss, SET_BSS); -	CMD(auth, AUTHENTICATE); -	CMD(assoc, ASSOCIATE); -	CMD(deauth, DEAUTHENTICATE); -	CMD(disassoc, DISASSOCIATE); -	CMD(join_ibss, JOIN_IBSS); -	CMD(join_mesh, JOIN_MESH); -	CMD(set_pmksa, SET_PMKSA); -	CMD(del_pmksa, DEL_PMKSA); -	CMD(flush_pmksa, FLUSH_PMKSA); -	if (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) -		CMD(remain_on_channel, REMAIN_ON_CHANNEL); -	CMD(set_bitrate_mask, SET_TX_BITRATE_MASK); -	CMD(mgmt_tx, FRAME); -	CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL); -	if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { -		i++; -		if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS)) +		if ((dev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) && +		    nla_put_flag(msg, NL80211_ATTR_SUPPORT_IBSS_RSN))  			goto nla_put_failure; -	} -	if (dev->ops->set_monitor_channel || dev->ops->start_ap || -	    dev->ops->join_mesh) { -		i++; -		if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL)) +		if ((dev->wiphy.flags & WIPHY_FLAG_MESH_AUTH) && +		    nla_put_flag(msg, NL80211_ATTR_SUPPORT_MESH_AUTH))  			goto nla_put_failure; -	} -	CMD(set_wds_peer, SET_WDS_PEER); -	if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) { -		CMD(tdls_mgmt, TDLS_MGMT); -		CMD(tdls_oper, TDLS_OPER); -	} -	if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) -		CMD(sched_scan_start, START_SCHED_SCAN); -	CMD(probe_client, PROBE_CLIENT); -	CMD(set_noack_map, SET_NOACK_MAP); -	if (dev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) { -		i++; -		if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS)) +		if ((dev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) && +		    nla_put_flag(msg, NL80211_ATTR_SUPPORT_AP_UAPSD)) +			goto nla_put_failure; +		if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_FW_ROAM) && +		    nla_put_flag(msg, NL80211_ATTR_ROAM_SUPPORT)) +			goto nla_put_failure; +		if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) && +		    nla_put_flag(msg, NL80211_ATTR_TDLS_SUPPORT)) +			goto nla_put_failure; +		if ((dev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) && +		    nla_put_flag(msg, NL80211_ATTR_TDLS_EXTERNAL_SETUP))  			goto nla_put_failure; -	} -	CMD(start_p2p_device, START_P2P_DEVICE); -	CMD(set_mcast_rate, SET_MCAST_RATE); -#ifdef CONFIG_NL80211_TESTMODE -	CMD(testmode_cmd, TESTMODE); -#endif +		(*split_start)++; +		if (split) +			break; +	case 1: +		if (nla_put(msg, NL80211_ATTR_CIPHER_SUITES, +			    sizeof(u32) * dev->wiphy.n_cipher_suites, +			    dev->wiphy.cipher_suites)) +			goto nla_put_failure; -#undef CMD +		if (nla_put_u8(msg, NL80211_ATTR_MAX_NUM_PMKIDS, +			       dev->wiphy.max_num_pmkids)) +			goto nla_put_failure; -	if (dev->ops->connect || dev->ops->auth) { -		i++; -		if (nla_put_u32(msg, i, NL80211_CMD_CONNECT)) +		if ((dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) && +		    nla_put_flag(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE))  			goto nla_put_failure; -	} -	if (dev->ops->disconnect || dev->ops->deauth) { -		i++; -		if (nla_put_u32(msg, i, NL80211_CMD_DISCONNECT)) +		if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX, +				dev->wiphy.available_antennas_tx) || +		    nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX, +				dev->wiphy.available_antennas_rx))  			goto nla_put_failure; -	} -	nla_nest_end(msg, nl_cmds); +		if ((dev->wiphy.flags & WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD) && +		    nla_put_u32(msg, NL80211_ATTR_PROBE_RESP_OFFLOAD, +				dev->wiphy.probe_resp_offload)) +			goto nla_put_failure; -	if (dev->ops->remain_on_channel && -	    (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) && -	    nla_put_u32(msg, NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION, -			dev->wiphy.max_remain_on_channel_duration)) -		goto nla_put_failure; +		if ((dev->wiphy.available_antennas_tx || +		     dev->wiphy.available_antennas_rx) && +		    dev->ops->get_antenna) { +			u32 tx_ant = 0, rx_ant = 0; +			int res; +			res = rdev_get_antenna(dev, &tx_ant, &rx_ant); +			if (!res) { +				if (nla_put_u32(msg, +						NL80211_ATTR_WIPHY_ANTENNA_TX, +						tx_ant) || +				    nla_put_u32(msg, +						NL80211_ATTR_WIPHY_ANTENNA_RX, +						rx_ant)) +					goto nla_put_failure; +			} +		} -	if ((dev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX) && -	    nla_put_flag(msg, NL80211_ATTR_OFFCHANNEL_TX_OK)) -		goto nla_put_failure; +		(*split_start)++; +		if (split) +			break; +	case 2: +		if (nl80211_put_iftypes(msg, NL80211_ATTR_SUPPORTED_IFTYPES, +					dev->wiphy.interface_modes)) +				goto nla_put_failure; +		(*split_start)++; +		if (split) +			break; +	case 3: +		nl_bands = nla_nest_start(msg, NL80211_ATTR_WIPHY_BANDS); +		if (!nl_bands) +			goto nla_put_failure; -	if (mgmt_stypes) { -		u16 stypes; -		struct nlattr *nl_ftypes, *nl_ifs; -		enum nl80211_iftype ift; +		for (band = *band_start; band < IEEE80211_NUM_BANDS; band++) { +			struct ieee80211_supported_band *sband; -		nl_ifs = nla_nest_start(msg, NL80211_ATTR_TX_FRAME_TYPES); -		if (!nl_ifs) -			goto nla_put_failure; +			sband = dev->wiphy.bands[band]; -		for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) { -			nl_ftypes = nla_nest_start(msg, ift); -			if (!nl_ftypes) +			if (!sband) +				continue; + +			nl_band = nla_nest_start(msg, band); +			if (!nl_band)  				goto nla_put_failure; -			i = 0; -			stypes = mgmt_stypes[ift].tx; -			while (stypes) { -				if ((stypes & 1) && -				    nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE, -						(i << 4) | IEEE80211_FTYPE_MGMT)) + +			switch (*chan_start) { +			case 0: +				if (nl80211_send_band_rateinfo(msg, sband))  					goto nla_put_failure; -				stypes >>= 1; -				i++; +				(*chan_start)++; +				if (split) +					break; +			default: +				/* add frequencies */ +				nl_freqs = nla_nest_start( +					msg, NL80211_BAND_ATTR_FREQS); +				if (!nl_freqs) +					goto nla_put_failure; + +				for (i = *chan_start - 1; +				     i < sband->n_channels; +				     i++) { +					nl_freq = nla_nest_start(msg, i); +					if (!nl_freq) +						goto nla_put_failure; + +					chan = &sband->channels[i]; + +					if (nl80211_msg_put_channel(msg, chan, +								    split)) +						goto nla_put_failure; + +					nla_nest_end(msg, nl_freq); +					if (split) +						break; +				} +				if (i < sband->n_channels) +					*chan_start = i + 2; +				else +					*chan_start = 0; +				nla_nest_end(msg, nl_freqs); +			} + +			nla_nest_end(msg, nl_band); + +			if (split) { +				/* start again here */ +				if (*chan_start) +					band--; +				break;  			} -			nla_nest_end(msg, nl_ftypes);  		} +		nla_nest_end(msg, nl_bands); -		nla_nest_end(msg, nl_ifs); +		if (band < IEEE80211_NUM_BANDS) +			*band_start = band + 1; +		else +			*band_start = 0; -		nl_ifs = nla_nest_start(msg, NL80211_ATTR_RX_FRAME_TYPES); -		if (!nl_ifs) +		/* if bands & channels are done, continue outside */ +		if (*band_start == 0 && *chan_start == 0) +			(*split_start)++; +		if (split) +			break; +	case 4: +		nl_cmds = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_COMMANDS); +		if (!nl_cmds)  			goto nla_put_failure; -		for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) { -			nl_ftypes = nla_nest_start(msg, ift); -			if (!nl_ftypes) +		i = 0; +#define CMD(op, n)							\ +		 do {							\ +			if (dev->ops->op) {				\ +				i++;					\ +				if (nla_put_u32(msg, i, NL80211_CMD_ ## n)) \ +					goto nla_put_failure;		\ +			}						\ +		} while (0) + +		CMD(add_virtual_intf, NEW_INTERFACE); +		CMD(change_virtual_intf, SET_INTERFACE); +		CMD(add_key, NEW_KEY); +		CMD(start_ap, START_AP); +		CMD(add_station, NEW_STATION); +		CMD(add_mpath, NEW_MPATH); +		CMD(update_mesh_config, SET_MESH_CONFIG); +		CMD(change_bss, SET_BSS); +		CMD(auth, AUTHENTICATE); +		CMD(assoc, ASSOCIATE); +		CMD(deauth, DEAUTHENTICATE); +		CMD(disassoc, DISASSOCIATE); +		CMD(join_ibss, JOIN_IBSS); +		CMD(join_mesh, JOIN_MESH); +		CMD(set_pmksa, SET_PMKSA); +		CMD(del_pmksa, DEL_PMKSA); +		CMD(flush_pmksa, FLUSH_PMKSA); +		if (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) +			CMD(remain_on_channel, REMAIN_ON_CHANNEL); +		CMD(set_bitrate_mask, SET_TX_BITRATE_MASK); +		CMD(mgmt_tx, FRAME); +		CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL); +		if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { +			i++; +			if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS))  				goto nla_put_failure; -			i = 0; -			stypes = mgmt_stypes[ift].rx; -			while (stypes) { -				if ((stypes & 1) && -				    nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE, -						(i << 4) | IEEE80211_FTYPE_MGMT)) -					goto nla_put_failure; -				stypes >>= 1; -				i++; -			} -			nla_nest_end(msg, nl_ftypes);  		} -		nla_nest_end(msg, nl_ifs); -	} +		if (dev->ops->set_monitor_channel || dev->ops->start_ap || +		    dev->ops->join_mesh) { +			i++; +			if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL)) +				goto nla_put_failure; +		} +		CMD(set_wds_peer, SET_WDS_PEER); +		if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) { +			CMD(tdls_mgmt, TDLS_MGMT); +			CMD(tdls_oper, TDLS_OPER); +		} +		if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) +			CMD(sched_scan_start, START_SCHED_SCAN); +		CMD(probe_client, PROBE_CLIENT); +		CMD(set_noack_map, SET_NOACK_MAP); +		if (dev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) { +			i++; +			if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS)) +				goto nla_put_failure; +		} +		CMD(start_p2p_device, START_P2P_DEVICE); +		CMD(set_mcast_rate, SET_MCAST_RATE); +		if (split) { +			CMD(crit_proto_start, CRIT_PROTOCOL_START); +			CMD(crit_proto_stop, CRIT_PROTOCOL_STOP); +		} -#ifdef CONFIG_PM -	if (dev->wiphy.wowlan.flags || dev->wiphy.wowlan.n_patterns) { -		struct nlattr *nl_wowlan; +#ifdef CONFIG_NL80211_TESTMODE +		CMD(testmode_cmd, TESTMODE); +#endif -		nl_wowlan = nla_nest_start(msg, -				NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED); -		if (!nl_wowlan) -			goto nla_put_failure; +#undef CMD -		if (((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_ANY) && -		     nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) || -		    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_DISCONNECT) && -		     nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) || -		    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_MAGIC_PKT) && -		     nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) || -		    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) && -		     nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED)) || -		    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && -		     nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) || -		    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) && -		     nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) || -		    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) && -		     nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) || -		    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_RFKILL_RELEASE) && -		     nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE))) -		    goto nla_put_failure; -		if (dev->wiphy.wowlan.n_patterns) { -			struct nl80211_wowlan_pattern_support pat = { -				.max_patterns = dev->wiphy.wowlan.n_patterns, -				.min_pattern_len = -					dev->wiphy.wowlan.pattern_min_len, -				.max_pattern_len = -					dev->wiphy.wowlan.pattern_max_len, -				.max_pkt_offset = -					dev->wiphy.wowlan.max_pkt_offset, -			}; -			if (nla_put(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN, -				    sizeof(pat), &pat)) +		if (dev->ops->connect || dev->ops->auth) { +			i++; +			if (nla_put_u32(msg, i, NL80211_CMD_CONNECT))  				goto nla_put_failure;  		} -		nla_nest_end(msg, nl_wowlan); -	} +		if (dev->ops->disconnect || dev->ops->deauth) { +			i++; +			if (nla_put_u32(msg, i, NL80211_CMD_DISCONNECT)) +				goto nla_put_failure; +		} + +		nla_nest_end(msg, nl_cmds); +		(*split_start)++; +		if (split) +			break; +	case 5: +		if (dev->ops->remain_on_channel && +		    (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) && +		    nla_put_u32(msg, +				NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION, +				dev->wiphy.max_remain_on_channel_duration)) +			goto nla_put_failure; + +		if ((dev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX) && +		    nla_put_flag(msg, NL80211_ATTR_OFFCHANNEL_TX_OK)) +			goto nla_put_failure; + +		if (nl80211_send_mgmt_stypes(msg, mgmt_stypes)) +			goto nla_put_failure; +		(*split_start)++; +		if (split) +			break; +	case 6: +#ifdef CONFIG_PM +		if (nl80211_send_wowlan(msg, dev, split)) +			goto nla_put_failure; +		(*split_start)++; +		if (split) +			break; +#else +		(*split_start)++;  #endif +	case 7: +		if (nl80211_put_iftypes(msg, NL80211_ATTR_SOFTWARE_IFTYPES, +					dev->wiphy.software_iftypes)) +			goto nla_put_failure; -	if (nl80211_put_iftypes(msg, NL80211_ATTR_SOFTWARE_IFTYPES, -				dev->wiphy.software_iftypes)) -		goto nla_put_failure; +		if (nl80211_put_iface_combinations(&dev->wiphy, msg, split)) +			goto nla_put_failure; -	if (nl80211_put_iface_combinations(&dev->wiphy, msg)) -		goto nla_put_failure; +		(*split_start)++; +		if (split) +			break; +	case 8: +		if ((dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) && +		    nla_put_u32(msg, NL80211_ATTR_DEVICE_AP_SME, +				dev->wiphy.ap_sme_capa)) +			goto nla_put_failure; -	if ((dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) && -	    nla_put_u32(msg, NL80211_ATTR_DEVICE_AP_SME, -			dev->wiphy.ap_sme_capa)) -		goto nla_put_failure; +		features = dev->wiphy.features; +		/* +		 * We can only add the per-channel limit information if the +		 * dump is split, otherwise it makes it too big. Therefore +		 * only advertise it in that case. +		 */ +		if (split) +			features |= NL80211_FEATURE_ADVERTISE_CHAN_LIMITS; +		if (nla_put_u32(msg, NL80211_ATTR_FEATURE_FLAGS, features)) +			goto nla_put_failure; -	if (nla_put_u32(msg, NL80211_ATTR_FEATURE_FLAGS, -			dev->wiphy.features)) -		goto nla_put_failure; +		if (dev->wiphy.ht_capa_mod_mask && +		    nla_put(msg, NL80211_ATTR_HT_CAPABILITY_MASK, +			    sizeof(*dev->wiphy.ht_capa_mod_mask), +			    dev->wiphy.ht_capa_mod_mask)) +			goto nla_put_failure; -	if (dev->wiphy.ht_capa_mod_mask && -	    nla_put(msg, NL80211_ATTR_HT_CAPABILITY_MASK, -		    sizeof(*dev->wiphy.ht_capa_mod_mask), -		    dev->wiphy.ht_capa_mod_mask)) -		goto nla_put_failure; +		if (dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME && +		    dev->wiphy.max_acl_mac_addrs && +		    nla_put_u32(msg, NL80211_ATTR_MAC_ACL_MAX, +				dev->wiphy.max_acl_mac_addrs)) +			goto nla_put_failure; -	if (dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME && -	    dev->wiphy.max_acl_mac_addrs && -	    nla_put_u32(msg, NL80211_ATTR_MAC_ACL_MAX, -			dev->wiphy.max_acl_mac_addrs)) -		goto nla_put_failure; +		/* +		 * Any information below this point is only available to +		 * applications that can deal with it being split. This +		 * helps ensure that newly added capabilities don't break +		 * older tools by overrunning their buffers. +		 * +		 * We still increment split_start so that in the split +		 * case we'll continue with more data in the next round, +		 * but break unconditionally so unsplit data stops here. +		 */ +		(*split_start)++; +		break; +	case 9: +		if (dev->wiphy.extended_capabilities && +		    (nla_put(msg, NL80211_ATTR_EXT_CAPA, +			     dev->wiphy.extended_capabilities_len, +			     dev->wiphy.extended_capabilities) || +		     nla_put(msg, NL80211_ATTR_EXT_CAPA_MASK, +			     dev->wiphy.extended_capabilities_len, +			     dev->wiphy.extended_capabilities_mask))) +			goto nla_put_failure; +		if (dev->wiphy.vht_capa_mod_mask && +		    nla_put(msg, NL80211_ATTR_VHT_CAPABILITY_MASK, +			    sizeof(*dev->wiphy.vht_capa_mod_mask), +			    dev->wiphy.vht_capa_mod_mask)) +			goto nla_put_failure; + +		/* done */ +		*split_start = 0; +		break; +	}  	return genlmsg_end(msg, hdr);   nla_put_failure: @@ -1310,39 +1562,80 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)  	int idx = 0, ret;  	int start = cb->args[0];  	struct cfg80211_registered_device *dev; +	s64 filter_wiphy = -1; +	bool split = false; +	struct nlattr **tb = nl80211_fam.attrbuf; +	int res;  	mutex_lock(&cfg80211_mutex); +	res = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, +			  tb, nl80211_fam.maxattr, nl80211_policy); +	if (res == 0) { +		split = tb[NL80211_ATTR_SPLIT_WIPHY_DUMP]; +		if (tb[NL80211_ATTR_WIPHY]) +			filter_wiphy = nla_get_u32(tb[NL80211_ATTR_WIPHY]); +		if (tb[NL80211_ATTR_WDEV]) +			filter_wiphy = nla_get_u64(tb[NL80211_ATTR_WDEV]) >> 32; +		if (tb[NL80211_ATTR_IFINDEX]) { +			struct net_device *netdev; +			int ifidx = nla_get_u32(tb[NL80211_ATTR_IFINDEX]); + +			netdev = dev_get_by_index(sock_net(skb->sk), ifidx); +			if (!netdev) { +				mutex_unlock(&cfg80211_mutex); +				return -ENODEV; +			} +			if (netdev->ieee80211_ptr) { +				dev = wiphy_to_dev( +					netdev->ieee80211_ptr->wiphy); +				filter_wiphy = dev->wiphy_idx; +			} +			dev_put(netdev); +		} +	} +  	list_for_each_entry(dev, &cfg80211_rdev_list, list) {  		if (!net_eq(wiphy_net(&dev->wiphy), sock_net(skb->sk)))  			continue;  		if (++idx <= start)  			continue; -		ret = nl80211_send_wiphy(skb, NETLINK_CB(cb->skb).portid, -					 cb->nlh->nlmsg_seq, NLM_F_MULTI, -					 dev); -		if (ret < 0) { -			/* -			 * If sending the wiphy data didn't fit (ENOBUFS or -			 * EMSGSIZE returned), this SKB is still empty (so -			 * it's not too big because another wiphy dataset is -			 * already in the skb) and we've not tried to adjust -			 * the dump allocation yet ... then adjust the alloc -			 * size to be bigger, and return 1 but with the empty -			 * skb. This results in an empty message being RX'ed -			 * in userspace, but that is ignored. -			 * -			 * We can then retry with the larger buffer. -			 */ -			if ((ret == -ENOBUFS || ret == -EMSGSIZE) && -			    !skb->len && -			    cb->min_dump_alloc < 4096) { -				cb->min_dump_alloc = 4096; -				mutex_unlock(&cfg80211_mutex); -				return 1; +		if (filter_wiphy != -1 && dev->wiphy_idx != filter_wiphy) +			continue; +		/* attempt to fit multiple wiphy data chunks into the skb */ +		do { +			ret = nl80211_send_wiphy(dev, skb, +						 NETLINK_CB(cb->skb).portid, +						 cb->nlh->nlmsg_seq, +						 NLM_F_MULTI, +						 split, &cb->args[1], +						 &cb->args[2], +						 &cb->args[3]); +			if (ret < 0) { +				/* +				 * If sending the wiphy data didn't fit (ENOBUFS +				 * or EMSGSIZE returned), this SKB is still +				 * empty (so it's not too big because another +				 * wiphy dataset is already in the skb) and +				 * we've not tried to adjust the dump allocation +				 * yet ... then adjust the alloc size to be +				 * bigger, and return 1 but with the empty skb. +				 * This results in an empty message being RX'ed +				 * in userspace, but that is ignored. +				 * +				 * We can then retry with the larger buffer. +				 */ +				if ((ret == -ENOBUFS || ret == -EMSGSIZE) && +				    !skb->len && +				    cb->min_dump_alloc < 4096) { +					cb->min_dump_alloc = 4096; +					mutex_unlock(&cfg80211_mutex); +					return 1; +				} +				idx--; +				break;  			} -			idx--; -			break; -		} +		} while (cb->args[1] > 0); +		break;  	}  	mutex_unlock(&cfg80211_mutex); @@ -1360,7 +1653,8 @@ static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info)  	if (!msg)  		return -ENOMEM; -	if (nl80211_send_wiphy(msg, info->snd_portid, info->snd_seq, 0, dev) < 0) { +	if (nl80211_send_wiphy(dev, msg, info->snd_portid, info->snd_seq, 0, +			       false, NULL, NULL, NULL) < 0) {  		nlmsg_free(msg);  		return -ENOBUFS;  	} @@ -2967,6 +3261,7 @@ static int parse_station_flags(struct genl_info *info,  		sta_flags = nla_data(nla);  		params->sta_flags_mask = sta_flags->mask;  		params->sta_flags_set = sta_flags->set; +		params->sta_flags_set &= params->sta_flags_mask;  		if ((params->sta_flags_mask |  		     params->sta_flags_set) & BIT(__NL80211_STA_FLAG_INVALID))  			return -EINVAL; @@ -3241,15 +3536,20 @@ static int nl80211_dump_station(struct sk_buff *skb,  {  	struct station_info sinfo;  	struct cfg80211_registered_device *dev; -	struct net_device *netdev; +	struct wireless_dev *wdev;  	u8 mac_addr[ETH_ALEN]; -	int sta_idx = cb->args[1]; +	int sta_idx = cb->args[2];  	int err; -	err = nl80211_prepare_netdev_dump(skb, cb, &dev, &netdev); +	err = nl80211_prepare_wdev_dump(skb, cb, &dev, &wdev);  	if (err)  		return err; +	if (!wdev->netdev) { +		err = -EINVAL; +		goto out_err; +	} +  	if (!dev->ops->dump_station) {  		err = -EOPNOTSUPP;  		goto out_err; @@ -3257,7 +3557,7 @@ static int nl80211_dump_station(struct sk_buff *skb,  	while (1) {  		memset(&sinfo, 0, sizeof(sinfo)); -		err = rdev_dump_station(dev, netdev, sta_idx, +		err = rdev_dump_station(dev, wdev->netdev, sta_idx,  					mac_addr, &sinfo);  		if (err == -ENOENT)  			break; @@ -3267,7 +3567,7 @@ static int nl80211_dump_station(struct sk_buff *skb,  		if (nl80211_send_station(skb,  				NETLINK_CB(cb->skb).portid,  				cb->nlh->nlmsg_seq, NLM_F_MULTI, -				dev, netdev, mac_addr, +				dev, wdev->netdev, mac_addr,  				&sinfo) < 0)  			goto out; @@ -3276,10 +3576,10 @@ static int nl80211_dump_station(struct sk_buff *skb,   out: -	cb->args[1] = sta_idx; +	cb->args[2] = sta_idx;  	err = skb->len;   out_err: -	nl80211_finish_netdev_dump(dev); +	nl80211_finish_wdev_dump(dev);  	return err;  } @@ -3320,6 +3620,136 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info)  	return genlmsg_reply(msg, info);  } +int cfg80211_check_station_change(struct wiphy *wiphy, +				  struct station_parameters *params, +				  enum cfg80211_station_type statype) +{ +	if (params->listen_interval != -1) +		return -EINVAL; +	if (params->aid) +		return -EINVAL; + +	/* When you run into this, adjust the code below for the new flag */ +	BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7); + +	switch (statype) { +	case CFG80211_STA_MESH_PEER_KERNEL: +	case CFG80211_STA_MESH_PEER_USER: +		/* +		 * No ignoring the TDLS flag here -- the userspace mesh +		 * code doesn't have the bug of including TDLS in the +		 * mask everywhere. +		 */ +		if (params->sta_flags_mask & +				~(BIT(NL80211_STA_FLAG_AUTHENTICATED) | +				  BIT(NL80211_STA_FLAG_MFP) | +				  BIT(NL80211_STA_FLAG_AUTHORIZED))) +			return -EINVAL; +		break; +	case CFG80211_STA_TDLS_PEER_SETUP: +	case CFG80211_STA_TDLS_PEER_ACTIVE: +		if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) +			return -EINVAL; +		/* ignore since it can't change */ +		params->sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); +		break; +	default: +		/* disallow mesh-specific things */ +		if (params->plink_action != NL80211_PLINK_ACTION_NO_ACTION) +			return -EINVAL; +		if (params->local_pm) +			return -EINVAL; +		if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) +			return -EINVAL; +	} + +	if (statype != CFG80211_STA_TDLS_PEER_SETUP && +	    statype != CFG80211_STA_TDLS_PEER_ACTIVE) { +		/* TDLS can't be set, ... */ +		if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) +			return -EINVAL; +		/* +		 * ... but don't bother the driver with it. This works around +		 * a hostapd/wpa_supplicant issue -- it always includes the +		 * TLDS_PEER flag in the mask even for AP mode. +		 */ +		params->sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); +	} + +	if (statype != CFG80211_STA_TDLS_PEER_SETUP) { +		/* reject other things that can't change */ +		if (params->sta_modify_mask & STATION_PARAM_APPLY_UAPSD) +			return -EINVAL; +		if (params->sta_modify_mask & STATION_PARAM_APPLY_CAPABILITY) +			return -EINVAL; +		if (params->supported_rates) +			return -EINVAL; +		if (params->ext_capab || params->ht_capa || params->vht_capa) +			return -EINVAL; +	} + +	if (statype != CFG80211_STA_AP_CLIENT) { +		if (params->vlan) +			return -EINVAL; +	} + +	switch (statype) { +	case CFG80211_STA_AP_MLME_CLIENT: +		/* Use this only for authorizing/unauthorizing a station */ +		if (!(params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED))) +			return -EOPNOTSUPP; +		break; +	case CFG80211_STA_AP_CLIENT: +		/* accept only the listed bits */ +		if (params->sta_flags_mask & +				~(BIT(NL80211_STA_FLAG_AUTHORIZED) | +				  BIT(NL80211_STA_FLAG_AUTHENTICATED) | +				  BIT(NL80211_STA_FLAG_ASSOCIATED) | +				  BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) | +				  BIT(NL80211_STA_FLAG_WME) | +				  BIT(NL80211_STA_FLAG_MFP))) +			return -EINVAL; + +		/* but authenticated/associated only if driver handles it */ +		if (!(wiphy->features & NL80211_FEATURE_FULL_AP_CLIENT_STATE) && +		    params->sta_flags_mask & +				(BIT(NL80211_STA_FLAG_AUTHENTICATED) | +				 BIT(NL80211_STA_FLAG_ASSOCIATED))) +			return -EINVAL; +		break; +	case CFG80211_STA_IBSS: +	case CFG80211_STA_AP_STA: +		/* reject any changes other than AUTHORIZED */ +		if (params->sta_flags_mask & ~BIT(NL80211_STA_FLAG_AUTHORIZED)) +			return -EINVAL; +		break; +	case CFG80211_STA_TDLS_PEER_SETUP: +		/* reject any changes other than AUTHORIZED or WME */ +		if (params->sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHORIZED) | +					       BIT(NL80211_STA_FLAG_WME))) +			return -EINVAL; +		/* force (at least) rates when authorizing */ +		if (params->sta_flags_set & BIT(NL80211_STA_FLAG_AUTHORIZED) && +		    !params->supported_rates) +			return -EINVAL; +		break; +	case CFG80211_STA_TDLS_PEER_ACTIVE: +		/* reject any changes */ +		return -EINVAL; +	case CFG80211_STA_MESH_PEER_KERNEL: +		if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) +			return -EINVAL; +		break; +	case CFG80211_STA_MESH_PEER_USER: +		if (params->plink_action != NL80211_PLINK_ACTION_NO_ACTION) +			return -EINVAL; +		break; +	} + +	return 0; +} +EXPORT_SYMBOL(cfg80211_check_station_change); +  /*   * Get vlan interface making sure it is running and on the right wiphy.   */ @@ -3342,6 +3772,13 @@ static struct net_device *get_vlan(struct genl_info *info,  		goto error;  	} +	if (v->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && +	    v->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && +	    v->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) { +		ret = -EINVAL; +		goto error; +	} +  	if (!netif_running(v)) {  		ret = -ENETDOWN;  		goto error; @@ -3359,21 +3796,13 @@ nl80211_sta_wme_policy[NL80211_STA_WME_MAX + 1] __read_mostly = {  	[NL80211_STA_WME_MAX_SP] = { .type = NLA_U8 },  }; -static int nl80211_set_station_tdls(struct genl_info *info, -				    struct station_parameters *params) +static int nl80211_parse_sta_wme(struct genl_info *info, +				 struct station_parameters *params)  {  	struct nlattr *tb[NL80211_STA_WME_MAX + 1];  	struct nlattr *nla;  	int err; -	/* Dummy STA entry gets updated once the peer capabilities are known */ -	if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) -		params->ht_capa = -			nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); -	if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) -		params->vht_capa = -			nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]); -  	/* parse WME attributes if present */  	if (!info->attrs[NL80211_ATTR_STA_WME])  		return 0; @@ -3401,18 +3830,34 @@ static int nl80211_set_station_tdls(struct genl_info *info,  	return 0;  } +static int nl80211_set_station_tdls(struct genl_info *info, +				    struct station_parameters *params) +{ +	/* Dummy STA entry gets updated once the peer capabilities are known */ +	if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) +		params->ht_capa = +			nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); +	if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) +		params->vht_capa = +			nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]); + +	return nl80211_parse_sta_wme(info, params); +} +  static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)  {  	struct cfg80211_registered_device *rdev = info->user_ptr[0]; -	int err;  	struct net_device *dev = info->user_ptr[1];  	struct station_parameters params; -	u8 *mac_addr = NULL; +	u8 *mac_addr; +	int err;  	memset(¶ms, 0, sizeof(params));  	params.listen_interval = -1; -	params.plink_state = -1; + +	if (!rdev->ops->change_station) +		return -EOPNOTSUPP;  	if (info->attrs[NL80211_ATTR_STA_AID])  		return -EINVAL; @@ -3445,19 +3890,23 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)  	if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL])  		return -EINVAL; -	if (!rdev->ops->change_station) -		return -EOPNOTSUPP; -  	if (parse_station_flags(info, dev->ieee80211_ptr->iftype, ¶ms))  		return -EINVAL; -	if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) +	if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) {  		params.plink_action = -		    nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); +			nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); +		if (params.plink_action >= NUM_NL80211_PLINK_ACTIONS) +			return -EINVAL; +	} -	if (info->attrs[NL80211_ATTR_STA_PLINK_STATE]) +	if (info->attrs[NL80211_ATTR_STA_PLINK_STATE]) {  		params.plink_state = -		    nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]); +			nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]); +		if (params.plink_state >= NUM_NL80211_PLINK_STATES) +			return -EINVAL; +		params.sta_modify_mask |= STATION_PARAM_APPLY_PLINK_STATE; +	}  	if (info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE]) {  		enum nl80211_mesh_power_mode pm = nla_get_u32( @@ -3470,127 +3919,33 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)  		params.local_pm = pm;  	} +	/* Include parameters for TDLS peer (will check later) */ +	err = nl80211_set_station_tdls(info, ¶ms); +	if (err) +		return err; + +	params.vlan = get_vlan(info, rdev); +	if (IS_ERR(params.vlan)) +		return PTR_ERR(params.vlan); +  	switch (dev->ieee80211_ptr->iftype) {  	case NL80211_IFTYPE_AP:  	case NL80211_IFTYPE_AP_VLAN:  	case NL80211_IFTYPE_P2P_GO: -		/* disallow mesh-specific things */ -		if (params.plink_action) -			return -EINVAL; -		if (params.local_pm) -			return -EINVAL; - -		/* TDLS can't be set, ... */ -		if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) -			return -EINVAL; -		/* -		 * ... but don't bother the driver with it. This works around -		 * a hostapd/wpa_supplicant issue -- it always includes the -		 * TLDS_PEER flag in the mask even for AP mode. -		 */ -		params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); - -		/* accept only the listed bits */ -		if (params.sta_flags_mask & -				~(BIT(NL80211_STA_FLAG_AUTHORIZED) | -				  BIT(NL80211_STA_FLAG_AUTHENTICATED) | -				  BIT(NL80211_STA_FLAG_ASSOCIATED) | -				  BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) | -				  BIT(NL80211_STA_FLAG_WME) | -				  BIT(NL80211_STA_FLAG_MFP))) -			return -EINVAL; - -		/* but authenticated/associated only if driver handles it */ -		if (!(rdev->wiphy.features & -				NL80211_FEATURE_FULL_AP_CLIENT_STATE) && -		    params.sta_flags_mask & -				(BIT(NL80211_STA_FLAG_AUTHENTICATED) | -				 BIT(NL80211_STA_FLAG_ASSOCIATED))) -			return -EINVAL; - -		/* reject other things that can't change */ -		if (params.supported_rates) -			return -EINVAL; -		if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) -			return -EINVAL; -		if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]) -			return -EINVAL; -		if (info->attrs[NL80211_ATTR_HT_CAPABILITY] || -		    info->attrs[NL80211_ATTR_VHT_CAPABILITY]) -			return -EINVAL; - -		/* must be last in here for error handling */ -		params.vlan = get_vlan(info, rdev); -		if (IS_ERR(params.vlan)) -			return PTR_ERR(params.vlan); -		break;  	case NL80211_IFTYPE_P2P_CLIENT:  	case NL80211_IFTYPE_STATION: -		/* -		 * Don't allow userspace to change the TDLS_PEER flag, -		 * but silently ignore attempts to change it since we -		 * don't have state here to verify that it doesn't try -		 * to change the flag. -		 */ -		params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); -		/* Include parameters for TDLS peer (driver will check) */ -		err = nl80211_set_station_tdls(info, ¶ms); -		if (err) -			return err; -		/* disallow things sta doesn't support */ -		if (params.plink_action) -			return -EINVAL; -		if (params.local_pm) -			return -EINVAL; -		/* reject any changes other than AUTHORIZED or WME (for TDLS) */ -		if (params.sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHORIZED) | -					      BIT(NL80211_STA_FLAG_WME))) -			return -EINVAL; -		break;  	case NL80211_IFTYPE_ADHOC: -		/* disallow things sta doesn't support */ -		if (params.plink_action) -			return -EINVAL; -		if (params.local_pm) -			return -EINVAL; -		if (info->attrs[NL80211_ATTR_HT_CAPABILITY] || -		    info->attrs[NL80211_ATTR_VHT_CAPABILITY]) -			return -EINVAL; -		/* reject any changes other than AUTHORIZED */ -		if (params.sta_flags_mask & ~BIT(NL80211_STA_FLAG_AUTHORIZED)) -			return -EINVAL; -		break;  	case NL80211_IFTYPE_MESH_POINT: -		/* disallow things mesh doesn't support */ -		if (params.vlan) -			return -EINVAL; -		if (params.supported_rates) -			return -EINVAL; -		if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) -			return -EINVAL; -		if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]) -			return -EINVAL; -		if (info->attrs[NL80211_ATTR_HT_CAPABILITY] || -		    info->attrs[NL80211_ATTR_VHT_CAPABILITY]) -			return -EINVAL; -		/* -		 * No special handling for TDLS here -- the userspace -		 * mesh code doesn't have this bug. -		 */ -		if (params.sta_flags_mask & -				~(BIT(NL80211_STA_FLAG_AUTHENTICATED) | -				  BIT(NL80211_STA_FLAG_MFP) | -				  BIT(NL80211_STA_FLAG_AUTHORIZED))) -			return -EINVAL;  		break;  	default: -		return -EOPNOTSUPP; +		err = -EOPNOTSUPP; +		goto out_put_vlan;  	} -	/* be aware of params.vlan when changing code here */ - +	/* driver will call cfg80211_check_station_change() */  	err = rdev_change_station(rdev, dev, mac_addr, ¶ms); + out_put_vlan:  	if (params.vlan)  		dev_put(params.vlan); @@ -3607,6 +3962,9 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)  	memset(¶ms, 0, sizeof(params)); +	if (!rdev->ops->add_station) +		return -EOPNOTSUPP; +  	if (!info->attrs[NL80211_ATTR_MAC])  		return -EINVAL; @@ -3652,50 +4010,32 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)  		params.vht_capa =  			nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]); -	if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) +	if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) {  		params.plink_action = -		    nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); +			nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); +		if (params.plink_action >= NUM_NL80211_PLINK_ACTIONS) +			return -EINVAL; +	} -	if (!rdev->ops->add_station) -		return -EOPNOTSUPP; +	err = nl80211_parse_sta_wme(info, ¶ms); +	if (err) +		return err;  	if (parse_station_flags(info, dev->ieee80211_ptr->iftype, ¶ms))  		return -EINVAL; +	/* When you run into this, adjust the code below for the new flag */ +	BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7); +  	switch (dev->ieee80211_ptr->iftype) {  	case NL80211_IFTYPE_AP:  	case NL80211_IFTYPE_AP_VLAN:  	case NL80211_IFTYPE_P2P_GO: -		/* parse WME attributes if sta is WME capable */ -		if ((rdev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) && -		    (params.sta_flags_set & BIT(NL80211_STA_FLAG_WME)) && -		    info->attrs[NL80211_ATTR_STA_WME]) { -			struct nlattr *tb[NL80211_STA_WME_MAX + 1]; -			struct nlattr *nla; - -			nla = info->attrs[NL80211_ATTR_STA_WME]; -			err = nla_parse_nested(tb, NL80211_STA_WME_MAX, nla, -					       nl80211_sta_wme_policy); -			if (err) -				return err; - -			if (tb[NL80211_STA_WME_UAPSD_QUEUES]) -				params.uapsd_queues = -				     nla_get_u8(tb[NL80211_STA_WME_UAPSD_QUEUES]); -			if (params.uapsd_queues & -					~IEEE80211_WMM_IE_STA_QOSINFO_AC_MASK) -				return -EINVAL; - -			if (tb[NL80211_STA_WME_MAX_SP]) -				params.max_sp = -				     nla_get_u8(tb[NL80211_STA_WME_MAX_SP]); - -			if (params.max_sp & -					~IEEE80211_WMM_IE_STA_QOSINFO_SP_MASK) -				return -EINVAL; +		/* ignore WME attributes if iface/sta is not capable */ +		if (!(rdev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) || +		    !(params.sta_flags_set & BIT(NL80211_STA_FLAG_WME))) +			params.sta_modify_mask &= ~STATION_PARAM_APPLY_UAPSD; -			params.sta_modify_mask |= STATION_PARAM_APPLY_UAPSD; -		}  		/* TDLS peers cannot be added */  		if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))  			return -EINVAL; @@ -3716,6 +4056,9 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)  			return PTR_ERR(params.vlan);  		break;  	case NL80211_IFTYPE_MESH_POINT: +		/* ignore uAPSD data */ +		params.sta_modify_mask &= ~STATION_PARAM_APPLY_UAPSD; +  		/* associated is disallowed */  		if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED))  			return -EINVAL; @@ -3724,8 +4067,14 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)  			return -EINVAL;  		break;  	case NL80211_IFTYPE_STATION: -		/* associated is disallowed */ -		if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) +	case NL80211_IFTYPE_P2P_CLIENT: +		/* ignore uAPSD data */ +		params.sta_modify_mask &= ~STATION_PARAM_APPLY_UAPSD; + +		/* these are disallowed */ +		if (params.sta_flags_mask & +				(BIT(NL80211_STA_FLAG_ASSOCIATED) | +				 BIT(NL80211_STA_FLAG_AUTHENTICATED)))  			return -EINVAL;  		/* Only TDLS peers can be added */  		if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) @@ -3736,6 +4085,11 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)  		/* ... with external setup is supported */  		if (!(rdev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP))  			return -EOPNOTSUPP; +		/* +		 * Older wpa_supplicant versions always mark the TDLS peer +		 * as authorized, but it shouldn't yet be. +		 */ +		params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_AUTHORIZED);  		break;  	default:  		return -EOPNOTSUPP; @@ -3829,13 +4183,13 @@ static int nl80211_dump_mpath(struct sk_buff *skb,  {  	struct mpath_info pinfo;  	struct cfg80211_registered_device *dev; -	struct net_device *netdev; +	struct wireless_dev *wdev;  	u8 dst[ETH_ALEN];  	u8 next_hop[ETH_ALEN]; -	int path_idx = cb->args[1]; +	int path_idx = cb->args[2];  	int err; -	err = nl80211_prepare_netdev_dump(skb, cb, &dev, &netdev); +	err = nl80211_prepare_wdev_dump(skb, cb, &dev, &wdev);  	if (err)  		return err; @@ -3844,14 +4198,14 @@ static int nl80211_dump_mpath(struct sk_buff *skb,  		goto out_err;  	} -	if (netdev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) { +	if (wdev->iftype != NL80211_IFTYPE_MESH_POINT) {  		err = -EOPNOTSUPP;  		goto out_err;  	}  	while (1) { -		err = rdev_dump_mpath(dev, netdev, path_idx, dst, next_hop, -				      &pinfo); +		err = rdev_dump_mpath(dev, wdev->netdev, path_idx, dst, +				      next_hop, &pinfo);  		if (err == -ENOENT)  			break;  		if (err) @@ -3859,7 +4213,7 @@ static int nl80211_dump_mpath(struct sk_buff *skb,  		if (nl80211_send_mpath(skb, NETLINK_CB(cb->skb).portid,  				       cb->nlh->nlmsg_seq, NLM_F_MULTI, -				       netdev, dst, next_hop, +				       wdev->netdev, dst, next_hop,  				       &pinfo) < 0)  			goto out; @@ -3868,10 +4222,10 @@ static int nl80211_dump_mpath(struct sk_buff *skb,   out: -	cb->args[1] = path_idx; +	cb->args[2] = path_idx;  	err = skb->len;   out_err: -	nl80211_finish_netdev_dump(dev); +	nl80211_finish_wdev_dump(dev);  	return err;  } @@ -4280,6 +4634,7 @@ static const struct nla_policy  	[NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL] = { .type = NLA_U8 },  	[NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC] = { .type = NLA_U8 },  	[NL80211_MESH_SETUP_USERSPACE_AUTH] = { .type = NLA_FLAG }, +	[NL80211_MESH_SETUP_USERSPACE_MPM] = { .type = NLA_FLAG },  	[NL80211_MESH_SETUP_IE] = { .type = NLA_BINARY,  				    .len = IEEE80211_MAX_DATA_LEN },  	[NL80211_MESH_SETUP_USERSPACE_AMPE] = { .type = NLA_FLAG }, @@ -4418,6 +4773,7 @@ do {									    \  static int nl80211_parse_mesh_setup(struct genl_info *info,  				     struct mesh_setup *setup)  { +	struct cfg80211_registered_device *rdev = info->user_ptr[0];  	struct nlattr *tb[NL80211_MESH_SETUP_ATTR_MAX + 1];  	if (!info->attrs[NL80211_ATTR_MESH_SETUP]) @@ -4454,8 +4810,14 @@ static int nl80211_parse_mesh_setup(struct genl_info *info,  		setup->ie = nla_data(ieattr);  		setup->ie_len = nla_len(ieattr);  	} +	if (tb[NL80211_MESH_SETUP_USERSPACE_MPM] && +	    !(rdev->wiphy.features & NL80211_FEATURE_USERSPACE_MPM)) +		return -EINVAL; +	setup->user_mpm = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_MPM]);  	setup->is_authenticated = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AUTH]);  	setup->is_secure = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AMPE]); +	if (setup->is_secure) +		setup->user_mpm = true;  	return 0;  } @@ -5219,9 +5581,13 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,  	genl_dump_check_consistent(cb, hdr, &nl80211_fam); -	if (nla_put_u32(msg, NL80211_ATTR_GENERATION, rdev->bss_generation) || +	if (nla_put_u32(msg, NL80211_ATTR_GENERATION, rdev->bss_generation)) +		goto nla_put_failure; +	if (wdev->netdev &&  	    nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex))  		goto nla_put_failure; +	if (nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev))) +		goto nla_put_failure;  	bss = nla_nest_start(msg, NL80211_ATTR_BSS);  	if (!bss) @@ -5301,22 +5667,18 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,  	return -EMSGSIZE;  } -static int nl80211_dump_scan(struct sk_buff *skb, -			     struct netlink_callback *cb) +static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb)  {  	struct cfg80211_registered_device *rdev; -	struct net_device *dev;  	struct cfg80211_internal_bss *scan;  	struct wireless_dev *wdev; -	int start = cb->args[1], idx = 0; +	int start = cb->args[2], idx = 0;  	int err; -	err = nl80211_prepare_netdev_dump(skb, cb, &rdev, &dev); +	err = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);  	if (err)  		return err; -	wdev = dev->ieee80211_ptr; -  	wdev_lock(wdev);  	spin_lock_bh(&rdev->bss_lock);  	cfg80211_bss_expire(rdev); @@ -5337,8 +5699,8 @@ static int nl80211_dump_scan(struct sk_buff *skb,  	spin_unlock_bh(&rdev->bss_lock);  	wdev_unlock(wdev); -	cb->args[1] = idx; -	nl80211_finish_netdev_dump(rdev); +	cb->args[2] = idx; +	nl80211_finish_wdev_dump(rdev);  	return skb->len;  } @@ -5407,14 +5769,19 @@ static int nl80211_dump_survey(struct sk_buff *skb,  {  	struct survey_info survey;  	struct cfg80211_registered_device *dev; -	struct net_device *netdev; -	int survey_idx = cb->args[1]; +	struct wireless_dev *wdev; +	int survey_idx = cb->args[2];  	int res; -	res = nl80211_prepare_netdev_dump(skb, cb, &dev, &netdev); +	res = nl80211_prepare_wdev_dump(skb, cb, &dev, &wdev);  	if (res)  		return res; +	if (!wdev->netdev) { +		res = -EINVAL; +		goto out_err; +	} +  	if (!dev->ops->dump_survey) {  		res = -EOPNOTSUPP;  		goto out_err; @@ -5423,7 +5790,7 @@ static int nl80211_dump_survey(struct sk_buff *skb,  	while (1) {  		struct ieee80211_channel *chan; -		res = rdev_dump_survey(dev, netdev, survey_idx, &survey); +		res = rdev_dump_survey(dev, wdev->netdev, survey_idx, &survey);  		if (res == -ENOENT)  			break;  		if (res) @@ -5445,17 +5812,16 @@ static int nl80211_dump_survey(struct sk_buff *skb,  		if (nl80211_send_survey(skb,  				NETLINK_CB(cb->skb).portid,  				cb->nlh->nlmsg_seq, NLM_F_MULTI, -				netdev, -				&survey) < 0) +				wdev->netdev, &survey) < 0)  			goto out;  		survey_idx++;  	}   out: -	cb->args[1] = survey_idx; +	cb->args[2] = survey_idx;  	res = skb->len;   out_err: -	nl80211_finish_netdev_dump(dev); +	nl80211_finish_wdev_dump(dev);  	return res;  } @@ -5663,14 +6029,10 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)  {  	struct cfg80211_registered_device *rdev = info->user_ptr[0];  	struct net_device *dev = info->user_ptr[1]; -	struct cfg80211_crypto_settings crypto;  	struct ieee80211_channel *chan; -	const u8 *bssid, *ssid, *ie = NULL, *prev_bssid = NULL; -	int err, ssid_len, ie_len = 0; -	bool use_mfp = false; -	u32 flags = 0; -	struct ieee80211_ht_cap *ht_capa = NULL; -	struct ieee80211_ht_cap *ht_capa_mask = NULL; +	struct cfg80211_assoc_request req = {}; +	const u8 *bssid, *ssid; +	int err, ssid_len = 0;  	if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))  		return -EINVAL; @@ -5698,41 +6060,58 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)  	ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);  	if (info->attrs[NL80211_ATTR_IE]) { -		ie = nla_data(info->attrs[NL80211_ATTR_IE]); -		ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); +		req.ie = nla_data(info->attrs[NL80211_ATTR_IE]); +		req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);  	}  	if (info->attrs[NL80211_ATTR_USE_MFP]) {  		enum nl80211_mfp mfp =  			nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]);  		if (mfp == NL80211_MFP_REQUIRED) -			use_mfp = true; +			req.use_mfp = true;  		else if (mfp != NL80211_MFP_NO)  			return -EINVAL;  	}  	if (info->attrs[NL80211_ATTR_PREV_BSSID]) -		prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]); +		req.prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]);  	if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_HT])) -		flags |= ASSOC_REQ_DISABLE_HT; +		req.flags |= ASSOC_REQ_DISABLE_HT;  	if (info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]) -		ht_capa_mask = -			nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]); +		memcpy(&req.ht_capa_mask, +		       nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]), +		       sizeof(req.ht_capa_mask));  	if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) { -		if (!ht_capa_mask) +		if (!info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]) +			return -EINVAL; +		memcpy(&req.ht_capa, +		       nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]), +		       sizeof(req.ht_capa)); +	} + +	if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_VHT])) +		req.flags |= ASSOC_REQ_DISABLE_VHT; + +	if (info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]) +		memcpy(&req.vht_capa_mask, +		       nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]), +		       sizeof(req.vht_capa_mask)); + +	if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) { +		if (!info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK])  			return -EINVAL; -		ht_capa = nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); +		memcpy(&req.vht_capa, +		       nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]), +		       sizeof(req.vht_capa));  	} -	err = nl80211_crypto_settings(rdev, info, &crypto, 1); +	err = nl80211_crypto_settings(rdev, info, &req.crypto, 1);  	if (!err) -		err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid, -					  ssid, ssid_len, ie, ie_len, use_mfp, -					  &crypto, flags, ht_capa, -					  ht_capa_mask); +		err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, +					  ssid, ssid_len, &req);  	return err;  } @@ -6312,6 +6691,24 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)  		       sizeof(connect.ht_capa));  	} +	if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_VHT])) +		connect.flags |= ASSOC_REQ_DISABLE_VHT; + +	if (info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]) +		memcpy(&connect.vht_capa_mask, +		       nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]), +		       sizeof(connect.vht_capa_mask)); + +	if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) { +		if (!info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]) { +			kfree(connkeys); +			return -EINVAL; +		} +		memcpy(&connect.vht_capa, +		       nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]), +		       sizeof(connect.vht_capa)); +	} +  	err = cfg80211_connect(rdev, dev, &connect, connkeys);  	if (err)  		kfree(connkeys); @@ -7085,6 +7482,9 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)  			return err;  	} +	if (setup.user_mpm) +		cfg.auto_open_plinks = false; +  	if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) {  		err = nl80211_parse_chandef(rdev, info, &setup.chandef);  		if (err) @@ -7284,7 +7684,8 @@ static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev,  		return -EINVAL;  	if (nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]) > -			rdev->wiphy.wowlan.tcp->data_interval_max) +			rdev->wiphy.wowlan.tcp->data_interval_max || +	    nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]) == 0)  		return -EINVAL;  	wake_size = nla_len(tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD]); @@ -7762,13 +8163,121 @@ static int nl80211_stop_p2p_device(struct sk_buff *skb, struct genl_info *info)  	if (!rdev->ops->stop_p2p_device)  		return -EOPNOTSUPP; +	mutex_lock(&rdev->devlist_mtx);  	mutex_lock(&rdev->sched_scan_mtx);  	cfg80211_stop_p2p_device(rdev, wdev);  	mutex_unlock(&rdev->sched_scan_mtx); +	mutex_unlock(&rdev->devlist_mtx);  	return 0;  } +static int nl80211_get_protocol_features(struct sk_buff *skb, +					 struct genl_info *info) +{ +	void *hdr; +	struct sk_buff *msg; + +	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); +	if (!msg) +		return -ENOMEM; + +	hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, +			     NL80211_CMD_GET_PROTOCOL_FEATURES); +	if (!hdr) +		goto nla_put_failure; + +	if (nla_put_u32(msg, NL80211_ATTR_PROTOCOL_FEATURES, +			NL80211_PROTOCOL_FEATURE_SPLIT_WIPHY_DUMP)) +		goto nla_put_failure; + +	genlmsg_end(msg, hdr); +	return genlmsg_reply(msg, info); + + nla_put_failure: +	kfree_skb(msg); +	return -ENOBUFS; +} + +static int nl80211_update_ft_ies(struct sk_buff *skb, struct genl_info *info) +{ +	struct cfg80211_registered_device *rdev = info->user_ptr[0]; +	struct cfg80211_update_ft_ies_params ft_params; +	struct net_device *dev = info->user_ptr[1]; + +	if (!rdev->ops->update_ft_ies) +		return -EOPNOTSUPP; + +	if (!info->attrs[NL80211_ATTR_MDID] || +	    !is_valid_ie_attr(info->attrs[NL80211_ATTR_IE])) +		return -EINVAL; + +	memset(&ft_params, 0, sizeof(ft_params)); +	ft_params.md = nla_get_u16(info->attrs[NL80211_ATTR_MDID]); +	ft_params.ie = nla_data(info->attrs[NL80211_ATTR_IE]); +	ft_params.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); + +	return rdev_update_ft_ies(rdev, dev, &ft_params); +} + +static int nl80211_crit_protocol_start(struct sk_buff *skb, +				       struct genl_info *info) +{ +	struct cfg80211_registered_device *rdev = info->user_ptr[0]; +	struct wireless_dev *wdev = info->user_ptr[1]; +	enum nl80211_crit_proto_id proto = NL80211_CRIT_PROTO_UNSPEC; +	u16 duration; +	int ret; + +	if (!rdev->ops->crit_proto_start) +		return -EOPNOTSUPP; + +	if (WARN_ON(!rdev->ops->crit_proto_stop)) +		return -EINVAL; + +	if (rdev->crit_proto_nlportid) +		return -EBUSY; + +	/* determine protocol if provided */ +	if (info->attrs[NL80211_ATTR_CRIT_PROT_ID]) +		proto = nla_get_u16(info->attrs[NL80211_ATTR_CRIT_PROT_ID]); + +	if (proto >= NUM_NL80211_CRIT_PROTO) +		return -EINVAL; + +	/* timeout must be provided */ +	if (!info->attrs[NL80211_ATTR_MAX_CRIT_PROT_DURATION]) +		return -EINVAL; + +	duration = +		nla_get_u16(info->attrs[NL80211_ATTR_MAX_CRIT_PROT_DURATION]); + +	if (duration > NL80211_CRIT_PROTO_MAX_DURATION) +		return -ERANGE; + +	ret = rdev_crit_proto_start(rdev, wdev, proto, duration); +	if (!ret) +		rdev->crit_proto_nlportid = info->snd_portid; + +	return ret; +} + +static int nl80211_crit_protocol_stop(struct sk_buff *skb, +				      struct genl_info *info) +{ +	struct cfg80211_registered_device *rdev = info->user_ptr[0]; +	struct wireless_dev *wdev = info->user_ptr[1]; + +	if (!rdev->ops->crit_proto_stop) +		return -EOPNOTSUPP; + +	if (rdev->crit_proto_nlportid) { +		rdev->crit_proto_nlportid = 0; +		rdev_crit_proto_stop(rdev, wdev); +	} +	return 0; +} +  #define NL80211_FLAG_NEED_WIPHY		0x01  #define NL80211_FLAG_NEED_NETDEV	0x02  #define NL80211_FLAG_NEED_RTNL		0x04 @@ -8445,6 +8954,35 @@ static struct genl_ops nl80211_ops[] = {  		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |  				  NL80211_FLAG_NEED_RTNL,  	}, +	{ +		.cmd = NL80211_CMD_GET_PROTOCOL_FEATURES, +		.doit = nl80211_get_protocol_features, +		.policy = nl80211_policy, +	}, +	{ +		.cmd = NL80211_CMD_UPDATE_FT_IES, +		.doit = nl80211_update_ft_ies, +		.policy = nl80211_policy, +		.flags = GENL_ADMIN_PERM, +		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP | +				  NL80211_FLAG_NEED_RTNL, +	}, +	{ +		.cmd = NL80211_CMD_CRIT_PROTOCOL_START, +		.doit = nl80211_crit_protocol_start, +		.policy = nl80211_policy, +		.flags = GENL_ADMIN_PERM, +		.internal_flags = NL80211_FLAG_NEED_WDEV_UP | +				  NL80211_FLAG_NEED_RTNL, +	}, +	{ +		.cmd = NL80211_CMD_CRIT_PROTOCOL_STOP, +		.doit = nl80211_crit_protocol_stop, +		.policy = nl80211_policy, +		.flags = GENL_ADMIN_PERM, +		.internal_flags = NL80211_FLAG_NEED_WDEV_UP | +				  NL80211_FLAG_NEED_RTNL, +	}  };  static struct genl_multicast_group nl80211_mlme_mcgrp = { @@ -8472,7 +9010,8 @@ void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev)  	if (!msg)  		return; -	if (nl80211_send_wiphy(msg, 0, 0, 0, rdev) < 0) { +	if (nl80211_send_wiphy(rdev, msg, 0, 0, 0, +			       false, NULL, NULL, NULL) < 0) {  		nlmsg_free(msg);  		return;  	} @@ -8796,21 +9335,31 @@ void nl80211_send_disassoc(struct cfg80211_registered_device *rdev,  				NL80211_CMD_DISASSOCIATE, gfp);  } -void nl80211_send_unprot_deauth(struct cfg80211_registered_device *rdev, -				struct net_device *netdev, const u8 *buf, -				size_t len, gfp_t gfp) +void cfg80211_send_unprot_deauth(struct net_device *dev, const u8 *buf, +				 size_t len)  { -	nl80211_send_mlme_event(rdev, netdev, buf, len, -				NL80211_CMD_UNPROT_DEAUTHENTICATE, gfp); +	struct wireless_dev *wdev = dev->ieee80211_ptr; +	struct wiphy *wiphy = wdev->wiphy; +	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + +	trace_cfg80211_send_unprot_deauth(dev); +	nl80211_send_mlme_event(rdev, dev, buf, len, +				NL80211_CMD_UNPROT_DEAUTHENTICATE, GFP_ATOMIC);  } +EXPORT_SYMBOL(cfg80211_send_unprot_deauth); -void nl80211_send_unprot_disassoc(struct cfg80211_registered_device *rdev, -				  struct net_device *netdev, const u8 *buf, -				  size_t len, gfp_t gfp) +void cfg80211_send_unprot_disassoc(struct net_device *dev, const u8 *buf, +				   size_t len)  { -	nl80211_send_mlme_event(rdev, netdev, buf, len, -				NL80211_CMD_UNPROT_DISASSOCIATE, gfp); +	struct wireless_dev *wdev = dev->ieee80211_ptr; +	struct wiphy *wiphy = wdev->wiphy; +	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + +	trace_cfg80211_send_unprot_disassoc(dev); +	nl80211_send_mlme_event(rdev, dev, buf, len, +				NL80211_CMD_UNPROT_DISASSOCIATE, GFP_ATOMIC);  } +EXPORT_SYMBOL(cfg80211_send_unprot_disassoc);  static void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev,  				      struct net_device *netdev, int cmd, @@ -9013,14 +9562,19 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,  	nlmsg_free(msg);  } -void nl80211_send_new_peer_candidate(struct cfg80211_registered_device *rdev, -		struct net_device *netdev, -		const u8 *macaddr, const u8* ie, u8 ie_len, -		gfp_t gfp) +void cfg80211_notify_new_peer_candidate(struct net_device *dev, const u8 *addr, +					const u8* ie, u8 ie_len, gfp_t gfp)  { +	struct wireless_dev *wdev = dev->ieee80211_ptr; +	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);  	struct sk_buff *msg;  	void *hdr; +	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_MESH_POINT)) +		return; + +	trace_cfg80211_notify_new_peer_candidate(dev, addr); +  	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);  	if (!msg)  		return; @@ -9032,8 +9586,8 @@ void nl80211_send_new_peer_candidate(struct cfg80211_registered_device *rdev,  	}  	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || -	    nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || -	    nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, macaddr) || +	    nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || +	    nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) ||  	    (ie_len && ie &&  	     nla_put(msg, NL80211_ATTR_IE, ie_len , ie)))  		goto nla_put_failure; @@ -9048,6 +9602,7 @@ void nl80211_send_new_peer_candidate(struct cfg80211_registered_device *rdev,  	genlmsg_cancel(msg, hdr);  	nlmsg_free(msg);  } +EXPORT_SYMBOL(cfg80211_notify_new_peer_candidate);  void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,  				 struct net_device *netdev, const u8 *addr, @@ -9116,7 +9671,7 @@ void nl80211_send_beacon_hint_event(struct wiphy *wiphy,  	nl_freq = nla_nest_start(msg, NL80211_ATTR_FREQ_BEFORE);  	if (!nl_freq)  		goto nla_put_failure; -	if (nl80211_msg_put_channel(msg, channel_before)) +	if (nl80211_msg_put_channel(msg, channel_before, false))  		goto nla_put_failure;  	nla_nest_end(msg, nl_freq); @@ -9124,7 +9679,7 @@ void nl80211_send_beacon_hint_event(struct wiphy *wiphy,  	nl_freq = nla_nest_start(msg, NL80211_ATTR_FREQ_AFTER);  	if (!nl_freq)  		goto nla_put_failure; -	if (nl80211_msg_put_channel(msg, channel_after)) +	if (nl80211_msg_put_channel(msg, channel_after, false))  		goto nla_put_failure;  	nla_nest_end(msg, nl_freq); @@ -9186,31 +9741,42 @@ static void nl80211_send_remain_on_chan_event(  	nlmsg_free(msg);  } -void nl80211_send_remain_on_channel(struct cfg80211_registered_device *rdev, -				    struct wireless_dev *wdev, u64 cookie, -				    struct ieee80211_channel *chan, -				    unsigned int duration, gfp_t gfp) +void cfg80211_ready_on_channel(struct wireless_dev *wdev, u64 cookie, +			       struct ieee80211_channel *chan, +			       unsigned int duration, gfp_t gfp)  { +	struct wiphy *wiphy = wdev->wiphy; +	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + +	trace_cfg80211_ready_on_channel(wdev, cookie, chan, duration);  	nl80211_send_remain_on_chan_event(NL80211_CMD_REMAIN_ON_CHANNEL,  					  rdev, wdev, cookie, chan,  					  duration, gfp);  } +EXPORT_SYMBOL(cfg80211_ready_on_channel); -void nl80211_send_remain_on_channel_cancel( -	struct cfg80211_registered_device *rdev, -	struct wireless_dev *wdev, -	u64 cookie, struct ieee80211_channel *chan, gfp_t gfp) +void cfg80211_remain_on_channel_expired(struct wireless_dev *wdev, u64 cookie, +					struct ieee80211_channel *chan, +					gfp_t gfp)  { +	struct wiphy *wiphy = wdev->wiphy; +	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + +	trace_cfg80211_ready_on_channel_expired(wdev, cookie, chan);  	nl80211_send_remain_on_chan_event(NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL,  					  rdev, wdev, cookie, chan, 0, gfp);  } +EXPORT_SYMBOL(cfg80211_remain_on_channel_expired); -void nl80211_send_sta_event(struct cfg80211_registered_device *rdev, -			    struct net_device *dev, const u8 *mac_addr, -			    struct station_info *sinfo, gfp_t gfp) +void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr, +		      struct station_info *sinfo, gfp_t gfp)  { +	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; +	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);  	struct sk_buff *msg; +	trace_cfg80211_new_sta(dev, mac_addr, sinfo); +  	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);  	if (!msg)  		return; @@ -9224,14 +9790,17 @@ void nl80211_send_sta_event(struct cfg80211_registered_device *rdev,  	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,  				nl80211_mlme_mcgrp.id, gfp);  } +EXPORT_SYMBOL(cfg80211_new_sta); -void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev, -				struct net_device *dev, const u8 *mac_addr, -				gfp_t gfp) +void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp)  { +	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; +	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);  	struct sk_buff *msg;  	void *hdr; +	trace_cfg80211_del_sta(dev, mac_addr); +  	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);  	if (!msg)  		return; @@ -9256,12 +9825,14 @@ void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev,  	genlmsg_cancel(msg, hdr);  	nlmsg_free(msg);  } +EXPORT_SYMBOL(cfg80211_del_sta); -void nl80211_send_conn_failed_event(struct cfg80211_registered_device *rdev, -				    struct net_device *dev, const u8 *mac_addr, -				    enum nl80211_connect_failed_reason reason, -				    gfp_t gfp) +void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr, +			  enum nl80211_connect_failed_reason reason, +			  gfp_t gfp)  { +	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; +	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);  	struct sk_buff *msg;  	void *hdr; @@ -9290,6 +9861,7 @@ void nl80211_send_conn_failed_event(struct cfg80211_registered_device *rdev,  	genlmsg_cancel(msg, hdr);  	nlmsg_free(msg);  } +EXPORT_SYMBOL(cfg80211_conn_failed);  static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd,  				       const u8 *addr, gfp_t gfp) @@ -9334,19 +9906,47 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd,  	return true;  } -bool nl80211_unexpected_frame(struct net_device *dev, const u8 *addr, gfp_t gfp) +bool cfg80211_rx_spurious_frame(struct net_device *dev, +				const u8 *addr, gfp_t gfp)  { -	return __nl80211_unexpected_frame(dev, NL80211_CMD_UNEXPECTED_FRAME, -					  addr, gfp); +	struct wireless_dev *wdev = dev->ieee80211_ptr; +	bool ret; + +	trace_cfg80211_rx_spurious_frame(dev, addr); + +	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && +		    wdev->iftype != NL80211_IFTYPE_P2P_GO)) { +		trace_cfg80211_return_bool(false); +		return false; +	} +	ret = __nl80211_unexpected_frame(dev, NL80211_CMD_UNEXPECTED_FRAME, +					 addr, gfp); +	trace_cfg80211_return_bool(ret); +	return ret;  } +EXPORT_SYMBOL(cfg80211_rx_spurious_frame); -bool nl80211_unexpected_4addr_frame(struct net_device *dev, -				    const u8 *addr, gfp_t gfp) +bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev, +					const u8 *addr, gfp_t gfp)  { -	return __nl80211_unexpected_frame(dev, -					  NL80211_CMD_UNEXPECTED_4ADDR_FRAME, -					  addr, gfp); +	struct wireless_dev *wdev = dev->ieee80211_ptr; +	bool ret; + +	trace_cfg80211_rx_unexpected_4addr_frame(dev, addr); + +	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && +		    wdev->iftype != NL80211_IFTYPE_P2P_GO && +		    wdev->iftype != NL80211_IFTYPE_AP_VLAN)) { +		trace_cfg80211_return_bool(false); +		return false; +	} +	ret = __nl80211_unexpected_frame(dev, +					 NL80211_CMD_UNEXPECTED_4ADDR_FRAME, +					 addr, gfp); +	trace_cfg80211_return_bool(ret); +	return ret;  } +EXPORT_SYMBOL(cfg80211_rx_unexpected_4addr_frame);  int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,  		      struct wireless_dev *wdev, u32 nlportid, @@ -9386,15 +9986,17 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,  	return -ENOBUFS;  } -void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev, -				 struct wireless_dev *wdev, u64 cookie, -				 const u8 *buf, size_t len, bool ack, -				 gfp_t gfp) +void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, +			     const u8 *buf, size_t len, bool ack, gfp_t gfp)  { +	struct wiphy *wiphy = wdev->wiphy; +	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);  	struct net_device *netdev = wdev->netdev;  	struct sk_buff *msg;  	void *hdr; +	trace_cfg80211_mgmt_tx_status(wdev, cookie, ack); +  	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);  	if (!msg)  		return; @@ -9422,17 +10024,21 @@ void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev,  	genlmsg_cancel(msg, hdr);  	nlmsg_free(msg);  } +EXPORT_SYMBOL(cfg80211_mgmt_tx_status); -void -nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev, -			     struct net_device *netdev, -			     enum nl80211_cqm_rssi_threshold_event rssi_event, -			     gfp_t gfp) +void cfg80211_cqm_rssi_notify(struct net_device *dev, +			      enum nl80211_cqm_rssi_threshold_event rssi_event, +			      gfp_t gfp)  { +	struct wireless_dev *wdev = dev->ieee80211_ptr; +	struct wiphy *wiphy = wdev->wiphy; +	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);  	struct sk_buff *msg;  	struct nlattr *pinfoattr;  	void *hdr; +	trace_cfg80211_cqm_rssi_notify(dev, rssi_event); +  	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);  	if (!msg)  		return; @@ -9444,7 +10050,7 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,  	}  	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || -	    nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex)) +	    nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex))  		goto nla_put_failure;  	pinfoattr = nla_nest_start(msg, NL80211_ATTR_CQM); @@ -9467,10 +10073,11 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,  	genlmsg_cancel(msg, hdr);  	nlmsg_free(msg);  } +EXPORT_SYMBOL(cfg80211_cqm_rssi_notify); -void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev, -			      struct net_device *netdev, const u8 *bssid, -			      const u8 *replay_ctr, gfp_t gfp) +static void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev, +				     struct net_device *netdev, const u8 *bssid, +				     const u8 *replay_ctr, gfp_t gfp)  {  	struct sk_buff *msg;  	struct nlattr *rekey_attr; @@ -9512,9 +10119,22 @@ void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev,  	nlmsg_free(msg);  } -void nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev, -				    struct net_device *netdev, int index, -				    const u8 *bssid, bool preauth, gfp_t gfp) +void cfg80211_gtk_rekey_notify(struct net_device *dev, const u8 *bssid, +			       const u8 *replay_ctr, gfp_t gfp) +{ +	struct wireless_dev *wdev = dev->ieee80211_ptr; +	struct wiphy *wiphy = wdev->wiphy; +	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + +	trace_cfg80211_gtk_rekey_notify(dev, bssid); +	nl80211_gtk_rekey_notify(rdev, dev, bssid, replay_ctr, gfp); +} +EXPORT_SYMBOL(cfg80211_gtk_rekey_notify); + +static void +nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev, +			       struct net_device *netdev, int index, +			       const u8 *bssid, bool preauth, gfp_t gfp)  {  	struct sk_buff *msg;  	struct nlattr *attr; @@ -9557,9 +10177,22 @@ void nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev,  	nlmsg_free(msg);  } -void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev, -			      struct net_device *netdev, -			      struct cfg80211_chan_def *chandef, gfp_t gfp) +void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index, +				     const u8 *bssid, bool preauth, gfp_t gfp) +{ +	struct wireless_dev *wdev = dev->ieee80211_ptr; +	struct wiphy *wiphy = wdev->wiphy; +	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + +	trace_cfg80211_pmksa_candidate_notify(dev, index, bssid, preauth); +	nl80211_pmksa_candidate_notify(rdev, dev, index, bssid, preauth, gfp); +} +EXPORT_SYMBOL(cfg80211_pmksa_candidate_notify); + +static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev, +				     struct net_device *netdev, +				     struct cfg80211_chan_def *chandef, +				     gfp_t gfp)  {  	struct sk_buff *msg;  	void *hdr; @@ -9591,11 +10224,36 @@ void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,  	nlmsg_free(msg);  } -void -nl80211_send_cqm_txe_notify(struct cfg80211_registered_device *rdev, -			    struct net_device *netdev, const u8 *peer, -			    u32 num_packets, u32 rate, u32 intvl, gfp_t gfp) +void cfg80211_ch_switch_notify(struct net_device *dev, +			       struct cfg80211_chan_def *chandef) +{ +	struct wireless_dev *wdev = dev->ieee80211_ptr; +	struct wiphy *wiphy = wdev->wiphy; +	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + +	trace_cfg80211_ch_switch_notify(dev, chandef); + +	wdev_lock(wdev); + +	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && +		    wdev->iftype != NL80211_IFTYPE_P2P_GO)) +		goto out; + +	wdev->channel = chandef->chan; +	nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL); +out: +	wdev_unlock(wdev); +	return; +} +EXPORT_SYMBOL(cfg80211_ch_switch_notify); + +void cfg80211_cqm_txe_notify(struct net_device *dev, +			     const u8 *peer, u32 num_packets, +			     u32 rate, u32 intvl, gfp_t gfp)  { +	struct wireless_dev *wdev = dev->ieee80211_ptr; +	struct wiphy *wiphy = wdev->wiphy; +	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);  	struct sk_buff *msg;  	struct nlattr *pinfoattr;  	void *hdr; @@ -9611,7 +10269,7 @@ nl80211_send_cqm_txe_notify(struct cfg80211_registered_device *rdev,  	}  	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || -	    nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || +	    nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||  	    nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, peer))  		goto nla_put_failure; @@ -9640,6 +10298,7 @@ nl80211_send_cqm_txe_notify(struct cfg80211_registered_device *rdev,  	genlmsg_cancel(msg, hdr);  	nlmsg_free(msg);  } +EXPORT_SYMBOL(cfg80211_cqm_txe_notify);  void  nl80211_radar_notify(struct cfg80211_registered_device *rdev, @@ -9692,15 +10351,18 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev,  	nlmsg_free(msg);  } -void -nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, -				struct net_device *netdev, const u8 *peer, -				u32 num_packets, gfp_t gfp) +void cfg80211_cqm_pktloss_notify(struct net_device *dev, +				 const u8 *peer, u32 num_packets, gfp_t gfp)  { +	struct wireless_dev *wdev = dev->ieee80211_ptr; +	struct wiphy *wiphy = wdev->wiphy; +	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);  	struct sk_buff *msg;  	struct nlattr *pinfoattr;  	void *hdr; +	trace_cfg80211_cqm_pktloss_notify(dev, peer, num_packets); +  	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);  	if (!msg)  		return; @@ -9712,7 +10374,7 @@ nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev,  	}  	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || -	    nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || +	    nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||  	    nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, peer))  		goto nla_put_failure; @@ -9735,6 +10397,7 @@ nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev,  	genlmsg_cancel(msg, hdr);  	nlmsg_free(msg);  } +EXPORT_SYMBOL(cfg80211_cqm_pktloss_notify);  void cfg80211_probe_status(struct net_device *dev, const u8 *addr,  			   u64 cookie, bool acked, gfp_t gfp) @@ -10021,6 +10684,89 @@ static struct notifier_block nl80211_netlink_notifier = {  	.notifier_call = nl80211_netlink_notify,  }; +void cfg80211_ft_event(struct net_device *netdev, +		       struct cfg80211_ft_event_params *ft_event) +{ +	struct wiphy *wiphy = netdev->ieee80211_ptr->wiphy; +	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); +	struct sk_buff *msg; +	void *hdr; +	int err; + +	trace_cfg80211_ft_event(wiphy, netdev, ft_event); + +	if (!ft_event->target_ap) +		return; + +	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); +	if (!msg) +		return; + +	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FT_EVENT); +	if (!hdr) { +		nlmsg_free(msg); +		return; +	} + +	nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx); +	nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex); +	nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, ft_event->target_ap); +	if (ft_event->ies) +		nla_put(msg, NL80211_ATTR_IE, ft_event->ies_len, ft_event->ies); +	if (ft_event->ric_ies) +		nla_put(msg, NL80211_ATTR_IE_RIC, ft_event->ric_ies_len, +			ft_event->ric_ies); + +	err = genlmsg_end(msg, hdr); +	if (err < 0) { +		nlmsg_free(msg); +		return; +	} + +	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, +				nl80211_mlme_mcgrp.id, GFP_KERNEL); +} +EXPORT_SYMBOL(cfg80211_ft_event); + +void cfg80211_crit_proto_stopped(struct wireless_dev *wdev, gfp_t gfp) +{ +	struct cfg80211_registered_device *rdev; +	struct sk_buff *msg; +	void *hdr; +	u32 nlportid; + +	rdev = wiphy_to_dev(wdev->wiphy); +	if (!rdev->crit_proto_nlportid) +		return; + +	nlportid = rdev->crit_proto_nlportid; +	rdev->crit_proto_nlportid = 0; + +	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); +	if (!msg) +		return; + +	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_CRIT_PROTOCOL_STOP); +	if (!hdr) +		goto nla_put_failure; + +	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || +	    nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev))) +		goto nla_put_failure; + +	genlmsg_end(msg, hdr); + +	genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid); +	return; + + nla_put_failure: +	if (hdr) +		genlmsg_cancel(msg, hdr); +	nlmsg_free(msg); + +} +EXPORT_SYMBOL(cfg80211_crit_proto_stopped); +  /* initialisation/exit functions */  int nl80211_init(void) diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index b061da4919e..a4073e808c1 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -29,12 +29,6 @@ void nl80211_send_deauth(struct cfg80211_registered_device *rdev,  void nl80211_send_disassoc(struct cfg80211_registered_device *rdev,  			   struct net_device *netdev,  			   const u8 *buf, size_t len, gfp_t gfp); -void nl80211_send_unprot_deauth(struct cfg80211_registered_device *rdev, -				struct net_device *netdev, -				const u8 *buf, size_t len, gfp_t gfp); -void nl80211_send_unprot_disassoc(struct cfg80211_registered_device *rdev, -				  struct net_device *netdev, -				  const u8 *buf, size_t len, gfp_t gfp);  void nl80211_send_auth_timeout(struct cfg80211_registered_device *rdev,  			       struct net_device *netdev,  			       const u8 *addr, gfp_t gfp); @@ -54,10 +48,6 @@ void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,  			       struct net_device *netdev, u16 reason,  			       const u8 *ie, size_t ie_len, bool from_ap); -void nl80211_send_new_peer_candidate(struct cfg80211_registered_device *rdev, -				     struct net_device *netdev, -				     const u8 *macaddr, const u8* ie, u8 ie_len, -				     gfp_t gfp);  void  nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,  			    struct net_device *netdev, const u8 *addr, @@ -73,41 +63,10 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,  			     struct net_device *netdev, const u8 *bssid,  			     gfp_t gfp); -void nl80211_send_remain_on_channel(struct cfg80211_registered_device *rdev, -				    struct wireless_dev *wdev, u64 cookie, -				    struct ieee80211_channel *chan, -				    unsigned int duration, gfp_t gfp); -void nl80211_send_remain_on_channel_cancel( -	struct cfg80211_registered_device *rdev, -	struct wireless_dev *wdev, -	u64 cookie, struct ieee80211_channel *chan, gfp_t gfp); - -void nl80211_send_sta_event(struct cfg80211_registered_device *rdev, -			    struct net_device *dev, const u8 *mac_addr, -			    struct station_info *sinfo, gfp_t gfp); -void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev, -				struct net_device *dev, const u8 *mac_addr, -				gfp_t gfp); - -void nl80211_send_conn_failed_event(struct cfg80211_registered_device *rdev, -				    struct net_device *dev, const u8 *mac_addr, -				    enum nl80211_connect_failed_reason reason, -				    gfp_t gfp); -  int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,  		      struct wireless_dev *wdev, u32 nlpid,  		      int freq, int sig_dbm,  		      const u8 *buf, size_t len, gfp_t gfp); -void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev, -				 struct wireless_dev *wdev, u64 cookie, -				 const u8 *buf, size_t len, bool ack, -				 gfp_t gfp); - -void -nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev, -			     struct net_device *netdev, -			     enum nl80211_cqm_rssi_threshold_event rssi_event, -			     gfp_t gfp);  void  nl80211_radar_notify(struct cfg80211_registered_device *rdev, @@ -115,31 +74,4 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev,  		     enum nl80211_radar_event event,  		     struct net_device *netdev, gfp_t gfp); -void -nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, -				struct net_device *netdev, const u8 *peer, -				u32 num_packets, gfp_t gfp); - -void -nl80211_send_cqm_txe_notify(struct cfg80211_registered_device *rdev, -			    struct net_device *netdev, const u8 *peer, -			    u32 num_packets, u32 rate, u32 intvl, gfp_t gfp); - -void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev, -			      struct net_device *netdev, const u8 *bssid, -			      const u8 *replay_ctr, gfp_t gfp); - -void nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev, -				    struct net_device *netdev, int index, -				    const u8 *bssid, bool preauth, gfp_t gfp); - -void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev, -			      struct net_device *dev, -			      struct cfg80211_chan_def *chandef, gfp_t gfp); - -bool nl80211_unexpected_frame(struct net_device *dev, -			      const u8 *addr, gfp_t gfp); -bool nl80211_unexpected_4addr_frame(struct net_device *dev, -				    const u8 *addr, gfp_t gfp); -  #endif /* __NET_WIRELESS_NL80211_H */ diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 422d38291d6..9f15f0ac824 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -6,11 +6,12 @@  #include "core.h"  #include "trace.h" -static inline int rdev_suspend(struct cfg80211_registered_device *rdev) +static inline int rdev_suspend(struct cfg80211_registered_device *rdev, +			       struct cfg80211_wowlan *wowlan)  {  	int ret; -	trace_rdev_suspend(&rdev->wiphy, rdev->wowlan); -	ret = rdev->ops->suspend(&rdev->wiphy, rdev->wowlan); +	trace_rdev_suspend(&rdev->wiphy, wowlan); +	ret = rdev->ops->suspend(&rdev->wiphy, wowlan);  	trace_rdev_return_int(&rdev->wiphy, ret);  	return ret;  } @@ -874,7 +875,7 @@ static inline void rdev_stop_p2p_device(struct cfg80211_registered_device *rdev,  	trace_rdev_stop_p2p_device(&rdev->wiphy, wdev);  	rdev->ops->stop_p2p_device(&rdev->wiphy, wdev);  	trace_rdev_return_void(&rdev->wiphy); -}					 +}  static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev,  				   struct net_device *dev, @@ -887,4 +888,39 @@ static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev,  	trace_rdev_return_int(&rdev->wiphy, ret);  	return ret;  } + +static inline int rdev_update_ft_ies(struct cfg80211_registered_device *rdev, +				     struct net_device *dev, +				     struct cfg80211_update_ft_ies_params *ftie) +{ +	int ret; + +	trace_rdev_update_ft_ies(&rdev->wiphy, dev, ftie); +	ret = rdev->ops->update_ft_ies(&rdev->wiphy, dev, ftie); +	trace_rdev_return_int(&rdev->wiphy, ret); +	return ret; +} + +static inline int rdev_crit_proto_start(struct cfg80211_registered_device *rdev, +					struct wireless_dev *wdev, +					enum nl80211_crit_proto_id protocol, +					u16 duration) +{ +	int ret; + +	trace_rdev_crit_proto_start(&rdev->wiphy, wdev, protocol, duration); +	ret = rdev->ops->crit_proto_start(&rdev->wiphy, wdev, +					  protocol, duration); +	trace_rdev_return_int(&rdev->wiphy, ret); +	return ret; +} + +static inline void rdev_crit_proto_stop(struct cfg80211_registered_device *rdev, +				       struct wireless_dev *wdev) +{ +	trace_rdev_crit_proto_stop(&rdev->wiphy, wdev); +	rdev->ops->crit_proto_stop(&rdev->wiphy, wdev); +	trace_rdev_return_void(&rdev->wiphy); +} +  #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 98532c00242..cc35fbaa457 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -184,14 +184,14 @@ static const struct ieee80211_regdomain world_regdom = {  			NL80211_RRF_NO_IBSS |  			NL80211_RRF_NO_OFDM),  		/* IEEE 802.11a, channel 36..48 */ -		REG_RULE(5180-10, 5240+10, 40, 6, 20, +		REG_RULE(5180-10, 5240+10, 80, 6, 20,                          NL80211_RRF_PASSIVE_SCAN |                          NL80211_RRF_NO_IBSS), -		/* NB: 5260 MHz - 5700 MHz requies DFS */ +		/* NB: 5260 MHz - 5700 MHz requires DFS */  		/* IEEE 802.11a, channel 149..165 */ -		REG_RULE(5745-10, 5825+10, 40, 6, 20, +		REG_RULE(5745-10, 5825+10, 80, 6, 20,  			NL80211_RRF_PASSIVE_SCAN |  			NL80211_RRF_NO_IBSS), @@ -855,7 +855,7 @@ static void handle_channel(struct wiphy *wiphy,  			return;  		REG_DBG_PRINT("Disabling freq %d MHz\n", chan->center_freq); -		chan->flags = IEEE80211_CHAN_DISABLED; +		chan->flags |= IEEE80211_CHAN_DISABLED;  		return;  	} diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 482c70e7012..a9dc5c736df 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -160,7 +160,7 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)  {  	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);  	struct cfg80211_connect_params *params; -	const u8 *prev_bssid = NULL; +	struct cfg80211_assoc_request req = {};  	int err;  	ASSERT_WDEV_LOCK(wdev); @@ -187,16 +187,20 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)  		BUG_ON(!rdev->ops->assoc);  		wdev->conn->state = CFG80211_CONN_ASSOCIATING;  		if (wdev->conn->prev_bssid_valid) -			prev_bssid = wdev->conn->prev_bssid; -		err = __cfg80211_mlme_assoc(rdev, wdev->netdev, -					    params->channel, params->bssid, -					    prev_bssid, -					    params->ssid, params->ssid_len, -					    params->ie, params->ie_len, -					    params->mfp != NL80211_MFP_NO, -					    ¶ms->crypto, -					    params->flags, ¶ms->ht_capa, -					    ¶ms->ht_capa_mask); +			req.prev_bssid = wdev->conn->prev_bssid; +		req.ie = params->ie; +		req.ie_len = params->ie_len; +		req.use_mfp = params->mfp != NL80211_MFP_NO; +		req.crypto = params->crypto; +		req.flags = params->flags; +		req.ht_capa = params->ht_capa; +		req.ht_capa_mask = params->ht_capa_mask; +		req.vht_capa = params->vht_capa; +		req.vht_capa_mask = params->vht_capa_mask; + +		err = __cfg80211_mlme_assoc(rdev, wdev->netdev, params->channel, +					    params->bssid, params->ssid, +					    params->ssid_len, &req);  		if (err)  			__cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid,  					       NULL, 0, @@ -232,7 +236,7 @@ void cfg80211_conn_work(struct work_struct *work)  			wdev_unlock(wdev);  			continue;  		} -		if (wdev->sme_state != CFG80211_SME_CONNECTING) { +		if (wdev->sme_state != CFG80211_SME_CONNECTING || !wdev->conn) {  			wdev_unlock(wdev);  			continue;  		} diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 238ee49b386..8f28b9f798d 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -83,6 +83,14 @@ static int wiphy_uevent(struct device *dev, struct kobj_uevent_env *env)  	return 0;  } +static void cfg80211_leave_all(struct cfg80211_registered_device *rdev) +{ +	struct wireless_dev *wdev; + +	list_for_each_entry(wdev, &rdev->wdev_list, list) +		cfg80211_leave(rdev, wdev); +} +  static int wiphy_suspend(struct device *dev, pm_message_t state)  {  	struct cfg80211_registered_device *rdev = dev_to_rdev(dev); @@ -90,12 +98,19 @@ static int wiphy_suspend(struct device *dev, pm_message_t state)  	rdev->suspend_at = get_seconds(); -	if (rdev->ops->suspend) { -		rtnl_lock(); -		if (rdev->wiphy.registered) -			ret = rdev_suspend(rdev); -		rtnl_unlock(); +	rtnl_lock(); +	if (rdev->wiphy.registered) { +		if (!rdev->wowlan) +			cfg80211_leave_all(rdev); +		if (rdev->ops->suspend) +			ret = rdev_suspend(rdev, rdev->wowlan); +		if (ret == 1) { +			/* Driver refuse to configure wowlan */ +			cfg80211_leave_all(rdev); +			ret = rdev_suspend(rdev, NULL); +		}  	} +	rtnl_unlock();  	return ret;  } diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 7586de77a2f..ecd4fcec3c9 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1786,6 +1786,61 @@ TRACE_EVENT(rdev_set_mac_acl,  		  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->acl_policy)  ); +TRACE_EVENT(rdev_update_ft_ies, +	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, +		 struct cfg80211_update_ft_ies_params *ftie), +	TP_ARGS(wiphy, netdev, ftie), +	TP_STRUCT__entry( +		WIPHY_ENTRY +		NETDEV_ENTRY +		__field(u16, md) +		__dynamic_array(u8, ie, ftie->ie_len) +	), +	TP_fast_assign( +		WIPHY_ASSIGN; +		NETDEV_ASSIGN; +		__entry->md = ftie->md; +		memcpy(__get_dynamic_array(ie), ftie->ie, ftie->ie_len); +	), +	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", md: 0x%x", +		  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->md) +); + +TRACE_EVENT(rdev_crit_proto_start, +	TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, +		 enum nl80211_crit_proto_id protocol, u16 duration), +	TP_ARGS(wiphy, wdev, protocol, duration), +	TP_STRUCT__entry( +		WIPHY_ENTRY +		WDEV_ENTRY +		__field(u16, proto) +		__field(u16, duration) +	), +	TP_fast_assign( +		WIPHY_ASSIGN; +		WDEV_ASSIGN; +		__entry->proto = protocol; +		__entry->duration = duration; +	), +	TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", proto=%x, duration=%u", +		  WIPHY_PR_ARG, WDEV_PR_ARG, __entry->proto, __entry->duration) +); + +TRACE_EVENT(rdev_crit_proto_stop, +	TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), +	TP_ARGS(wiphy, wdev), +	TP_STRUCT__entry( +		WIPHY_ENTRY +		WDEV_ENTRY +	), +	TP_fast_assign( +		WIPHY_ASSIGN; +		WDEV_ASSIGN; +	), +	TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT, +		  WIPHY_PR_ARG, WDEV_PR_ARG) +); +  /*************************************************************   *	     cfg80211 exported functions traces		     *   *************************************************************/ @@ -2414,6 +2469,32 @@ TRACE_EVENT(cfg80211_report_wowlan_wakeup,  	TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT, WIPHY_PR_ARG, WDEV_PR_ARG)  ); +TRACE_EVENT(cfg80211_ft_event, +	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, +		 struct cfg80211_ft_event_params *ft_event), +	TP_ARGS(wiphy, netdev, ft_event), +	TP_STRUCT__entry( +		WIPHY_ENTRY +		NETDEV_ENTRY +		__dynamic_array(u8, ies, ft_event->ies_len) +		MAC_ENTRY(target_ap) +		__dynamic_array(u8, ric_ies, ft_event->ric_ies_len) +	), +	TP_fast_assign( +		WIPHY_ASSIGN; +		NETDEV_ASSIGN; +		if (ft_event->ies) +			memcpy(__get_dynamic_array(ies), ft_event->ies, +			       ft_event->ies_len); +		MAC_ASSIGN(target_ap, ft_event->target_ap); +		if (ft_event->ric_ies) +			memcpy(__get_dynamic_array(ric_ies), ft_event->ric_ies, +			       ft_event->ric_ies_len); +	), +	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", target_ap: " MAC_PR_FMT, +		  WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(target_ap)) +); +  #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */  #undef TRACE_INCLUDE_PATH diff --git a/net/wireless/util.c b/net/wireless/util.c index 37a56ee1e1e..f5ad4d94ba8 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -511,7 +511,7 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,  		encaps_data = bridge_tunnel_header;  		encaps_len = sizeof(bridge_tunnel_header);  		skip_header_bytes -= 2; -	} else if (ethertype > 0x600) { +	} else if (ethertype >= ETH_P_802_3_MIN) {  		encaps_data = rfc1042_header;  		encaps_len = sizeof(rfc1042_header);  		skip_header_bytes -= 2; @@ -1155,6 +1155,26 @@ int cfg80211_get_p2p_attr(const u8 *ies, unsigned int len,  }  EXPORT_SYMBOL(cfg80211_get_p2p_attr); +bool ieee80211_operating_class_to_band(u8 operating_class, +				       enum ieee80211_band *band) +{ +	switch (operating_class) { +	case 112: +	case 115 ... 127: +		*band = IEEE80211_BAND_5GHZ; +		return true; +	case 81: +	case 82: +	case 83: +	case 84: +		*band = IEEE80211_BAND_2GHZ; +		return true; +	} + +	return false; +} +EXPORT_SYMBOL(ieee80211_operating_class_to_band); +  int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev,  				 u32 beacon_int)  { @@ -1258,12 +1278,12 @@ int cfg80211_can_use_iftype_chan(struct cfg80211_registered_device *rdev,  	list_for_each_entry(wdev_iter, &rdev->wdev_list, list) {  		if (wdev_iter == wdev)  			continue; -		if (wdev_iter->netdev) { -			if (!netif_running(wdev_iter->netdev)) -				continue; -		} else if (wdev_iter->iftype == NL80211_IFTYPE_P2P_DEVICE) { +		if (wdev_iter->iftype == NL80211_IFTYPE_P2P_DEVICE) {  			if (!wdev_iter->p2p_started)  				continue; +		} else if (wdev_iter->netdev) { +			if (!netif_running(wdev_iter->netdev)) +				continue;  		} else {  			WARN_ON(1);  		} diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 167c67d46c6..23cea0f7433 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1037,6 +1037,24 @@ __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir  	return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);  } +static int flow_to_policy_dir(int dir) +{ +	if (XFRM_POLICY_IN == FLOW_DIR_IN && +	    XFRM_POLICY_OUT == FLOW_DIR_OUT && +	    XFRM_POLICY_FWD == FLOW_DIR_FWD) +		return dir; + +	switch (dir) { +	default: +	case FLOW_DIR_IN: +		return XFRM_POLICY_IN; +	case FLOW_DIR_OUT: +		return XFRM_POLICY_OUT; +	case FLOW_DIR_FWD: +		return XFRM_POLICY_FWD; +	} +} +  static struct flow_cache_object *  xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,  		   u8 dir, struct flow_cache_object *old_obj, void *ctx) @@ -1046,7 +1064,7 @@ xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,  	if (old_obj)  		xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo)); -	pol = __xfrm_policy_lookup(net, fl, family, dir); +	pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir));  	if (IS_ERR_OR_NULL(pol))  		return ERR_CAST(pol); @@ -1932,7 +1950,8 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,  	 * previous cache entry */  	if (xdst == NULL) {  		num_pols = 1; -		pols[0] = __xfrm_policy_lookup(net, fl, family, dir); +		pols[0] = __xfrm_policy_lookup(net, fl, family, +					       flow_to_policy_dir(dir));  		err = xfrm_expand_policies(fl, family, pols,  					   &num_pols, &num_xfrms);  		if (err < 0) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 2c341bdaf47..78f66fa9244 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1187,6 +1187,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)  		goto error;  	x->props.flags = orig->props.flags; +	x->props.extra_flags = orig->props.extra_flags;  	x->curlft.add_time = orig->curlft.add_time;  	x->km.state = orig->km.state; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index fbd9e6cd0fd..aa778748c56 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -515,6 +515,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,  	copy_from_user_state(x, p); +	if (attrs[XFRMA_SA_EXTRA_FLAGS]) +		x->props.extra_flags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]); +  	if ((err = attach_aead(&x->aead, &x->props.ealgo,  			       attrs[XFRMA_ALG_AEAD])))  		goto error; @@ -779,6 +782,13 @@ static int copy_to_user_state_extra(struct xfrm_state *x,  	copy_to_user_state(x, p); +	if (x->props.extra_flags) { +		ret = nla_put_u32(skb, XFRMA_SA_EXTRA_FLAGS, +				  x->props.extra_flags); +		if (ret) +			goto out; +	} +  	if (x->coaddr) {  		ret = nla_put(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr);  		if (ret) @@ -2302,9 +2312,10 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {  	[XFRMA_MARK]		= { .len = sizeof(struct xfrm_mark) },  	[XFRMA_TFCPAD]		= { .type = NLA_U32 },  	[XFRMA_REPLAY_ESN_VAL]	= { .len = sizeof(struct xfrm_replay_state_esn) }, +	[XFRMA_SA_EXTRA_FLAGS]	= { .type = NLA_U32 },  }; -static struct xfrm_link { +static const struct xfrm_link {  	int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **);  	int (*dump)(struct sk_buff *, struct netlink_callback *);  	int (*done)(struct netlink_callback *); @@ -2338,7 +2349,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)  {  	struct net *net = sock_net(skb->sk);  	struct nlattr *attrs[XFRMA_MAX+1]; -	struct xfrm_link *link; +	const struct xfrm_link *link;  	int type, err;  	type = nlh->nlmsg_type; @@ -2495,6 +2506,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x)  				    x->security->ctx_len);  	if (x->coaddr)  		l += nla_total_size(sizeof(*x->coaddr)); +	if (x->props.extra_flags) +		l += nla_total_size(sizeof(x->props.extra_flags));  	/* Must count x->lastused as it may become non-zero behind our back. */  	l += nla_total_size(sizeof(u64));  |