diff options
| author | Tomasz Grobelny <tomasz@grobelny.oswiecenia.net> | 2010-12-04 13:38:01 +0100 | 
|---|---|---|
| committer | Gerrit Renker <gerrit@erg.abdn.ac.uk> | 2010-12-07 13:47:12 +0100 | 
| commit | 871a2c16c21b988688b4ab1a78eadd969765c0a3 (patch) | |
| tree | 34ffb3be1402747ef3b7fdb754fb99778bd45728 | |
| parent | cfa969e385a23e4c85f50e0ed5de25a2e18bf9d4 (diff) | |
| download | olio-linux-3.10-871a2c16c21b988688b4ab1a78eadd969765c0a3.tar.xz olio-linux-3.10-871a2c16c21b988688b4ab1a78eadd969765c0a3.zip  | |
dccp: Policy-based packet dequeueing infrastructure
This patch adds a generic infrastructure for policy-based dequeueing of
TX packets and provides two policies:
 * a simple FIFO policy (which is the default) and
 * a priority based policy (set via socket options).
Both policies honour the tx_qlen sysctl for the maximum size of the write
queue (can be overridden via socket options).
The priority policy uses skb->priority internally to assign an u32 priority
identifier, using the same ranking as SO_PRIORITY. The skb->priority field
is set to 0 when the packet leaves DCCP. The priority is supplied as ancillary
data using cmsg(3), the patch also provides the requisite parsing routines.
Signed-off-by: Tomasz Grobelny <tomasz@grobelny.oswiecenia.net>
Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
| -rw-r--r-- | Documentation/networking/dccp.txt | 20 | ||||
| -rw-r--r-- | include/linux/dccp.h | 21 | ||||
| -rw-r--r-- | net/dccp/Makefile | 4 | ||||
| -rw-r--r-- | net/dccp/dccp.h | 12 | ||||
| -rw-r--r-- | net/dccp/output.c | 7 | ||||
| -rw-r--r-- | net/dccp/proto.c | 67 | ||||
| -rw-r--r-- | net/dccp/qpolicy.c | 126 | 
7 files changed, 248 insertions, 9 deletions
diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt index 271d524a4c8..b395ca6a49f 100644 --- a/Documentation/networking/dccp.txt +++ b/Documentation/networking/dccp.txt @@ -47,6 +47,26 @@ http://linux-net.osdl.org/index.php/DCCP_Testing#Experimental_DCCP_source_tree  Socket options  ============== +DCCP_SOCKOPT_QPOLICY_ID sets the dequeuing policy for outgoing packets. It takes +a policy ID as argument and can only be set before the connection (i.e. changes +during an established connection are not supported). Currently, two policies are +defined: the "simple" policy (DCCPQ_POLICY_SIMPLE), which does nothing special, +and a priority-based variant (DCCPQ_POLICY_PRIO). The latter allows to pass an +u32 priority value as ancillary data to sendmsg(), where higher numbers indicate +a higher packet priority (similar to SO_PRIORITY). This ancillary data needs to +be formatted using a cmsg(3) message header filled in as follows: +	cmsg->cmsg_level = SOL_DCCP; +	cmsg->cmsg_type	 = DCCP_SCM_PRIORITY; +	cmsg->cmsg_len	 = CMSG_LEN(sizeof(uint32_t));	/* or CMSG_LEN(4) */ + +DCCP_SOCKOPT_QPOLICY_TXQLEN sets the maximum length of the output queue. A zero +value is always interpreted as unbounded queue length. If different from zero, +the interpretation of this parameter depends on the current dequeuing policy +(see above): the "simple" policy will enforce a fixed queue size by returning +EAGAIN, whereas the "prio" policy enforces a fixed queue length by dropping the +lowest-priority packet first. The default value for this parameter is +initialised from /proc/sys/net/dccp/default/tx_qlen. +  DCCP_SOCKOPT_SERVICE sets the service. The specification mandates use of  service codes (RFC 4340, sec. 8.1.2); if this socket option is not set,  the socket will fall back to 0 (which means that no meaningful service code diff --git a/include/linux/dccp.h b/include/linux/dccp.h index eed52bcd35d..010e2d87ed7 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -197,6 +197,21 @@ enum dccp_feature_numbers {  	DCCPF_MAX_CCID_SPECIFIC = 255,  }; +/* DCCP socket control message types for cmsg */ +enum dccp_cmsg_type { +	DCCP_SCM_PRIORITY = 1, +	DCCP_SCM_QPOLICY_MAX = 0xFFFF, +	/* ^-- Up to here reserved exclusively for qpolicy parameters */ +	DCCP_SCM_MAX +}; + +/* DCCP priorities for outgoing/queued packets */ +enum dccp_packet_dequeueing_policy { +	DCCPQ_POLICY_SIMPLE, +	DCCPQ_POLICY_PRIO, +	DCCPQ_POLICY_MAX +}; +  /* DCCP socket options */  #define DCCP_SOCKOPT_PACKET_SIZE	1 /* XXX deprecated, without effect */  #define DCCP_SOCKOPT_SERVICE		2 @@ -210,6 +225,8 @@ enum dccp_feature_numbers {  #define DCCP_SOCKOPT_CCID		13  #define DCCP_SOCKOPT_TX_CCID		14  #define DCCP_SOCKOPT_RX_CCID		15 +#define DCCP_SOCKOPT_QPOLICY_ID		16 +#define DCCP_SOCKOPT_QPOLICY_TXQLEN	17  #define DCCP_SOCKOPT_CCID_RX_INFO	128  #define DCCP_SOCKOPT_CCID_TX_INFO	192 @@ -458,6 +475,8 @@ struct dccp_ackvec;   * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection)   * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection)   * @dccps_options_received - parsed set of retrieved options + * @dccps_qpolicy - TX dequeueing policy, one of %dccp_packet_dequeueing_policy + * @dccps_tx_qlen - maximum length of the TX queue   * @dccps_role - role of this sock, one of %dccp_role   * @dccps_hc_rx_insert_options - receiver wants to add options when acking   * @dccps_hc_tx_insert_options - sender wants to add options when sending @@ -500,6 +519,8 @@ struct dccp_sock {  	struct ccid			*dccps_hc_rx_ccid;  	struct ccid			*dccps_hc_tx_ccid;  	struct dccp_options_received	dccps_options_received; +	__u8				dccps_qpolicy; +	__u32				dccps_tx_qlen;  	enum dccp_role			dccps_role:2;  	__u8				dccps_hc_rx_insert_options:1;  	__u8				dccps_hc_tx_insert_options:1; diff --git a/net/dccp/Makefile b/net/dccp/Makefile index 2991efcc8de..5c8362b037e 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile @@ -1,7 +1,7 @@  obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o -dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o - +dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o \ +	  qpolicy.o  #  # CCID algorithms to be used by dccp.ko  # diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 19fafd59746..d008da91cec 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -243,6 +243,18 @@ extern void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,  extern void dccp_send_sync(struct sock *sk, const u64 seq,  			   const enum dccp_pkt_type pkt_type); +/* + * TX Packet Dequeueing Interface + */ +extern void		dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb); +extern bool		dccp_qpolicy_full(struct sock *sk); +extern void		dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb); +extern struct sk_buff	*dccp_qpolicy_top(struct sock *sk); +extern struct sk_buff	*dccp_qpolicy_pop(struct sock *sk); + +/* + * TX Packet Output and TX Timers + */  extern void   dccp_write_xmit(struct sock *sk);  extern void   dccp_write_space(struct sock *sk);  extern void   dccp_flush_write_queue(struct sock *sk, long *time_budget); diff --git a/net/dccp/output.c b/net/dccp/output.c index d96dd9d362a..784d3021054 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -242,7 +242,7 @@ static void dccp_xmit_packet(struct sock *sk)  {  	int err, len;  	struct dccp_sock *dp = dccp_sk(sk); -	struct sk_buff *skb = skb_dequeue(&sk->sk_write_queue); +	struct sk_buff *skb = dccp_qpolicy_pop(sk);  	if (unlikely(skb == NULL))  		return; @@ -345,7 +345,7 @@ void dccp_write_xmit(struct sock *sk)  	struct dccp_sock *dp = dccp_sk(sk);  	struct sk_buff *skb; -	while ((skb = skb_peek(&sk->sk_write_queue))) { +	while ((skb = dccp_qpolicy_top(sk))) {  		int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);  		switch (ccid_packet_dequeue_eval(rc)) { @@ -359,8 +359,7 @@ void dccp_write_xmit(struct sock *sk)  			dccp_xmit_packet(sk);  			break;  		case CCID_PACKET_ERR: -			skb_dequeue(&sk->sk_write_queue); -			kfree_skb(skb); +			dccp_qpolicy_drop(sk, skb);  			dccp_pr_debug("packet discarded due to err=%d\n", rc);  		}  	} diff --git a/net/dccp/proto.c b/net/dccp/proto.c index ef343d53fce..d6a224982bb 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -185,6 +185,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)  	dp->dccps_role		= DCCP_ROLE_UNDEFINED;  	dp->dccps_service	= DCCP_SERVICE_CODE_IS_ABSENT;  	dp->dccps_l_ack_ratio	= dp->dccps_r_ack_ratio = 1; +	dp->dccps_tx_qlen	= sysctl_dccp_tx_qlen;  	dccp_init_xmit_timers(sk); @@ -532,6 +533,20 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,  	case DCCP_SOCKOPT_RECV_CSCOV:  		err = dccp_setsockopt_cscov(sk, val, true);  		break; +	case DCCP_SOCKOPT_QPOLICY_ID: +		if (sk->sk_state != DCCP_CLOSED) +			err = -EISCONN; +		else if (val < 0 || val >= DCCPQ_POLICY_MAX) +			err = -EINVAL; +		else +			dp->dccps_qpolicy = val; +		break; +	case DCCP_SOCKOPT_QPOLICY_TXQLEN: +		if (val < 0) +			err = -EINVAL; +		else +			dp->dccps_tx_qlen = val; +		break;  	default:  		err = -ENOPROTOOPT;  		break; @@ -639,6 +654,12 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,  	case DCCP_SOCKOPT_RECV_CSCOV:  		val = dp->dccps_pcrlen;  		break; +	case DCCP_SOCKOPT_QPOLICY_ID: +		val = dp->dccps_qpolicy; +		break; +	case DCCP_SOCKOPT_QPOLICY_TXQLEN: +		val = dp->dccps_tx_qlen; +		break;  	case 128 ... 191:  		return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,  					     len, (u32 __user *)optval, optlen); @@ -681,6 +702,43 @@ int compat_dccp_getsockopt(struct sock *sk, int level, int optname,  EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);  #endif +static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb) +{ +	struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg); + +	/* +	 * Assign an (opaque) qpolicy priority value to skb->priority. +	 * +	 * We are overloading this skb field for use with the qpolicy subystem. +	 * The skb->priority is normally used for the SO_PRIORITY option, which +	 * is initialised from sk_priority. Since the assignment of sk_priority +	 * to skb->priority happens later (on layer 3), we overload this field +	 * for use with queueing priorities as long as the skb is on layer 4. +	 * The default priority value (if nothing is set) is 0. +	 */ +	skb->priority = 0; + +	for (; cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) { + +		if (!CMSG_OK(msg, cmsg)) +			return -EINVAL; + +		if (cmsg->cmsg_level != SOL_DCCP) +			continue; + +		switch (cmsg->cmsg_type) { +		case DCCP_SCM_PRIORITY: +			if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32))) +				return -EINVAL; +			skb->priority = *(__u32 *)CMSG_DATA(cmsg); +			break; +		default: +			return -EINVAL; +		} +	} +	return 0; +} +  int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,  		 size_t len)  { @@ -696,8 +754,7 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,  	lock_sock(sk); -	if (sysctl_dccp_tx_qlen && -	    (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) { +	if (dccp_qpolicy_full(sk)) {  		rc = -EAGAIN;  		goto out_release;  	} @@ -725,7 +782,11 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,  	if (rc != 0)  		goto out_discard; -	skb_queue_tail(&sk->sk_write_queue, skb); +	rc = dccp_msghdr_parse(msg, skb); +	if (rc != 0) +		goto out_discard; + +	dccp_qpolicy_push(sk, skb);  	/*  	 * The xmit_timer is set if the TX CCID is rate-based and will expire  	 * when congestion control permits to release further packets into the diff --git a/net/dccp/qpolicy.c b/net/dccp/qpolicy.c new file mode 100644 index 00000000000..4b0fd6b11f6 --- /dev/null +++ b/net/dccp/qpolicy.c @@ -0,0 +1,126 @@ +/* + *  net/dccp/qpolicy.c + * + *  Policy-based packet dequeueing interface for DCCP. + * + *  Copyright (c) 2008 Tomasz Grobelny <tomasz@grobelny.oswiecenia.net> + * + *  This program is free software; you can redistribute it and/or + *  modify it under the terms of the GNU General Public License v2 + *  as published by the Free Software Foundation. + */ +#include "dccp.h" + +/* + *	Simple Dequeueing Policy: + *	If tx_qlen is different from 0, enqueue up to tx_qlen elements. + */ +static void qpolicy_simple_push(struct sock *sk, struct sk_buff *skb) +{ +	skb_queue_tail(&sk->sk_write_queue, skb); +} + +static bool qpolicy_simple_full(struct sock *sk) +{ +	return dccp_sk(sk)->dccps_tx_qlen && +	       sk->sk_write_queue.qlen >= dccp_sk(sk)->dccps_tx_qlen; +} + +static struct sk_buff *qpolicy_simple_top(struct sock *sk) +{ +	return skb_peek(&sk->sk_write_queue); +} + +/* + *	Priority-based Dequeueing Policy: + *	If tx_qlen is different from 0 and the queue has reached its upper bound + *	of tx_qlen elements, replace older packets lowest-priority-first. + */ +static struct sk_buff *qpolicy_prio_best_skb(struct sock *sk) +{ +	struct sk_buff *skb, *best = NULL; + +	skb_queue_walk(&sk->sk_write_queue, skb) +		if (best == NULL || skb->priority > best->priority) +			best = skb; +	return best; +} + +static struct sk_buff *qpolicy_prio_worst_skb(struct sock *sk) +{ +	struct sk_buff *skb, *worst = NULL; + +	skb_queue_walk(&sk->sk_write_queue, skb) +		if (worst == NULL || skb->priority < worst->priority) +			worst = skb; +	return worst; +} + +static bool qpolicy_prio_full(struct sock *sk) +{ +	if (qpolicy_simple_full(sk)) +		dccp_qpolicy_drop(sk, qpolicy_prio_worst_skb(sk)); +	return false; +} + +/** + * struct dccp_qpolicy_operations  -  TX Packet Dequeueing Interface + * @push: add a new @skb to the write queue + * @full: indicates that no more packets will be admitted + * @top:  peeks at whatever the queueing policy defines as its `top' + */ +static struct dccp_qpolicy_operations { +	void		(*push)	(struct sock *sk, struct sk_buff *skb); +	bool		(*full) (struct sock *sk); +	struct sk_buff*	(*top)  (struct sock *sk); + +} qpol_table[DCCPQ_POLICY_MAX] = { +	[DCCPQ_POLICY_SIMPLE] = { +		.push = qpolicy_simple_push, +		.full = qpolicy_simple_full, +		.top  = qpolicy_simple_top, +	}, +	[DCCPQ_POLICY_PRIO] = { +		.push = qpolicy_simple_push, +		.full = qpolicy_prio_full, +		.top  = qpolicy_prio_best_skb, +	}, +}; + +/* + *	Externally visible interface + */ +void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb) +{ +	qpol_table[dccp_sk(sk)->dccps_qpolicy].push(sk, skb); +} + +bool dccp_qpolicy_full(struct sock *sk) +{ +	return qpol_table[dccp_sk(sk)->dccps_qpolicy].full(sk); +} + +void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb) +{ +	if (skb != NULL) { +		skb_unlink(skb, &sk->sk_write_queue); +		kfree_skb(skb); +	} +} + +struct sk_buff *dccp_qpolicy_top(struct sock *sk) +{ +	return qpol_table[dccp_sk(sk)->dccps_qpolicy].top(sk); +} + +struct sk_buff *dccp_qpolicy_pop(struct sock *sk) +{ +	struct sk_buff *skb = dccp_qpolicy_top(sk); + +	if (skb != NULL) { +		/* Clear any skb fields that we used internally */ +		skb->priority = 0; +		skb_unlink(skb, &sk->sk_write_queue); +	} +	return skb; +}  |