patch-2.1.68 linux/net/sched/sch_red.c
Next file: linux/net/sched/sch_sfq.c
Previous file: linux/net/sched/sch_prio.c
Back to the patch index
Back to the overall index
- Lines: 304
- Date:
Sun Nov 30 14:00:40 1997
- Orig file:
v2.1.67/linux/net/sched/sch_red.c
- Orig date:
Wed Dec 31 16:00:00 1969
diff -u --recursive --new-file v2.1.67/linux/net/sched/sch_red.c linux/net/sched/sch_red.c
@@ -0,0 +1,303 @@
+/*
+ * net/sched/sch_red.c Random Early Detection scheduler.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+
+
+/* Random Early Detection (RED) algorithm.
+ =======================================
+
+ Source: Sally Floyd and Van Jacobson, "Random Early Detection Gateways
+ for Congestion Avoidance", 1993, IEEE/ACM Transactions on Networking.
+
+ This file codes a "divisionless" version of RED algorithm
+ written down in Fig.17 of the paper.
+
+Short description.
+------------------
+
+ When new packet arrives we calculate average queue length:
+
+ avg = (1-W)*avg + W*current_queue_len,
+
+ W is filter time constant (choosen as 2^(-Wlog)), controlling
+ inertia of algorithm. To allow larger bursts, W should be
+ decreased.
+
+ if (avg > th_max) -> packet marked (dropped).
+ if (avg < th_min) -> packet passes.
+ if (th_min < avg < th_max) we calculate probability:
+
+ Pb = max_P * (avg - th_min)/(th_max-th_min)
+
+ and mark (drop) packet with this probability.
+ Pb changes from 0 (at avg==th_min) to max_P (avg==th_max).
+ max_P should be small (not 1!).
+
+ NB. SF&VJ assumed that Pb[avg] is linear function. I think it
+ is wrong. I'd make:
+ P[th_min] = 0, P[th_max] = 1;
+ dP/davg[th_min] = 0, dP/davg[th_max] = infinity, or a large number.
+
+ I choose max_P as a number between 0.01 and 0.1, so that
+ C1 = max_P/(th_max-th_min) is power of two: C1 = 2^(-C1log)
+
+ Parameters, settable by user (with default values):
+
+ qmaxbytes=256K - hard limit on queue length, should be chosen >qth_max
+ to allow packet bursts. This parameter does not
+ affect algorithm behaviour and can be chosen
+ arbitrarily high (well, less than ram size)
+ Really, this limit will never be achieved
+ if RED works correctly.
+ qth_min=32K
+ qth_max=128K - qth_max should be at least 2*qth_min
+ Wlog=8 - log(1/W).
+ Alog=Wlog - fixed point position in th_min and th_max.
+ Rlog=10
+ C1log=24 - C1log = trueC1log+Alog-Rlog
+ so that trueC1log=22 and max_P~0.02
+
+
+NOTES:
+
+Upper bound on W.
+-----------------
+
+ If you want to allow bursts of L packets of size S,
+ you should choose W:
+
+ L + 1 -th_min/S < (1-(1-W)^L)/W
+
+ For th_min/S = 32
+
+ log(W) L
+ -1 33
+ -2 35
+ -3 39
+ -4 46
+ -5 57
+ -6 75
+ -7 101
+ -8 135
+ -9 190
+ etc.
+ */
+
+struct red_sched_data
+{
+/* Parameters */
+ unsigned long qmaxbytes; /* HARD maximal queue length */
+ unsigned long qth_min; /* Min average length threshold: A scaled */
+ unsigned long qth_max; /* Max average length threshold: A scaled */
+ char Alog; /* Point position in average lengths */
+ char Wlog; /* log(W) */
+ char Rlog; /* random number bits */
+ char C1log; /* log(1/C1) */
+ char Slog;
+ char Stab[256];
+
+/* Variables */
+ unsigned long qbytes; /* Queue length in bytes */
+ unsigned long qave; /* Average queue length: A scaled */
+ int qcount; /* Packets since last random number generation */
+ unsigned qR; /* Cached random number [0..1<Rlog) */
+ psched_time_t qidlestart; /* Start of idle period */
+};
+
+/* Stolen from igmp.c. */
+
+static __inline__ unsigned red_random(int log)
+{
+ static unsigned long seed=152L;
+ seed=seed*69069L+1;
+ return (seed^jiffies)&((1<<log)-1);
+}
+
+static int
+red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+{
+ struct red_sched_data *q = (struct red_sched_data *)sch->data;
+
+ psched_time_t now;
+
+ if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) {
+ long us_idle;
+ PSCHED_SET_PASTPERFECT(q->qidlestart);
+ PSCHED_GET_TIME(now);
+ us_idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, (256<<q->Slog)-1, 0);
+
+/* It is wrong, but I do not think that SF+VJ proposal is reasonable
+ and did not invented anything more clever 8)
+
+ The problem: ideally, average length queue recalcultion should
+ be done over constant clock intervals. It is too expensive, so that
+ calculation is driven by outgoing packets.
+ When queue is idle we have to model this clock by hands.
+
+ SF+VJ proposed to "generate" m = (idletime/bandwidth)*average_pkt_size
+ dummy packets as burst after idle time, i.e.
+
+ q->qave *= (1-W)^m
+
+ It is apparently overcomplicated solution (f.e. we have to precompute
+ a table to make this calculation for reasonable time)
+ I believe, that a simpler model may be used here,
+ but it is field for experiments.
+*/
+ q->qave >>= q->Stab[(us_idle>>q->Slog)&0xFF];
+ }
+
+ q->qave += ((q->qbytes<<q->Alog) - q->qave) >> q->Wlog;
+
+ if (q->qave < q->qth_min) {
+enqueue:
+ q->qcount = -1;
+ if (q->qbytes <= q->qmaxbytes) {
+ skb_queue_tail(&sch->q, skb);
+ q->qbytes += skb->len;
+ return 1;
+ }
+drop:
+ kfree_skb(skb, FREE_WRITE);
+ return 0;
+ }
+ if (q->qave >= q->qth_max) {
+ q->qcount = -1;
+ goto drop;
+ }
+ q->qcount++;
+ if (q->qcount++) {
+ if ((((q->qave - q->qth_min)*q->qcount)>>q->C1log) < q->qR)
+ goto enqueue;
+ q->qcount = 0;
+ q->qR = red_random(q->Rlog);
+ goto drop;
+ }
+ q->qR = red_random(q->Rlog);
+ goto enqueue;
+}
+
+static struct sk_buff *
+red_dequeue(struct Qdisc* sch)
+{
+ struct sk_buff *skb;
+ struct red_sched_data *q = (struct red_sched_data *)sch->data;
+
+ skb = skb_dequeue(&sch->q);
+ if (skb) {
+ q->qbytes -= skb->len;
+ return skb;
+ }
+ PSCHED_GET_TIME(q->qidlestart);
+ return NULL;
+}
+
+static void
+red_reset(struct Qdisc* sch)
+{
+ struct red_sched_data *q = (struct red_sched_data *)sch->data;
+ struct sk_buff *skb;
+
+ while((skb=skb_dequeue(&sch->q))!=NULL) {
+ q->qbytes -= skb->len;
+ kfree_skb(skb,FREE_WRITE);
+ }
+ if (q->qbytes) {
+ printk("red_reset: qbytes=%lu\n", q->qbytes);
+ q->qbytes = 0;
+ }
+ PSCHED_SET_PASTPERFECT(q->qidlestart);
+ q->qave = 0;
+ q->qcount = -1;
+}
+
+static int red_init(struct Qdisc *sch, struct pschedctl *pctl)
+{
+ struct red_sched_data *q;
+ struct redctl *ctl = (struct redctl*)pctl->args;
+
+ q = (struct red_sched_data *)sch->data;
+
+ if (pctl->arglen < sizeof(struct redctl))
+ return -EINVAL;
+
+ q->Wlog = ctl->Wlog;
+ q->Alog = ctl->Alog;
+ q->Rlog = ctl->Rlog;
+ q->C1log = ctl->C1log;
+ q->Slog = ctl->Slog;
+ q->qth_min = ctl->qth_min;
+ q->qth_max = ctl->qth_max;
+ q->qmaxbytes = ctl->qmaxbytes;
+ memcpy(q->Stab, ctl->Stab, 256);
+
+ q->qcount = -1;
+ PSCHED_SET_PASTPERFECT(q->qidlestart);
+ return 0;
+}
+
+struct Qdisc_ops red_ops =
+{
+ NULL,
+ "red",
+ 0,
+ sizeof(struct red_sched_data),
+ red_enqueue,
+ red_dequeue,
+ red_reset,
+ NULL,
+ red_init,
+ NULL
+};
+
+
+#ifdef MODULE
+#include <linux/module.h>
+int init_module(void)
+{
+ int err;
+
+ /* Load once and never free it. */
+ MOD_INC_USE_COUNT;
+
+ err = register_qdisc(&red_ops);
+ if (err)
+ MOD_DEC_USE_COUNT;
+ return err;
+}
+
+void cleanup_module(void)
+{
+}
+#endif
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov